1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
133 (define_c_enum "unspecv" [
143 ;; All vector modes including V?TImode, used in move patterns.
144 (define_mode_iterator VMOVE
145 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
146 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
147 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
148 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
149 (V2TI "TARGET_AVX") V1TI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
154 (define_mode_iterator V
155 [(V32QI "TARGET_AVX") V16QI
156 (V16HI "TARGET_AVX") V8HI
157 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
158 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
159 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
160 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
162 ;; All 128bit vector modes
163 (define_mode_iterator V_128
164 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
166 ;; All 256bit vector modes
167 (define_mode_iterator V_256
168 [V32QI V16HI V8SI V4DI V8SF V4DF])
170 ;; All 512bit vector modes
171 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
173 ;; All 256bit and 512bit vector modes
174 (define_mode_iterator V_256_512
175 [V32QI V16HI V8SI V4DI V8SF V4DF
176 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
177 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
179 ;; All vector float modes
180 (define_mode_iterator VF
181 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
182 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
184 ;; 128- and 256-bit float vector modes
185 (define_mode_iterator VF_128_256
186 [(V8SF "TARGET_AVX") V4SF
187 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
189 ;; All SFmode vector float modes
190 (define_mode_iterator VF1
191 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
193 ;; 128- and 256-bit SF vector modes
194 (define_mode_iterator VF1_128_256
195 [(V8SF "TARGET_AVX") V4SF])
197 ;; All DFmode vector float modes
198 (define_mode_iterator VF2
199 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
201 ;; 128- and 256-bit DF vector modes
202 (define_mode_iterator VF2_128_256
203 [(V4DF "TARGET_AVX") V2DF])
205 (define_mode_iterator VF2_512_256
206 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
208 ;; All 128bit vector float modes
209 (define_mode_iterator VF_128
210 [V4SF (V2DF "TARGET_SSE2")])
212 ;; All 256bit vector float modes
213 (define_mode_iterator VF_256
216 ;; All 512bit vector float modes
217 (define_mode_iterator VF_512
220 ;; All vector integer modes
221 (define_mode_iterator VI
222 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
223 (V32QI "TARGET_AVX") V16QI
224 (V16HI "TARGET_AVX") V8HI
225 (V8SI "TARGET_AVX") V4SI
226 (V4DI "TARGET_AVX") V2DI])
228 (define_mode_iterator VI_AVX2
229 [(V32QI "TARGET_AVX2") V16QI
230 (V16HI "TARGET_AVX2") V8HI
231 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
232 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
234 ;; All QImode vector integer modes
235 (define_mode_iterator VI1
236 [(V32QI "TARGET_AVX") V16QI])
238 (define_mode_iterator VI_UNALIGNED_LOADSTORE
239 [(V32QI "TARGET_AVX") V16QI
240 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
242 ;; All DImode vector integer modes
243 (define_mode_iterator VI8
244 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
246 (define_mode_iterator VI1_AVX2
247 [(V32QI "TARGET_AVX2") V16QI])
249 (define_mode_iterator VI2_AVX2
250 [(V16HI "TARGET_AVX2") V8HI])
252 (define_mode_iterator VI2_AVX512F
253 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
255 (define_mode_iterator VI4_AVX
256 [(V8SI "TARGET_AVX") V4SI])
258 (define_mode_iterator VI4_AVX2
259 [(V8SI "TARGET_AVX2") V4SI])
261 (define_mode_iterator VI4_AVX512F
262 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
264 (define_mode_iterator VI48_AVX512F
265 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
266 (V8DI "TARGET_AVX512F")])
268 (define_mode_iterator VI8_AVX2
269 [(V4DI "TARGET_AVX2") V2DI])
271 (define_mode_iterator VI8_AVX2_AVX512F
272 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
275 (define_mode_iterator V8FI
279 (define_mode_iterator V16FI
282 ;; ??? We should probably use TImode instead.
283 (define_mode_iterator VIMAX_AVX2
284 [(V2TI "TARGET_AVX2") V1TI])
286 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
287 (define_mode_iterator SSESCALARMODE
288 [(V2TI "TARGET_AVX2") TI])
290 (define_mode_iterator VI12_AVX2
291 [(V32QI "TARGET_AVX2") V16QI
292 (V16HI "TARGET_AVX2") V8HI])
294 (define_mode_iterator VI24_AVX2
295 [(V16HI "TARGET_AVX2") V8HI
296 (V8SI "TARGET_AVX2") V4SI])
298 (define_mode_iterator VI124_AVX2_48_AVX512F
299 [(V32QI "TARGET_AVX2") V16QI
300 (V16HI "TARGET_AVX2") V8HI
301 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
302 (V8DI "TARGET_AVX512F")])
304 (define_mode_iterator VI124_AVX512F
305 [(V32QI "TARGET_AVX2") V16QI
306 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
307 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
309 (define_mode_iterator VI124_AVX2
310 [(V32QI "TARGET_AVX2") V16QI
311 (V16HI "TARGET_AVX2") V8HI
312 (V8SI "TARGET_AVX2") V4SI])
314 (define_mode_iterator VI248_AVX2
315 [(V16HI "TARGET_AVX2") V8HI
316 (V8SI "TARGET_AVX2") V4SI
317 (V4DI "TARGET_AVX2") V2DI])
319 (define_mode_iterator VI248_AVX2_8_AVX512F
320 [(V16HI "TARGET_AVX2") V8HI
321 (V8SI "TARGET_AVX2") V4SI
322 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
324 (define_mode_iterator VI48_AVX2_48_AVX512F
325 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
326 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
328 (define_mode_iterator V48_AVX2
331 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
332 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
334 (define_mode_attr sse2_avx_avx512f
335 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
336 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
338 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
339 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
341 (define_mode_attr sse2_avx2
342 [(V16QI "sse2") (V32QI "avx2")
343 (V8HI "sse2") (V16HI "avx2")
344 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
345 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
346 (V1TI "sse2") (V2TI "avx2")])
348 (define_mode_attr ssse3_avx2
349 [(V16QI "ssse3") (V32QI "avx2")
350 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
351 (V4SI "ssse3") (V8SI "avx2")
352 (V2DI "ssse3") (V4DI "avx2")
353 (TI "ssse3") (V2TI "avx2")])
355 (define_mode_attr sse4_1_avx2
356 [(V16QI "sse4_1") (V32QI "avx2")
357 (V8HI "sse4_1") (V16HI "avx2")
358 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
359 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
361 (define_mode_attr avx_avx2
362 [(V4SF "avx") (V2DF "avx")
363 (V8SF "avx") (V4DF "avx")
364 (V4SI "avx2") (V2DI "avx2")
365 (V8SI "avx2") (V4DI "avx2")])
367 (define_mode_attr vec_avx2
368 [(V16QI "vec") (V32QI "avx2")
369 (V8HI "vec") (V16HI "avx2")
370 (V4SI "vec") (V8SI "avx2")
371 (V2DI "vec") (V4DI "avx2")])
373 (define_mode_attr avx2_avx512f
374 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
375 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
376 (V8SF "avx2") (V16SF "avx512f")
377 (V4DF "avx2") (V8DF "avx512f")])
379 (define_mode_attr shuffletype
380 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
381 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
382 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
383 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
384 (V64QI "i") (V1TI "i") (V2TI "i")])
386 (define_mode_attr ssequartermode
387 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
389 (define_mode_attr ssedoublemode
390 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
391 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
392 (V32QI "V32HI") (V16QI "V16HI")])
394 (define_mode_attr ssebytemode
395 [(V4DI "V32QI") (V2DI "V16QI")])
397 ;; All 128bit vector integer modes
398 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
400 ;; All 256bit vector integer modes
401 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
403 ;; All 512bit vector integer modes
404 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
406 ;; Various 128bit vector integer mode combinations
407 (define_mode_iterator VI12_128 [V16QI V8HI])
408 (define_mode_iterator VI14_128 [V16QI V4SI])
409 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
410 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
411 (define_mode_iterator VI24_128 [V8HI V4SI])
412 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
413 (define_mode_iterator VI48_128 [V4SI V2DI])
415 ;; Various 256bit and 512 vector integer mode combinations
416 (define_mode_iterator VI124_256_48_512
417 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
418 (define_mode_iterator VI48_256 [V8SI V4DI])
419 (define_mode_iterator VI48_512 [V16SI V8DI])
420 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
422 ;; Int-float size matches
423 (define_mode_iterator VI4F_128 [V4SI V4SF])
424 (define_mode_iterator VI8F_128 [V2DI V2DF])
425 (define_mode_iterator VI4F_256 [V8SI V8SF])
426 (define_mode_iterator VI8F_256 [V4DI V4DF])
427 (define_mode_iterator VI8F_256_512
428 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
429 (define_mode_iterator VI48F_256_512
431 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
432 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
433 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
435 ;; Mapping from float mode to required SSE level
436 (define_mode_attr sse
437 [(SF "sse") (DF "sse2")
438 (V4SF "sse") (V2DF "sse2")
439 (V16SF "avx512f") (V8SF "avx")
440 (V8DF "avx512f") (V4DF "avx")])
442 (define_mode_attr sse2
443 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
444 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
446 (define_mode_attr sse3
447 [(V16QI "sse3") (V32QI "avx")])
449 (define_mode_attr sse4_1
450 [(V4SF "sse4_1") (V2DF "sse4_1")
451 (V8SF "avx") (V4DF "avx")
454 (define_mode_attr avxsizesuffix
455 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
456 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
457 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
458 (V16SF "512") (V8DF "512")
459 (V8SF "256") (V4DF "256")
460 (V4SF "") (V2DF "")])
462 ;; SSE instruction mode
463 (define_mode_attr sseinsnmode
464 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
465 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
466 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
467 (V16SF "V16SF") (V8DF "V8DF")
468 (V8SF "V8SF") (V4DF "V4DF")
469 (V4SF "V4SF") (V2DF "V2DF")
472 ;; Mapping of vector modes to corresponding mask size
473 (define_mode_attr avx512fmaskmode
475 (V16HI "HI") (V8HI "QI")
476 (V16SI "HI") (V8SI "QI") (V4SI "QI")
477 (V8DI "QI") (V4DI "QI") (V2DI "QI")
478 (V16SF "HI") (V8SF "QI") (V4SF "QI")
479 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
481 ;; Mapping of vector float modes to an integer mode of the same size
482 (define_mode_attr sseintvecmode
483 [(V16SF "V16SI") (V8DF "V8DI")
484 (V8SF "V8SI") (V4DF "V4DI")
485 (V4SF "V4SI") (V2DF "V2DI")
486 (V16SI "V16SI") (V8DI "V8DI")
487 (V8SI "V8SI") (V4DI "V4DI")
488 (V4SI "V4SI") (V2DI "V2DI")
489 (V16HI "V16HI") (V8HI "V8HI")
490 (V32QI "V32QI") (V16QI "V16QI")])
492 (define_mode_attr sseintvecmodelower
494 (V8SF "v8si") (V4DF "v4di")
495 (V4SF "v4si") (V2DF "v2di")
496 (V8SI "v8si") (V4DI "v4di")
497 (V4SI "v4si") (V2DI "v2di")
498 (V16HI "v16hi") (V8HI "v8hi")
499 (V32QI "v32qi") (V16QI "v16qi")])
501 ;; Mapping of vector modes to a vector mode of double size
502 (define_mode_attr ssedoublevecmode
503 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
504 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
505 (V8SF "V16SF") (V4DF "V8DF")
506 (V4SF "V8SF") (V2DF "V4DF")])
508 ;; Mapping of vector modes to a vector mode of half size
509 (define_mode_attr ssehalfvecmode
510 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
511 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
512 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
513 (V16SF "V8SF") (V8DF "V4DF")
514 (V8SF "V4SF") (V4DF "V2DF")
517 ;; Mapping of vector modes ti packed single mode of the same size
518 (define_mode_attr ssePSmode
519 [(V16SI "V16SF") (V8DF "V16SF")
520 (V16SF "V16SF") (V8DI "V16SF")
521 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
522 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
523 (V8SI "V8SF") (V4SI "V4SF")
524 (V4DI "V8SF") (V2DI "V4SF")
525 (V2TI "V8SF") (V1TI "V4SF")
526 (V8SF "V8SF") (V4SF "V4SF")
527 (V4DF "V8SF") (V2DF "V4SF")])
529 ;; Mapping of vector modes back to the scalar modes
530 (define_mode_attr ssescalarmode
531 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
532 (V32HI "HI") (V16HI "HI") (V8HI "HI")
533 (V16SI "SI") (V8SI "SI") (V4SI "SI")
534 (V8DI "DI") (V4DI "DI") (V2DI "DI")
535 (V16SF "SF") (V8SF "SF") (V4SF "SF")
536 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
538 ;; Mapping of vector modes to the 128bit modes
539 (define_mode_attr ssexmmmode
540 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
541 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
542 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
543 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
544 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
545 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
547 ;; Pointer size override for scalar modes (Intel asm dialect)
548 (define_mode_attr iptr
549 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
550 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
551 (V8SF "k") (V4DF "q")
552 (V4SF "k") (V2DF "q")
555 ;; Number of scalar elements in each vector type
556 (define_mode_attr ssescalarnum
557 [(V64QI "64") (V16SI "16") (V8DI "8")
558 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
559 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
560 (V16SF "16") (V8DF "8")
561 (V8SF "8") (V4DF "4")
562 (V4SF "4") (V2DF "2")])
564 ;; Mask of scalar elements in each vector type
565 (define_mode_attr ssescalarnummask
566 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
567 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
568 (V8SF "7") (V4DF "3")
569 (V4SF "3") (V2DF "1")])
571 (define_mode_attr ssescalarsize
572 [(V8DI "64") (V4DI "64") (V2DI "64")
573 (V32HI "16") (V16HI "16") (V8HI "16")
574 (V16SI "32") (V8SI "32") (V4SI "32")
575 (V16SF "32") (V8DF "64")])
577 ;; SSE prefix for integer vector modes
578 (define_mode_attr sseintprefix
579 [(V2DI "p") (V2DF "")
584 (V16SI "p") (V16SF "")])
586 ;; SSE scalar suffix for vector modes
587 (define_mode_attr ssescalarmodesuffix
589 (V8SF "ss") (V4DF "sd")
590 (V4SF "ss") (V2DF "sd")
591 (V8SI "ss") (V4DI "sd")
594 ;; Pack/unpack vector modes
595 (define_mode_attr sseunpackmode
596 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
597 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
598 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
600 (define_mode_attr ssepackmode
601 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
602 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
603 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
605 ;; Mapping of the max integer size for xop rotate immediate constraint
606 (define_mode_attr sserotatemax
607 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
609 ;; Mapping of mode to cast intrinsic name
610 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
612 ;; Instruction suffix for sign and zero extensions.
613 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
615 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
616 ;; i64x4 or f64x4 for 512bit modes.
617 (define_mode_attr i128
618 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
619 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
620 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
623 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
625 ;; Mapping of immediate bits for blend instructions
626 (define_mode_attr blendbits
627 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
629 ;; Mapping suffixes for broadcast
630 (define_mode_attr bcstscalarsuff
631 [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
633 ;; Include define_subst patterns for instructions with mask
636 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
644 ;; All of these patterns are enabled for SSE1 as well as SSE2.
645 ;; This is essential for maintaining stable calling conventions.
647 (define_expand "mov<mode>"
648 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
649 (match_operand:VMOVE 1 "nonimmediate_operand"))]
652 ix86_expand_vector_move (<MODE>mode, operands);
656 (define_insn "*mov<mode>_internal"
657 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
658 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
660 && (register_operand (operands[0], <MODE>mode)
661 || register_operand (operands[1], <MODE>mode))"
663 int mode = get_attr_mode (insn);
664 switch (which_alternative)
667 return standard_sse_constant_opcode (insn, operands[1]);
670 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
671 in avx512f, so we need to use workarounds, to access sse registers
672 16-31, which are evex-only. */
673 if (TARGET_AVX512F && <MODE_SIZE> < 64
674 && ((REG_P (operands[0])
675 && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
676 || (REG_P (operands[1])
677 && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
679 if (memory_operand (operands[0], <MODE>mode))
681 if (<MODE_SIZE> == 32)
682 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
683 else if (<MODE_SIZE> == 16)
684 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
688 else if (memory_operand (operands[1], <MODE>mode))
690 if (<MODE_SIZE> == 32)
691 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
692 else if (<MODE_SIZE> == 16)
693 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
698 /* Reg -> reg move is always aligned. Just use wider move. */
703 return "vmovaps\t{%g1, %g0|%g0, %g1}";
706 return "vmovapd\t{%g1, %g0|%g0, %g1}";
709 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
720 && (misaligned_operand (operands[0], <MODE>mode)
721 || misaligned_operand (operands[1], <MODE>mode)))
722 return "vmovups\t{%1, %0|%0, %1}";
724 return "%vmovaps\t{%1, %0|%0, %1}";
730 && (misaligned_operand (operands[0], <MODE>mode)
731 || misaligned_operand (operands[1], <MODE>mode)))
732 return "vmovupd\t{%1, %0|%0, %1}";
734 return "%vmovapd\t{%1, %0|%0, %1}";
739 && (misaligned_operand (operands[0], <MODE>mode)
740 || misaligned_operand (operands[1], <MODE>mode)))
741 return "vmovdqu\t{%1, %0|%0, %1}";
743 return "%vmovdqa\t{%1, %0|%0, %1}";
745 if (misaligned_operand (operands[0], <MODE>mode)
746 || misaligned_operand (operands[1], <MODE>mode))
747 return "vmovdqu64\t{%1, %0|%0, %1}";
749 return "vmovdqa64\t{%1, %0|%0, %1}";
758 [(set_attr "type" "sselog1,ssemov,ssemov")
759 (set_attr "prefix" "maybe_vex")
761 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
762 (const_string "<ssePSmode>")
763 (and (match_test "<MODE_SIZE> == 16")
764 (and (eq_attr "alternative" "2")
765 (match_test "TARGET_SSE_TYPELESS_STORES")))
766 (const_string "<ssePSmode>")
767 (match_test "TARGET_AVX")
768 (const_string "<sseinsnmode>")
769 (ior (not (match_test "TARGET_SSE2"))
770 (match_test "optimize_function_for_size_p (cfun)"))
771 (const_string "V4SF")
772 (and (eq_attr "alternative" "0")
773 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
776 (const_string "<sseinsnmode>")))])
778 (define_insn "avx512f_load<mode>_mask"
779 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
781 (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
782 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
783 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
786 switch (MODE_<sseinsnmode>)
790 if (misaligned_operand (operands[1], <MODE>mode))
791 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
792 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
794 if (misaligned_operand (operands[1], <MODE>mode))
795 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
796 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
799 [(set_attr "type" "ssemov")
800 (set_attr "prefix" "evex")
801 (set_attr "memory" "none,load")
802 (set_attr "mode" "<sseinsnmode>")])
804 (define_insn "avx512f_blendm<mode>"
805 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
807 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
808 (match_operand:VI48F_512 1 "register_operand" "v")
809 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
811 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
812 [(set_attr "type" "ssemov")
813 (set_attr "prefix" "evex")
814 (set_attr "mode" "<sseinsnmode>")])
816 (define_insn "avx512f_store<mode>_mask"
817 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
819 (match_operand:VI48F_512 1 "register_operand" "v")
821 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
824 switch (MODE_<sseinsnmode>)
828 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
830 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
833 [(set_attr "type" "ssemov")
834 (set_attr "prefix" "evex")
835 (set_attr "memory" "store")
836 (set_attr "mode" "<sseinsnmode>")])
838 (define_insn "sse2_movq128"
839 [(set (match_operand:V2DI 0 "register_operand" "=x")
842 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
843 (parallel [(const_int 0)]))
846 "%vmovq\t{%1, %0|%0, %q1}"
847 [(set_attr "type" "ssemov")
848 (set_attr "prefix" "maybe_vex")
849 (set_attr "mode" "TI")])
851 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
852 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
853 ;; from memory, we'd prefer to load the memory directly into the %xmm
854 ;; register. To facilitate this happy circumstance, this pattern won't
855 ;; split until after register allocation. If the 64-bit value didn't
856 ;; come from memory, this is the best we can do. This is much better
857 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
860 (define_insn_and_split "movdi_to_sse"
862 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
863 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
864 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
865 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
867 "&& reload_completed"
870 if (register_operand (operands[1], DImode))
872 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
873 Assemble the 64-bit DImode value in an xmm register. */
874 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
875 gen_rtx_SUBREG (SImode, operands[1], 0)));
876 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
877 gen_rtx_SUBREG (SImode, operands[1], 4)));
878 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
881 else if (memory_operand (operands[1], DImode))
883 rtx tmp = gen_reg_rtx (V2DImode);
884 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
885 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
892 [(set (match_operand:V4SF 0 "register_operand")
893 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
894 "TARGET_SSE && reload_completed"
897 (vec_duplicate:V4SF (match_dup 1))
901 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
902 operands[2] = CONST0_RTX (V4SFmode);
906 [(set (match_operand:V2DF 0 "register_operand")
907 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
908 "TARGET_SSE2 && reload_completed"
909 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
911 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
912 operands[2] = CONST0_RTX (DFmode);
915 (define_expand "movmisalign<mode>"
916 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
917 (match_operand:VMOVE 1 "nonimmediate_operand"))]
920 ix86_expand_vector_move_misalign (<MODE>mode, operands);
924 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
925 [(set (match_operand:VF 0 "register_operand")
926 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
928 "TARGET_SSE && <mask_mode512bit_condition>"
930 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
931 just fine if misaligned_operand is true, and without the UNSPEC it can
932 be combined with arithmetic instructions. If misaligned_operand is
933 false, still emit UNSPEC_LOADU insn to honor user's request for
936 && misaligned_operand (operands[1], <MODE>mode))
938 rtx src = operands[1];
940 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
941 operands[2 * <mask_applied>],
942 operands[3 * <mask_applied>]);
943 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
948 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
949 [(set (match_operand:VF 0 "register_operand" "=v")
951 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
953 "TARGET_SSE && <mask_mode512bit_condition>"
955 switch (get_attr_mode (insn))
960 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
962 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
965 [(set_attr "type" "ssemov")
966 (set_attr "movu" "1")
967 (set_attr "ssememalign" "8")
968 (set_attr "prefix" "maybe_vex")
970 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
971 (const_string "<ssePSmode>")
972 (match_test "TARGET_AVX")
973 (const_string "<MODE>")
974 (match_test "optimize_function_for_size_p (cfun)")
975 (const_string "V4SF")
977 (const_string "<MODE>")))])
979 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
980 [(set (match_operand:VF 0 "memory_operand" "=m")
982 [(match_operand:VF 1 "register_operand" "v")]
986 switch (get_attr_mode (insn))
991 return "%vmovups\t{%1, %0|%0, %1}";
993 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
996 [(set_attr "type" "ssemov")
997 (set_attr "movu" "1")
998 (set_attr "ssememalign" "8")
999 (set_attr "prefix" "maybe_vex")
1001 (cond [(and (match_test "<MODE_SIZE> == 16")
1002 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1003 (match_test "TARGET_SSE_TYPELESS_STORES")))
1004 (const_string "<ssePSmode>")
1005 (match_test "TARGET_AVX")
1006 (const_string "<MODE>")
1007 (match_test "optimize_function_for_size_p (cfun)")
1008 (const_string "V4SF")
1010 (const_string "<MODE>")))])
1012 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
1013 [(set (match_operand:VF_512 0 "memory_operand" "=m")
1016 [(match_operand:VF_512 1 "register_operand" "v")]
1019 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1022 switch (get_attr_mode (insn))
1025 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1027 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1030 [(set_attr "type" "ssemov")
1031 (set_attr "movu" "1")
1032 (set_attr "memory" "store")
1033 (set_attr "prefix" "evex")
1034 (set_attr "mode" "<sseinsnmode>")])
1036 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1037 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
1038 (unspec:VI_UNALIGNED_LOADSTORE
1039 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
1041 "TARGET_SSE2 && <mask_mode512bit_condition>"
1043 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1044 just fine if misaligned_operand is true, and without the UNSPEC it can
1045 be combined with arithmetic instructions. If misaligned_operand is
1046 false, still emit UNSPEC_LOADU insn to honor user's request for
1049 && misaligned_operand (operands[1], <MODE>mode))
1051 rtx src = operands[1];
1053 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1054 operands[2 * <mask_applied>],
1055 operands[3 * <mask_applied>]);
1056 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1061 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1062 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1063 (unspec:VI_UNALIGNED_LOADSTORE
1064 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1066 "TARGET_SSE2 && <mask_mode512bit_condition>"
1068 switch (get_attr_mode (insn))
1072 return "%vmovups\t{%1, %0|%0, %1}";
1074 if (<MODE>mode == V8DImode)
1075 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1077 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1079 return "%vmovdqu\t{%1, %0|%0, %1}";
1082 [(set_attr "type" "ssemov")
1083 (set_attr "movu" "1")
1084 (set_attr "ssememalign" "8")
1085 (set (attr "prefix_data16")
1087 (match_test "TARGET_AVX")
1089 (const_string "1")))
1090 (set_attr "prefix" "maybe_vex")
1092 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1093 (const_string "<ssePSmode>")
1094 (match_test "TARGET_AVX")
1095 (const_string "<sseinsnmode>")
1096 (match_test "optimize_function_for_size_p (cfun)")
1097 (const_string "V4SF")
1099 (const_string "<sseinsnmode>")))])
1101 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1102 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1103 (unspec:VI_UNALIGNED_LOADSTORE
1104 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1108 switch (get_attr_mode (insn))
1113 return "%vmovups\t{%1, %0|%0, %1}";
1115 if (<MODE>mode == V8DImode)
1116 return "vmovdqu64\t{%1, %0|%0, %1}";
1118 return "vmovdqu32\t{%1, %0|%0, %1}";
1120 return "%vmovdqu\t{%1, %0|%0, %1}";
1123 [(set_attr "type" "ssemov")
1124 (set_attr "movu" "1")
1125 (set_attr "ssememalign" "8")
1126 (set (attr "prefix_data16")
1128 (match_test "TARGET_AVX")
1130 (const_string "1")))
1131 (set_attr "prefix" "maybe_vex")
1133 (cond [(and (match_test "<MODE_SIZE> == 16")
1134 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1135 (match_test "TARGET_SSE_TYPELESS_STORES")))
1136 (const_string "<ssePSmode>")
1137 (match_test "TARGET_AVX")
1138 (const_string "<sseinsnmode>")
1139 (match_test "optimize_function_for_size_p (cfun)")
1140 (const_string "V4SF")
1142 (const_string "<sseinsnmode>")))])
1144 (define_insn "avx512f_storedqu<mode>_mask"
1145 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1148 [(match_operand:VI48_512 1 "register_operand" "v")]
1151 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1154 if (<MODE>mode == V8DImode)
1155 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1157 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1159 [(set_attr "type" "ssemov")
1160 (set_attr "movu" "1")
1161 (set_attr "memory" "store")
1162 (set_attr "prefix" "evex")
1163 (set_attr "mode" "<sseinsnmode>")])
1165 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1166 [(set (match_operand:VI1 0 "register_operand" "=x")
1167 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1170 "%vlddqu\t{%1, %0|%0, %1}"
1171 [(set_attr "type" "ssemov")
1172 (set_attr "movu" "1")
1173 (set_attr "ssememalign" "8")
1174 (set (attr "prefix_data16")
1176 (match_test "TARGET_AVX")
1178 (const_string "0")))
1179 (set (attr "prefix_rep")
1181 (match_test "TARGET_AVX")
1183 (const_string "1")))
1184 (set_attr "prefix" "maybe_vex")
1185 (set_attr "mode" "<sseinsnmode>")])
1187 (define_insn "sse2_movnti<mode>"
1188 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1189 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1192 "movnti\t{%1, %0|%0, %1}"
1193 [(set_attr "type" "ssemov")
1194 (set_attr "prefix_data16" "0")
1195 (set_attr "mode" "<MODE>")])
1197 (define_insn "<sse>_movnt<mode>"
1198 [(set (match_operand:VF 0 "memory_operand" "=m")
1200 [(match_operand:VF 1 "register_operand" "v")]
1203 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1204 [(set_attr "type" "ssemov")
1205 (set_attr "prefix" "maybe_vex")
1206 (set_attr "mode" "<MODE>")])
1208 (define_insn "<sse2>_movnt<mode>"
1209 [(set (match_operand:VI8 0 "memory_operand" "=m")
1210 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1213 "%vmovntdq\t{%1, %0|%0, %1}"
1214 [(set_attr "type" "ssecvt")
1215 (set (attr "prefix_data16")
1217 (match_test "TARGET_AVX")
1219 (const_string "1")))
1220 (set_attr "prefix" "maybe_vex")
1221 (set_attr "mode" "<sseinsnmode>")])
1223 ; Expand patterns for non-temporal stores. At the moment, only those
1224 ; that directly map to insns are defined; it would be possible to
1225 ; define patterns for other modes that would expand to several insns.
1227 ;; Modes handled by storent patterns.
1228 (define_mode_iterator STORENT_MODE
1229 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1230 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1231 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1232 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1233 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1235 (define_expand "storent<mode>"
1236 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1237 (unspec:STORENT_MODE
1238 [(match_operand:STORENT_MODE 1 "register_operand")]
1242 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1244 ;; Parallel floating point arithmetic
1246 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1248 (define_expand "<code><mode>2"
1249 [(set (match_operand:VF 0 "register_operand")
1251 (match_operand:VF 1 "register_operand")))]
1253 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1255 (define_insn_and_split "*absneg<mode>2"
1256 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1257 (match_operator:VF 3 "absneg_operator"
1258 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1259 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1262 "&& reload_completed"
1265 enum rtx_code absneg_op;
1271 if (MEM_P (operands[1]))
1272 op1 = operands[2], op2 = operands[1];
1274 op1 = operands[1], op2 = operands[2];
1279 if (rtx_equal_p (operands[0], operands[1]))
1285 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1286 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1287 t = gen_rtx_SET (VOIDmode, operands[0], t);
1291 [(set_attr "isa" "noavx,noavx,avx,avx")])
1293 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1294 [(set (match_operand:VF 0 "register_operand")
1296 (match_operand:VF 1 "<round_nimm_predicate>")
1297 (match_operand:VF 2 "<round_nimm_predicate>")))]
1298 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1299 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1301 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1302 [(set (match_operand:VF 0 "register_operand" "=x,v")
1304 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1305 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1306 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1308 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1309 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1310 [(set_attr "isa" "noavx,avx")
1311 (set_attr "type" "sseadd")
1312 (set_attr "prefix" "<mask_prefix3>")
1313 (set_attr "mode" "<MODE>")])
1315 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1316 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1319 (match_operand:VF_128 1 "register_operand" "0,v")
1320 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1325 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1326 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1327 [(set_attr "isa" "noavx,avx")
1328 (set_attr "type" "sseadd")
1329 (set_attr "prefix" "<round_prefix>")
1330 (set_attr "mode" "<ssescalarmode>")])
1332 (define_expand "mul<mode>3<mask_name><round_name>"
1333 [(set (match_operand:VF 0 "register_operand")
1335 (match_operand:VF 1 "<round_nimm_predicate>")
1336 (match_operand:VF 2 "<round_nimm_predicate>")))]
1337 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1338 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1340 (define_insn "*mul<mode>3<mask_name><round_name>"
1341 [(set (match_operand:VF 0 "register_operand" "=x,v")
1343 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1344 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1345 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1347 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1348 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "ssemul")
1351 (set_attr "prefix" "<mask_prefix3>")
1352 (set_attr "btver2_decode" "direct,double")
1353 (set_attr "mode" "<MODE>")])
1355 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1356 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1359 (match_operand:VF_128 1 "register_operand" "0,v")
1360 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1365 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1366 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1367 [(set_attr "isa" "noavx,avx")
1368 (set_attr "type" "sse<multdiv_mnemonic>")
1369 (set_attr "prefix" "<round_prefix>")
1370 (set_attr "btver2_decode" "direct,double")
1371 (set_attr "mode" "<ssescalarmode>")])
1373 (define_expand "div<mode>3"
1374 [(set (match_operand:VF2 0 "register_operand")
1375 (div:VF2 (match_operand:VF2 1 "register_operand")
1376 (match_operand:VF2 2 "nonimmediate_operand")))]
1378 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1380 (define_expand "div<mode>3"
1381 [(set (match_operand:VF1 0 "register_operand")
1382 (div:VF1 (match_operand:VF1 1 "register_operand")
1383 (match_operand:VF1 2 "nonimmediate_operand")))]
1386 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1389 && TARGET_RECIP_VEC_DIV
1390 && !optimize_insn_for_size_p ()
1391 && flag_finite_math_only && !flag_trapping_math
1392 && flag_unsafe_math_optimizations)
1394 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1399 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1400 [(set (match_operand:VF 0 "register_operand" "=x,v")
1402 (match_operand:VF 1 "register_operand" "0,v")
1403 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1404 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1406 div<ssemodesuffix>\t{%2, %0|%0, %2}
1407 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1408 [(set_attr "isa" "noavx,avx")
1409 (set_attr "type" "ssediv")
1410 (set_attr "prefix" "<mask_prefix3>")
1411 (set_attr "mode" "<MODE>")])
1413 (define_insn "<sse>_rcp<mode>2"
1414 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1416 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1418 "%vrcpps\t{%1, %0|%0, %1}"
1419 [(set_attr "type" "sse")
1420 (set_attr "atom_sse_attr" "rcp")
1421 (set_attr "btver2_sse_attr" "rcp")
1422 (set_attr "prefix" "maybe_vex")
1423 (set_attr "mode" "<MODE>")])
1425 (define_insn "sse_vmrcpv4sf2"
1426 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1428 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1430 (match_operand:V4SF 2 "register_operand" "0,x")
1434 rcpss\t{%1, %0|%0, %k1}
1435 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1436 [(set_attr "isa" "noavx,avx")
1437 (set_attr "type" "sse")
1438 (set_attr "ssememalign" "32")
1439 (set_attr "atom_sse_attr" "rcp")
1440 (set_attr "btver2_sse_attr" "rcp")
1441 (set_attr "prefix" "orig,vex")
1442 (set_attr "mode" "SF")])
1444 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1445 [(set (match_operand:VF_512 0 "register_operand" "=v")
1447 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1450 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1451 [(set_attr "type" "sse")
1452 (set_attr "prefix" "evex")
1453 (set_attr "mode" "<MODE>")])
1455 (define_insn "srcp14<mode>"
1456 [(set (match_operand:VF_128 0 "register_operand" "=v")
1459 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1461 (match_operand:VF_128 2 "register_operand" "v")
1464 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1465 [(set_attr "type" "sse")
1466 (set_attr "prefix" "evex")
1467 (set_attr "mode" "<MODE>")])
1469 (define_expand "sqrt<mode>2"
1470 [(set (match_operand:VF2 0 "register_operand")
1471 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1474 (define_expand "sqrt<mode>2"
1475 [(set (match_operand:VF1 0 "register_operand")
1476 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1480 && TARGET_RECIP_VEC_SQRT
1481 && !optimize_insn_for_size_p ()
1482 && flag_finite_math_only && !flag_trapping_math
1483 && flag_unsafe_math_optimizations)
1485 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1490 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1491 [(set (match_operand:VF 0 "register_operand" "=v")
1492 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1493 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1494 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1495 [(set_attr "type" "sse")
1496 (set_attr "atom_sse_attr" "sqrt")
1497 (set_attr "btver2_sse_attr" "sqrt")
1498 (set_attr "prefix" "maybe_vex")
1499 (set_attr "mode" "<MODE>")])
1501 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1502 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1505 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1506 (match_operand:VF_128 2 "register_operand" "0,v")
1510 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1511 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1512 [(set_attr "isa" "noavx,avx")
1513 (set_attr "type" "sse")
1514 (set_attr "atom_sse_attr" "sqrt")
1515 (set_attr "prefix" "<round_prefix>")
1516 (set_attr "btver2_sse_attr" "sqrt")
1517 (set_attr "mode" "<ssescalarmode>")])
1519 (define_expand "rsqrt<mode>2"
1520 [(set (match_operand:VF1_128_256 0 "register_operand")
1522 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1525 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1529 (define_insn "<sse>_rsqrt<mode>2"
1530 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1532 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1534 "%vrsqrtps\t{%1, %0|%0, %1}"
1535 [(set_attr "type" "sse")
1536 (set_attr "prefix" "maybe_vex")
1537 (set_attr "mode" "<MODE>")])
1539 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1540 [(set (match_operand:VF_512 0 "register_operand" "=v")
1542 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1545 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1546 [(set_attr "type" "sse")
1547 (set_attr "prefix" "evex")
1548 (set_attr "mode" "<MODE>")])
1550 (define_insn "rsqrt14<mode>"
1551 [(set (match_operand:VF_128 0 "register_operand" "=v")
1554 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1556 (match_operand:VF_128 2 "register_operand" "v")
1559 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1560 [(set_attr "type" "sse")
1561 (set_attr "prefix" "evex")
1562 (set_attr "mode" "<MODE>")])
1564 (define_insn "sse_vmrsqrtv4sf2"
1565 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1567 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1569 (match_operand:V4SF 2 "register_operand" "0,x")
1573 rsqrtss\t{%1, %0|%0, %k1}
1574 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1575 [(set_attr "isa" "noavx,avx")
1576 (set_attr "type" "sse")
1577 (set_attr "ssememalign" "32")
1578 (set_attr "prefix" "orig,vex")
1579 (set_attr "mode" "SF")])
1581 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1582 ;; isn't really correct, as those rtl operators aren't defined when
1583 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1585 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1586 [(set (match_operand:VF 0 "register_operand")
1588 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1589 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1590 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1592 if (!flag_finite_math_only)
1593 operands[1] = force_reg (<MODE>mode, operands[1]);
1594 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1597 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1598 [(set (match_operand:VF 0 "register_operand" "=x,v")
1600 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1601 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1602 "TARGET_SSE && flag_finite_math_only
1603 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1604 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1606 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1607 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1608 [(set_attr "isa" "noavx,avx")
1609 (set_attr "type" "sseadd")
1610 (set_attr "btver2_sse_attr" "maxmin")
1611 (set_attr "prefix" "<mask_prefix3>")
1612 (set_attr "mode" "<MODE>")])
1614 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1615 [(set (match_operand:VF 0 "register_operand" "=x,v")
1617 (match_operand:VF 1 "register_operand" "0,v")
1618 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1619 "TARGET_SSE && !flag_finite_math_only
1620 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1622 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1623 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1624 [(set_attr "isa" "noavx,avx")
1625 (set_attr "type" "sseadd")
1626 (set_attr "btver2_sse_attr" "maxmin")
1627 (set_attr "prefix" "<mask_prefix3>")
1628 (set_attr "mode" "<MODE>")])
1630 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1631 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1634 (match_operand:VF_128 1 "register_operand" "0,v")
1635 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1640 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1641 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1642 [(set_attr "isa" "noavx,avx")
1643 (set_attr "type" "sse")
1644 (set_attr "btver2_sse_attr" "maxmin")
1645 (set_attr "prefix" "<round_saeonly_prefix>")
1646 (set_attr "mode" "<ssescalarmode>")])
1648 ;; These versions of the min/max patterns implement exactly the operations
1649 ;; min = (op1 < op2 ? op1 : op2)
1650 ;; max = (!(op1 < op2) ? op1 : op2)
1651 ;; Their operands are not commutative, and thus they may be used in the
1652 ;; presence of -0.0 and NaN.
1654 (define_insn "*ieee_smin<mode>3"
1655 [(set (match_operand:VF 0 "register_operand" "=v,v")
1657 [(match_operand:VF 1 "register_operand" "0,v")
1658 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1662 min<ssemodesuffix>\t{%2, %0|%0, %2}
1663 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1664 [(set_attr "isa" "noavx,avx")
1665 (set_attr "type" "sseadd")
1666 (set_attr "prefix" "orig,vex")
1667 (set_attr "mode" "<MODE>")])
1669 (define_insn "*ieee_smax<mode>3"
1670 [(set (match_operand:VF 0 "register_operand" "=v,v")
1672 [(match_operand:VF 1 "register_operand" "0,v")
1673 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1677 max<ssemodesuffix>\t{%2, %0|%0, %2}
1678 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1679 [(set_attr "isa" "noavx,avx")
1680 (set_attr "type" "sseadd")
1681 (set_attr "prefix" "orig,vex")
1682 (set_attr "mode" "<MODE>")])
1684 (define_insn "avx_addsubv4df3"
1685 [(set (match_operand:V4DF 0 "register_operand" "=x")
1688 (match_operand:V4DF 1 "register_operand" "x")
1689 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1690 (minus:V4DF (match_dup 1) (match_dup 2))
1693 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1694 [(set_attr "type" "sseadd")
1695 (set_attr "prefix" "vex")
1696 (set_attr "mode" "V4DF")])
1698 (define_insn "sse3_addsubv2df3"
1699 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1702 (match_operand:V2DF 1 "register_operand" "0,x")
1703 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1704 (minus:V2DF (match_dup 1) (match_dup 2))
1708 addsubpd\t{%2, %0|%0, %2}
1709 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1710 [(set_attr "isa" "noavx,avx")
1711 (set_attr "type" "sseadd")
1712 (set_attr "atom_unit" "complex")
1713 (set_attr "prefix" "orig,vex")
1714 (set_attr "mode" "V2DF")])
1716 (define_insn "avx_addsubv8sf3"
1717 [(set (match_operand:V8SF 0 "register_operand" "=x")
1720 (match_operand:V8SF 1 "register_operand" "x")
1721 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1722 (minus:V8SF (match_dup 1) (match_dup 2))
1725 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1726 [(set_attr "type" "sseadd")
1727 (set_attr "prefix" "vex")
1728 (set_attr "mode" "V8SF")])
1730 (define_insn "sse3_addsubv4sf3"
1731 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1734 (match_operand:V4SF 1 "register_operand" "0,x")
1735 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1736 (minus:V4SF (match_dup 1) (match_dup 2))
1740 addsubps\t{%2, %0|%0, %2}
1741 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1742 [(set_attr "isa" "noavx,avx")
1743 (set_attr "type" "sseadd")
1744 (set_attr "prefix" "orig,vex")
1745 (set_attr "prefix_rep" "1,*")
1746 (set_attr "mode" "V4SF")])
1748 (define_insn "avx_h<plusminus_insn>v4df3"
1749 [(set (match_operand:V4DF 0 "register_operand" "=x")
1754 (match_operand:V4DF 1 "register_operand" "x")
1755 (parallel [(const_int 0)]))
1756 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1759 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1760 (parallel [(const_int 0)]))
1761 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1764 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1765 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1767 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1768 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1770 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1771 [(set_attr "type" "sseadd")
1772 (set_attr "prefix" "vex")
1773 (set_attr "mode" "V4DF")])
1775 (define_expand "sse3_haddv2df3"
1776 [(set (match_operand:V2DF 0 "register_operand")
1780 (match_operand:V2DF 1 "register_operand")
1781 (parallel [(const_int 0)]))
1782 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1785 (match_operand:V2DF 2 "nonimmediate_operand")
1786 (parallel [(const_int 0)]))
1787 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1790 (define_insn "*sse3_haddv2df3"
1791 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1795 (match_operand:V2DF 1 "register_operand" "0,x")
1796 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1799 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1802 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1803 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1806 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1808 && INTVAL (operands[3]) != INTVAL (operands[4])
1809 && INTVAL (operands[5]) != INTVAL (operands[6])"
1811 haddpd\t{%2, %0|%0, %2}
1812 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1813 [(set_attr "isa" "noavx,avx")
1814 (set_attr "type" "sseadd")
1815 (set_attr "prefix" "orig,vex")
1816 (set_attr "mode" "V2DF")])
1818 (define_insn "sse3_hsubv2df3"
1819 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1823 (match_operand:V2DF 1 "register_operand" "0,x")
1824 (parallel [(const_int 0)]))
1825 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1828 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1829 (parallel [(const_int 0)]))
1830 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1833 hsubpd\t{%2, %0|%0, %2}
1834 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1835 [(set_attr "isa" "noavx,avx")
1836 (set_attr "type" "sseadd")
1837 (set_attr "prefix" "orig,vex")
1838 (set_attr "mode" "V2DF")])
1840 (define_insn "*sse3_haddv2df3_low"
1841 [(set (match_operand:DF 0 "register_operand" "=x,x")
1844 (match_operand:V2DF 1 "register_operand" "0,x")
1845 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1848 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1850 && INTVAL (operands[2]) != INTVAL (operands[3])"
1852 haddpd\t{%0, %0|%0, %0}
1853 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1854 [(set_attr "isa" "noavx,avx")
1855 (set_attr "type" "sseadd1")
1856 (set_attr "prefix" "orig,vex")
1857 (set_attr "mode" "V2DF")])
1859 (define_insn "*sse3_hsubv2df3_low"
1860 [(set (match_operand:DF 0 "register_operand" "=x,x")
1863 (match_operand:V2DF 1 "register_operand" "0,x")
1864 (parallel [(const_int 0)]))
1867 (parallel [(const_int 1)]))))]
1870 hsubpd\t{%0, %0|%0, %0}
1871 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1872 [(set_attr "isa" "noavx,avx")
1873 (set_attr "type" "sseadd1")
1874 (set_attr "prefix" "orig,vex")
1875 (set_attr "mode" "V2DF")])
1877 (define_insn "avx_h<plusminus_insn>v8sf3"
1878 [(set (match_operand:V8SF 0 "register_operand" "=x")
1884 (match_operand:V8SF 1 "register_operand" "x")
1885 (parallel [(const_int 0)]))
1886 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1888 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1889 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1893 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1894 (parallel [(const_int 0)]))
1895 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1897 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1898 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1902 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1903 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1905 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1906 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1909 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1910 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1912 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1913 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1915 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1916 [(set_attr "type" "sseadd")
1917 (set_attr "prefix" "vex")
1918 (set_attr "mode" "V8SF")])
1920 (define_insn "sse3_h<plusminus_insn>v4sf3"
1921 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1926 (match_operand:V4SF 1 "register_operand" "0,x")
1927 (parallel [(const_int 0)]))
1928 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1930 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1931 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1935 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1936 (parallel [(const_int 0)]))
1937 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1939 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1940 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1943 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1944 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1945 [(set_attr "isa" "noavx,avx")
1946 (set_attr "type" "sseadd")
1947 (set_attr "atom_unit" "complex")
1948 (set_attr "prefix" "orig,vex")
1949 (set_attr "prefix_rep" "1,*")
1950 (set_attr "mode" "V4SF")])
1952 (define_expand "reduc_splus_v8df"
1953 [(match_operand:V8DF 0 "register_operand")
1954 (match_operand:V8DF 1 "register_operand")]
1957 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
1961 (define_expand "reduc_splus_v4df"
1962 [(match_operand:V4DF 0 "register_operand")
1963 (match_operand:V4DF 1 "register_operand")]
1966 rtx tmp = gen_reg_rtx (V4DFmode);
1967 rtx tmp2 = gen_reg_rtx (V4DFmode);
1968 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1969 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1970 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1974 (define_expand "reduc_splus_v2df"
1975 [(match_operand:V2DF 0 "register_operand")
1976 (match_operand:V2DF 1 "register_operand")]
1979 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1983 (define_expand "reduc_splus_v16sf"
1984 [(match_operand:V16SF 0 "register_operand")
1985 (match_operand:V16SF 1 "register_operand")]
1988 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
1992 (define_expand "reduc_splus_v8sf"
1993 [(match_operand:V8SF 0 "register_operand")
1994 (match_operand:V8SF 1 "register_operand")]
1997 rtx tmp = gen_reg_rtx (V8SFmode);
1998 rtx tmp2 = gen_reg_rtx (V8SFmode);
1999 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2000 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2001 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2002 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2006 (define_expand "reduc_splus_v4sf"
2007 [(match_operand:V4SF 0 "register_operand")
2008 (match_operand:V4SF 1 "register_operand")]
2013 rtx tmp = gen_reg_rtx (V4SFmode);
2014 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2015 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2018 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2022 ;; Modes handled by reduc_sm{in,ax}* patterns.
2023 (define_mode_iterator REDUC_SMINMAX_MODE
2024 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2025 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2026 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2027 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
2028 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2029 (V8DF "TARGET_AVX512F")])
2031 (define_expand "reduc_<code>_<mode>"
2032 [(smaxmin:REDUC_SMINMAX_MODE
2033 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2034 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2037 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2041 (define_expand "reduc_<code>_<mode>"
2043 (match_operand:VI48_512 0 "register_operand")
2044 (match_operand:VI48_512 1 "register_operand"))]
2047 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2051 (define_expand "reduc_<code>_<mode>"
2053 (match_operand:VI_256 0 "register_operand")
2054 (match_operand:VI_256 1 "register_operand"))]
2057 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2061 (define_expand "reduc_umin_v8hi"
2063 (match_operand:V8HI 0 "register_operand")
2064 (match_operand:V8HI 1 "register_operand"))]
2067 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2073 ;; Parallel floating point comparisons
2075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2077 (define_insn "avx_cmp<mode>3"
2078 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2080 [(match_operand:VF_128_256 1 "register_operand" "x")
2081 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2082 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2085 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2086 [(set_attr "type" "ssecmp")
2087 (set_attr "length_immediate" "1")
2088 (set_attr "prefix" "vex")
2089 (set_attr "mode" "<MODE>")])
2091 (define_insn "avx_vmcmp<mode>3"
2092 [(set (match_operand:VF_128 0 "register_operand" "=x")
2095 [(match_operand:VF_128 1 "register_operand" "x")
2096 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2097 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2102 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2103 [(set_attr "type" "ssecmp")
2104 (set_attr "length_immediate" "1")
2105 (set_attr "prefix" "vex")
2106 (set_attr "mode" "<ssescalarmode>")])
2108 (define_insn "*<sse>_maskcmp<mode>3_comm"
2109 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2110 (match_operator:VF_128_256 3 "sse_comparison_operator"
2111 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2112 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2114 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2116 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2117 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2118 [(set_attr "isa" "noavx,avx")
2119 (set_attr "type" "ssecmp")
2120 (set_attr "length_immediate" "1")
2121 (set_attr "prefix" "orig,vex")
2122 (set_attr "mode" "<MODE>")])
2124 (define_insn "<sse>_maskcmp<mode>3"
2125 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2126 (match_operator:VF_128_256 3 "sse_comparison_operator"
2127 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2128 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2131 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2132 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2133 [(set_attr "isa" "noavx,avx")
2134 (set_attr "type" "ssecmp")
2135 (set_attr "length_immediate" "1")
2136 (set_attr "prefix" "orig,vex")
2137 (set_attr "mode" "<MODE>")])
2139 (define_insn "<sse>_vmmaskcmp<mode>3"
2140 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2142 (match_operator:VF_128 3 "sse_comparison_operator"
2143 [(match_operand:VF_128 1 "register_operand" "0,x")
2144 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2149 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2150 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2151 [(set_attr "isa" "noavx,avx")
2152 (set_attr "type" "ssecmp")
2153 (set_attr "length_immediate" "1,*")
2154 (set_attr "prefix" "orig,vex")
2155 (set_attr "mode" "<ssescalarmode>")])
2157 (define_mode_attr cmp_imm_predicate
2158 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2159 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2161 (define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2162 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2163 (unspec:<avx512fmaskmode>
2164 [(match_operand:VI48F_512 1 "register_operand" "v")
2165 (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2166 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2168 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2169 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2170 [(set_attr "type" "ssecmp")
2171 (set_attr "length_immediate" "1")
2172 (set_attr "prefix" "evex")
2173 (set_attr "mode" "<sseinsnmode>")])
2175 (define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
2176 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2177 (unspec:<avx512fmaskmode>
2178 [(match_operand:VI48_512 1 "register_operand" "v")
2179 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2180 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2181 UNSPEC_UNSIGNED_PCMP))]
2183 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2184 [(set_attr "type" "ssecmp")
2185 (set_attr "length_immediate" "1")
2186 (set_attr "prefix" "evex")
2187 (set_attr "mode" "<sseinsnmode>")])
2189 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2190 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2191 (and:<avx512fmaskmode>
2192 (unspec:<avx512fmaskmode>
2193 [(match_operand:VF_128 1 "register_operand" "v")
2194 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2195 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2199 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2200 [(set_attr "type" "ssecmp")
2201 (set_attr "length_immediate" "1")
2202 (set_attr "prefix" "evex")
2203 (set_attr "mode" "<ssescalarmode>")])
2205 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2206 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2207 (and:<avx512fmaskmode>
2208 (unspec:<avx512fmaskmode>
2209 [(match_operand:VF_128 1 "register_operand" "v")
2210 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2211 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2213 (and:<avx512fmaskmode>
2214 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2217 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2218 [(set_attr "type" "ssecmp")
2219 (set_attr "length_immediate" "1")
2220 (set_attr "prefix" "evex")
2221 (set_attr "mode" "<ssescalarmode>")])
2223 (define_insn "avx512f_maskcmp<mode>3"
2224 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2225 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2226 [(match_operand:VF 1 "register_operand" "v")
2227 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2229 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2230 [(set_attr "type" "ssecmp")
2231 (set_attr "length_immediate" "1")
2232 (set_attr "prefix" "evex")
2233 (set_attr "mode" "<sseinsnmode>")])
2235 (define_insn "<sse>_comi<round_saeonly_name>"
2236 [(set (reg:CCFP FLAGS_REG)
2239 (match_operand:<ssevecmode> 0 "register_operand" "v")
2240 (parallel [(const_int 0)]))
2242 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2243 (parallel [(const_int 0)]))))]
2244 "SSE_FLOAT_MODE_P (<MODE>mode)"
2245 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2246 [(set_attr "type" "ssecomi")
2247 (set_attr "prefix" "maybe_vex")
2248 (set_attr "prefix_rep" "0")
2249 (set (attr "prefix_data16")
2250 (if_then_else (eq_attr "mode" "DF")
2252 (const_string "0")))
2253 (set_attr "mode" "<MODE>")])
2255 (define_insn "<sse>_ucomi<round_saeonly_name>"
2256 [(set (reg:CCFPU FLAGS_REG)
2259 (match_operand:<ssevecmode> 0 "register_operand" "v")
2260 (parallel [(const_int 0)]))
2262 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2263 (parallel [(const_int 0)]))))]
2264 "SSE_FLOAT_MODE_P (<MODE>mode)"
2265 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2266 [(set_attr "type" "ssecomi")
2267 (set_attr "prefix" "maybe_vex")
2268 (set_attr "prefix_rep" "0")
2269 (set (attr "prefix_data16")
2270 (if_then_else (eq_attr "mode" "DF")
2272 (const_string "0")))
2273 (set_attr "mode" "<MODE>")])
2275 (define_expand "vcond<V_512:mode><VF_512:mode>"
2276 [(set (match_operand:V_512 0 "register_operand")
2278 (match_operator 3 ""
2279 [(match_operand:VF_512 4 "nonimmediate_operand")
2280 (match_operand:VF_512 5 "nonimmediate_operand")])
2281 (match_operand:V_512 1 "general_operand")
2282 (match_operand:V_512 2 "general_operand")))]
2284 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2285 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2287 bool ok = ix86_expand_fp_vcond (operands);
2292 (define_expand "vcond<V_256:mode><VF_256:mode>"
2293 [(set (match_operand:V_256 0 "register_operand")
2295 (match_operator 3 ""
2296 [(match_operand:VF_256 4 "nonimmediate_operand")
2297 (match_operand:VF_256 5 "nonimmediate_operand")])
2298 (match_operand:V_256 1 "general_operand")
2299 (match_operand:V_256 2 "general_operand")))]
2301 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2302 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2304 bool ok = ix86_expand_fp_vcond (operands);
2309 (define_expand "vcond<V_128:mode><VF_128:mode>"
2310 [(set (match_operand:V_128 0 "register_operand")
2312 (match_operator 3 ""
2313 [(match_operand:VF_128 4 "nonimmediate_operand")
2314 (match_operand:VF_128 5 "nonimmediate_operand")])
2315 (match_operand:V_128 1 "general_operand")
2316 (match_operand:V_128 2 "general_operand")))]
2318 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2319 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2321 bool ok = ix86_expand_fp_vcond (operands);
2326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2328 ;; Parallel floating point logical operations
2330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2332 (define_insn "<sse>_andnot<mode>3"
2333 [(set (match_operand:VF 0 "register_operand" "=x,v")
2336 (match_operand:VF 1 "register_operand" "0,v"))
2337 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2340 static char buf[32];
2344 switch (get_attr_mode (insn))
2351 suffix = "<ssemodesuffix>";
2354 switch (which_alternative)
2357 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2360 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2366 /* There is no vandnp[sd]. Use vpandnq. */
2367 if (<MODE_SIZE> == 64)
2370 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2373 snprintf (buf, sizeof (buf), ops, suffix);
2376 [(set_attr "isa" "noavx,avx")
2377 (set_attr "type" "sselog")
2378 (set_attr "prefix" "orig,maybe_evex")
2380 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2381 (const_string "<ssePSmode>")
2382 (match_test "TARGET_AVX")
2383 (const_string "<MODE>")
2384 (match_test "optimize_function_for_size_p (cfun)")
2385 (const_string "V4SF")
2387 (const_string "<MODE>")))])
2389 (define_expand "<code><mode>3"
2390 [(set (match_operand:VF_128_256 0 "register_operand")
2391 (any_logic:VF_128_256
2392 (match_operand:VF_128_256 1 "nonimmediate_operand")
2393 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2395 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2397 (define_expand "<code><mode>3"
2398 [(set (match_operand:VF_512 0 "register_operand")
2400 (match_operand:VF_512 1 "nonimmediate_operand")
2401 (match_operand:VF_512 2 "nonimmediate_operand")))]
2403 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2405 (define_insn "*<code><mode>3"
2406 [(set (match_operand:VF 0 "register_operand" "=x,v")
2408 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2409 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2410 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2412 static char buf[32];
2416 switch (get_attr_mode (insn))
2423 suffix = "<ssemodesuffix>";
2426 switch (which_alternative)
2429 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2432 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2438 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2439 if (<MODE_SIZE> == 64)
2442 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2445 snprintf (buf, sizeof (buf), ops, suffix);
2448 [(set_attr "isa" "noavx,avx")
2449 (set_attr "type" "sselog")
2450 (set_attr "prefix" "orig,maybe_evex")
2452 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2453 (const_string "<ssePSmode>")
2454 (match_test "TARGET_AVX")
2455 (const_string "<MODE>")
2456 (match_test "optimize_function_for_size_p (cfun)")
2457 (const_string "V4SF")
2459 (const_string "<MODE>")))])
2461 (define_expand "copysign<mode>3"
2464 (not:VF (match_dup 3))
2465 (match_operand:VF 1 "nonimmediate_operand")))
2467 (and:VF (match_dup 3)
2468 (match_operand:VF 2 "nonimmediate_operand")))
2469 (set (match_operand:VF 0 "register_operand")
2470 (ior:VF (match_dup 4) (match_dup 5)))]
2473 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2475 operands[4] = gen_reg_rtx (<MODE>mode);
2476 operands[5] = gen_reg_rtx (<MODE>mode);
2479 ;; Also define scalar versions. These are used for abs, neg, and
2480 ;; conditional move. Using subregs into vector modes causes register
2481 ;; allocation lossage. These patterns do not allow memory operands
2482 ;; because the native instructions read the full 128-bits.
2484 (define_insn "*andnot<mode>3"
2485 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2488 (match_operand:MODEF 1 "register_operand" "0,x"))
2489 (match_operand:MODEF 2 "register_operand" "x,x")))]
2490 "SSE_FLOAT_MODE_P (<MODE>mode)"
2492 static char buf[32];
2495 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2497 switch (which_alternative)
2500 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2503 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2509 snprintf (buf, sizeof (buf), ops, suffix);
2512 [(set_attr "isa" "noavx,avx")
2513 (set_attr "type" "sselog")
2514 (set_attr "prefix" "orig,vex")
2516 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2517 (const_string "V4SF")
2518 (match_test "TARGET_AVX")
2519 (const_string "<ssevecmode>")
2520 (match_test "optimize_function_for_size_p (cfun)")
2521 (const_string "V4SF")
2523 (const_string "<ssevecmode>")))])
2525 (define_insn "*andnottf3"
2526 [(set (match_operand:TF 0 "register_operand" "=x,x")
2528 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2529 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2532 static char buf[32];
2535 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2537 switch (which_alternative)
2540 ops = "%s\t{%%2, %%0|%%0, %%2}";
2543 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2549 snprintf (buf, sizeof (buf), ops, tmp);
2552 [(set_attr "isa" "noavx,avx")
2553 (set_attr "type" "sselog")
2554 (set (attr "prefix_data16")
2556 (and (eq_attr "alternative" "0")
2557 (eq_attr "mode" "TI"))
2559 (const_string "*")))
2560 (set_attr "prefix" "orig,vex")
2562 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2563 (const_string "V4SF")
2564 (match_test "TARGET_AVX")
2566 (ior (not (match_test "TARGET_SSE2"))
2567 (match_test "optimize_function_for_size_p (cfun)"))
2568 (const_string "V4SF")
2570 (const_string "TI")))])
2572 (define_insn "*<code><mode>3"
2573 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2575 (match_operand:MODEF 1 "register_operand" "%0,x")
2576 (match_operand:MODEF 2 "register_operand" "x,x")))]
2577 "SSE_FLOAT_MODE_P (<MODE>mode)"
2579 static char buf[32];
2582 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2584 switch (which_alternative)
2587 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2590 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2596 snprintf (buf, sizeof (buf), ops, suffix);
2599 [(set_attr "isa" "noavx,avx")
2600 (set_attr "type" "sselog")
2601 (set_attr "prefix" "orig,vex")
2603 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2604 (const_string "V4SF")
2605 (match_test "TARGET_AVX")
2606 (const_string "<ssevecmode>")
2607 (match_test "optimize_function_for_size_p (cfun)")
2608 (const_string "V4SF")
2610 (const_string "<ssevecmode>")))])
2612 (define_expand "<code>tf3"
2613 [(set (match_operand:TF 0 "register_operand")
2615 (match_operand:TF 1 "nonimmediate_operand")
2616 (match_operand:TF 2 "nonimmediate_operand")))]
2618 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2620 (define_insn "*<code>tf3"
2621 [(set (match_operand:TF 0 "register_operand" "=x,x")
2623 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2624 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2626 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2628 static char buf[32];
2631 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2633 switch (which_alternative)
2636 ops = "%s\t{%%2, %%0|%%0, %%2}";
2639 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2645 snprintf (buf, sizeof (buf), ops, tmp);
2648 [(set_attr "isa" "noavx,avx")
2649 (set_attr "type" "sselog")
2650 (set (attr "prefix_data16")
2652 (and (eq_attr "alternative" "0")
2653 (eq_attr "mode" "TI"))
2655 (const_string "*")))
2656 (set_attr "prefix" "orig,vex")
2658 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2659 (const_string "V4SF")
2660 (match_test "TARGET_AVX")
2662 (ior (not (match_test "TARGET_SSE2"))
2663 (match_test "optimize_function_for_size_p (cfun)"))
2664 (const_string "V4SF")
2666 (const_string "TI")))])
2668 ;; There are no floating point xor for V16SF and V8DF in avx512f
2669 ;; but we need them for negation. Instead we use int versions of
2670 ;; xor. Maybe there could be a better way to do that.
2672 (define_mode_attr avx512flogicsuff
2673 [(V16SF "d") (V8DF "q")])
2675 (define_insn "avx512f_<logic><mode>"
2676 [(set (match_operand:VF_512 0 "register_operand" "=v")
2678 (match_operand:VF_512 1 "register_operand" "v")
2679 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2681 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2682 [(set_attr "type" "sselog")
2683 (set_attr "prefix" "evex")])
2685 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2687 ;; FMA floating point multiply/accumulate instructions. These include
2688 ;; scalar versions of the instructions as well as vector versions.
2690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2692 ;; The standard names for scalar FMA are only available with SSE math enabled.
2693 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2694 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2695 ;; and TARGET_FMA4 are both false.
2696 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2697 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2698 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2699 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2700 (define_mode_iterator FMAMODEM
2701 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2702 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2703 (V4SF "TARGET_FMA || TARGET_FMA4")
2704 (V2DF "TARGET_FMA || TARGET_FMA4")
2705 (V8SF "TARGET_FMA || TARGET_FMA4")
2706 (V4DF "TARGET_FMA || TARGET_FMA4")
2707 (V16SF "TARGET_AVX512F")
2708 (V8DF "TARGET_AVX512F")])
2710 (define_expand "fma<mode>4"
2711 [(set (match_operand:FMAMODEM 0 "register_operand")
2713 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2714 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2715 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2717 (define_expand "fms<mode>4"
2718 [(set (match_operand:FMAMODEM 0 "register_operand")
2720 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2721 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2722 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2724 (define_expand "fnma<mode>4"
2725 [(set (match_operand:FMAMODEM 0 "register_operand")
2727 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2728 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2729 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2731 (define_expand "fnms<mode>4"
2732 [(set (match_operand:FMAMODEM 0 "register_operand")
2734 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2735 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2736 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2738 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2739 (define_mode_iterator FMAMODE
2740 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2741 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2742 (V4SF "TARGET_FMA || TARGET_FMA4")
2743 (V2DF "TARGET_FMA || TARGET_FMA4")
2744 (V8SF "TARGET_FMA || TARGET_FMA4")
2745 (V4DF "TARGET_FMA || TARGET_FMA4")
2746 (V16SF "TARGET_AVX512F")
2747 (V8DF "TARGET_AVX512F")])
2749 (define_expand "fma4i_fmadd_<mode>"
2750 [(set (match_operand:FMAMODE 0 "register_operand")
2752 (match_operand:FMAMODE 1 "nonimmediate_operand")
2753 (match_operand:FMAMODE 2 "nonimmediate_operand")
2754 (match_operand:FMAMODE 3 "nonimmediate_operand")))])
2756 (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
2757 [(match_operand:VF_512 0 "register_operand")
2758 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
2759 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
2760 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
2761 (match_operand:<avx512fmaskmode> 4 "register_operand")]
2764 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
2765 operands[0], operands[1], operands[2], operands[3],
2766 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
2770 (define_mode_iterator FMAMODE_NOVF512
2771 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2772 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2773 (V4SF "TARGET_FMA || TARGET_FMA4")
2774 (V2DF "TARGET_FMA || TARGET_FMA4")
2775 (V8SF "TARGET_FMA || TARGET_FMA4")
2776 (V4DF "TARGET_FMA || TARGET_FMA4")])
2778 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2779 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2780 (fma:FMAMODE_NOVF512
2781 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2782 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2783 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2784 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2786 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2787 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2788 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2789 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2790 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2791 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2792 (set_attr "type" "ssemuladd")
2793 (set_attr "mode" "<MODE>")])
2795 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2796 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2798 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
2799 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2800 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
2801 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2803 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2804 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2805 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2806 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2807 (set_attr "type" "ssemuladd")
2808 (set_attr "mode" "<MODE>")])
2810 (define_insn "avx512f_fmadd_<mode>_mask<round_name>"
2811 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2814 (match_operand:VF_512 1 "register_operand" "0,0")
2815 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2816 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2818 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2821 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2822 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2823 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2824 (set_attr "type" "ssemuladd")
2825 (set_attr "mode" "<MODE>")])
2827 (define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
2828 [(set (match_operand:VF_512 0 "register_operand" "=x")
2831 (match_operand:VF_512 1 "register_operand" "x")
2832 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2833 (match_operand:VF_512 3 "register_operand" "0"))
2835 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2837 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2838 [(set_attr "isa" "fma_avx512f")
2839 (set_attr "type" "ssemuladd")
2840 (set_attr "mode" "<MODE>")])
2842 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2843 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2844 (fma:FMAMODE_NOVF512
2845 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2846 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2847 (neg:FMAMODE_NOVF512
2848 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2849 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2851 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2852 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2853 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2854 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2855 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2856 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2857 (set_attr "type" "ssemuladd")
2858 (set_attr "mode" "<MODE>")])
2860 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2861 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2863 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
2864 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2866 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
2867 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2869 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2870 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2871 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2872 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2873 (set_attr "type" "ssemuladd")
2874 (set_attr "mode" "<MODE>")])
2876 (define_insn "avx512f_fmsub_<mode>_mask<round_name>"
2877 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2880 (match_operand:VF_512 1 "register_operand" "0,0")
2881 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2883 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
2885 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2888 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2889 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2890 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2891 (set_attr "type" "ssemuladd")
2892 (set_attr "mode" "<MODE>")])
2894 (define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
2895 [(set (match_operand:VF_512 0 "register_operand" "=v")
2898 (match_operand:VF_512 1 "register_operand" "v")
2899 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2901 (match_operand:VF_512 3 "register_operand" "0")))
2903 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2905 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2906 [(set_attr "isa" "fma_avx512f")
2907 (set_attr "type" "ssemuladd")
2908 (set_attr "mode" "<MODE>")])
2910 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
2911 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2912 (fma:FMAMODE_NOVF512
2913 (neg:FMAMODE_NOVF512
2914 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2915 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2916 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2917 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2919 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2920 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2921 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2922 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2923 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2924 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2925 (set_attr "type" "ssemuladd")
2926 (set_attr "mode" "<MODE>")])
2928 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
2929 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2932 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
2933 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2934 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
2935 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2937 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2938 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2939 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2940 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2941 (set_attr "type" "ssemuladd")
2942 (set_attr "mode" "<MODE>")])
2944 (define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
2945 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2949 (match_operand:VF_512 1 "register_operand" "0,0"))
2950 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2951 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2953 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2956 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2957 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2958 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2959 (set_attr "type" "ssemuladd")
2960 (set_attr "mode" "<MODE>")])
2962 (define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
2963 [(set (match_operand:VF_512 0 "register_operand" "=v")
2967 (match_operand:VF_512 1 "register_operand" "v"))
2968 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2969 (match_operand:VF_512 3 "register_operand" "0"))
2971 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2973 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2974 [(set_attr "isa" "fma_avx512f")
2975 (set_attr "type" "ssemuladd")
2976 (set_attr "mode" "<MODE>")])
2978 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
2979 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2980 (fma:FMAMODE_NOVF512
2981 (neg:FMAMODE_NOVF512
2982 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2983 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2984 (neg:FMAMODE_NOVF512
2985 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2986 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2988 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2989 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2990 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2991 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2992 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2993 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2994 (set_attr "type" "ssemuladd")
2995 (set_attr "mode" "<MODE>")])
2997 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
2998 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3001 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
3002 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3004 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3005 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3007 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3008 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3009 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3010 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3011 (set_attr "type" "ssemuladd")
3012 (set_attr "mode" "<MODE>")])
3014 (define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
3015 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3019 (match_operand:VF_512 1 "register_operand" "0,0"))
3020 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3022 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
3024 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3027 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3028 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3029 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3030 (set_attr "type" "ssemuladd")
3031 (set_attr "mode" "<MODE>")])
3033 (define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
3034 [(set (match_operand:VF_512 0 "register_operand" "=v")
3038 (match_operand:VF_512 1 "register_operand" "v"))
3039 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3041 (match_operand:VF_512 3 "register_operand" "0")))
3043 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3045 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3046 [(set_attr "isa" "fma_avx512f")
3047 (set_attr "type" "ssemuladd")
3048 (set_attr "mode" "<MODE>")])
3050 ;; FMA parallel floating point multiply addsub and subadd operations.
3052 ;; It would be possible to represent these without the UNSPEC as
3055 ;; (fma op1 op2 op3)
3056 ;; (fma op1 op2 (neg op3))
3059 ;; But this doesn't seem useful in practice.
3061 (define_expand "fmaddsub_<mode>"
3062 [(set (match_operand:VF 0 "register_operand")
3064 [(match_operand:VF 1 "nonimmediate_operand")
3065 (match_operand:VF 2 "nonimmediate_operand")
3066 (match_operand:VF 3 "nonimmediate_operand")]
3068 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3070 (define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
3071 [(match_operand:VF_512 0 "register_operand")
3072 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
3073 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
3074 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
3075 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3078 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3079 operands[0], operands[1], operands[2], operands[3],
3080 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3084 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3085 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3087 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3088 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3089 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
3091 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3093 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3094 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3095 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3096 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3097 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3098 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3099 (set_attr "type" "ssemuladd")
3100 (set_attr "mode" "<MODE>")])
3102 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3103 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3105 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3106 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3107 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3109 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3111 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3112 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3113 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3114 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3115 (set_attr "type" "ssemuladd")
3116 (set_attr "mode" "<MODE>")])
3118 (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
3119 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3122 [(match_operand:VF_512 1 "register_operand" "0,0")
3123 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3124 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
3127 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3130 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3131 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3132 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3133 (set_attr "type" "ssemuladd")
3134 (set_attr "mode" "<MODE>")])
3136 (define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
3137 [(set (match_operand:VF_512 0 "register_operand" "=v")
3140 [(match_operand:VF_512 1 "register_operand" "v")
3141 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3142 (match_operand:VF_512 3 "register_operand" "0")]
3145 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3147 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3148 [(set_attr "isa" "fma_avx512f")
3149 (set_attr "type" "ssemuladd")
3150 (set_attr "mode" "<MODE>")])
3152 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3153 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3155 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3156 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3158 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
3160 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3162 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3163 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3164 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3165 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3166 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3167 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3168 (set_attr "type" "ssemuladd")
3169 (set_attr "mode" "<MODE>")])
3171 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3172 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3174 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3175 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3177 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3179 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3181 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3182 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3183 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3184 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3185 (set_attr "type" "ssemuladd")
3186 (set_attr "mode" "<MODE>")])
3188 (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
3189 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3192 [(match_operand:VF_512 1 "register_operand" "0,0")
3193 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3195 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
3198 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3201 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3202 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3203 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3204 (set_attr "type" "ssemuladd")
3205 (set_attr "mode" "<MODE>")])
3207 (define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
3208 [(set (match_operand:VF_512 0 "register_operand" "=v")
3211 [(match_operand:VF_512 1 "register_operand" "v")
3212 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3214 (match_operand:VF_512 3 "register_operand" "0"))]
3217 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3219 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3220 [(set_attr "isa" "fma_avx512f")
3221 (set_attr "type" "ssemuladd")
3222 (set_attr "mode" "<MODE>")])
3224 ;; FMA3 floating point scalar intrinsics. These merge result with
3225 ;; high-order elements from the destination register.
3227 (define_expand "fmai_vmfmadd_<mode><round_name>"
3228 [(set (match_operand:VF_128 0 "register_operand")
3231 (match_operand:VF_128 1 "<round_nimm_predicate>")
3232 (match_operand:VF_128 2 "<round_nimm_predicate>")
3233 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3238 (define_insn "*fmai_fmadd_<mode>"
3239 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3242 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3243 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3244 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3247 "TARGET_FMA || TARGET_AVX512F"
3249 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3250 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3251 [(set_attr "type" "ssemuladd")
3252 (set_attr "mode" "<MODE>")])
3254 (define_insn "*fmai_fmsub_<mode>"
3255 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3258 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3259 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3261 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3264 "TARGET_FMA || TARGET_AVX512F"
3266 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3267 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3268 [(set_attr "type" "ssemuladd")
3269 (set_attr "mode" "<MODE>")])
3271 (define_insn "*fmai_fnmadd_<mode><round_name>"
3272 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3276 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3277 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3278 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3281 "TARGET_FMA || TARGET_AVX512F"
3283 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3284 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3285 [(set_attr "type" "ssemuladd")
3286 (set_attr "mode" "<MODE>")])
3288 (define_insn "*fmai_fnmsub_<mode><round_name>"
3289 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3293 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3294 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3296 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3299 "TARGET_FMA || TARGET_AVX512F"
3301 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3302 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3303 [(set_attr "type" "ssemuladd")
3304 (set_attr "mode" "<MODE>")])
3306 ;; FMA4 floating point scalar intrinsics. These write the
3307 ;; entire destination register, with the high-order elements zeroed.
3309 (define_expand "fma4i_vmfmadd_<mode>"
3310 [(set (match_operand:VF_128 0 "register_operand")
3313 (match_operand:VF_128 1 "nonimmediate_operand")
3314 (match_operand:VF_128 2 "nonimmediate_operand")
3315 (match_operand:VF_128 3 "nonimmediate_operand"))
3319 "operands[4] = CONST0_RTX (<MODE>mode);")
3321 (define_insn "*fma4i_vmfmadd_<mode>"
3322 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3325 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3326 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3327 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3328 (match_operand:VF_128 4 "const0_operand")
3331 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3332 [(set_attr "type" "ssemuladd")
3333 (set_attr "mode" "<MODE>")])
3335 (define_insn "*fma4i_vmfmsub_<mode>"
3336 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3339 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3340 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3342 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3343 (match_operand:VF_128 4 "const0_operand")
3346 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3347 [(set_attr "type" "ssemuladd")
3348 (set_attr "mode" "<MODE>")])
3350 (define_insn "*fma4i_vmfnmadd_<mode>"
3351 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3355 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3356 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3357 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3358 (match_operand:VF_128 4 "const0_operand")
3361 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3362 [(set_attr "type" "ssemuladd")
3363 (set_attr "mode" "<MODE>")])
3365 (define_insn "*fma4i_vmfnmsub_<mode>"
3366 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3370 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3371 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3373 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3374 (match_operand:VF_128 4 "const0_operand")
3377 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3378 [(set_attr "type" "ssemuladd")
3379 (set_attr "mode" "<MODE>")])
3381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3383 ;; Parallel single-precision floating point conversion operations
3385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3387 (define_insn "sse_cvtpi2ps"
3388 [(set (match_operand:V4SF 0 "register_operand" "=x")
3391 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3392 (match_operand:V4SF 1 "register_operand" "0")
3395 "cvtpi2ps\t{%2, %0|%0, %2}"
3396 [(set_attr "type" "ssecvt")
3397 (set_attr "mode" "V4SF")])
3399 (define_insn "sse_cvtps2pi"
3400 [(set (match_operand:V2SI 0 "register_operand" "=y")
3402 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3404 (parallel [(const_int 0) (const_int 1)])))]
3406 "cvtps2pi\t{%1, %0|%0, %q1}"
3407 [(set_attr "type" "ssecvt")
3408 (set_attr "unit" "mmx")
3409 (set_attr "mode" "DI")])
3411 (define_insn "sse_cvttps2pi"
3412 [(set (match_operand:V2SI 0 "register_operand" "=y")
3414 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3415 (parallel [(const_int 0) (const_int 1)])))]
3417 "cvttps2pi\t{%1, %0|%0, %q1}"
3418 [(set_attr "type" "ssecvt")
3419 (set_attr "unit" "mmx")
3420 (set_attr "prefix_rep" "0")
3421 (set_attr "mode" "SF")])
3423 (define_insn "sse_cvtsi2ss<round_name>"
3424 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3427 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3428 (match_operand:V4SF 1 "register_operand" "0,0,v")
3432 cvtsi2ss\t{%2, %0|%0, %2}
3433 cvtsi2ss\t{%2, %0|%0, %2}
3434 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3435 [(set_attr "isa" "noavx,noavx,avx")
3436 (set_attr "type" "sseicvt")
3437 (set_attr "athlon_decode" "vector,double,*")
3438 (set_attr "amdfam10_decode" "vector,double,*")
3439 (set_attr "bdver1_decode" "double,direct,*")
3440 (set_attr "btver2_decode" "double,double,double")
3441 (set_attr "prefix" "orig,orig,maybe_evex")
3442 (set_attr "mode" "SF")])
3444 (define_insn "sse_cvtsi2ssq<round_name>"
3445 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3448 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3449 (match_operand:V4SF 1 "register_operand" "0,0,v")
3451 "TARGET_SSE && TARGET_64BIT"
3453 cvtsi2ssq\t{%2, %0|%0, %2}
3454 cvtsi2ssq\t{%2, %0|%0, %2}
3455 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3456 [(set_attr "isa" "noavx,noavx,avx")
3457 (set_attr "type" "sseicvt")
3458 (set_attr "athlon_decode" "vector,double,*")
3459 (set_attr "amdfam10_decode" "vector,double,*")
3460 (set_attr "bdver1_decode" "double,direct,*")
3461 (set_attr "btver2_decode" "double,double,double")
3462 (set_attr "length_vex" "*,*,4")
3463 (set_attr "prefix_rex" "1,1,*")
3464 (set_attr "prefix" "orig,orig,maybe_evex")
3465 (set_attr "mode" "SF")])
3467 (define_insn "sse_cvtss2si<round_name>"
3468 [(set (match_operand:SI 0 "register_operand" "=r,r")
3471 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3472 (parallel [(const_int 0)]))]
3473 UNSPEC_FIX_NOTRUNC))]
3475 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3476 [(set_attr "type" "sseicvt")
3477 (set_attr "athlon_decode" "double,vector")
3478 (set_attr "bdver1_decode" "double,double")
3479 (set_attr "prefix_rep" "1")
3480 (set_attr "prefix" "maybe_vex")
3481 (set_attr "mode" "SI")])
3483 (define_insn "sse_cvtss2si_2"
3484 [(set (match_operand:SI 0 "register_operand" "=r,r")
3485 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3486 UNSPEC_FIX_NOTRUNC))]
3488 "%vcvtss2si\t{%1, %0|%0, %k1}"
3489 [(set_attr "type" "sseicvt")
3490 (set_attr "athlon_decode" "double,vector")
3491 (set_attr "amdfam10_decode" "double,double")
3492 (set_attr "bdver1_decode" "double,double")
3493 (set_attr "prefix_rep" "1")
3494 (set_attr "prefix" "maybe_vex")
3495 (set_attr "mode" "SI")])
3497 (define_insn "sse_cvtss2siq<round_name>"
3498 [(set (match_operand:DI 0 "register_operand" "=r,r")
3501 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3502 (parallel [(const_int 0)]))]
3503 UNSPEC_FIX_NOTRUNC))]
3504 "TARGET_SSE && TARGET_64BIT"
3505 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3506 [(set_attr "type" "sseicvt")
3507 (set_attr "athlon_decode" "double,vector")
3508 (set_attr "bdver1_decode" "double,double")
3509 (set_attr "prefix_rep" "1")
3510 (set_attr "prefix" "maybe_vex")
3511 (set_attr "mode" "DI")])
3513 (define_insn "sse_cvtss2siq_2"
3514 [(set (match_operand:DI 0 "register_operand" "=r,r")
3515 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3516 UNSPEC_FIX_NOTRUNC))]
3517 "TARGET_SSE && TARGET_64BIT"
3518 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3519 [(set_attr "type" "sseicvt")
3520 (set_attr "athlon_decode" "double,vector")
3521 (set_attr "amdfam10_decode" "double,double")
3522 (set_attr "bdver1_decode" "double,double")
3523 (set_attr "prefix_rep" "1")
3524 (set_attr "prefix" "maybe_vex")
3525 (set_attr "mode" "DI")])
3527 (define_insn "sse_cvttss2si<round_saeonly_name>"
3528 [(set (match_operand:SI 0 "register_operand" "=r,r")
3531 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3532 (parallel [(const_int 0)]))))]
3534 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3535 [(set_attr "type" "sseicvt")
3536 (set_attr "athlon_decode" "double,vector")
3537 (set_attr "amdfam10_decode" "double,double")
3538 (set_attr "bdver1_decode" "double,double")
3539 (set_attr "prefix_rep" "1")
3540 (set_attr "prefix" "maybe_vex")
3541 (set_attr "mode" "SI")])
3543 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3544 [(set (match_operand:DI 0 "register_operand" "=r,r")
3547 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3548 (parallel [(const_int 0)]))))]
3549 "TARGET_SSE && TARGET_64BIT"
3550 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3551 [(set_attr "type" "sseicvt")
3552 (set_attr "athlon_decode" "double,vector")
3553 (set_attr "amdfam10_decode" "double,double")
3554 (set_attr "bdver1_decode" "double,double")
3555 (set_attr "prefix_rep" "1")
3556 (set_attr "prefix" "maybe_vex")
3557 (set_attr "mode" "DI")])
3559 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3560 [(set (match_operand:VF_128 0 "register_operand" "=v")
3562 (vec_duplicate:VF_128
3563 (unsigned_float:<ssescalarmode>
3564 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3565 (match_operand:VF_128 1 "register_operand" "v")
3567 "TARGET_AVX512F && <round_modev4sf_condition>"
3568 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3569 [(set_attr "type" "sseicvt")
3570 (set_attr "prefix" "evex")
3571 (set_attr "mode" "<ssescalarmode>")])
3573 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3574 [(set (match_operand:VF_128 0 "register_operand" "=v")
3576 (vec_duplicate:VF_128
3577 (unsigned_float:<ssescalarmode>
3578 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3579 (match_operand:VF_128 1 "register_operand" "v")
3581 "TARGET_AVX512F && TARGET_64BIT"
3582 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3583 [(set_attr "type" "sseicvt")
3584 (set_attr "prefix" "evex")
3585 (set_attr "mode" "<ssescalarmode>")])
3587 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3588 [(set (match_operand:VF1 0 "register_operand" "=v")
3590 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3591 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3592 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3593 [(set_attr "type" "ssecvt")
3594 (set_attr "prefix" "maybe_vex")
3595 (set_attr "mode" "<sseinsnmode>")])
3597 (define_insn "ufloatv16siv16sf2<mask_name><round_name>"
3598 [(set (match_operand:V16SF 0 "register_operand" "=v")
3599 (unsigned_float:V16SF
3600 (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
3602 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3603 [(set_attr "type" "ssecvt")
3604 (set_attr "prefix" "evex")
3605 (set_attr "mode" "V16SF")])
3607 (define_expand "floatuns<sseintvecmodelower><mode>2"
3608 [(match_operand:VF1 0 "register_operand")
3609 (match_operand:<sseintvecmode> 1 "register_operand")]
3610 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3612 if (<MODE>mode == V16SFmode)
3613 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3615 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3621 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3622 (define_mode_attr sf2simodelower
3623 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3625 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3626 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3628 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3629 UNSPEC_FIX_NOTRUNC))]
3631 "%vcvtps2dq\t{%1, %0|%0, %1}"
3632 [(set_attr "type" "ssecvt")
3633 (set (attr "prefix_data16")
3635 (match_test "TARGET_AVX")
3637 (const_string "1")))
3638 (set_attr "prefix" "maybe_vex")
3639 (set_attr "mode" "<sseinsnmode>")])
3641 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3642 [(set (match_operand:V16SI 0 "register_operand" "=v")
3644 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3645 UNSPEC_FIX_NOTRUNC))]
3647 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3648 [(set_attr "type" "ssecvt")
3649 (set_attr "prefix" "evex")
3650 (set_attr "mode" "XI")])
3652 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
3653 [(set (match_operand:V16SI 0 "register_operand" "=v")
3655 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3656 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3658 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3659 [(set_attr "type" "ssecvt")
3660 (set_attr "prefix" "evex")
3661 (set_attr "mode" "XI")])
3663 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
3664 [(set (match_operand:V16SI 0 "register_operand" "=v")
3666 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
3668 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
3669 [(set_attr "type" "ssecvt")
3670 (set_attr "prefix" "evex")
3671 (set_attr "mode" "XI")])
3673 (define_insn "fix_truncv8sfv8si2"
3674 [(set (match_operand:V8SI 0 "register_operand" "=x")
3675 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3677 "vcvttps2dq\t{%1, %0|%0, %1}"
3678 [(set_attr "type" "ssecvt")
3679 (set_attr "prefix" "vex")
3680 (set_attr "mode" "OI")])
3682 (define_insn "fix_truncv4sfv4si2"
3683 [(set (match_operand:V4SI 0 "register_operand" "=x")
3684 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3686 "%vcvttps2dq\t{%1, %0|%0, %1}"
3687 [(set_attr "type" "ssecvt")
3688 (set (attr "prefix_rep")
3690 (match_test "TARGET_AVX")
3692 (const_string "1")))
3693 (set (attr "prefix_data16")
3695 (match_test "TARGET_AVX")
3697 (const_string "0")))
3698 (set_attr "prefix_data16" "0")
3699 (set_attr "prefix" "maybe_vex")
3700 (set_attr "mode" "TI")])
3702 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3703 [(match_operand:<sseintvecmode> 0 "register_operand")
3704 (match_operand:VF1 1 "register_operand")]
3707 if (<MODE>mode == V16SFmode)
3708 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
3713 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3714 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3715 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3716 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3721 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3723 ;; Parallel double-precision floating point conversion operations
3725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3727 (define_insn "sse2_cvtpi2pd"
3728 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3729 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3731 "cvtpi2pd\t{%1, %0|%0, %1}"
3732 [(set_attr "type" "ssecvt")
3733 (set_attr "unit" "mmx,*")
3734 (set_attr "prefix_data16" "1,*")
3735 (set_attr "mode" "V2DF")])
3737 (define_insn "sse2_cvtpd2pi"
3738 [(set (match_operand:V2SI 0 "register_operand" "=y")
3739 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3740 UNSPEC_FIX_NOTRUNC))]
3742 "cvtpd2pi\t{%1, %0|%0, %1}"
3743 [(set_attr "type" "ssecvt")
3744 (set_attr "unit" "mmx")
3745 (set_attr "bdver1_decode" "double")
3746 (set_attr "btver2_decode" "direct")
3747 (set_attr "prefix_data16" "1")
3748 (set_attr "mode" "DI")])
3750 (define_insn "sse2_cvttpd2pi"
3751 [(set (match_operand:V2SI 0 "register_operand" "=y")
3752 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3754 "cvttpd2pi\t{%1, %0|%0, %1}"
3755 [(set_attr "type" "ssecvt")
3756 (set_attr "unit" "mmx")
3757 (set_attr "bdver1_decode" "double")
3758 (set_attr "prefix_data16" "1")
3759 (set_attr "mode" "TI")])
3761 (define_insn "sse2_cvtsi2sd"
3762 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3765 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3766 (match_operand:V2DF 1 "register_operand" "0,0,x")
3770 cvtsi2sd\t{%2, %0|%0, %2}
3771 cvtsi2sd\t{%2, %0|%0, %2}
3772 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3773 [(set_attr "isa" "noavx,noavx,avx")
3774 (set_attr "type" "sseicvt")
3775 (set_attr "athlon_decode" "double,direct,*")
3776 (set_attr "amdfam10_decode" "vector,double,*")
3777 (set_attr "bdver1_decode" "double,direct,*")
3778 (set_attr "btver2_decode" "double,double,double")
3779 (set_attr "prefix" "orig,orig,vex")
3780 (set_attr "mode" "DF")])
3782 (define_insn "sse2_cvtsi2sdq<round_name>"
3783 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3786 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3787 (match_operand:V2DF 1 "register_operand" "0,0,v")
3789 "TARGET_SSE2 && TARGET_64BIT"
3791 cvtsi2sdq\t{%2, %0|%0, %2}
3792 cvtsi2sdq\t{%2, %0|%0, %2}
3793 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3794 [(set_attr "isa" "noavx,noavx,avx")
3795 (set_attr "type" "sseicvt")
3796 (set_attr "athlon_decode" "double,direct,*")
3797 (set_attr "amdfam10_decode" "vector,double,*")
3798 (set_attr "bdver1_decode" "double,direct,*")
3799 (set_attr "length_vex" "*,*,4")
3800 (set_attr "prefix_rex" "1,1,*")
3801 (set_attr "prefix" "orig,orig,maybe_evex")
3802 (set_attr "mode" "DF")])
3804 (define_insn "avx512f_vcvtss2usi<round_name>"
3805 [(set (match_operand:SI 0 "register_operand" "=r")
3808 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3809 (parallel [(const_int 0)]))]
3810 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3812 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3813 [(set_attr "type" "sseicvt")
3814 (set_attr "prefix" "evex")
3815 (set_attr "mode" "SI")])
3817 (define_insn "avx512f_vcvtss2usiq<round_name>"
3818 [(set (match_operand:DI 0 "register_operand" "=r")
3821 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3822 (parallel [(const_int 0)]))]
3823 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3824 "TARGET_AVX512F && TARGET_64BIT"
3825 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3826 [(set_attr "type" "sseicvt")
3827 (set_attr "prefix" "evex")
3828 (set_attr "mode" "DI")])
3830 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
3831 [(set (match_operand:SI 0 "register_operand" "=r")
3834 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3835 (parallel [(const_int 0)]))))]
3837 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3838 [(set_attr "type" "sseicvt")
3839 (set_attr "prefix" "evex")
3840 (set_attr "mode" "SI")])
3842 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
3843 [(set (match_operand:DI 0 "register_operand" "=r")
3846 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3847 (parallel [(const_int 0)]))))]
3848 "TARGET_AVX512F && TARGET_64BIT"
3849 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3850 [(set_attr "type" "sseicvt")
3851 (set_attr "prefix" "evex")
3852 (set_attr "mode" "DI")])
3854 (define_insn "avx512f_vcvtsd2usi<round_name>"
3855 [(set (match_operand:SI 0 "register_operand" "=r")
3858 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3859 (parallel [(const_int 0)]))]
3860 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3862 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3863 [(set_attr "type" "sseicvt")
3864 (set_attr "prefix" "evex")
3865 (set_attr "mode" "SI")])
3867 (define_insn "avx512f_vcvtsd2usiq<round_name>"
3868 [(set (match_operand:DI 0 "register_operand" "=r")
3871 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3872 (parallel [(const_int 0)]))]
3873 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3874 "TARGET_AVX512F && TARGET_64BIT"
3875 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3876 [(set_attr "type" "sseicvt")
3877 (set_attr "prefix" "evex")
3878 (set_attr "mode" "DI")])
3880 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
3881 [(set (match_operand:SI 0 "register_operand" "=r")
3884 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3885 (parallel [(const_int 0)]))))]
3887 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3888 [(set_attr "type" "sseicvt")
3889 (set_attr "prefix" "evex")
3890 (set_attr "mode" "SI")])
3892 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
3893 [(set (match_operand:DI 0 "register_operand" "=r")
3896 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3897 (parallel [(const_int 0)]))))]
3898 "TARGET_AVX512F && TARGET_64BIT"
3899 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3900 [(set_attr "type" "sseicvt")
3901 (set_attr "prefix" "evex")
3902 (set_attr "mode" "DI")])
3904 (define_insn "sse2_cvtsd2si<round_name>"
3905 [(set (match_operand:SI 0 "register_operand" "=r,r")
3908 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3909 (parallel [(const_int 0)]))]
3910 UNSPEC_FIX_NOTRUNC))]
3912 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3913 [(set_attr "type" "sseicvt")
3914 (set_attr "athlon_decode" "double,vector")
3915 (set_attr "bdver1_decode" "double,double")
3916 (set_attr "btver2_decode" "double,double")
3917 (set_attr "prefix_rep" "1")
3918 (set_attr "prefix" "maybe_vex")
3919 (set_attr "mode" "SI")])
3921 (define_insn "sse2_cvtsd2si_2"
3922 [(set (match_operand:SI 0 "register_operand" "=r,r")
3923 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3924 UNSPEC_FIX_NOTRUNC))]
3926 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3927 [(set_attr "type" "sseicvt")
3928 (set_attr "athlon_decode" "double,vector")
3929 (set_attr "amdfam10_decode" "double,double")
3930 (set_attr "bdver1_decode" "double,double")
3931 (set_attr "prefix_rep" "1")
3932 (set_attr "prefix" "maybe_vex")
3933 (set_attr "mode" "SI")])
3935 (define_insn "sse2_cvtsd2siq<round_name>"
3936 [(set (match_operand:DI 0 "register_operand" "=r,r")
3939 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3940 (parallel [(const_int 0)]))]
3941 UNSPEC_FIX_NOTRUNC))]
3942 "TARGET_SSE2 && TARGET_64BIT"
3943 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3944 [(set_attr "type" "sseicvt")
3945 (set_attr "athlon_decode" "double,vector")
3946 (set_attr "bdver1_decode" "double,double")
3947 (set_attr "prefix_rep" "1")
3948 (set_attr "prefix" "maybe_vex")
3949 (set_attr "mode" "DI")])
3951 (define_insn "sse2_cvtsd2siq_2"
3952 [(set (match_operand:DI 0 "register_operand" "=r,r")
3953 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3954 UNSPEC_FIX_NOTRUNC))]
3955 "TARGET_SSE2 && TARGET_64BIT"
3956 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3957 [(set_attr "type" "sseicvt")
3958 (set_attr "athlon_decode" "double,vector")
3959 (set_attr "amdfam10_decode" "double,double")
3960 (set_attr "bdver1_decode" "double,double")
3961 (set_attr "prefix_rep" "1")
3962 (set_attr "prefix" "maybe_vex")
3963 (set_attr "mode" "DI")])
3965 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
3966 [(set (match_operand:SI 0 "register_operand" "=r,r")
3969 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3970 (parallel [(const_int 0)]))))]
3972 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
3973 [(set_attr "type" "sseicvt")
3974 (set_attr "athlon_decode" "double,vector")
3975 (set_attr "amdfam10_decode" "double,double")
3976 (set_attr "bdver1_decode" "double,double")
3977 (set_attr "btver2_decode" "double,double")
3978 (set_attr "prefix_rep" "1")
3979 (set_attr "prefix" "maybe_vex")
3980 (set_attr "mode" "SI")])
3982 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
3983 [(set (match_operand:DI 0 "register_operand" "=r,r")
3986 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3987 (parallel [(const_int 0)]))))]
3988 "TARGET_SSE2 && TARGET_64BIT"
3989 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
3990 [(set_attr "type" "sseicvt")
3991 (set_attr "athlon_decode" "double,vector")
3992 (set_attr "amdfam10_decode" "double,double")
3993 (set_attr "bdver1_decode" "double,double")
3994 (set_attr "prefix_rep" "1")
3995 (set_attr "prefix" "maybe_vex")
3996 (set_attr "mode" "DI")])
3998 ;; For float<si2dfmode><mode>2 insn pattern
3999 (define_mode_attr si2dfmode
4000 [(V8DF "V8SI") (V4DF "V4SI")])
4001 (define_mode_attr si2dfmodelower
4002 [(V8DF "v8si") (V4DF "v4si")])
4004 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4005 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4006 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4007 "TARGET_AVX && <mask_mode512bit_condition>"
4008 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4009 [(set_attr "type" "ssecvt")
4010 (set_attr "prefix" "maybe_vex")
4011 (set_attr "mode" "<MODE>")])
4013 (define_insn "ufloatv8siv8df<mask_name>"
4014 [(set (match_operand:V8DF 0 "register_operand" "=v")
4015 (unsigned_float:V8DF
4016 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
4018 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4019 [(set_attr "type" "ssecvt")
4020 (set_attr "prefix" "evex")
4021 (set_attr "mode" "V8DF")])
4023 (define_insn "avx512f_cvtdq2pd512_2"
4024 [(set (match_operand:V8DF 0 "register_operand" "=v")
4027 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4028 (parallel [(const_int 0) (const_int 1)
4029 (const_int 2) (const_int 3)
4030 (const_int 4) (const_int 5)
4031 (const_int 6) (const_int 7)]))))]
4033 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4034 [(set_attr "type" "ssecvt")
4035 (set_attr "prefix" "evex")
4036 (set_attr "mode" "V8DF")])
4038 (define_insn "avx_cvtdq2pd256_2"
4039 [(set (match_operand:V4DF 0 "register_operand" "=x")
4042 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
4043 (parallel [(const_int 0) (const_int 1)
4044 (const_int 2) (const_int 3)]))))]
4046 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4047 [(set_attr "type" "ssecvt")
4048 (set_attr "prefix" "vex")
4049 (set_attr "mode" "V4DF")])
4051 (define_insn "sse2_cvtdq2pd"
4052 [(set (match_operand:V2DF 0 "register_operand" "=x")
4055 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4056 (parallel [(const_int 0) (const_int 1)]))))]
4058 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
4059 [(set_attr "type" "ssecvt")
4060 (set_attr "prefix" "maybe_vex")
4061 (set_attr "ssememalign" "64")
4062 (set_attr "mode" "V2DF")])
4064 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4065 [(set (match_operand:V8SI 0 "register_operand" "=v")
4067 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4068 UNSPEC_FIX_NOTRUNC))]
4070 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4071 [(set_attr "type" "ssecvt")
4072 (set_attr "prefix" "evex")
4073 (set_attr "mode" "OI")])
4075 (define_insn "avx_cvtpd2dq256"
4076 [(set (match_operand:V4SI 0 "register_operand" "=x")
4077 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4078 UNSPEC_FIX_NOTRUNC))]
4080 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
4081 [(set_attr "type" "ssecvt")
4082 (set_attr "prefix" "vex")
4083 (set_attr "mode" "OI")])
4085 (define_expand "avx_cvtpd2dq256_2"
4086 [(set (match_operand:V8SI 0 "register_operand")
4088 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4092 "operands[2] = CONST0_RTX (V4SImode);")
4094 (define_insn "*avx_cvtpd2dq256_2"
4095 [(set (match_operand:V8SI 0 "register_operand" "=x")
4097 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4099 (match_operand:V4SI 2 "const0_operand")))]
4101 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4102 [(set_attr "type" "ssecvt")
4103 (set_attr "prefix" "vex")
4104 (set_attr "btver2_decode" "vector")
4105 (set_attr "mode" "OI")])
4107 (define_expand "sse2_cvtpd2dq"
4108 [(set (match_operand:V4SI 0 "register_operand")
4110 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
4114 "operands[2] = CONST0_RTX (V2SImode);")
4116 (define_insn "*sse2_cvtpd2dq"
4117 [(set (match_operand:V4SI 0 "register_operand" "=x")
4119 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4121 (match_operand:V2SI 2 "const0_operand")))]
4125 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
4127 return "cvtpd2dq\t{%1, %0|%0, %1}";
4129 [(set_attr "type" "ssecvt")
4130 (set_attr "prefix_rep" "1")
4131 (set_attr "prefix_data16" "0")
4132 (set_attr "prefix" "maybe_vex")
4133 (set_attr "mode" "TI")
4134 (set_attr "amdfam10_decode" "double")
4135 (set_attr "athlon_decode" "vector")
4136 (set_attr "bdver1_decode" "double")])
4138 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
4139 [(set (match_operand:V8SI 0 "register_operand" "=v")
4141 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4142 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4144 "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4145 [(set_attr "type" "ssecvt")
4146 (set_attr "prefix" "evex")
4147 (set_attr "mode" "OI")])
4149 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4150 [(set (match_operand:V8SI 0 "register_operand" "=v")
4152 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4154 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4155 [(set_attr "type" "ssecvt")
4156 (set_attr "prefix" "evex")
4157 (set_attr "mode" "OI")])
4159 (define_insn "fix_truncv4dfv4si2"
4160 [(set (match_operand:V4SI 0 "register_operand" "=x")
4161 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4163 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
4164 [(set_attr "type" "ssecvt")
4165 (set_attr "prefix" "vex")
4166 (set_attr "mode" "OI")])
4168 (define_expand "avx_cvttpd2dq256_2"
4169 [(set (match_operand:V8SI 0 "register_operand")
4171 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4174 "operands[2] = CONST0_RTX (V4SImode);")
4176 (define_insn "*avx_cvttpd2dq256_2"
4177 [(set (match_operand:V8SI 0 "register_operand" "=x")
4179 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
4180 (match_operand:V4SI 2 "const0_operand")))]
4182 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
4183 [(set_attr "type" "ssecvt")
4184 (set_attr "prefix" "vex")
4185 (set_attr "btver2_decode" "vector")
4186 (set_attr "mode" "OI")])
4188 (define_expand "sse2_cvttpd2dq"
4189 [(set (match_operand:V4SI 0 "register_operand")
4191 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
4194 "operands[2] = CONST0_RTX (V2SImode);")
4196 (define_insn "*sse2_cvttpd2dq"
4197 [(set (match_operand:V4SI 0 "register_operand" "=x")
4199 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4200 (match_operand:V2SI 2 "const0_operand")))]
4204 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4206 return "cvttpd2dq\t{%1, %0|%0, %1}";
4208 [(set_attr "type" "ssecvt")
4209 (set_attr "amdfam10_decode" "double")
4210 (set_attr "athlon_decode" "vector")
4211 (set_attr "bdver1_decode" "double")
4212 (set_attr "prefix" "maybe_vex")
4213 (set_attr "mode" "TI")])
4215 (define_insn "sse2_cvtsd2ss<round_name>"
4216 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4219 (float_truncate:V2SF
4220 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4221 (match_operand:V4SF 1 "register_operand" "0,0,v")
4225 cvtsd2ss\t{%2, %0|%0, %2}
4226 cvtsd2ss\t{%2, %0|%0, %q2}
4227 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4228 [(set_attr "isa" "noavx,noavx,avx")
4229 (set_attr "type" "ssecvt")
4230 (set_attr "athlon_decode" "vector,double,*")
4231 (set_attr "amdfam10_decode" "vector,double,*")
4232 (set_attr "bdver1_decode" "direct,direct,*")
4233 (set_attr "btver2_decode" "double,double,double")
4234 (set_attr "prefix" "orig,orig,<round_prefix>")
4235 (set_attr "mode" "SF")])
4237 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4238 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4242 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
4243 (parallel [(const_int 0) (const_int 1)])))
4244 (match_operand:V2DF 1 "register_operand" "0,0,v")
4248 cvtss2sd\t{%2, %0|%0, %2}
4249 cvtss2sd\t{%2, %0|%0, %k2}
4250 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4251 [(set_attr "isa" "noavx,noavx,avx")
4252 (set_attr "type" "ssecvt")
4253 (set_attr "amdfam10_decode" "vector,double,*")
4254 (set_attr "athlon_decode" "direct,direct,*")
4255 (set_attr "bdver1_decode" "direct,direct,*")
4256 (set_attr "btver2_decode" "double,double,double")
4257 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4258 (set_attr "mode" "DF")])
4260 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4261 [(set (match_operand:V8SF 0 "register_operand" "=v")
4262 (float_truncate:V8SF
4263 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4265 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4266 [(set_attr "type" "ssecvt")
4267 (set_attr "prefix" "evex")
4268 (set_attr "mode" "V8SF")])
4270 (define_insn "avx_cvtpd2ps256"
4271 [(set (match_operand:V4SF 0 "register_operand" "=x")
4272 (float_truncate:V4SF
4273 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4275 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4276 [(set_attr "type" "ssecvt")
4277 (set_attr "prefix" "vex")
4278 (set_attr "btver2_decode" "vector")
4279 (set_attr "mode" "V4SF")])
4281 (define_expand "sse2_cvtpd2ps"
4282 [(set (match_operand:V4SF 0 "register_operand")
4284 (float_truncate:V2SF
4285 (match_operand:V2DF 1 "nonimmediate_operand"))
4288 "operands[2] = CONST0_RTX (V2SFmode);")
4290 (define_insn "*sse2_cvtpd2ps"
4291 [(set (match_operand:V4SF 0 "register_operand" "=x")
4293 (float_truncate:V2SF
4294 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4295 (match_operand:V2SF 2 "const0_operand")))]
4299 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4301 return "cvtpd2ps\t{%1, %0|%0, %1}";
4303 [(set_attr "type" "ssecvt")
4304 (set_attr "amdfam10_decode" "double")
4305 (set_attr "athlon_decode" "vector")
4306 (set_attr "bdver1_decode" "double")
4307 (set_attr "prefix_data16" "1")
4308 (set_attr "prefix" "maybe_vex")
4309 (set_attr "mode" "V4SF")])
4311 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4312 (define_mode_attr sf2dfmode
4313 [(V8DF "V8SF") (V4DF "V4SF")])
4315 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4316 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4317 (float_extend:VF2_512_256
4318 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4319 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4320 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4321 [(set_attr "type" "ssecvt")
4322 (set_attr "prefix" "maybe_vex")
4323 (set_attr "mode" "<MODE>")])
4325 (define_insn "*avx_cvtps2pd256_2"
4326 [(set (match_operand:V4DF 0 "register_operand" "=x")
4329 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4330 (parallel [(const_int 0) (const_int 1)
4331 (const_int 2) (const_int 3)]))))]
4333 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4334 [(set_attr "type" "ssecvt")
4335 (set_attr "prefix" "vex")
4336 (set_attr "mode" "V4DF")])
4338 (define_insn "vec_unpacks_lo_v16sf"
4339 [(set (match_operand:V8DF 0 "register_operand" "=v")
4342 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4343 (parallel [(const_int 0) (const_int 1)
4344 (const_int 2) (const_int 3)
4345 (const_int 4) (const_int 5)
4346 (const_int 6) (const_int 7)]))))]
4348 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4349 [(set_attr "type" "ssecvt")
4350 (set_attr "prefix" "evex")
4351 (set_attr "mode" "V8DF")])
4353 (define_insn "sse2_cvtps2pd"
4354 [(set (match_operand:V2DF 0 "register_operand" "=x")
4357 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4358 (parallel [(const_int 0) (const_int 1)]))))]
4360 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4361 [(set_attr "type" "ssecvt")
4362 (set_attr "amdfam10_decode" "direct")
4363 (set_attr "athlon_decode" "double")
4364 (set_attr "bdver1_decode" "double")
4365 (set_attr "prefix_data16" "0")
4366 (set_attr "prefix" "maybe_vex")
4367 (set_attr "mode" "V2DF")])
4369 (define_expand "vec_unpacks_hi_v4sf"
4374 (match_operand:V4SF 1 "nonimmediate_operand"))
4375 (parallel [(const_int 6) (const_int 7)
4376 (const_int 2) (const_int 3)])))
4377 (set (match_operand:V2DF 0 "register_operand")
4381 (parallel [(const_int 0) (const_int 1)]))))]
4383 "operands[2] = gen_reg_rtx (V4SFmode);")
4385 (define_expand "vec_unpacks_hi_v8sf"
4388 (match_operand:V8SF 1 "register_operand")
4389 (parallel [(const_int 4) (const_int 5)
4390 (const_int 6) (const_int 7)])))
4391 (set (match_operand:V4DF 0 "register_operand")
4395 "operands[2] = gen_reg_rtx (V4SFmode);")
4397 (define_expand "vec_unpacks_hi_v16sf"
4400 (match_operand:V16SF 1 "register_operand")
4401 (parallel [(const_int 8) (const_int 9)
4402 (const_int 10) (const_int 11)
4403 (const_int 12) (const_int 13)
4404 (const_int 14) (const_int 15)])))
4405 (set (match_operand:V8DF 0 "register_operand")
4409 "operands[2] = gen_reg_rtx (V8SFmode);")
4411 (define_expand "vec_unpacks_lo_v4sf"
4412 [(set (match_operand:V2DF 0 "register_operand")
4415 (match_operand:V4SF 1 "nonimmediate_operand")
4416 (parallel [(const_int 0) (const_int 1)]))))]
4419 (define_expand "vec_unpacks_lo_v8sf"
4420 [(set (match_operand:V4DF 0 "register_operand")
4423 (match_operand:V8SF 1 "nonimmediate_operand")
4424 (parallel [(const_int 0) (const_int 1)
4425 (const_int 2) (const_int 3)]))))]
4428 (define_mode_attr sseunpackfltmode
4429 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4430 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4432 (define_expand "vec_unpacks_float_hi_<mode>"
4433 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4434 (match_operand:VI2_AVX512F 1 "register_operand")]
4437 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4439 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4440 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4441 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4445 (define_expand "vec_unpacks_float_lo_<mode>"
4446 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4447 (match_operand:VI2_AVX512F 1 "register_operand")]
4450 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4452 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4453 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4454 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4458 (define_expand "vec_unpacku_float_hi_<mode>"
4459 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4460 (match_operand:VI2_AVX512F 1 "register_operand")]
4463 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4465 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4467 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4471 (define_expand "vec_unpacku_float_lo_<mode>"
4472 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4473 (match_operand:VI2_AVX512F 1 "register_operand")]
4476 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4478 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4479 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4480 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4484 (define_expand "vec_unpacks_float_hi_v4si"
4487 (match_operand:V4SI 1 "nonimmediate_operand")
4488 (parallel [(const_int 2) (const_int 3)
4489 (const_int 2) (const_int 3)])))
4490 (set (match_operand:V2DF 0 "register_operand")
4494 (parallel [(const_int 0) (const_int 1)]))))]
4496 "operands[2] = gen_reg_rtx (V4SImode);")
4498 (define_expand "vec_unpacks_float_lo_v4si"
4499 [(set (match_operand:V2DF 0 "register_operand")
4502 (match_operand:V4SI 1 "nonimmediate_operand")
4503 (parallel [(const_int 0) (const_int 1)]))))]
4506 (define_expand "vec_unpacks_float_hi_v8si"
4509 (match_operand:V8SI 1 "nonimmediate_operand")
4510 (parallel [(const_int 4) (const_int 5)
4511 (const_int 6) (const_int 7)])))
4512 (set (match_operand:V4DF 0 "register_operand")
4516 "operands[2] = gen_reg_rtx (V4SImode);")
4518 (define_expand "vec_unpacks_float_lo_v8si"
4519 [(set (match_operand:V4DF 0 "register_operand")
4522 (match_operand:V8SI 1 "nonimmediate_operand")
4523 (parallel [(const_int 0) (const_int 1)
4524 (const_int 2) (const_int 3)]))))]
4527 (define_expand "vec_unpacks_float_hi_v16si"
4530 (match_operand:V16SI 1 "nonimmediate_operand")
4531 (parallel [(const_int 8) (const_int 9)
4532 (const_int 10) (const_int 11)
4533 (const_int 12) (const_int 13)
4534 (const_int 14) (const_int 15)])))
4535 (set (match_operand:V8DF 0 "register_operand")
4539 "operands[2] = gen_reg_rtx (V8SImode);")
4541 (define_expand "vec_unpacks_float_lo_v16si"
4542 [(set (match_operand:V8DF 0 "register_operand")
4545 (match_operand:V16SI 1 "nonimmediate_operand")
4546 (parallel [(const_int 0) (const_int 1)
4547 (const_int 2) (const_int 3)
4548 (const_int 4) (const_int 5)
4549 (const_int 6) (const_int 7)]))))]
4552 (define_expand "vec_unpacku_float_hi_v4si"
4555 (match_operand:V4SI 1 "nonimmediate_operand")
4556 (parallel [(const_int 2) (const_int 3)
4557 (const_int 2) (const_int 3)])))
4562 (parallel [(const_int 0) (const_int 1)]))))
4564 (lt:V2DF (match_dup 6) (match_dup 3)))
4566 (and:V2DF (match_dup 7) (match_dup 4)))
4567 (set (match_operand:V2DF 0 "register_operand")
4568 (plus:V2DF (match_dup 6) (match_dup 8)))]
4571 REAL_VALUE_TYPE TWO32r;
4575 real_ldexp (&TWO32r, &dconst1, 32);
4576 x = const_double_from_real_value (TWO32r, DFmode);
4578 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4579 operands[4] = force_reg (V2DFmode,
4580 ix86_build_const_vector (V2DFmode, 1, x));
4582 operands[5] = gen_reg_rtx (V4SImode);
4584 for (i = 6; i < 9; i++)
4585 operands[i] = gen_reg_rtx (V2DFmode);
4588 (define_expand "vec_unpacku_float_lo_v4si"
4592 (match_operand:V4SI 1 "nonimmediate_operand")
4593 (parallel [(const_int 0) (const_int 1)]))))
4595 (lt:V2DF (match_dup 5) (match_dup 3)))
4597 (and:V2DF (match_dup 6) (match_dup 4)))
4598 (set (match_operand:V2DF 0 "register_operand")
4599 (plus:V2DF (match_dup 5) (match_dup 7)))]
4602 REAL_VALUE_TYPE TWO32r;
4606 real_ldexp (&TWO32r, &dconst1, 32);
4607 x = const_double_from_real_value (TWO32r, DFmode);
4609 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4610 operands[4] = force_reg (V2DFmode,
4611 ix86_build_const_vector (V2DFmode, 1, x));
4613 for (i = 5; i < 8; i++)
4614 operands[i] = gen_reg_rtx (V2DFmode);
4617 (define_expand "vec_unpacku_float_hi_v8si"
4618 [(match_operand:V4DF 0 "register_operand")
4619 (match_operand:V8SI 1 "register_operand")]
4622 REAL_VALUE_TYPE TWO32r;
4626 real_ldexp (&TWO32r, &dconst1, 32);
4627 x = const_double_from_real_value (TWO32r, DFmode);
4629 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4630 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4631 tmp[5] = gen_reg_rtx (V4SImode);
4633 for (i = 2; i < 5; i++)
4634 tmp[i] = gen_reg_rtx (V4DFmode);
4635 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4636 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4637 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4638 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4639 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4640 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4644 (define_expand "vec_unpacku_float_hi_v16si"
4645 [(match_operand:V8DF 0 "register_operand")
4646 (match_operand:V16SI 1 "register_operand")]
4649 REAL_VALUE_TYPE TWO32r;
4652 real_ldexp (&TWO32r, &dconst1, 32);
4653 x = const_double_from_real_value (TWO32r, DFmode);
4655 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4656 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4657 tmp[2] = gen_reg_rtx (V8DFmode);
4658 tmp[3] = gen_reg_rtx (V8SImode);
4659 k = gen_reg_rtx (QImode);
4661 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
4662 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
4663 emit_insn (gen_rtx_SET (VOIDmode, k,
4664 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4665 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4666 emit_move_insn (operands[0], tmp[2]);
4670 (define_expand "vec_unpacku_float_lo_v8si"
4671 [(match_operand:V4DF 0 "register_operand")
4672 (match_operand:V8SI 1 "nonimmediate_operand")]
4675 REAL_VALUE_TYPE TWO32r;
4679 real_ldexp (&TWO32r, &dconst1, 32);
4680 x = const_double_from_real_value (TWO32r, DFmode);
4682 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4683 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4685 for (i = 2; i < 5; i++)
4686 tmp[i] = gen_reg_rtx (V4DFmode);
4687 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4688 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4689 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4690 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4691 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4695 (define_expand "vec_unpacku_float_lo_v16si"
4696 [(match_operand:V8DF 0 "register_operand")
4697 (match_operand:V16SI 1 "nonimmediate_operand")]
4700 REAL_VALUE_TYPE TWO32r;
4703 real_ldexp (&TWO32r, &dconst1, 32);
4704 x = const_double_from_real_value (TWO32r, DFmode);
4706 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4707 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4708 tmp[2] = gen_reg_rtx (V8DFmode);
4709 k = gen_reg_rtx (QImode);
4711 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4712 emit_insn (gen_rtx_SET (VOIDmode, k,
4713 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4714 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4715 emit_move_insn (operands[0], tmp[2]);
4719 (define_expand "vec_pack_trunc_<mode>"
4721 (float_truncate:<sf2dfmode>
4722 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4724 (float_truncate:<sf2dfmode>
4725 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4726 (set (match_operand:<ssePSmode> 0 "register_operand")
4727 (vec_concat:<ssePSmode>
4732 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4733 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4736 (define_expand "vec_pack_trunc_v2df"
4737 [(match_operand:V4SF 0 "register_operand")
4738 (match_operand:V2DF 1 "nonimmediate_operand")
4739 (match_operand:V2DF 2 "nonimmediate_operand")]
4744 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4746 tmp0 = gen_reg_rtx (V4DFmode);
4747 tmp1 = force_reg (V2DFmode, operands[1]);
4749 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4750 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4754 tmp0 = gen_reg_rtx (V4SFmode);
4755 tmp1 = gen_reg_rtx (V4SFmode);
4757 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4758 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4759 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4764 (define_expand "vec_pack_sfix_trunc_v8df"
4765 [(match_operand:V16SI 0 "register_operand")
4766 (match_operand:V8DF 1 "nonimmediate_operand")
4767 (match_operand:V8DF 2 "nonimmediate_operand")]
4772 r1 = gen_reg_rtx (V8SImode);
4773 r2 = gen_reg_rtx (V8SImode);
4775 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4776 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4777 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4781 (define_expand "vec_pack_sfix_trunc_v4df"
4782 [(match_operand:V8SI 0 "register_operand")
4783 (match_operand:V4DF 1 "nonimmediate_operand")
4784 (match_operand:V4DF 2 "nonimmediate_operand")]
4789 r1 = gen_reg_rtx (V4SImode);
4790 r2 = gen_reg_rtx (V4SImode);
4792 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4793 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4794 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4798 (define_expand "vec_pack_sfix_trunc_v2df"
4799 [(match_operand:V4SI 0 "register_operand")
4800 (match_operand:V2DF 1 "nonimmediate_operand")
4801 (match_operand:V2DF 2 "nonimmediate_operand")]
4804 rtx tmp0, tmp1, tmp2;
4806 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4808 tmp0 = gen_reg_rtx (V4DFmode);
4809 tmp1 = force_reg (V2DFmode, operands[1]);
4811 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4812 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4816 tmp0 = gen_reg_rtx (V4SImode);
4817 tmp1 = gen_reg_rtx (V4SImode);
4818 tmp2 = gen_reg_rtx (V2DImode);
4820 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4821 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4822 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4823 gen_lowpart (V2DImode, tmp0),
4824 gen_lowpart (V2DImode, tmp1)));
4825 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4830 (define_mode_attr ssepackfltmode
4831 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4833 (define_expand "vec_pack_ufix_trunc_<mode>"
4834 [(match_operand:<ssepackfltmode> 0 "register_operand")
4835 (match_operand:VF2 1 "register_operand")
4836 (match_operand:VF2 2 "register_operand")]
4839 if (<MODE>mode == V8DFmode)
4843 r1 = gen_reg_rtx (V8SImode);
4844 r2 = gen_reg_rtx (V8SImode);
4846 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
4847 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
4848 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4853 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4854 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4855 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4856 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4857 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4859 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4860 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4864 tmp[5] = gen_reg_rtx (V8SFmode);
4865 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4866 gen_lowpart (V8SFmode, tmp[3]), 0);
4867 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4869 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4870 operands[0], 0, OPTAB_DIRECT);
4871 if (tmp[6] != operands[0])
4872 emit_move_insn (operands[0], tmp[6]);
4878 (define_expand "vec_pack_sfix_v4df"
4879 [(match_operand:V8SI 0 "register_operand")
4880 (match_operand:V4DF 1 "nonimmediate_operand")
4881 (match_operand:V4DF 2 "nonimmediate_operand")]
4886 r1 = gen_reg_rtx (V4SImode);
4887 r2 = gen_reg_rtx (V4SImode);
4889 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4890 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4891 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4895 (define_expand "vec_pack_sfix_v2df"
4896 [(match_operand:V4SI 0 "register_operand")
4897 (match_operand:V2DF 1 "nonimmediate_operand")
4898 (match_operand:V2DF 2 "nonimmediate_operand")]
4901 rtx tmp0, tmp1, tmp2;
4903 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4905 tmp0 = gen_reg_rtx (V4DFmode);
4906 tmp1 = force_reg (V2DFmode, operands[1]);
4908 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4909 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4913 tmp0 = gen_reg_rtx (V4SImode);
4914 tmp1 = gen_reg_rtx (V4SImode);
4915 tmp2 = gen_reg_rtx (V2DImode);
4917 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4918 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4919 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4920 gen_lowpart (V2DImode, tmp0),
4921 gen_lowpart (V2DImode, tmp1)));
4922 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4929 ;; Parallel single-precision floating point element swizzling
4931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4933 (define_expand "sse_movhlps_exp"
4934 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4937 (match_operand:V4SF 1 "nonimmediate_operand")
4938 (match_operand:V4SF 2 "nonimmediate_operand"))
4939 (parallel [(const_int 6)
4945 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4947 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
4949 /* Fix up the destination if needed. */
4950 if (dst != operands[0])
4951 emit_move_insn (operands[0], dst);
4956 (define_insn "sse_movhlps"
4957 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4960 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4961 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
4962 (parallel [(const_int 6)
4966 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4968 movhlps\t{%2, %0|%0, %2}
4969 vmovhlps\t{%2, %1, %0|%0, %1, %2}
4970 movlps\t{%H2, %0|%0, %H2}
4971 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
4972 %vmovhps\t{%2, %0|%q0, %2}"
4973 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4974 (set_attr "type" "ssemov")
4975 (set_attr "ssememalign" "64")
4976 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4977 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4979 (define_expand "sse_movlhps_exp"
4980 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4983 (match_operand:V4SF 1 "nonimmediate_operand")
4984 (match_operand:V4SF 2 "nonimmediate_operand"))
4985 (parallel [(const_int 0)
4991 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4993 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
4995 /* Fix up the destination if needed. */
4996 if (dst != operands[0])
4997 emit_move_insn (operands[0], dst);
5002 (define_insn "sse_movlhps"
5003 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5006 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5007 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5008 (parallel [(const_int 0)
5012 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5014 movlhps\t{%2, %0|%0, %2}
5015 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5016 movhps\t{%2, %0|%0, %q2}
5017 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5018 %vmovlps\t{%2, %H0|%H0, %2}"
5019 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5020 (set_attr "type" "ssemov")
5021 (set_attr "ssememalign" "64")
5022 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5023 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5025 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5026 [(set (match_operand:V16SF 0 "register_operand" "=v")
5029 (match_operand:V16SF 1 "register_operand" "v")
5030 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5031 (parallel [(const_int 2) (const_int 18)
5032 (const_int 3) (const_int 19)
5033 (const_int 6) (const_int 22)
5034 (const_int 7) (const_int 23)
5035 (const_int 10) (const_int 26)
5036 (const_int 11) (const_int 27)
5037 (const_int 14) (const_int 30)
5038 (const_int 15) (const_int 31)])))]
5040 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5041 [(set_attr "type" "sselog")
5042 (set_attr "prefix" "evex")
5043 (set_attr "mode" "V16SF")])
5045 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5046 (define_insn "avx_unpckhps256"
5047 [(set (match_operand:V8SF 0 "register_operand" "=x")
5050 (match_operand:V8SF 1 "register_operand" "x")
5051 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5052 (parallel [(const_int 2) (const_int 10)
5053 (const_int 3) (const_int 11)
5054 (const_int 6) (const_int 14)
5055 (const_int 7) (const_int 15)])))]
5057 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5058 [(set_attr "type" "sselog")
5059 (set_attr "prefix" "vex")
5060 (set_attr "mode" "V8SF")])
5062 (define_expand "vec_interleave_highv8sf"
5066 (match_operand:V8SF 1 "register_operand" "x")
5067 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5068 (parallel [(const_int 0) (const_int 8)
5069 (const_int 1) (const_int 9)
5070 (const_int 4) (const_int 12)
5071 (const_int 5) (const_int 13)])))
5077 (parallel [(const_int 2) (const_int 10)
5078 (const_int 3) (const_int 11)
5079 (const_int 6) (const_int 14)
5080 (const_int 7) (const_int 15)])))
5081 (set (match_operand:V8SF 0 "register_operand")
5086 (parallel [(const_int 4) (const_int 5)
5087 (const_int 6) (const_int 7)
5088 (const_int 12) (const_int 13)
5089 (const_int 14) (const_int 15)])))]
5092 operands[3] = gen_reg_rtx (V8SFmode);
5093 operands[4] = gen_reg_rtx (V8SFmode);
5096 (define_insn "vec_interleave_highv4sf"
5097 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5100 (match_operand:V4SF 1 "register_operand" "0,x")
5101 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5102 (parallel [(const_int 2) (const_int 6)
5103 (const_int 3) (const_int 7)])))]
5106 unpckhps\t{%2, %0|%0, %2}
5107 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5108 [(set_attr "isa" "noavx,avx")
5109 (set_attr "type" "sselog")
5110 (set_attr "prefix" "orig,vex")
5111 (set_attr "mode" "V4SF")])
5113 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5114 [(set (match_operand:V16SF 0 "register_operand" "=v")
5117 (match_operand:V16SF 1 "register_operand" "v")
5118 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5119 (parallel [(const_int 0) (const_int 16)
5120 (const_int 1) (const_int 17)
5121 (const_int 4) (const_int 20)
5122 (const_int 5) (const_int 21)
5123 (const_int 8) (const_int 24)
5124 (const_int 9) (const_int 25)
5125 (const_int 12) (const_int 28)
5126 (const_int 13) (const_int 29)])))]
5128 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5129 [(set_attr "type" "sselog")
5130 (set_attr "prefix" "evex")
5131 (set_attr "mode" "V16SF")])
5133 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5134 (define_insn "avx_unpcklps256"
5135 [(set (match_operand:V8SF 0 "register_operand" "=x")
5138 (match_operand:V8SF 1 "register_operand" "x")
5139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5140 (parallel [(const_int 0) (const_int 8)
5141 (const_int 1) (const_int 9)
5142 (const_int 4) (const_int 12)
5143 (const_int 5) (const_int 13)])))]
5145 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5146 [(set_attr "type" "sselog")
5147 (set_attr "prefix" "vex")
5148 (set_attr "mode" "V8SF")])
5150 (define_expand "vec_interleave_lowv8sf"
5154 (match_operand:V8SF 1 "register_operand" "x")
5155 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5156 (parallel [(const_int 0) (const_int 8)
5157 (const_int 1) (const_int 9)
5158 (const_int 4) (const_int 12)
5159 (const_int 5) (const_int 13)])))
5165 (parallel [(const_int 2) (const_int 10)
5166 (const_int 3) (const_int 11)
5167 (const_int 6) (const_int 14)
5168 (const_int 7) (const_int 15)])))
5169 (set (match_operand:V8SF 0 "register_operand")
5174 (parallel [(const_int 0) (const_int 1)
5175 (const_int 2) (const_int 3)
5176 (const_int 8) (const_int 9)
5177 (const_int 10) (const_int 11)])))]
5180 operands[3] = gen_reg_rtx (V8SFmode);
5181 operands[4] = gen_reg_rtx (V8SFmode);
5184 (define_insn "vec_interleave_lowv4sf"
5185 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5188 (match_operand:V4SF 1 "register_operand" "0,x")
5189 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5190 (parallel [(const_int 0) (const_int 4)
5191 (const_int 1) (const_int 5)])))]
5194 unpcklps\t{%2, %0|%0, %2}
5195 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5196 [(set_attr "isa" "noavx,avx")
5197 (set_attr "type" "sselog")
5198 (set_attr "prefix" "orig,vex")
5199 (set_attr "mode" "V4SF")])
5201 ;; These are modeled with the same vec_concat as the others so that we
5202 ;; capture users of shufps that can use the new instructions
5203 (define_insn "avx_movshdup256"
5204 [(set (match_operand:V8SF 0 "register_operand" "=x")
5207 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5209 (parallel [(const_int 1) (const_int 1)
5210 (const_int 3) (const_int 3)
5211 (const_int 5) (const_int 5)
5212 (const_int 7) (const_int 7)])))]
5214 "vmovshdup\t{%1, %0|%0, %1}"
5215 [(set_attr "type" "sse")
5216 (set_attr "prefix" "vex")
5217 (set_attr "mode" "V8SF")])
5219 (define_insn "sse3_movshdup"
5220 [(set (match_operand:V4SF 0 "register_operand" "=x")
5223 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5225 (parallel [(const_int 1)
5230 "%vmovshdup\t{%1, %0|%0, %1}"
5231 [(set_attr "type" "sse")
5232 (set_attr "prefix_rep" "1")
5233 (set_attr "prefix" "maybe_vex")
5234 (set_attr "mode" "V4SF")])
5236 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5237 [(set (match_operand:V16SF 0 "register_operand" "=v")
5240 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5242 (parallel [(const_int 1) (const_int 1)
5243 (const_int 3) (const_int 3)
5244 (const_int 5) (const_int 5)
5245 (const_int 7) (const_int 7)
5246 (const_int 9) (const_int 9)
5247 (const_int 11) (const_int 11)
5248 (const_int 13) (const_int 13)
5249 (const_int 15) (const_int 15)])))]
5251 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5252 [(set_attr "type" "sse")
5253 (set_attr "prefix" "evex")
5254 (set_attr "mode" "V16SF")])
5256 (define_insn "avx_movsldup256"
5257 [(set (match_operand:V8SF 0 "register_operand" "=x")
5260 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5262 (parallel [(const_int 0) (const_int 0)
5263 (const_int 2) (const_int 2)
5264 (const_int 4) (const_int 4)
5265 (const_int 6) (const_int 6)])))]
5267 "vmovsldup\t{%1, %0|%0, %1}"
5268 [(set_attr "type" "sse")
5269 (set_attr "prefix" "vex")
5270 (set_attr "mode" "V8SF")])
5272 (define_insn "sse3_movsldup"
5273 [(set (match_operand:V4SF 0 "register_operand" "=x")
5276 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5278 (parallel [(const_int 0)
5283 "%vmovsldup\t{%1, %0|%0, %1}"
5284 [(set_attr "type" "sse")
5285 (set_attr "prefix_rep" "1")
5286 (set_attr "prefix" "maybe_vex")
5287 (set_attr "mode" "V4SF")])
5289 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5290 [(set (match_operand:V16SF 0 "register_operand" "=v")
5293 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5295 (parallel [(const_int 0) (const_int 0)
5296 (const_int 2) (const_int 2)
5297 (const_int 4) (const_int 4)
5298 (const_int 6) (const_int 6)
5299 (const_int 8) (const_int 8)
5300 (const_int 10) (const_int 10)
5301 (const_int 12) (const_int 12)
5302 (const_int 14) (const_int 14)])))]
5304 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5305 [(set_attr "type" "sse")
5306 (set_attr "prefix" "evex")
5307 (set_attr "mode" "V16SF")])
5309 (define_expand "avx_shufps256"
5310 [(match_operand:V8SF 0 "register_operand")
5311 (match_operand:V8SF 1 "register_operand")
5312 (match_operand:V8SF 2 "nonimmediate_operand")
5313 (match_operand:SI 3 "const_int_operand")]
5316 int mask = INTVAL (operands[3]);
5317 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5318 GEN_INT ((mask >> 0) & 3),
5319 GEN_INT ((mask >> 2) & 3),
5320 GEN_INT (((mask >> 4) & 3) + 8),
5321 GEN_INT (((mask >> 6) & 3) + 8),
5322 GEN_INT (((mask >> 0) & 3) + 4),
5323 GEN_INT (((mask >> 2) & 3) + 4),
5324 GEN_INT (((mask >> 4) & 3) + 12),
5325 GEN_INT (((mask >> 6) & 3) + 12)));
5329 ;; One bit in mask selects 2 elements.
5330 (define_insn "avx_shufps256_1"
5331 [(set (match_operand:V8SF 0 "register_operand" "=x")
5334 (match_operand:V8SF 1 "register_operand" "x")
5335 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5336 (parallel [(match_operand 3 "const_0_to_3_operand" )
5337 (match_operand 4 "const_0_to_3_operand" )
5338 (match_operand 5 "const_8_to_11_operand" )
5339 (match_operand 6 "const_8_to_11_operand" )
5340 (match_operand 7 "const_4_to_7_operand" )
5341 (match_operand 8 "const_4_to_7_operand" )
5342 (match_operand 9 "const_12_to_15_operand")
5343 (match_operand 10 "const_12_to_15_operand")])))]
5345 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5346 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5347 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5348 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5351 mask = INTVAL (operands[3]);
5352 mask |= INTVAL (operands[4]) << 2;
5353 mask |= (INTVAL (operands[5]) - 8) << 4;
5354 mask |= (INTVAL (operands[6]) - 8) << 6;
5355 operands[3] = GEN_INT (mask);
5357 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5359 [(set_attr "type" "sseshuf")
5360 (set_attr "length_immediate" "1")
5361 (set_attr "prefix" "vex")
5362 (set_attr "mode" "V8SF")])
5364 (define_expand "sse_shufps"
5365 [(match_operand:V4SF 0 "register_operand")
5366 (match_operand:V4SF 1 "register_operand")
5367 (match_operand:V4SF 2 "nonimmediate_operand")
5368 (match_operand:SI 3 "const_int_operand")]
5371 int mask = INTVAL (operands[3]);
5372 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5373 GEN_INT ((mask >> 0) & 3),
5374 GEN_INT ((mask >> 2) & 3),
5375 GEN_INT (((mask >> 4) & 3) + 4),
5376 GEN_INT (((mask >> 6) & 3) + 4)));
5380 (define_insn "sse_shufps_<mode>"
5381 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5382 (vec_select:VI4F_128
5383 (vec_concat:<ssedoublevecmode>
5384 (match_operand:VI4F_128 1 "register_operand" "0,x")
5385 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5386 (parallel [(match_operand 3 "const_0_to_3_operand")
5387 (match_operand 4 "const_0_to_3_operand")
5388 (match_operand 5 "const_4_to_7_operand")
5389 (match_operand 6 "const_4_to_7_operand")])))]
5393 mask |= INTVAL (operands[3]) << 0;
5394 mask |= INTVAL (operands[4]) << 2;
5395 mask |= (INTVAL (operands[5]) - 4) << 4;
5396 mask |= (INTVAL (operands[6]) - 4) << 6;
5397 operands[3] = GEN_INT (mask);
5399 switch (which_alternative)
5402 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5404 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5409 [(set_attr "isa" "noavx,avx")
5410 (set_attr "type" "sseshuf")
5411 (set_attr "length_immediate" "1")
5412 (set_attr "prefix" "orig,vex")
5413 (set_attr "mode" "V4SF")])
5415 (define_insn "sse_storehps"
5416 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5418 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5419 (parallel [(const_int 2) (const_int 3)])))]
5422 %vmovhps\t{%1, %0|%q0, %1}
5423 %vmovhlps\t{%1, %d0|%d0, %1}
5424 %vmovlps\t{%H1, %d0|%d0, %H1}"
5425 [(set_attr "type" "ssemov")
5426 (set_attr "ssememalign" "64")
5427 (set_attr "prefix" "maybe_vex")
5428 (set_attr "mode" "V2SF,V4SF,V2SF")])
5430 (define_expand "sse_loadhps_exp"
5431 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5434 (match_operand:V4SF 1 "nonimmediate_operand")
5435 (parallel [(const_int 0) (const_int 1)]))
5436 (match_operand:V2SF 2 "nonimmediate_operand")))]
5439 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5441 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5443 /* Fix up the destination if needed. */
5444 if (dst != operands[0])
5445 emit_move_insn (operands[0], dst);
5450 (define_insn "sse_loadhps"
5451 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5454 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5455 (parallel [(const_int 0) (const_int 1)]))
5456 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5459 movhps\t{%2, %0|%0, %q2}
5460 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5461 movlhps\t{%2, %0|%0, %2}
5462 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5463 %vmovlps\t{%2, %H0|%H0, %2}"
5464 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5465 (set_attr "type" "ssemov")
5466 (set_attr "ssememalign" "64")
5467 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5468 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5470 (define_insn "sse_storelps"
5471 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5473 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5474 (parallel [(const_int 0) (const_int 1)])))]
5477 %vmovlps\t{%1, %0|%q0, %1}
5478 %vmovaps\t{%1, %0|%0, %1}
5479 %vmovlps\t{%1, %d0|%d0, %q1}"
5480 [(set_attr "type" "ssemov")
5481 (set_attr "prefix" "maybe_vex")
5482 (set_attr "mode" "V2SF,V4SF,V2SF")])
5484 (define_expand "sse_loadlps_exp"
5485 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5487 (match_operand:V2SF 2 "nonimmediate_operand")
5489 (match_operand:V4SF 1 "nonimmediate_operand")
5490 (parallel [(const_int 2) (const_int 3)]))))]
5493 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5495 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5497 /* Fix up the destination if needed. */
5498 if (dst != operands[0])
5499 emit_move_insn (operands[0], dst);
5504 (define_insn "sse_loadlps"
5505 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5507 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5509 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5510 (parallel [(const_int 2) (const_int 3)]))))]
5513 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5514 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5515 movlps\t{%2, %0|%0, %q2}
5516 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5517 %vmovlps\t{%2, %0|%q0, %2}"
5518 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5519 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5520 (set_attr "ssememalign" "64")
5521 (set_attr "length_immediate" "1,1,*,*,*")
5522 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5523 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5525 (define_insn "sse_movss"
5526 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5528 (match_operand:V4SF 2 "register_operand" " x,x")
5529 (match_operand:V4SF 1 "register_operand" " 0,x")
5533 movss\t{%2, %0|%0, %2}
5534 vmovss\t{%2, %1, %0|%0, %1, %2}"
5535 [(set_attr "isa" "noavx,avx")
5536 (set_attr "type" "ssemov")
5537 (set_attr "prefix" "orig,vex")
5538 (set_attr "mode" "SF")])
5540 (define_insn "avx2_vec_dup<mode>"
5541 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5542 (vec_duplicate:VF1_128_256
5544 (match_operand:V4SF 1 "register_operand" "x")
5545 (parallel [(const_int 0)]))))]
5547 "vbroadcastss\t{%1, %0|%0, %1}"
5548 [(set_attr "type" "sselog1")
5549 (set_attr "prefix" "vex")
5550 (set_attr "mode" "<MODE>")])
5552 (define_insn "avx2_vec_dupv8sf_1"
5553 [(set (match_operand:V8SF 0 "register_operand" "=x")
5556 (match_operand:V8SF 1 "register_operand" "x")
5557 (parallel [(const_int 0)]))))]
5559 "vbroadcastss\t{%x1, %0|%0, %x1}"
5560 [(set_attr "type" "sselog1")
5561 (set_attr "prefix" "vex")
5562 (set_attr "mode" "V8SF")])
5564 (define_insn "vec_dupv4sf"
5565 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5567 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5570 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5571 vbroadcastss\t{%1, %0|%0, %1}
5572 shufps\t{$0, %0, %0|%0, %0, 0}"
5573 [(set_attr "isa" "avx,avx,noavx")
5574 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5575 (set_attr "length_immediate" "1,0,1")
5576 (set_attr "prefix_extra" "0,1,*")
5577 (set_attr "prefix" "vex,vex,orig")
5578 (set_attr "mode" "V4SF")])
5580 ;; Although insertps takes register source, we prefer
5581 ;; unpcklps with register source since it is shorter.
5582 (define_insn "*vec_concatv2sf_sse4_1"
5583 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5585 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5586 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5589 unpcklps\t{%2, %0|%0, %2}
5590 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5591 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5592 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5593 %vmovss\t{%1, %0|%0, %1}
5594 punpckldq\t{%2, %0|%0, %2}
5595 movd\t{%1, %0|%0, %1}"
5596 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5597 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5598 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5599 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5600 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5601 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5602 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5604 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5605 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5606 ;; alternatives pretty much forces the MMX alternative to be chosen.
5607 (define_insn "*vec_concatv2sf_sse"
5608 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5610 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5611 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5614 unpcklps\t{%2, %0|%0, %2}
5615 movss\t{%1, %0|%0, %1}
5616 punpckldq\t{%2, %0|%0, %2}
5617 movd\t{%1, %0|%0, %1}"
5618 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5619 (set_attr "mode" "V4SF,SF,DI,DI")])
5621 (define_insn "*vec_concatv4sf"
5622 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5624 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5625 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5628 movlhps\t{%2, %0|%0, %2}
5629 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5630 movhps\t{%2, %0|%0, %q2}
5631 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5632 [(set_attr "isa" "noavx,avx,noavx,avx")
5633 (set_attr "type" "ssemov")
5634 (set_attr "prefix" "orig,vex,orig,vex")
5635 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5637 (define_expand "vec_init<mode>"
5638 [(match_operand:V_128 0 "register_operand")
5642 ix86_expand_vector_init (false, operands[0], operands[1]);
5646 ;; Avoid combining registers from different units in a single alternative,
5647 ;; see comment above inline_secondary_memory_needed function in i386.c
5648 (define_insn "vec_set<mode>_0"
5649 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5650 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5652 (vec_duplicate:VI4F_128
5653 (match_operand:<ssescalarmode> 2 "general_operand"
5654 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5655 (match_operand:VI4F_128 1 "vector_move_operand"
5656 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5660 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5661 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5662 %vmovd\t{%2, %0|%0, %2}
5663 movss\t{%2, %0|%0, %2}
5664 movss\t{%2, %0|%0, %2}
5665 vmovss\t{%2, %1, %0|%0, %1, %2}
5666 pinsrd\t{$0, %2, %0|%0, %2, 0}
5667 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5671 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5673 (cond [(eq_attr "alternative" "0,6,7")
5674 (const_string "sselog")
5675 (eq_attr "alternative" "9")
5676 (const_string "imov")
5677 (eq_attr "alternative" "10")
5678 (const_string "fmov")
5680 (const_string "ssemov")))
5681 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5682 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5683 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5684 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5686 ;; A subset is vec_setv4sf.
5687 (define_insn "*vec_setv4sf_sse4_1"
5688 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5691 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5692 (match_operand:V4SF 1 "register_operand" "0,x")
5693 (match_operand:SI 3 "const_int_operand")))]
5695 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5696 < GET_MODE_NUNITS (V4SFmode))"
5698 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5699 switch (which_alternative)
5702 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5704 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5709 [(set_attr "isa" "noavx,avx")
5710 (set_attr "type" "sselog")
5711 (set_attr "prefix_data16" "1,*")
5712 (set_attr "prefix_extra" "1")
5713 (set_attr "length_immediate" "1")
5714 (set_attr "prefix" "orig,vex")
5715 (set_attr "mode" "V4SF")])
5717 (define_insn "sse4_1_insertps"
5718 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5719 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5720 (match_operand:V4SF 1 "register_operand" "0,x")
5721 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5725 if (MEM_P (operands[2]))
5727 unsigned count_s = INTVAL (operands[3]) >> 6;
5729 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5730 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5732 switch (which_alternative)
5735 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5737 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5742 [(set_attr "isa" "noavx,avx")
5743 (set_attr "type" "sselog")
5744 (set_attr "prefix_data16" "1,*")
5745 (set_attr "prefix_extra" "1")
5746 (set_attr "length_immediate" "1")
5747 (set_attr "prefix" "orig,vex")
5748 (set_attr "mode" "V4SF")])
5751 [(set (match_operand:VI4F_128 0 "memory_operand")
5753 (vec_duplicate:VI4F_128
5754 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5757 "TARGET_SSE && reload_completed"
5758 [(set (match_dup 0) (match_dup 1))]
5759 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5761 (define_expand "vec_set<mode>"
5762 [(match_operand:V 0 "register_operand")
5763 (match_operand:<ssescalarmode> 1 "register_operand")
5764 (match_operand 2 "const_int_operand")]
5767 ix86_expand_vector_set (false, operands[0], operands[1],
5768 INTVAL (operands[2]));
5772 (define_insn_and_split "*vec_extractv4sf_0"
5773 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5775 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5776 (parallel [(const_int 0)])))]
5777 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5779 "&& reload_completed"
5780 [(set (match_dup 0) (match_dup 1))]
5782 if (REG_P (operands[1]))
5783 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5785 operands[1] = adjust_address (operands[1], SFmode, 0);
5788 (define_insn_and_split "*sse4_1_extractps"
5789 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5791 (match_operand:V4SF 1 "register_operand" "x,0,x")
5792 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5795 %vextractps\t{%2, %1, %0|%0, %1, %2}
5798 "&& reload_completed && SSE_REG_P (operands[0])"
5801 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5802 switch (INTVAL (operands[2]))
5806 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5807 operands[2], operands[2],
5808 GEN_INT (INTVAL (operands[2]) + 4),
5809 GEN_INT (INTVAL (operands[2]) + 4)));
5812 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5815 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5820 [(set_attr "isa" "*,noavx,avx")
5821 (set_attr "type" "sselog,*,*")
5822 (set_attr "prefix_data16" "1,*,*")
5823 (set_attr "prefix_extra" "1,*,*")
5824 (set_attr "length_immediate" "1,*,*")
5825 (set_attr "prefix" "maybe_vex,*,*")
5826 (set_attr "mode" "V4SF,*,*")])
5828 (define_insn_and_split "*vec_extractv4sf_mem"
5829 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5831 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5832 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5835 "&& reload_completed"
5836 [(set (match_dup 0) (match_dup 1))]
5838 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5841 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5842 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5843 (match_operand:V16FI 1 "register_operand")
5844 (match_operand:SI 2 "const_0_to_3_operand")
5845 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5846 (match_operand:QI 4 "register_operand")]
5849 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5850 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5851 switch (INTVAL (operands[2]))
5854 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5855 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5856 GEN_INT (3), operands[3], operands[4]));
5859 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5860 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5861 GEN_INT (7), operands[3], operands[4]));
5864 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5865 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5866 GEN_INT (11), operands[3], operands[4]));
5869 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5870 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5871 GEN_INT (15), operands[3], operands[4]));
5879 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5880 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5881 (vec_merge:<ssequartermode>
5882 (vec_select:<ssequartermode>
5883 (match_operand:V16FI 1 "register_operand" "v")
5884 (parallel [(match_operand 2 "const_0_to_15_operand")
5885 (match_operand 3 "const_0_to_15_operand")
5886 (match_operand 4 "const_0_to_15_operand")
5887 (match_operand 5 "const_0_to_15_operand")]))
5888 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5889 (match_operand:QI 7 "register_operand" "Yk")))]
5891 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
5892 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
5893 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
5894 && rtx_equal_p (operands[6], operands[0])"
5896 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5897 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5899 [(set_attr "type" "sselog")
5900 (set_attr "prefix_extra" "1")
5901 (set_attr "length_immediate" "1")
5902 (set_attr "memory" "store")
5903 (set_attr "prefix" "evex")
5904 (set_attr "mode" "<sseinsnmode>")])
5906 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5907 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5908 (vec_select:<ssequartermode>
5909 (match_operand:V16FI 1 "register_operand" "v")
5910 (parallel [(match_operand 2 "const_0_to_15_operand")
5911 (match_operand 3 "const_0_to_15_operand")
5912 (match_operand 4 "const_0_to_15_operand")
5913 (match_operand 5 "const_0_to_15_operand")])))]
5915 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
5916 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
5917 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
5919 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5920 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5922 [(set_attr "type" "sselog")
5923 (set_attr "prefix_extra" "1")
5924 (set_attr "length_immediate" "1")
5925 (set (attr "memory")
5926 (if_then_else (match_test "MEM_P (operands[0])")
5927 (const_string "store")
5928 (const_string "none")))
5929 (set_attr "prefix" "evex")
5930 (set_attr "mode" "<sseinsnmode>")])
5932 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
5933 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5934 (match_operand:V8FI 1 "register_operand")
5935 (match_operand:SI 2 "const_0_to_1_operand")
5936 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
5937 (match_operand:QI 4 "register_operand")]
5940 rtx (*insn)(rtx, rtx, rtx, rtx);
5942 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5943 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5945 switch (INTVAL (operands[2]))
5948 insn = gen_vec_extract_lo_<mode>_mask;
5951 insn = gen_vec_extract_hi_<mode>_mask;
5957 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
5962 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5963 (vec_select:<ssehalfvecmode>
5964 (match_operand:V8FI 1 "nonimmediate_operand")
5965 (parallel [(const_int 0) (const_int 1)
5966 (const_int 2) (const_int 3)])))]
5967 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
5968 && reload_completed"
5971 rtx op1 = operands[1];
5973 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5975 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5976 emit_move_insn (operands[0], op1);
5980 (define_insn "vec_extract_lo_<mode>_maskm"
5981 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5982 (vec_merge:<ssehalfvecmode>
5983 (vec_select:<ssehalfvecmode>
5984 (match_operand:V8FI 1 "register_operand" "v")
5985 (parallel [(const_int 0) (const_int 1)
5986 (const_int 2) (const_int 3)]))
5987 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5988 (match_operand:QI 3 "register_operand" "Yk")))]
5990 && rtx_equal_p (operands[2], operands[0])"
5991 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
5992 [(set_attr "type" "sselog")
5993 (set_attr "prefix_extra" "1")
5994 (set_attr "length_immediate" "1")
5995 (set_attr "prefix" "evex")
5996 (set_attr "mode" "<sseinsnmode>")])
5998 (define_insn "vec_extract_lo_<mode><mask_name>"
5999 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6000 (vec_select:<ssehalfvecmode>
6001 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6002 (parallel [(const_int 0) (const_int 1)
6003 (const_int 2) (const_int 3)])))]
6004 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6007 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6011 [(set_attr "type" "sselog")
6012 (set_attr "prefix_extra" "1")
6013 (set_attr "length_immediate" "1")
6014 (set (attr "memory")
6015 (if_then_else (match_test "MEM_P (operands[0])")
6016 (const_string "store")
6017 (const_string "none")))
6018 (set_attr "prefix" "evex")
6019 (set_attr "mode" "<sseinsnmode>")])
6021 (define_insn "vec_extract_hi_<mode>_maskm"
6022 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6023 (vec_merge:<ssehalfvecmode>
6024 (vec_select:<ssehalfvecmode>
6025 (match_operand:V8FI 1 "register_operand" "v")
6026 (parallel [(const_int 4) (const_int 5)
6027 (const_int 6) (const_int 7)]))
6028 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6029 (match_operand:QI 3 "register_operand" "Yk")))]
6031 && rtx_equal_p (operands[2], operands[0])"
6032 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6033 [(set_attr "type" "sselog")
6034 (set_attr "prefix_extra" "1")
6035 (set_attr "length_immediate" "1")
6036 (set_attr "memory" "store")
6037 (set_attr "prefix" "evex")
6038 (set_attr "mode" "<sseinsnmode>")])
6040 (define_insn "vec_extract_hi_<mode><mask_name>"
6041 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6042 (vec_select:<ssehalfvecmode>
6043 (match_operand:V8FI 1 "register_operand" "v")
6044 (parallel [(const_int 4) (const_int 5)
6045 (const_int 6) (const_int 7)])))]
6047 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6048 [(set_attr "type" "sselog")
6049 (set_attr "prefix_extra" "1")
6050 (set_attr "length_immediate" "1")
6051 (set (attr "memory")
6052 (if_then_else (match_test "MEM_P (operands[0])")
6053 (const_string "store")
6054 (const_string "none")))
6055 (set_attr "prefix" "evex")
6056 (set_attr "mode" "<sseinsnmode>")])
6058 (define_expand "avx_vextractf128<mode>"
6059 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6060 (match_operand:V_256 1 "register_operand")
6061 (match_operand:SI 2 "const_0_to_1_operand")]
6064 rtx (*insn)(rtx, rtx);
6066 switch (INTVAL (operands[2]))
6069 insn = gen_vec_extract_lo_<mode>;
6072 insn = gen_vec_extract_hi_<mode>;
6078 emit_insn (insn (operands[0], operands[1]));
6082 (define_insn_and_split "vec_extract_lo_<mode>"
6083 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6084 (vec_select:<ssehalfvecmode>
6085 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6086 (parallel [(const_int 0) (const_int 1)
6087 (const_int 2) (const_int 3)
6088 (const_int 4) (const_int 5)
6089 (const_int 6) (const_int 7)])))]
6090 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6092 "&& reload_completed"
6095 rtx op1 = operands[1];
6097 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6099 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6100 emit_move_insn (operands[0], op1);
6104 (define_insn "vec_extract_hi_<mode>"
6105 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6106 (vec_select:<ssehalfvecmode>
6107 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
6108 (parallel [(const_int 8) (const_int 9)
6109 (const_int 10) (const_int 11)
6110 (const_int 12) (const_int 13)
6111 (const_int 14) (const_int 15)])))]
6113 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6114 [(set_attr "type" "sselog")
6115 (set_attr "prefix_extra" "1")
6116 (set_attr "length_immediate" "1")
6117 (set_attr "memory" "none,store")
6118 (set_attr "prefix" "evex")
6119 (set_attr "mode" "XI")])
6121 (define_insn_and_split "vec_extract_lo_<mode>"
6122 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6123 (vec_select:<ssehalfvecmode>
6124 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
6125 (parallel [(const_int 0) (const_int 1)])))]
6126 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6128 "&& reload_completed"
6129 [(set (match_dup 0) (match_dup 1))]
6131 if (REG_P (operands[1]))
6132 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6134 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6137 (define_insn "vec_extract_hi_<mode>"
6138 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6139 (vec_select:<ssehalfvecmode>
6140 (match_operand:VI8F_256 1 "register_operand" "x,x")
6141 (parallel [(const_int 2) (const_int 3)])))]
6143 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6144 [(set_attr "type" "sselog")
6145 (set_attr "prefix_extra" "1")
6146 (set_attr "length_immediate" "1")
6147 (set_attr "memory" "none,store")
6148 (set_attr "prefix" "vex")
6149 (set_attr "mode" "<sseinsnmode>")])
6151 (define_insn_and_split "vec_extract_lo_<mode>"
6152 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6153 (vec_select:<ssehalfvecmode>
6154 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
6155 (parallel [(const_int 0) (const_int 1)
6156 (const_int 2) (const_int 3)])))]
6157 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6159 "&& reload_completed"
6160 [(set (match_dup 0) (match_dup 1))]
6162 if (REG_P (operands[1]))
6163 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6165 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6168 (define_insn "vec_extract_hi_<mode>"
6169 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6170 (vec_select:<ssehalfvecmode>
6171 (match_operand:VI4F_256 1 "register_operand" "x,x")
6172 (parallel [(const_int 4) (const_int 5)
6173 (const_int 6) (const_int 7)])))]
6175 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6176 [(set_attr "type" "sselog")
6177 (set_attr "prefix_extra" "1")
6178 (set_attr "length_immediate" "1")
6179 (set_attr "memory" "none,store")
6180 (set_attr "prefix" "vex")
6181 (set_attr "mode" "<sseinsnmode>")])
6183 (define_insn_and_split "vec_extract_lo_v32hi"
6184 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6186 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6187 (parallel [(const_int 0) (const_int 1)
6188 (const_int 2) (const_int 3)
6189 (const_int 4) (const_int 5)
6190 (const_int 6) (const_int 7)
6191 (const_int 8) (const_int 9)
6192 (const_int 10) (const_int 11)
6193 (const_int 12) (const_int 13)
6194 (const_int 14) (const_int 15)])))]
6195 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6197 "&& reload_completed"
6198 [(set (match_dup 0) (match_dup 1))]
6200 if (REG_P (operands[1]))
6201 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6203 operands[1] = adjust_address (operands[1], V16HImode, 0);
6206 (define_insn "vec_extract_hi_v32hi"
6207 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6209 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6210 (parallel [(const_int 16) (const_int 17)
6211 (const_int 18) (const_int 19)
6212 (const_int 20) (const_int 21)
6213 (const_int 22) (const_int 23)
6214 (const_int 24) (const_int 25)
6215 (const_int 26) (const_int 27)
6216 (const_int 28) (const_int 29)
6217 (const_int 30) (const_int 31)])))]
6219 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6220 [(set_attr "type" "sselog")
6221 (set_attr "prefix_extra" "1")
6222 (set_attr "length_immediate" "1")
6223 (set_attr "memory" "none,store")
6224 (set_attr "prefix" "evex")
6225 (set_attr "mode" "XI")])
6227 (define_insn_and_split "vec_extract_lo_v16hi"
6228 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6230 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6231 (parallel [(const_int 0) (const_int 1)
6232 (const_int 2) (const_int 3)
6233 (const_int 4) (const_int 5)
6234 (const_int 6) (const_int 7)])))]
6235 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6237 "&& reload_completed"
6238 [(set (match_dup 0) (match_dup 1))]
6240 if (REG_P (operands[1]))
6241 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
6243 operands[1] = adjust_address (operands[1], V8HImode, 0);
6246 (define_insn "vec_extract_hi_v16hi"
6247 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6249 (match_operand:V16HI 1 "register_operand" "x,x")
6250 (parallel [(const_int 8) (const_int 9)
6251 (const_int 10) (const_int 11)
6252 (const_int 12) (const_int 13)
6253 (const_int 14) (const_int 15)])))]
6255 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6256 [(set_attr "type" "sselog")
6257 (set_attr "prefix_extra" "1")
6258 (set_attr "length_immediate" "1")
6259 (set_attr "memory" "none,store")
6260 (set_attr "prefix" "vex")
6261 (set_attr "mode" "OI")])
6263 (define_insn_and_split "vec_extract_lo_v64qi"
6264 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6266 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6267 (parallel [(const_int 0) (const_int 1)
6268 (const_int 2) (const_int 3)
6269 (const_int 4) (const_int 5)
6270 (const_int 6) (const_int 7)
6271 (const_int 8) (const_int 9)
6272 (const_int 10) (const_int 11)
6273 (const_int 12) (const_int 13)
6274 (const_int 14) (const_int 15)
6275 (const_int 16) (const_int 17)
6276 (const_int 18) (const_int 19)
6277 (const_int 20) (const_int 21)
6278 (const_int 22) (const_int 23)
6279 (const_int 24) (const_int 25)
6280 (const_int 26) (const_int 27)
6281 (const_int 28) (const_int 29)
6282 (const_int 30) (const_int 31)])))]
6283 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6285 "&& reload_completed"
6286 [(set (match_dup 0) (match_dup 1))]
6288 if (REG_P (operands[1]))
6289 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6291 operands[1] = adjust_address (operands[1], V32QImode, 0);
6294 (define_insn "vec_extract_hi_v64qi"
6295 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6297 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6298 (parallel [(const_int 32) (const_int 33)
6299 (const_int 34) (const_int 35)
6300 (const_int 36) (const_int 37)
6301 (const_int 38) (const_int 39)
6302 (const_int 40) (const_int 41)
6303 (const_int 42) (const_int 43)
6304 (const_int 44) (const_int 45)
6305 (const_int 46) (const_int 47)
6306 (const_int 48) (const_int 49)
6307 (const_int 50) (const_int 51)
6308 (const_int 52) (const_int 53)
6309 (const_int 54) (const_int 55)
6310 (const_int 56) (const_int 57)
6311 (const_int 58) (const_int 59)
6312 (const_int 60) (const_int 61)
6313 (const_int 62) (const_int 63)])))]
6315 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6316 [(set_attr "type" "sselog")
6317 (set_attr "prefix_extra" "1")
6318 (set_attr "length_immediate" "1")
6319 (set_attr "memory" "none,store")
6320 (set_attr "prefix" "evex")
6321 (set_attr "mode" "XI")])
6323 (define_insn_and_split "vec_extract_lo_v32qi"
6324 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6326 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6327 (parallel [(const_int 0) (const_int 1)
6328 (const_int 2) (const_int 3)
6329 (const_int 4) (const_int 5)
6330 (const_int 6) (const_int 7)
6331 (const_int 8) (const_int 9)
6332 (const_int 10) (const_int 11)
6333 (const_int 12) (const_int 13)
6334 (const_int 14) (const_int 15)])))]
6335 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6337 "&& reload_completed"
6338 [(set (match_dup 0) (match_dup 1))]
6340 if (REG_P (operands[1]))
6341 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6343 operands[1] = adjust_address (operands[1], V16QImode, 0);
6346 (define_insn "vec_extract_hi_v32qi"
6347 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6349 (match_operand:V32QI 1 "register_operand" "x,x")
6350 (parallel [(const_int 16) (const_int 17)
6351 (const_int 18) (const_int 19)
6352 (const_int 20) (const_int 21)
6353 (const_int 22) (const_int 23)
6354 (const_int 24) (const_int 25)
6355 (const_int 26) (const_int 27)
6356 (const_int 28) (const_int 29)
6357 (const_int 30) (const_int 31)])))]
6359 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6360 [(set_attr "type" "sselog")
6361 (set_attr "prefix_extra" "1")
6362 (set_attr "length_immediate" "1")
6363 (set_attr "memory" "none,store")
6364 (set_attr "prefix" "vex")
6365 (set_attr "mode" "OI")])
6367 ;; Modes handled by vec_extract patterns.
6368 (define_mode_iterator VEC_EXTRACT_MODE
6369 [(V32QI "TARGET_AVX") V16QI
6370 (V16HI "TARGET_AVX") V8HI
6371 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6372 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6373 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6374 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6376 (define_expand "vec_extract<mode>"
6377 [(match_operand:<ssescalarmode> 0 "register_operand")
6378 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6379 (match_operand 2 "const_int_operand")]
6382 ix86_expand_vector_extract (false, operands[0], operands[1],
6383 INTVAL (operands[2]));
6387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6389 ;; Parallel double-precision floating point element swizzling
6391 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6393 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6394 [(set (match_operand:V8DF 0 "register_operand" "=v")
6397 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6398 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6399 (parallel [(const_int 1) (const_int 9)
6400 (const_int 3) (const_int 11)
6401 (const_int 5) (const_int 13)
6402 (const_int 7) (const_int 15)])))]
6404 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6405 [(set_attr "type" "sselog")
6406 (set_attr "prefix" "evex")
6407 (set_attr "mode" "V8DF")])
6409 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6410 (define_insn "avx_unpckhpd256"
6411 [(set (match_operand:V4DF 0 "register_operand" "=x")
6414 (match_operand:V4DF 1 "register_operand" "x")
6415 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6416 (parallel [(const_int 1) (const_int 5)
6417 (const_int 3) (const_int 7)])))]
6419 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6420 [(set_attr "type" "sselog")
6421 (set_attr "prefix" "vex")
6422 (set_attr "mode" "V4DF")])
6424 (define_expand "vec_interleave_highv4df"
6428 (match_operand:V4DF 1 "register_operand" "x")
6429 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6430 (parallel [(const_int 0) (const_int 4)
6431 (const_int 2) (const_int 6)])))
6437 (parallel [(const_int 1) (const_int 5)
6438 (const_int 3) (const_int 7)])))
6439 (set (match_operand:V4DF 0 "register_operand")
6444 (parallel [(const_int 2) (const_int 3)
6445 (const_int 6) (const_int 7)])))]
6448 operands[3] = gen_reg_rtx (V4DFmode);
6449 operands[4] = gen_reg_rtx (V4DFmode);
6453 (define_expand "vec_interleave_highv2df"
6454 [(set (match_operand:V2DF 0 "register_operand")
6457 (match_operand:V2DF 1 "nonimmediate_operand")
6458 (match_operand:V2DF 2 "nonimmediate_operand"))
6459 (parallel [(const_int 1)
6463 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6464 operands[2] = force_reg (V2DFmode, operands[2]);
6467 (define_insn "*vec_interleave_highv2df"
6468 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6471 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6472 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6473 (parallel [(const_int 1)
6475 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6477 unpckhpd\t{%2, %0|%0, %2}
6478 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6479 %vmovddup\t{%H1, %0|%0, %H1}
6480 movlpd\t{%H1, %0|%0, %H1}
6481 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6482 %vmovhpd\t{%1, %0|%q0, %1}"
6483 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6484 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6485 (set_attr "ssememalign" "64")
6486 (set_attr "prefix_data16" "*,*,*,1,*,1")
6487 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6488 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6490 (define_expand "avx512f_movddup512<mask_name>"
6491 [(set (match_operand:V8DF 0 "register_operand")
6494 (match_operand:V8DF 1 "nonimmediate_operand")
6496 (parallel [(const_int 0) (const_int 8)
6497 (const_int 2) (const_int 10)
6498 (const_int 4) (const_int 12)
6499 (const_int 6) (const_int 14)])))]
6502 (define_expand "avx512f_unpcklpd512<mask_name>"
6503 [(set (match_operand:V8DF 0 "register_operand")
6506 (match_operand:V8DF 1 "register_operand")
6507 (match_operand:V8DF 2 "nonimmediate_operand"))
6508 (parallel [(const_int 0) (const_int 8)
6509 (const_int 2) (const_int 10)
6510 (const_int 4) (const_int 12)
6511 (const_int 6) (const_int 14)])))]
6514 (define_insn "*avx512f_unpcklpd512<mask_name>"
6515 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6518 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6519 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6520 (parallel [(const_int 0) (const_int 8)
6521 (const_int 2) (const_int 10)
6522 (const_int 4) (const_int 12)
6523 (const_int 6) (const_int 14)])))]
6526 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6527 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6528 [(set_attr "type" "sselog")
6529 (set_attr "prefix" "evex")
6530 (set_attr "mode" "V8DF")])
6532 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6533 (define_expand "avx_movddup256"
6534 [(set (match_operand:V4DF 0 "register_operand")
6537 (match_operand:V4DF 1 "nonimmediate_operand")
6539 (parallel [(const_int 0) (const_int 4)
6540 (const_int 2) (const_int 6)])))]
6543 (define_expand "avx_unpcklpd256"
6544 [(set (match_operand:V4DF 0 "register_operand")
6547 (match_operand:V4DF 1 "register_operand")
6548 (match_operand:V4DF 2 "nonimmediate_operand"))
6549 (parallel [(const_int 0) (const_int 4)
6550 (const_int 2) (const_int 6)])))]
6553 (define_insn "*avx_unpcklpd256"
6554 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6557 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6558 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6559 (parallel [(const_int 0) (const_int 4)
6560 (const_int 2) (const_int 6)])))]
6563 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6564 vmovddup\t{%1, %0|%0, %1}"
6565 [(set_attr "type" "sselog")
6566 (set_attr "prefix" "vex")
6567 (set_attr "mode" "V4DF")])
6569 (define_expand "vec_interleave_lowv4df"
6573 (match_operand:V4DF 1 "register_operand" "x")
6574 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6575 (parallel [(const_int 0) (const_int 4)
6576 (const_int 2) (const_int 6)])))
6582 (parallel [(const_int 1) (const_int 5)
6583 (const_int 3) (const_int 7)])))
6584 (set (match_operand:V4DF 0 "register_operand")
6589 (parallel [(const_int 0) (const_int 1)
6590 (const_int 4) (const_int 5)])))]
6593 operands[3] = gen_reg_rtx (V4DFmode);
6594 operands[4] = gen_reg_rtx (V4DFmode);
6597 (define_expand "vec_interleave_lowv2df"
6598 [(set (match_operand:V2DF 0 "register_operand")
6601 (match_operand:V2DF 1 "nonimmediate_operand")
6602 (match_operand:V2DF 2 "nonimmediate_operand"))
6603 (parallel [(const_int 0)
6607 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6608 operands[1] = force_reg (V2DFmode, operands[1]);
6611 (define_insn "*vec_interleave_lowv2df"
6612 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6615 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6616 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6617 (parallel [(const_int 0)
6619 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6621 unpcklpd\t{%2, %0|%0, %2}
6622 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6623 %vmovddup\t{%1, %0|%0, %q1}
6624 movhpd\t{%2, %0|%0, %q2}
6625 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6626 %vmovlpd\t{%2, %H0|%H0, %2}"
6627 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6628 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6629 (set_attr "ssememalign" "64")
6630 (set_attr "prefix_data16" "*,*,*,1,*,1")
6631 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6632 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6635 [(set (match_operand:V2DF 0 "memory_operand")
6638 (match_operand:V2DF 1 "register_operand")
6640 (parallel [(const_int 0)
6642 "TARGET_SSE3 && reload_completed"
6645 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6646 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6647 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6652 [(set (match_operand:V2DF 0 "register_operand")
6655 (match_operand:V2DF 1 "memory_operand")
6657 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6658 (match_operand:SI 3 "const_int_operand")])))]
6659 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6660 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6662 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6665 (define_insn "avx512f_vmscalef<mode><round_name>"
6666 [(set (match_operand:VF_128 0 "register_operand" "=v")
6669 [(match_operand:VF_128 1 "register_operand" "v")
6670 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
6675 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
6676 [(set_attr "prefix" "evex")
6677 (set_attr "mode" "<ssescalarmode>")])
6679 (define_insn "avx512f_scalef<mode><mask_name><round_name>"
6680 [(set (match_operand:VF_512 0 "register_operand" "=v")
6682 [(match_operand:VF_512 1 "register_operand" "v")
6683 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
6686 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
6687 [(set_attr "prefix" "evex")
6688 (set_attr "mode" "<MODE>")])
6690 (define_expand "avx512f_vternlog<mode>_maskz"
6691 [(match_operand:VI48_512 0 "register_operand")
6692 (match_operand:VI48_512 1 "register_operand")
6693 (match_operand:VI48_512 2 "register_operand")
6694 (match_operand:VI48_512 3 "nonimmediate_operand")
6695 (match_operand:SI 4 "const_0_to_255_operand")
6696 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6699 emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
6700 operands[0], operands[1], operands[2], operands[3],
6701 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
6705 (define_insn "avx512f_vternlog<mode><sd_maskz_name>"
6706 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6708 [(match_operand:VI48_512 1 "register_operand" "0")
6709 (match_operand:VI48_512 2 "register_operand" "v")
6710 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6711 (match_operand:SI 4 "const_0_to_255_operand")]
6714 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
6715 [(set_attr "type" "sselog")
6716 (set_attr "prefix" "evex")
6717 (set_attr "mode" "<sseinsnmode>")])
6719 (define_insn "avx512f_vternlog<mode>_mask"
6720 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6723 [(match_operand:VI48_512 1 "register_operand" "0")
6724 (match_operand:VI48_512 2 "register_operand" "v")
6725 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6726 (match_operand:SI 4 "const_0_to_255_operand")]
6729 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6731 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6732 [(set_attr "type" "sselog")
6733 (set_attr "prefix" "evex")
6734 (set_attr "mode" "<sseinsnmode>")])
6736 (define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
6737 [(set (match_operand:VF_512 0 "register_operand" "=v")
6738 (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6741 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
6742 [(set_attr "prefix" "evex")
6743 (set_attr "mode" "<MODE>")])
6745 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
6746 [(set (match_operand:VF_128 0 "register_operand" "=v")
6749 [(match_operand:VF_128 1 "register_operand" "v")
6750 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6755 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
6756 [(set_attr "prefix" "evex")
6757 (set_attr "mode" "<ssescalarmode>")])
6759 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6760 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6761 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6762 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6763 (match_operand:SI 3 "const_0_to_255_operand")]
6766 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6767 [(set_attr "prefix" "evex")
6768 (set_attr "mode" "<sseinsnmode>")])
6770 (define_expand "avx512f_shufps512_mask"
6771 [(match_operand:V16SF 0 "register_operand")
6772 (match_operand:V16SF 1 "register_operand")
6773 (match_operand:V16SF 2 "nonimmediate_operand")
6774 (match_operand:SI 3 "const_0_to_255_operand")
6775 (match_operand:V16SF 4 "register_operand")
6776 (match_operand:HI 5 "register_operand")]
6779 int mask = INTVAL (operands[3]);
6780 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6781 GEN_INT ((mask >> 0) & 3),
6782 GEN_INT ((mask >> 2) & 3),
6783 GEN_INT (((mask >> 4) & 3) + 16),
6784 GEN_INT (((mask >> 6) & 3) + 16),
6785 GEN_INT (((mask >> 0) & 3) + 4),
6786 GEN_INT (((mask >> 2) & 3) + 4),
6787 GEN_INT (((mask >> 4) & 3) + 20),
6788 GEN_INT (((mask >> 6) & 3) + 20),
6789 GEN_INT (((mask >> 0) & 3) + 8),
6790 GEN_INT (((mask >> 2) & 3) + 8),
6791 GEN_INT (((mask >> 4) & 3) + 24),
6792 GEN_INT (((mask >> 6) & 3) + 24),
6793 GEN_INT (((mask >> 0) & 3) + 12),
6794 GEN_INT (((mask >> 2) & 3) + 12),
6795 GEN_INT (((mask >> 4) & 3) + 28),
6796 GEN_INT (((mask >> 6) & 3) + 28),
6797 operands[4], operands[5]));
6802 (define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
6803 [(match_operand:VF_512 0 "register_operand")
6804 (match_operand:VF_512 1 "register_operand")
6805 (match_operand:VF_512 2 "register_operand")
6806 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6807 (match_operand:SI 4 "const_0_to_255_operand")
6808 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6811 emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6812 operands[0], operands[1], operands[2], operands[3],
6813 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6814 <round_saeonly_expand_operand6>));
6818 (define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
6819 [(set (match_operand:VF_512 0 "register_operand" "=v")
6821 [(match_operand:VF_512 1 "register_operand" "0")
6822 (match_operand:VF_512 2 "register_operand" "v")
6823 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6824 (match_operand:SI 4 "const_0_to_255_operand")]
6827 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6828 [(set_attr "prefix" "evex")
6829 (set_attr "mode" "<MODE>")])
6831 (define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
6832 [(set (match_operand:VF_512 0 "register_operand" "=v")
6835 [(match_operand:VF_512 1 "register_operand" "0")
6836 (match_operand:VF_512 2 "register_operand" "v")
6837 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6838 (match_operand:SI 4 "const_0_to_255_operand")]
6841 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6843 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6844 [(set_attr "prefix" "evex")
6845 (set_attr "mode" "<MODE>")])
6847 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
6848 [(match_operand:VF_128 0 "register_operand")
6849 (match_operand:VF_128 1 "register_operand")
6850 (match_operand:VF_128 2 "register_operand")
6851 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6852 (match_operand:SI 4 "const_0_to_255_operand")
6853 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6856 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6857 operands[0], operands[1], operands[2], operands[3],
6858 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6859 <round_saeonly_expand_operand6>));
6863 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
6864 [(set (match_operand:VF_128 0 "register_operand" "=v")
6867 [(match_operand:VF_128 1 "register_operand" "0")
6868 (match_operand:VF_128 2 "register_operand" "v")
6869 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6870 (match_operand:SI 4 "const_0_to_255_operand")]
6875 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6876 [(set_attr "prefix" "evex")
6877 (set_attr "mode" "<ssescalarmode>")])
6879 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
6880 [(set (match_operand:VF_128 0 "register_operand" "=v")
6884 [(match_operand:VF_128 1 "register_operand" "0")
6885 (match_operand:VF_128 2 "register_operand" "v")
6886 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6887 (match_operand:SI 4 "const_0_to_255_operand")]
6892 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6894 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6895 [(set_attr "prefix" "evex")
6896 (set_attr "mode" "<ssescalarmode>")])
6898 (define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
6899 [(set (match_operand:VF_512 0 "register_operand" "=v")
6901 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6902 (match_operand:SI 2 "const_0_to_255_operand")]
6905 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
6906 [(set_attr "length_immediate" "1")
6907 (set_attr "prefix" "evex")
6908 (set_attr "mode" "<MODE>")])
6910 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
6911 [(set (match_operand:VF_128 0 "register_operand" "=v")
6914 [(match_operand:VF_128 1 "register_operand" "v")
6915 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6916 (match_operand:SI 3 "const_0_to_255_operand")]
6921 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
6922 [(set_attr "length_immediate" "1")
6923 (set_attr "prefix" "evex")
6924 (set_attr "mode" "<MODE>")])
6926 ;; One bit in mask selects 2 elements.
6927 (define_insn "avx512f_shufps512_1<mask_name>"
6928 [(set (match_operand:V16SF 0 "register_operand" "=v")
6931 (match_operand:V16SF 1 "register_operand" "v")
6932 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6933 (parallel [(match_operand 3 "const_0_to_3_operand")
6934 (match_operand 4 "const_0_to_3_operand")
6935 (match_operand 5 "const_16_to_19_operand")
6936 (match_operand 6 "const_16_to_19_operand")
6937 (match_operand 7 "const_4_to_7_operand")
6938 (match_operand 8 "const_4_to_7_operand")
6939 (match_operand 9 "const_20_to_23_operand")
6940 (match_operand 10 "const_20_to_23_operand")
6941 (match_operand 11 "const_8_to_11_operand")
6942 (match_operand 12 "const_8_to_11_operand")
6943 (match_operand 13 "const_24_to_27_operand")
6944 (match_operand 14 "const_24_to_27_operand")
6945 (match_operand 15 "const_12_to_15_operand")
6946 (match_operand 16 "const_12_to_15_operand")
6947 (match_operand 17 "const_28_to_31_operand")
6948 (match_operand 18 "const_28_to_31_operand")])))]
6950 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6951 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6952 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6953 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
6954 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
6955 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
6956 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
6957 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
6958 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
6959 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
6960 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
6961 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
6964 mask = INTVAL (operands[3]);
6965 mask |= INTVAL (operands[4]) << 2;
6966 mask |= (INTVAL (operands[5]) - 16) << 4;
6967 mask |= (INTVAL (operands[6]) - 16) << 6;
6968 operands[3] = GEN_INT (mask);
6970 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
6972 [(set_attr "type" "sselog")
6973 (set_attr "length_immediate" "1")
6974 (set_attr "prefix" "evex")
6975 (set_attr "mode" "V16SF")])
6977 (define_expand "avx512f_shufpd512_mask"
6978 [(match_operand:V8DF 0 "register_operand")
6979 (match_operand:V8DF 1 "register_operand")
6980 (match_operand:V8DF 2 "nonimmediate_operand")
6981 (match_operand:SI 3 "const_0_to_255_operand")
6982 (match_operand:V8DF 4 "register_operand")
6983 (match_operand:QI 5 "register_operand")]
6986 int mask = INTVAL (operands[3]);
6987 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
6989 GEN_INT (mask & 2 ? 9 : 8),
6990 GEN_INT (mask & 4 ? 3 : 2),
6991 GEN_INT (mask & 8 ? 11 : 10),
6992 GEN_INT (mask & 16 ? 5 : 4),
6993 GEN_INT (mask & 32 ? 13 : 12),
6994 GEN_INT (mask & 64 ? 7 : 6),
6995 GEN_INT (mask & 128 ? 15 : 14),
6996 operands[4], operands[5]));
7000 (define_insn "avx512f_shufpd512_1<mask_name>"
7001 [(set (match_operand:V8DF 0 "register_operand" "=v")
7004 (match_operand:V8DF 1 "register_operand" "v")
7005 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7006 (parallel [(match_operand 3 "const_0_to_1_operand")
7007 (match_operand 4 "const_8_to_9_operand")
7008 (match_operand 5 "const_2_to_3_operand")
7009 (match_operand 6 "const_10_to_11_operand")
7010 (match_operand 7 "const_4_to_5_operand")
7011 (match_operand 8 "const_12_to_13_operand")
7012 (match_operand 9 "const_6_to_7_operand")
7013 (match_operand 10 "const_14_to_15_operand")])))]
7017 mask = INTVAL (operands[3]);
7018 mask |= (INTVAL (operands[4]) - 8) << 1;
7019 mask |= (INTVAL (operands[5]) - 2) << 2;
7020 mask |= (INTVAL (operands[6]) - 10) << 3;
7021 mask |= (INTVAL (operands[7]) - 4) << 4;
7022 mask |= (INTVAL (operands[8]) - 12) << 5;
7023 mask |= (INTVAL (operands[9]) - 6) << 6;
7024 mask |= (INTVAL (operands[10]) - 14) << 7;
7025 operands[3] = GEN_INT (mask);
7027 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7029 [(set_attr "type" "sselog")
7030 (set_attr "length_immediate" "1")
7031 (set_attr "prefix" "evex")
7032 (set_attr "mode" "V8DF")])
7034 (define_expand "avx_shufpd256"
7035 [(match_operand:V4DF 0 "register_operand")
7036 (match_operand:V4DF 1 "register_operand")
7037 (match_operand:V4DF 2 "nonimmediate_operand")
7038 (match_operand:SI 3 "const_int_operand")]
7041 int mask = INTVAL (operands[3]);
7042 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
7044 GEN_INT (mask & 2 ? 5 : 4),
7045 GEN_INT (mask & 4 ? 3 : 2),
7046 GEN_INT (mask & 8 ? 7 : 6)));
7050 (define_insn "avx_shufpd256_1"
7051 [(set (match_operand:V4DF 0 "register_operand" "=x")
7054 (match_operand:V4DF 1 "register_operand" "x")
7055 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7056 (parallel [(match_operand 3 "const_0_to_1_operand")
7057 (match_operand 4 "const_4_to_5_operand")
7058 (match_operand 5 "const_2_to_3_operand")
7059 (match_operand 6 "const_6_to_7_operand")])))]
7063 mask = INTVAL (operands[3]);
7064 mask |= (INTVAL (operands[4]) - 4) << 1;
7065 mask |= (INTVAL (operands[5]) - 2) << 2;
7066 mask |= (INTVAL (operands[6]) - 6) << 3;
7067 operands[3] = GEN_INT (mask);
7069 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7071 [(set_attr "type" "sseshuf")
7072 (set_attr "length_immediate" "1")
7073 (set_attr "prefix" "vex")
7074 (set_attr "mode" "V4DF")])
7076 (define_expand "sse2_shufpd"
7077 [(match_operand:V2DF 0 "register_operand")
7078 (match_operand:V2DF 1 "register_operand")
7079 (match_operand:V2DF 2 "nonimmediate_operand")
7080 (match_operand:SI 3 "const_int_operand")]
7083 int mask = INTVAL (operands[3]);
7084 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
7086 GEN_INT (mask & 2 ? 3 : 2)));
7090 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
7091 (define_insn "avx2_interleave_highv4di"
7092 [(set (match_operand:V4DI 0 "register_operand" "=x")
7095 (match_operand:V4DI 1 "register_operand" "x")
7096 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7097 (parallel [(const_int 1)
7102 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7103 [(set_attr "type" "sselog")
7104 (set_attr "prefix" "vex")
7105 (set_attr "mode" "OI")])
7107 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7108 [(set (match_operand:V8DI 0 "register_operand" "=v")
7111 (match_operand:V8DI 1 "register_operand" "v")
7112 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7113 (parallel [(const_int 1) (const_int 9)
7114 (const_int 3) (const_int 11)
7115 (const_int 5) (const_int 13)
7116 (const_int 7) (const_int 15)])))]
7118 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7119 [(set_attr "type" "sselog")
7120 (set_attr "prefix" "evex")
7121 (set_attr "mode" "XI")])
7123 (define_insn "vec_interleave_highv2di"
7124 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7127 (match_operand:V2DI 1 "register_operand" "0,x")
7128 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7129 (parallel [(const_int 1)
7133 punpckhqdq\t{%2, %0|%0, %2}
7134 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7135 [(set_attr "isa" "noavx,avx")
7136 (set_attr "type" "sselog")
7137 (set_attr "prefix_data16" "1,*")
7138 (set_attr "prefix" "orig,vex")
7139 (set_attr "mode" "TI")])
7141 (define_insn "avx2_interleave_lowv4di"
7142 [(set (match_operand:V4DI 0 "register_operand" "=x")
7145 (match_operand:V4DI 1 "register_operand" "x")
7146 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7147 (parallel [(const_int 0)
7152 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7153 [(set_attr "type" "sselog")
7154 (set_attr "prefix" "vex")
7155 (set_attr "mode" "OI")])
7157 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7158 [(set (match_operand:V8DI 0 "register_operand" "=v")
7161 (match_operand:V8DI 1 "register_operand" "v")
7162 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7163 (parallel [(const_int 0) (const_int 8)
7164 (const_int 2) (const_int 10)
7165 (const_int 4) (const_int 12)
7166 (const_int 6) (const_int 14)])))]
7168 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7169 [(set_attr "type" "sselog")
7170 (set_attr "prefix" "evex")
7171 (set_attr "mode" "XI")])
7173 (define_insn "vec_interleave_lowv2di"
7174 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7177 (match_operand:V2DI 1 "register_operand" "0,x")
7178 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7179 (parallel [(const_int 0)
7183 punpcklqdq\t{%2, %0|%0, %2}
7184 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7185 [(set_attr "isa" "noavx,avx")
7186 (set_attr "type" "sselog")
7187 (set_attr "prefix_data16" "1,*")
7188 (set_attr "prefix" "orig,vex")
7189 (set_attr "mode" "TI")])
7191 (define_insn "sse2_shufpd_<mode>"
7192 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
7193 (vec_select:VI8F_128
7194 (vec_concat:<ssedoublevecmode>
7195 (match_operand:VI8F_128 1 "register_operand" "0,x")
7196 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
7197 (parallel [(match_operand 3 "const_0_to_1_operand")
7198 (match_operand 4 "const_2_to_3_operand")])))]
7202 mask = INTVAL (operands[3]);
7203 mask |= (INTVAL (operands[4]) - 2) << 1;
7204 operands[3] = GEN_INT (mask);
7206 switch (which_alternative)
7209 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
7211 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7216 [(set_attr "isa" "noavx,avx")
7217 (set_attr "type" "sseshuf")
7218 (set_attr "length_immediate" "1")
7219 (set_attr "prefix" "orig,vex")
7220 (set_attr "mode" "V2DF")])
7222 ;; Avoid combining registers from different units in a single alternative,
7223 ;; see comment above inline_secondary_memory_needed function in i386.c
7224 (define_insn "sse2_storehpd"
7225 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
7227 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
7228 (parallel [(const_int 1)])))]
7229 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7231 %vmovhpd\t{%1, %0|%0, %1}
7233 vunpckhpd\t{%d1, %0|%0, %d1}
7237 [(set_attr "isa" "*,noavx,avx,*,*,*")
7238 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
7239 (set (attr "prefix_data16")
7241 (and (eq_attr "alternative" "0")
7242 (not (match_test "TARGET_AVX")))
7244 (const_string "*")))
7245 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
7246 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
7249 [(set (match_operand:DF 0 "register_operand")
7251 (match_operand:V2DF 1 "memory_operand")
7252 (parallel [(const_int 1)])))]
7253 "TARGET_SSE2 && reload_completed"
7254 [(set (match_dup 0) (match_dup 1))]
7255 "operands[1] = adjust_address (operands[1], DFmode, 8);")
7257 (define_insn "*vec_extractv2df_1_sse"
7258 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7260 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
7261 (parallel [(const_int 1)])))]
7262 "!TARGET_SSE2 && TARGET_SSE
7263 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7265 movhps\t{%1, %0|%q0, %1}
7266 movhlps\t{%1, %0|%0, %1}
7267 movlps\t{%H1, %0|%0, %H1}"
7268 [(set_attr "type" "ssemov")
7269 (set_attr "ssememalign" "64")
7270 (set_attr "mode" "V2SF,V4SF,V2SF")])
7272 ;; Avoid combining registers from different units in a single alternative,
7273 ;; see comment above inline_secondary_memory_needed function in i386.c
7274 (define_insn "sse2_storelpd"
7275 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
7277 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
7278 (parallel [(const_int 0)])))]
7279 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7281 %vmovlpd\t{%1, %0|%0, %1}
7286 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
7287 (set_attr "prefix_data16" "1,*,*,*,*")
7288 (set_attr "prefix" "maybe_vex")
7289 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
7292 [(set (match_operand:DF 0 "register_operand")
7294 (match_operand:V2DF 1 "nonimmediate_operand")
7295 (parallel [(const_int 0)])))]
7296 "TARGET_SSE2 && reload_completed"
7297 [(set (match_dup 0) (match_dup 1))]
7299 if (REG_P (operands[1]))
7300 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7302 operands[1] = adjust_address (operands[1], DFmode, 0);
7305 (define_insn "*vec_extractv2df_0_sse"
7306 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7308 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7309 (parallel [(const_int 0)])))]
7310 "!TARGET_SSE2 && TARGET_SSE
7311 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7313 movlps\t{%1, %0|%0, %1}
7314 movaps\t{%1, %0|%0, %1}
7315 movlps\t{%1, %0|%0, %q1}"
7316 [(set_attr "type" "ssemov")
7317 (set_attr "mode" "V2SF,V4SF,V2SF")])
7319 (define_expand "sse2_loadhpd_exp"
7320 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7323 (match_operand:V2DF 1 "nonimmediate_operand")
7324 (parallel [(const_int 0)]))
7325 (match_operand:DF 2 "nonimmediate_operand")))]
7328 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7330 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7332 /* Fix up the destination if needed. */
7333 if (dst != operands[0])
7334 emit_move_insn (operands[0], dst);
7339 ;; Avoid combining registers from different units in a single alternative,
7340 ;; see comment above inline_secondary_memory_needed function in i386.c
7341 (define_insn "sse2_loadhpd"
7342 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7346 (match_operand:V2DF 1 "nonimmediate_operand"
7348 (parallel [(const_int 0)]))
7349 (match_operand:DF 2 "nonimmediate_operand"
7350 " m,m,x,x,x,*f,r")))]
7351 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7353 movhpd\t{%2, %0|%0, %2}
7354 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7355 unpcklpd\t{%2, %0|%0, %2}
7356 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7360 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7361 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7362 (set_attr "ssememalign" "64")
7363 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7364 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7365 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7368 [(set (match_operand:V2DF 0 "memory_operand")
7370 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7371 (match_operand:DF 1 "register_operand")))]
7372 "TARGET_SSE2 && reload_completed"
7373 [(set (match_dup 0) (match_dup 1))]
7374 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7376 (define_expand "sse2_loadlpd_exp"
7377 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7379 (match_operand:DF 2 "nonimmediate_operand")
7381 (match_operand:V2DF 1 "nonimmediate_operand")
7382 (parallel [(const_int 1)]))))]
7385 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7387 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7389 /* Fix up the destination if needed. */
7390 if (dst != operands[0])
7391 emit_move_insn (operands[0], dst);
7396 ;; Avoid combining registers from different units in a single alternative,
7397 ;; see comment above inline_secondary_memory_needed function in i386.c
7398 (define_insn "sse2_loadlpd"
7399 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7400 "=x,x,x,x,x,x,x,x,m,m ,m")
7402 (match_operand:DF 2 "nonimmediate_operand"
7403 " m,m,m,x,x,0,0,x,x,*f,r")
7405 (match_operand:V2DF 1 "vector_move_operand"
7406 " C,0,x,0,x,x,o,o,0,0 ,0")
7407 (parallel [(const_int 1)]))))]
7408 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7410 %vmovsd\t{%2, %0|%0, %2}
7411 movlpd\t{%2, %0|%0, %2}
7412 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7413 movsd\t{%2, %0|%0, %2}
7414 vmovsd\t{%2, %1, %0|%0, %1, %2}
7415 shufpd\t{$2, %1, %0|%0, %1, 2}
7416 movhpd\t{%H1, %0|%0, %H1}
7417 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7421 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7423 (cond [(eq_attr "alternative" "5")
7424 (const_string "sselog")
7425 (eq_attr "alternative" "9")
7426 (const_string "fmov")
7427 (eq_attr "alternative" "10")
7428 (const_string "imov")
7430 (const_string "ssemov")))
7431 (set_attr "ssememalign" "64")
7432 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7433 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7434 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7435 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7438 [(set (match_operand:V2DF 0 "memory_operand")
7440 (match_operand:DF 1 "register_operand")
7441 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7442 "TARGET_SSE2 && reload_completed"
7443 [(set (match_dup 0) (match_dup 1))]
7444 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7446 (define_insn "sse2_movsd"
7447 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7449 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7450 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7454 movsd\t{%2, %0|%0, %2}
7455 vmovsd\t{%2, %1, %0|%0, %1, %2}
7456 movlpd\t{%2, %0|%0, %q2}
7457 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7458 %vmovlpd\t{%2, %0|%q0, %2}
7459 shufpd\t{$2, %1, %0|%0, %1, 2}
7460 movhps\t{%H1, %0|%0, %H1}
7461 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7462 %vmovhps\t{%1, %H0|%H0, %1}"
7463 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7466 (eq_attr "alternative" "5")
7467 (const_string "sselog")
7468 (const_string "ssemov")))
7469 (set (attr "prefix_data16")
7471 (and (eq_attr "alternative" "2,4")
7472 (not (match_test "TARGET_AVX")))
7474 (const_string "*")))
7475 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7476 (set_attr "ssememalign" "64")
7477 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7478 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7480 (define_insn "vec_dupv2df"
7481 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7483 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7487 %vmovddup\t{%1, %0|%0, %1}"
7488 [(set_attr "isa" "noavx,sse3")
7489 (set_attr "type" "sselog1")
7490 (set_attr "prefix" "orig,maybe_vex")
7491 (set_attr "mode" "V2DF,DF")])
7493 (define_insn "*vec_concatv2df"
7494 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7496 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7497 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7500 unpcklpd\t{%2, %0|%0, %2}
7501 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7502 %vmovddup\t{%1, %0|%0, %1}
7503 movhpd\t{%2, %0|%0, %2}
7504 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7505 %vmovsd\t{%1, %0|%0, %1}
7506 movlhps\t{%2, %0|%0, %2}
7507 movhps\t{%2, %0|%0, %2}"
7508 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7511 (eq_attr "alternative" "0,1,2")
7512 (const_string "sselog")
7513 (const_string "ssemov")))
7514 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7515 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7516 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7518 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7520 ;; Parallel integer down-conversion operations
7522 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7524 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7525 (define_mode_attr pmov_src_mode
7526 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7527 (define_mode_attr pmov_src_lower
7528 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7529 (define_mode_attr pmov_suff
7530 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7532 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7533 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7534 (any_truncate:PMOV_DST_MODE
7535 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7537 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7538 [(set_attr "type" "ssemov")
7539 (set_attr "memory" "none,store")
7540 (set_attr "prefix" "evex")
7541 (set_attr "mode" "<sseinsnmode>")])
7543 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7544 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7545 (vec_merge:PMOV_DST_MODE
7546 (any_truncate:PMOV_DST_MODE
7547 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7548 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7549 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
7551 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7552 [(set_attr "type" "ssemov")
7553 (set_attr "memory" "none,store")
7554 (set_attr "prefix" "evex")
7555 (set_attr "mode" "<sseinsnmode>")])
7557 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
7558 [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
7559 (vec_merge:PMOV_DST_MODE
7560 (any_truncate:PMOV_DST_MODE
7561 (match_operand:<pmov_src_mode> 1 "register_operand"))
7563 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
7566 (define_insn "*avx512f_<code>v8div16qi2"
7567 [(set (match_operand:V16QI 0 "register_operand" "=v")
7570 (match_operand:V8DI 1 "register_operand" "v"))
7571 (const_vector:V8QI [(const_int 0) (const_int 0)
7572 (const_int 0) (const_int 0)
7573 (const_int 0) (const_int 0)
7574 (const_int 0) (const_int 0)])))]
7576 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7577 [(set_attr "type" "ssemov")
7578 (set_attr "prefix" "evex")
7579 (set_attr "mode" "TI")])
7581 (define_insn "*avx512f_<code>v8div16qi2_store"
7582 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7585 (match_operand:V8DI 1 "register_operand" "v"))
7588 (parallel [(const_int 8) (const_int 9)
7589 (const_int 10) (const_int 11)
7590 (const_int 12) (const_int 13)
7591 (const_int 14) (const_int 15)]))))]
7593 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7594 [(set_attr "type" "ssemov")
7595 (set_attr "memory" "store")
7596 (set_attr "prefix" "evex")
7597 (set_attr "mode" "TI")])
7599 (define_insn "avx512f_<code>v8div16qi2_mask"
7600 [(set (match_operand:V16QI 0 "register_operand" "=v")
7604 (match_operand:V8DI 1 "register_operand" "v"))
7606 (match_operand:V16QI 2 "vector_move_operand" "0C")
7607 (parallel [(const_int 0) (const_int 1)
7608 (const_int 2) (const_int 3)
7609 (const_int 4) (const_int 5)
7610 (const_int 6) (const_int 7)]))
7611 (match_operand:QI 3 "register_operand" "Yk"))
7612 (const_vector:V8QI [(const_int 0) (const_int 0)
7613 (const_int 0) (const_int 0)
7614 (const_int 0) (const_int 0)
7615 (const_int 0) (const_int 0)])))]
7617 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7618 [(set_attr "type" "ssemov")
7619 (set_attr "prefix" "evex")
7620 (set_attr "mode" "TI")])
7622 (define_insn "avx512f_<code>v8div16qi2_mask_store"
7623 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7627 (match_operand:V8DI 1 "register_operand" "v"))
7630 (parallel [(const_int 0) (const_int 1)
7631 (const_int 2) (const_int 3)
7632 (const_int 4) (const_int 5)
7633 (const_int 6) (const_int 7)]))
7634 (match_operand:QI 2 "register_operand" "Yk"))
7637 (parallel [(const_int 8) (const_int 9)
7638 (const_int 10) (const_int 11)
7639 (const_int 12) (const_int 13)
7640 (const_int 14) (const_int 15)]))))]
7642 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7643 [(set_attr "type" "ssemov")
7644 (set_attr "memory" "store")
7645 (set_attr "prefix" "evex")
7646 (set_attr "mode" "TI")])
7648 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7650 ;; Parallel integral arithmetic
7652 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7654 (define_expand "neg<mode>2"
7655 [(set (match_operand:VI_AVX2 0 "register_operand")
7658 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7660 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7662 (define_expand "<plusminus_insn><mode>3<mask_name>"
7663 [(set (match_operand:VI_AVX2 0 "register_operand")
7665 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7666 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7667 "TARGET_SSE2 && <mask_mode512bit_condition>"
7668 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7670 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7671 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7673 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7674 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7675 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7677 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7678 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7679 [(set_attr "isa" "noavx,avx")
7680 (set_attr "type" "sseiadd")
7681 (set_attr "prefix_data16" "1,*")
7682 (set_attr "prefix" "<mask_prefix3>")
7683 (set_attr "mode" "<sseinsnmode>")])
7685 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7686 [(set (match_operand:VI12_AVX2 0 "register_operand")
7687 (sat_plusminus:VI12_AVX2
7688 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7689 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7691 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7693 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7694 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7695 (sat_plusminus:VI12_AVX2
7696 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7697 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7698 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7700 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7701 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7702 [(set_attr "isa" "noavx,avx")
7703 (set_attr "type" "sseiadd")
7704 (set_attr "prefix_data16" "1,*")
7705 (set_attr "prefix" "orig,vex")
7706 (set_attr "mode" "TI")])
7708 (define_expand "mul<mode>3"
7709 [(set (match_operand:VI1_AVX2 0 "register_operand")
7710 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7711 (match_operand:VI1_AVX2 2 "register_operand")))]
7714 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7718 (define_expand "mul<mode>3"
7719 [(set (match_operand:VI2_AVX2 0 "register_operand")
7720 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7721 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7723 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7725 (define_insn "*mul<mode>3"
7726 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7727 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7728 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7729 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7731 pmullw\t{%2, %0|%0, %2}
7732 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7733 [(set_attr "isa" "noavx,avx")
7734 (set_attr "type" "sseimul")
7735 (set_attr "prefix_data16" "1,*")
7736 (set_attr "prefix" "orig,vex")
7737 (set_attr "mode" "<sseinsnmode>")])
7739 (define_expand "<s>mul<mode>3_highpart"
7740 [(set (match_operand:VI2_AVX2 0 "register_operand")
7742 (lshiftrt:<ssedoublemode>
7743 (mult:<ssedoublemode>
7744 (any_extend:<ssedoublemode>
7745 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7746 (any_extend:<ssedoublemode>
7747 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7750 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7752 (define_insn "*<s>mul<mode>3_highpart"
7753 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7755 (lshiftrt:<ssedoublemode>
7756 (mult:<ssedoublemode>
7757 (any_extend:<ssedoublemode>
7758 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7759 (any_extend:<ssedoublemode>
7760 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7762 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7764 pmulh<u>w\t{%2, %0|%0, %2}
7765 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7766 [(set_attr "isa" "noavx,avx")
7767 (set_attr "type" "sseimul")
7768 (set_attr "prefix_data16" "1,*")
7769 (set_attr "prefix" "orig,vex")
7770 (set_attr "mode" "<sseinsnmode>")])
7772 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7773 [(set (match_operand:V8DI 0 "register_operand")
7777 (match_operand:V16SI 1 "nonimmediate_operand")
7778 (parallel [(const_int 0) (const_int 2)
7779 (const_int 4) (const_int 6)
7780 (const_int 8) (const_int 10)
7781 (const_int 12) (const_int 14)])))
7784 (match_operand:V16SI 2 "nonimmediate_operand")
7785 (parallel [(const_int 0) (const_int 2)
7786 (const_int 4) (const_int 6)
7787 (const_int 8) (const_int 10)
7788 (const_int 12) (const_int 14)])))))]
7790 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7792 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7793 [(set (match_operand:V8DI 0 "register_operand" "=v")
7797 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7798 (parallel [(const_int 0) (const_int 2)
7799 (const_int 4) (const_int 6)
7800 (const_int 8) (const_int 10)
7801 (const_int 12) (const_int 14)])))
7804 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7805 (parallel [(const_int 0) (const_int 2)
7806 (const_int 4) (const_int 6)
7807 (const_int 8) (const_int 10)
7808 (const_int 12) (const_int 14)])))))]
7809 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7810 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7811 [(set_attr "isa" "avx512f")
7812 (set_attr "type" "sseimul")
7813 (set_attr "prefix_extra" "1")
7814 (set_attr "prefix" "evex")
7815 (set_attr "mode" "XI")])
7817 (define_expand "vec_widen_umult_even_v8si"
7818 [(set (match_operand:V4DI 0 "register_operand")
7822 (match_operand:V8SI 1 "nonimmediate_operand")
7823 (parallel [(const_int 0) (const_int 2)
7824 (const_int 4) (const_int 6)])))
7827 (match_operand:V8SI 2 "nonimmediate_operand")
7828 (parallel [(const_int 0) (const_int 2)
7829 (const_int 4) (const_int 6)])))))]
7831 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7833 (define_insn "*vec_widen_umult_even_v8si"
7834 [(set (match_operand:V4DI 0 "register_operand" "=x")
7838 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7839 (parallel [(const_int 0) (const_int 2)
7840 (const_int 4) (const_int 6)])))
7843 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7844 (parallel [(const_int 0) (const_int 2)
7845 (const_int 4) (const_int 6)])))))]
7846 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7847 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7848 [(set_attr "type" "sseimul")
7849 (set_attr "prefix" "vex")
7850 (set_attr "mode" "OI")])
7852 (define_expand "vec_widen_umult_even_v4si"
7853 [(set (match_operand:V2DI 0 "register_operand")
7857 (match_operand:V4SI 1 "nonimmediate_operand")
7858 (parallel [(const_int 0) (const_int 2)])))
7861 (match_operand:V4SI 2 "nonimmediate_operand")
7862 (parallel [(const_int 0) (const_int 2)])))))]
7864 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7866 (define_insn "*vec_widen_umult_even_v4si"
7867 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7871 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7872 (parallel [(const_int 0) (const_int 2)])))
7875 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7876 (parallel [(const_int 0) (const_int 2)])))))]
7877 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7879 pmuludq\t{%2, %0|%0, %2}
7880 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7881 [(set_attr "isa" "noavx,avx")
7882 (set_attr "type" "sseimul")
7883 (set_attr "prefix_data16" "1,*")
7884 (set_attr "prefix" "orig,vex")
7885 (set_attr "mode" "TI")])
7887 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7888 [(set (match_operand:V8DI 0 "register_operand")
7892 (match_operand:V16SI 1 "nonimmediate_operand")
7893 (parallel [(const_int 0) (const_int 2)
7894 (const_int 4) (const_int 6)
7895 (const_int 8) (const_int 10)
7896 (const_int 12) (const_int 14)])))
7899 (match_operand:V16SI 2 "nonimmediate_operand")
7900 (parallel [(const_int 0) (const_int 2)
7901 (const_int 4) (const_int 6)
7902 (const_int 8) (const_int 10)
7903 (const_int 12) (const_int 14)])))))]
7905 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7907 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7908 [(set (match_operand:V8DI 0 "register_operand" "=v")
7912 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7913 (parallel [(const_int 0) (const_int 2)
7914 (const_int 4) (const_int 6)
7915 (const_int 8) (const_int 10)
7916 (const_int 12) (const_int 14)])))
7919 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7920 (parallel [(const_int 0) (const_int 2)
7921 (const_int 4) (const_int 6)
7922 (const_int 8) (const_int 10)
7923 (const_int 12) (const_int 14)])))))]
7924 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7925 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7926 [(set_attr "isa" "avx512f")
7927 (set_attr "type" "sseimul")
7928 (set_attr "prefix_extra" "1")
7929 (set_attr "prefix" "evex")
7930 (set_attr "mode" "XI")])
7932 (define_expand "vec_widen_smult_even_v8si"
7933 [(set (match_operand:V4DI 0 "register_operand")
7937 (match_operand:V8SI 1 "nonimmediate_operand")
7938 (parallel [(const_int 0) (const_int 2)
7939 (const_int 4) (const_int 6)])))
7942 (match_operand:V8SI 2 "nonimmediate_operand")
7943 (parallel [(const_int 0) (const_int 2)
7944 (const_int 4) (const_int 6)])))))]
7946 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7948 (define_insn "*vec_widen_smult_even_v8si"
7949 [(set (match_operand:V4DI 0 "register_operand" "=x")
7953 (match_operand:V8SI 1 "nonimmediate_operand" "x")
7954 (parallel [(const_int 0) (const_int 2)
7955 (const_int 4) (const_int 6)])))
7958 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7959 (parallel [(const_int 0) (const_int 2)
7960 (const_int 4) (const_int 6)])))))]
7961 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7962 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7963 [(set_attr "type" "sseimul")
7964 (set_attr "prefix_extra" "1")
7965 (set_attr "prefix" "vex")
7966 (set_attr "mode" "OI")])
7968 (define_expand "sse4_1_mulv2siv2di3"
7969 [(set (match_operand:V2DI 0 "register_operand")
7973 (match_operand:V4SI 1 "nonimmediate_operand")
7974 (parallel [(const_int 0) (const_int 2)])))
7977 (match_operand:V4SI 2 "nonimmediate_operand")
7978 (parallel [(const_int 0) (const_int 2)])))))]
7980 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7982 (define_insn "*sse4_1_mulv2siv2di3"
7983 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7987 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7988 (parallel [(const_int 0) (const_int 2)])))
7991 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7992 (parallel [(const_int 0) (const_int 2)])))))]
7993 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7995 pmuldq\t{%2, %0|%0, %2}
7996 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7997 [(set_attr "isa" "noavx,avx")
7998 (set_attr "type" "sseimul")
7999 (set_attr "prefix_data16" "1,*")
8000 (set_attr "prefix_extra" "1")
8001 (set_attr "prefix" "orig,vex")
8002 (set_attr "mode" "TI")])
8004 (define_expand "avx2_pmaddwd"
8005 [(set (match_operand:V8SI 0 "register_operand")
8010 (match_operand:V16HI 1 "nonimmediate_operand")
8011 (parallel [(const_int 0) (const_int 2)
8012 (const_int 4) (const_int 6)
8013 (const_int 8) (const_int 10)
8014 (const_int 12) (const_int 14)])))
8017 (match_operand:V16HI 2 "nonimmediate_operand")
8018 (parallel [(const_int 0) (const_int 2)
8019 (const_int 4) (const_int 6)
8020 (const_int 8) (const_int 10)
8021 (const_int 12) (const_int 14)]))))
8024 (vec_select:V8HI (match_dup 1)
8025 (parallel [(const_int 1) (const_int 3)
8026 (const_int 5) (const_int 7)
8027 (const_int 9) (const_int 11)
8028 (const_int 13) (const_int 15)])))
8030 (vec_select:V8HI (match_dup 2)
8031 (parallel [(const_int 1) (const_int 3)
8032 (const_int 5) (const_int 7)
8033 (const_int 9) (const_int 11)
8034 (const_int 13) (const_int 15)]))))))]
8036 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8038 (define_insn "*avx2_pmaddwd"
8039 [(set (match_operand:V8SI 0 "register_operand" "=x")
8044 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
8045 (parallel [(const_int 0) (const_int 2)
8046 (const_int 4) (const_int 6)
8047 (const_int 8) (const_int 10)
8048 (const_int 12) (const_int 14)])))
8051 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8052 (parallel [(const_int 0) (const_int 2)
8053 (const_int 4) (const_int 6)
8054 (const_int 8) (const_int 10)
8055 (const_int 12) (const_int 14)]))))
8058 (vec_select:V8HI (match_dup 1)
8059 (parallel [(const_int 1) (const_int 3)
8060 (const_int 5) (const_int 7)
8061 (const_int 9) (const_int 11)
8062 (const_int 13) (const_int 15)])))
8064 (vec_select:V8HI (match_dup 2)
8065 (parallel [(const_int 1) (const_int 3)
8066 (const_int 5) (const_int 7)
8067 (const_int 9) (const_int 11)
8068 (const_int 13) (const_int 15)]))))))]
8069 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
8070 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8071 [(set_attr "type" "sseiadd")
8072 (set_attr "prefix" "vex")
8073 (set_attr "mode" "OI")])
8075 (define_expand "sse2_pmaddwd"
8076 [(set (match_operand:V4SI 0 "register_operand")
8081 (match_operand:V8HI 1 "nonimmediate_operand")
8082 (parallel [(const_int 0) (const_int 2)
8083 (const_int 4) (const_int 6)])))
8086 (match_operand:V8HI 2 "nonimmediate_operand")
8087 (parallel [(const_int 0) (const_int 2)
8088 (const_int 4) (const_int 6)]))))
8091 (vec_select:V4HI (match_dup 1)
8092 (parallel [(const_int 1) (const_int 3)
8093 (const_int 5) (const_int 7)])))
8095 (vec_select:V4HI (match_dup 2)
8096 (parallel [(const_int 1) (const_int 3)
8097 (const_int 5) (const_int 7)]))))))]
8099 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8101 (define_insn "*sse2_pmaddwd"
8102 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8107 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8108 (parallel [(const_int 0) (const_int 2)
8109 (const_int 4) (const_int 6)])))
8112 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8113 (parallel [(const_int 0) (const_int 2)
8114 (const_int 4) (const_int 6)]))))
8117 (vec_select:V4HI (match_dup 1)
8118 (parallel [(const_int 1) (const_int 3)
8119 (const_int 5) (const_int 7)])))
8121 (vec_select:V4HI (match_dup 2)
8122 (parallel [(const_int 1) (const_int 3)
8123 (const_int 5) (const_int 7)]))))))]
8124 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8126 pmaddwd\t{%2, %0|%0, %2}
8127 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8128 [(set_attr "isa" "noavx,avx")
8129 (set_attr "type" "sseiadd")
8130 (set_attr "atom_unit" "simul")
8131 (set_attr "prefix_data16" "1,*")
8132 (set_attr "prefix" "orig,vex")
8133 (set_attr "mode" "TI")])
8135 (define_expand "mul<mode>3<mask_name>"
8136 [(set (match_operand:VI4_AVX512F 0 "register_operand")
8138 (match_operand:VI4_AVX512F 1 "general_vector_operand")
8139 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
8140 "TARGET_SSE2 && <mask_mode512bit_condition>"
8144 if (!nonimmediate_operand (operands[1], <MODE>mode))
8145 operands[1] = force_reg (<MODE>mode, operands[1]);
8146 if (!nonimmediate_operand (operands[2], <MODE>mode))
8147 operands[2] = force_reg (<MODE>mode, operands[2]);
8148 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8152 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
8157 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
8158 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
8160 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
8161 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
8162 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
8164 pmulld\t{%2, %0|%0, %2}
8165 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8166 [(set_attr "isa" "noavx,avx")
8167 (set_attr "type" "sseimul")
8168 (set_attr "prefix_extra" "1")
8169 (set_attr "prefix" "<mask_prefix3>")
8170 (set_attr "btver2_decode" "vector,vector")
8171 (set_attr "mode" "<sseinsnmode>")])
8173 (define_expand "mul<mode>3"
8174 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
8175 (mult:VI8_AVX2_AVX512F
8176 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
8177 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
8180 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
8184 (define_expand "vec_widen_<s>mult_hi_<mode>"
8185 [(match_operand:<sseunpackmode> 0 "register_operand")
8186 (any_extend:<sseunpackmode>
8187 (match_operand:VI124_AVX2 1 "register_operand"))
8188 (match_operand:VI124_AVX2 2 "register_operand")]
8191 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8196 (define_expand "vec_widen_<s>mult_lo_<mode>"
8197 [(match_operand:<sseunpackmode> 0 "register_operand")
8198 (any_extend:<sseunpackmode>
8199 (match_operand:VI124_AVX2 1 "register_operand"))
8200 (match_operand:VI124_AVX2 2 "register_operand")]
8203 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8208 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
8209 ;; named patterns, but signed V4SI needs special help for plain SSE2.
8210 (define_expand "vec_widen_smult_even_v4si"
8211 [(match_operand:V2DI 0 "register_operand")
8212 (match_operand:V4SI 1 "nonimmediate_operand")
8213 (match_operand:V4SI 2 "nonimmediate_operand")]
8216 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8221 (define_expand "vec_widen_<s>mult_odd_<mode>"
8222 [(match_operand:<sseunpackmode> 0 "register_operand")
8223 (any_extend:<sseunpackmode>
8224 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
8225 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
8228 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8233 (define_expand "sdot_prod<mode>"
8234 [(match_operand:<sseunpackmode> 0 "register_operand")
8235 (match_operand:VI2_AVX2 1 "register_operand")
8236 (match_operand:VI2_AVX2 2 "register_operand")
8237 (match_operand:<sseunpackmode> 3 "register_operand")]
8240 rtx t = gen_reg_rtx (<sseunpackmode>mode);
8241 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
8242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8243 gen_rtx_PLUS (<sseunpackmode>mode,
8248 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
8249 ;; back together when madd is available.
8250 (define_expand "sdot_prodv4si"
8251 [(match_operand:V2DI 0 "register_operand")
8252 (match_operand:V4SI 1 "register_operand")
8253 (match_operand:V4SI 2 "register_operand")
8254 (match_operand:V2DI 3 "register_operand")]
8257 rtx t = gen_reg_rtx (V2DImode);
8258 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
8259 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
8263 (define_insn "ashr<mode>3"
8264 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
8266 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
8267 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8270 psra<ssemodesuffix>\t{%2, %0|%0, %2}
8271 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8272 [(set_attr "isa" "noavx,avx")
8273 (set_attr "type" "sseishft")
8274 (set (attr "length_immediate")
8275 (if_then_else (match_operand 2 "const_int_operand")
8277 (const_string "0")))
8278 (set_attr "prefix_data16" "1,*")
8279 (set_attr "prefix" "orig,vex")
8280 (set_attr "mode" "<sseinsnmode>")])
8282 (define_insn "ashr<mode>3<mask_name>"
8283 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8285 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
8286 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
8287 "TARGET_AVX512F && <mask_mode512bit_condition>"
8288 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8289 [(set_attr "type" "sseishft")
8290 (set (attr "length_immediate")
8291 (if_then_else (match_operand 2 "const_int_operand")
8293 (const_string "0")))
8294 (set_attr "mode" "<sseinsnmode>")])
8296 (define_insn "<shift_insn><mode>3"
8297 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
8298 (any_lshift:VI248_AVX2
8299 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
8300 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8303 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8304 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8305 [(set_attr "isa" "noavx,avx")
8306 (set_attr "type" "sseishft")
8307 (set (attr "length_immediate")
8308 (if_then_else (match_operand 2 "const_int_operand")
8310 (const_string "0")))
8311 (set_attr "prefix_data16" "1,*")
8312 (set_attr "prefix" "orig,vex")
8313 (set_attr "mode" "<sseinsnmode>")])
8315 (define_insn "<shift_insn><mode>3<mask_name>"
8316 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8317 (any_lshift:VI48_512
8318 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
8319 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8320 "TARGET_AVX512F && <mask_mode512bit_condition>"
8321 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8322 [(set_attr "isa" "avx512f")
8323 (set_attr "type" "sseishft")
8324 (set (attr "length_immediate")
8325 (if_then_else (match_operand 2 "const_int_operand")
8327 (const_string "0")))
8328 (set_attr "prefix" "evex")
8329 (set_attr "mode" "<sseinsnmode>")])
8332 (define_expand "vec_shl_<mode>"
8335 (match_operand:VI_128 1 "register_operand")
8336 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8337 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8340 operands[1] = gen_lowpart (V1TImode, operands[1]);
8341 operands[3] = gen_reg_rtx (V1TImode);
8342 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8345 (define_insn "<sse2_avx2>_ashl<mode>3"
8346 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8348 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8349 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8352 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8354 switch (which_alternative)
8357 return "pslldq\t{%2, %0|%0, %2}";
8359 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8364 [(set_attr "isa" "noavx,avx")
8365 (set_attr "type" "sseishft")
8366 (set_attr "length_immediate" "1")
8367 (set_attr "prefix_data16" "1,*")
8368 (set_attr "prefix" "orig,vex")
8369 (set_attr "mode" "<sseinsnmode>")])
8371 (define_expand "vec_shr_<mode>"
8374 (match_operand:VI_128 1 "register_operand")
8375 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8376 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8379 operands[1] = gen_lowpart (V1TImode, operands[1]);
8380 operands[3] = gen_reg_rtx (V1TImode);
8381 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8384 (define_insn "<sse2_avx2>_lshr<mode>3"
8385 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8386 (lshiftrt:VIMAX_AVX2
8387 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8388 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8391 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8393 switch (which_alternative)
8396 return "psrldq\t{%2, %0|%0, %2}";
8398 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8403 [(set_attr "isa" "noavx,avx")
8404 (set_attr "type" "sseishft")
8405 (set_attr "length_immediate" "1")
8406 (set_attr "atom_unit" "sishuf")
8407 (set_attr "prefix_data16" "1,*")
8408 (set_attr "prefix" "orig,vex")
8409 (set_attr "mode" "<sseinsnmode>")])
8411 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8412 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8413 (any_rotate:VI48_512
8414 (match_operand:VI48_512 1 "register_operand" "v")
8415 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8417 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8418 [(set_attr "prefix" "evex")
8419 (set_attr "mode" "<sseinsnmode>")])
8421 (define_insn "avx512f_<rotate><mode><mask_name>"
8422 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8423 (any_rotate:VI48_512
8424 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8425 (match_operand:SI 2 "const_0_to_255_operand")))]
8427 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8428 [(set_attr "prefix" "evex")
8429 (set_attr "mode" "<sseinsnmode>")])
8431 (define_expand "<code><mode>3<mask_name><round_name>"
8432 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8433 (maxmin:VI124_256_48_512
8434 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
8435 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
8436 "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8437 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8439 (define_insn "*avx2_<code><mode>3<mask_name><round_name>"
8440 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8441 (maxmin:VI124_256_48_512
8442 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
8443 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
8444 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8445 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8446 "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8447 [(set_attr "type" "sseiadd")
8448 (set_attr "prefix_extra" "1")
8449 (set_attr "prefix" "maybe_evex")
8450 (set_attr "mode" "OI")])
8452 (define_expand "<code><mode>3"
8453 [(set (match_operand:VI8_AVX2 0 "register_operand")
8455 (match_operand:VI8_AVX2 1 "register_operand")
8456 (match_operand:VI8_AVX2 2 "register_operand")))]
8463 xops[0] = operands[0];
8465 if (<CODE> == SMAX || <CODE> == UMAX)
8467 xops[1] = operands[1];
8468 xops[2] = operands[2];
8472 xops[1] = operands[2];
8473 xops[2] = operands[1];
8476 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8478 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8479 xops[4] = operands[1];
8480 xops[5] = operands[2];
8482 ok = ix86_expand_int_vcond (xops);
8487 (define_expand "<code><mode>3"
8488 [(set (match_operand:VI124_128 0 "register_operand")
8490 (match_operand:VI124_128 1 "nonimmediate_operand")
8491 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8494 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8495 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8501 xops[0] = operands[0];
8502 operands[1] = force_reg (<MODE>mode, operands[1]);
8503 operands[2] = force_reg (<MODE>mode, operands[2]);
8507 xops[1] = operands[1];
8508 xops[2] = operands[2];
8512 xops[1] = operands[2];
8513 xops[2] = operands[1];
8516 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8517 xops[4] = operands[1];
8518 xops[5] = operands[2];
8520 ok = ix86_expand_int_vcond (xops);
8526 (define_insn "*sse4_1_<code><mode>3"
8527 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8529 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8530 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8531 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8533 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8534 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8535 [(set_attr "isa" "noavx,avx")
8536 (set_attr "type" "sseiadd")
8537 (set_attr "prefix_extra" "1,*")
8538 (set_attr "prefix" "orig,vex")
8539 (set_attr "mode" "TI")])
8541 (define_insn "*<code>v8hi3"
8542 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8544 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8545 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8546 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8548 p<maxmin_int>w\t{%2, %0|%0, %2}
8549 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8550 [(set_attr "isa" "noavx,avx")
8551 (set_attr "type" "sseiadd")
8552 (set_attr "prefix_data16" "1,*")
8553 (set_attr "prefix_extra" "*,1")
8554 (set_attr "prefix" "orig,vex")
8555 (set_attr "mode" "TI")])
8557 (define_expand "<code><mode>3"
8558 [(set (match_operand:VI124_128 0 "register_operand")
8560 (match_operand:VI124_128 1 "nonimmediate_operand")
8561 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8564 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8565 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8566 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8568 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8569 operands[1] = force_reg (<MODE>mode, operands[1]);
8570 if (rtx_equal_p (op3, op2))
8571 op3 = gen_reg_rtx (V8HImode);
8572 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8573 emit_insn (gen_addv8hi3 (op0, op3, op2));
8581 operands[1] = force_reg (<MODE>mode, operands[1]);
8582 operands[2] = force_reg (<MODE>mode, operands[2]);
8584 xops[0] = operands[0];
8588 xops[1] = operands[1];
8589 xops[2] = operands[2];
8593 xops[1] = operands[2];
8594 xops[2] = operands[1];
8597 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8598 xops[4] = operands[1];
8599 xops[5] = operands[2];
8601 ok = ix86_expand_int_vcond (xops);
8607 (define_insn "*sse4_1_<code><mode>3"
8608 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8610 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8611 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8612 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8614 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8615 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8616 [(set_attr "isa" "noavx,avx")
8617 (set_attr "type" "sseiadd")
8618 (set_attr "prefix_extra" "1,*")
8619 (set_attr "prefix" "orig,vex")
8620 (set_attr "mode" "TI")])
8622 (define_insn "*<code>v16qi3"
8623 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8625 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8626 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8627 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8629 p<maxmin_int>b\t{%2, %0|%0, %2}
8630 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8631 [(set_attr "isa" "noavx,avx")
8632 (set_attr "type" "sseiadd")
8633 (set_attr "prefix_data16" "1,*")
8634 (set_attr "prefix_extra" "*,1")
8635 (set_attr "prefix" "orig,vex")
8636 (set_attr "mode" "TI")])
8638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8640 ;; Parallel integral comparisons
8642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8644 (define_expand "avx2_eq<mode>3"
8645 [(set (match_operand:VI_256 0 "register_operand")
8647 (match_operand:VI_256 1 "nonimmediate_operand")
8648 (match_operand:VI_256 2 "nonimmediate_operand")))]
8650 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8652 (define_insn "*avx2_eq<mode>3"
8653 [(set (match_operand:VI_256 0 "register_operand" "=x")
8655 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8656 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8657 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8658 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8659 [(set_attr "type" "ssecmp")
8660 (set_attr "prefix_extra" "1")
8661 (set_attr "prefix" "vex")
8662 (set_attr "mode" "OI")])
8664 (define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
8665 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8666 (unspec:<avx512fmaskmode>
8667 [(match_operand:VI48_512 1 "register_operand")
8668 (match_operand:VI48_512 2 "nonimmediate_operand")]
8671 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8673 (define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
8674 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8675 (unspec:<avx512fmaskmode>
8676 [(match_operand:VI48_512 1 "register_operand" "%v")
8677 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8679 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8680 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8681 [(set_attr "type" "ssecmp")
8682 (set_attr "prefix_extra" "1")
8683 (set_attr "prefix" "evex")
8684 (set_attr "mode" "<sseinsnmode>")])
8686 (define_insn "*sse4_1_eqv2di3"
8687 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8689 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8690 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8691 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8693 pcmpeqq\t{%2, %0|%0, %2}
8694 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8695 [(set_attr "isa" "noavx,avx")
8696 (set_attr "type" "ssecmp")
8697 (set_attr "prefix_extra" "1")
8698 (set_attr "prefix" "orig,vex")
8699 (set_attr "mode" "TI")])
8701 (define_insn "*sse2_eq<mode>3"
8702 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8704 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8705 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8706 "TARGET_SSE2 && !TARGET_XOP
8707 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8709 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8710 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8711 [(set_attr "isa" "noavx,avx")
8712 (set_attr "type" "ssecmp")
8713 (set_attr "prefix_data16" "1,*")
8714 (set_attr "prefix" "orig,vex")
8715 (set_attr "mode" "TI")])
8717 (define_expand "sse2_eq<mode>3"
8718 [(set (match_operand:VI124_128 0 "register_operand")
8720 (match_operand:VI124_128 1 "nonimmediate_operand")
8721 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8722 "TARGET_SSE2 && !TARGET_XOP "
8723 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8725 (define_expand "sse4_1_eqv2di3"
8726 [(set (match_operand:V2DI 0 "register_operand")
8728 (match_operand:V2DI 1 "nonimmediate_operand")
8729 (match_operand:V2DI 2 "nonimmediate_operand")))]
8731 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8733 (define_insn "sse4_2_gtv2di3"
8734 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8736 (match_operand:V2DI 1 "register_operand" "0,x")
8737 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8740 pcmpgtq\t{%2, %0|%0, %2}
8741 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8742 [(set_attr "isa" "noavx,avx")
8743 (set_attr "type" "ssecmp")
8744 (set_attr "prefix_extra" "1")
8745 (set_attr "prefix" "orig,vex")
8746 (set_attr "mode" "TI")])
8748 (define_insn "avx2_gt<mode>3"
8749 [(set (match_operand:VI_256 0 "register_operand" "=x")
8751 (match_operand:VI_256 1 "register_operand" "x")
8752 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8754 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8755 [(set_attr "type" "ssecmp")
8756 (set_attr "prefix_extra" "1")
8757 (set_attr "prefix" "vex")
8758 (set_attr "mode" "OI")])
8760 (define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
8761 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8762 (unspec:<avx512fmaskmode>
8763 [(match_operand:VI48_512 1 "register_operand" "v")
8764 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8766 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8767 [(set_attr "type" "ssecmp")
8768 (set_attr "prefix_extra" "1")
8769 (set_attr "prefix" "evex")
8770 (set_attr "mode" "<sseinsnmode>")])
8772 (define_insn "sse2_gt<mode>3"
8773 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8775 (match_operand:VI124_128 1 "register_operand" "0,x")
8776 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8777 "TARGET_SSE2 && !TARGET_XOP"
8779 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8780 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8781 [(set_attr "isa" "noavx,avx")
8782 (set_attr "type" "ssecmp")
8783 (set_attr "prefix_data16" "1,*")
8784 (set_attr "prefix" "orig,vex")
8785 (set_attr "mode" "TI")])
8787 (define_expand "vcond<V_512:mode><VI_512:mode>"
8788 [(set (match_operand:V_512 0 "register_operand")
8790 (match_operator 3 ""
8791 [(match_operand:VI_512 4 "nonimmediate_operand")
8792 (match_operand:VI_512 5 "general_operand")])
8793 (match_operand:V_512 1)
8794 (match_operand:V_512 2)))]
8796 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8797 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8799 bool ok = ix86_expand_int_vcond (operands);
8804 (define_expand "vcond<V_256:mode><VI_256:mode>"
8805 [(set (match_operand:V_256 0 "register_operand")
8807 (match_operator 3 ""
8808 [(match_operand:VI_256 4 "nonimmediate_operand")
8809 (match_operand:VI_256 5 "general_operand")])
8810 (match_operand:V_256 1)
8811 (match_operand:V_256 2)))]
8813 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8814 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8816 bool ok = ix86_expand_int_vcond (operands);
8821 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8822 [(set (match_operand:V_128 0 "register_operand")
8824 (match_operator 3 ""
8825 [(match_operand:VI124_128 4 "nonimmediate_operand")
8826 (match_operand:VI124_128 5 "general_operand")])
8827 (match_operand:V_128 1)
8828 (match_operand:V_128 2)))]
8830 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8831 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8833 bool ok = ix86_expand_int_vcond (operands);
8838 (define_expand "vcond<VI8F_128:mode>v2di"
8839 [(set (match_operand:VI8F_128 0 "register_operand")
8840 (if_then_else:VI8F_128
8841 (match_operator 3 ""
8842 [(match_operand:V2DI 4 "nonimmediate_operand")
8843 (match_operand:V2DI 5 "general_operand")])
8844 (match_operand:VI8F_128 1)
8845 (match_operand:VI8F_128 2)))]
8848 bool ok = ix86_expand_int_vcond (operands);
8853 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8854 [(set (match_operand:V_512 0 "register_operand")
8856 (match_operator 3 ""
8857 [(match_operand:VI_512 4 "nonimmediate_operand")
8858 (match_operand:VI_512 5 "nonimmediate_operand")])
8859 (match_operand:V_512 1 "general_operand")
8860 (match_operand:V_512 2 "general_operand")))]
8862 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8863 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8865 bool ok = ix86_expand_int_vcond (operands);
8870 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8871 [(set (match_operand:V_256 0 "register_operand")
8873 (match_operator 3 ""
8874 [(match_operand:VI_256 4 "nonimmediate_operand")
8875 (match_operand:VI_256 5 "nonimmediate_operand")])
8876 (match_operand:V_256 1 "general_operand")
8877 (match_operand:V_256 2 "general_operand")))]
8879 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8880 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8882 bool ok = ix86_expand_int_vcond (operands);
8887 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8888 [(set (match_operand:V_128 0 "register_operand")
8890 (match_operator 3 ""
8891 [(match_operand:VI124_128 4 "nonimmediate_operand")
8892 (match_operand:VI124_128 5 "nonimmediate_operand")])
8893 (match_operand:V_128 1 "general_operand")
8894 (match_operand:V_128 2 "general_operand")))]
8896 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8897 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8899 bool ok = ix86_expand_int_vcond (operands);
8904 (define_expand "vcondu<VI8F_128:mode>v2di"
8905 [(set (match_operand:VI8F_128 0 "register_operand")
8906 (if_then_else:VI8F_128
8907 (match_operator 3 ""
8908 [(match_operand:V2DI 4 "nonimmediate_operand")
8909 (match_operand:V2DI 5 "nonimmediate_operand")])
8910 (match_operand:VI8F_128 1 "general_operand")
8911 (match_operand:VI8F_128 2 "general_operand")))]
8914 bool ok = ix86_expand_int_vcond (operands);
8919 (define_mode_iterator VEC_PERM_AVX2
8920 [V16QI V8HI V4SI V2DI V4SF V2DF
8921 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8922 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
8923 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
8924 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
8925 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
8927 (define_expand "vec_perm<mode>"
8928 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
8929 (match_operand:VEC_PERM_AVX2 1 "register_operand")
8930 (match_operand:VEC_PERM_AVX2 2 "register_operand")
8931 (match_operand:<sseintvecmode> 3 "register_operand")]
8932 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
8934 ix86_expand_vec_perm (operands);
8938 (define_mode_iterator VEC_PERM_CONST
8939 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
8940 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
8941 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
8942 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
8943 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
8944 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8945 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
8946 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
8948 (define_expand "vec_perm_const<mode>"
8949 [(match_operand:VEC_PERM_CONST 0 "register_operand")
8950 (match_operand:VEC_PERM_CONST 1 "register_operand")
8951 (match_operand:VEC_PERM_CONST 2 "register_operand")
8952 (match_operand:<sseintvecmode> 3)]
8955 if (ix86_expand_vec_perm_const (operands))
8961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8963 ;; Parallel bitwise logical operations
8965 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8967 (define_expand "one_cmpl<mode>2"
8968 [(set (match_operand:VI 0 "register_operand")
8969 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
8973 int i, n = GET_MODE_NUNITS (<MODE>mode);
8974 rtvec v = rtvec_alloc (n);
8976 for (i = 0; i < n; ++i)
8977 RTVEC_ELT (v, i) = constm1_rtx;
8979 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
8982 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
8983 [(set (match_operand:VI_AVX2 0 "register_operand")
8985 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
8986 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8987 "TARGET_SSE2 && <mask_mode512bit_condition>")
8989 (define_insn "*andnot<mode>3<mask_name>"
8990 [(set (match_operand:VI 0 "register_operand" "=x,v")
8992 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
8993 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8994 "TARGET_SSE && <mask_mode512bit_condition>"
8996 static char buf[64];
9000 switch (get_attr_mode (insn))
9003 gcc_assert (TARGET_AVX512F);
9005 tmp = "pandn<ssemodesuffix>";
9009 gcc_assert (TARGET_AVX2);
9011 gcc_assert (TARGET_SSE2);
9017 gcc_assert (TARGET_AVX512F);
9019 gcc_assert (TARGET_AVX);
9021 gcc_assert (TARGET_SSE);
9030 switch (which_alternative)
9033 ops = "%s\t{%%2, %%0|%%0, %%2}";
9036 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9042 snprintf (buf, sizeof (buf), ops, tmp);
9045 [(set_attr "isa" "noavx,avx")
9046 (set_attr "type" "sselog")
9047 (set (attr "prefix_data16")
9049 (and (eq_attr "alternative" "0")
9050 (eq_attr "mode" "TI"))
9052 (const_string "*")))
9053 (set_attr "prefix" "<mask_prefix3>")
9055 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
9056 (const_string "<ssePSmode>")
9057 (match_test "TARGET_AVX2")
9058 (const_string "<sseinsnmode>")
9059 (match_test "TARGET_AVX")
9061 (match_test "<MODE_SIZE> > 16")
9062 (const_string "V8SF")
9063 (const_string "<sseinsnmode>"))
9064 (ior (not (match_test "TARGET_SSE2"))
9065 (match_test "optimize_function_for_size_p (cfun)"))
9066 (const_string "V4SF")
9068 (const_string "<sseinsnmode>")))])
9070 (define_expand "<code><mode>3"
9071 [(set (match_operand:VI 0 "register_operand")
9073 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
9074 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
9077 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
9081 (define_insn "<mask_codefor><code><mode>3<mask_name>"
9082 [(set (match_operand:VI 0 "register_operand" "=x,v")
9084 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
9085 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
9086 "TARGET_SSE && <mask_mode512bit_condition>
9087 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9089 static char buf[64];
9093 switch (get_attr_mode (insn))
9096 gcc_assert (TARGET_AVX512F);
9098 tmp = "p<logic><ssemodesuffix>";
9102 gcc_assert (TARGET_AVX2);
9104 gcc_assert (TARGET_SSE2);
9110 gcc_assert (TARGET_AVX512F);
9112 gcc_assert (TARGET_AVX);
9114 gcc_assert (TARGET_SSE);
9123 switch (which_alternative)
9126 ops = "%s\t{%%2, %%0|%%0, %%2}";
9129 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9135 snprintf (buf, sizeof (buf), ops, tmp);
9138 [(set_attr "isa" "noavx,avx")
9139 (set_attr "type" "sselog")
9140 (set (attr "prefix_data16")
9142 (and (eq_attr "alternative" "0")
9143 (eq_attr "mode" "TI"))
9145 (const_string "*")))
9146 (set_attr "prefix" "<mask_prefix3>")
9148 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
9149 (const_string "<ssePSmode>")
9150 (match_test "TARGET_AVX2")
9151 (const_string "<sseinsnmode>")
9152 (match_test "TARGET_AVX")
9154 (match_test "<MODE_SIZE> > 16")
9155 (const_string "V8SF")
9156 (const_string "<sseinsnmode>"))
9157 (ior (not (match_test "TARGET_SSE2"))
9158 (match_test "optimize_function_for_size_p (cfun)"))
9159 (const_string "V4SF")
9161 (const_string "<sseinsnmode>")))])
9163 (define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
9164 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9165 (unspec:<avx512fmaskmode>
9166 [(match_operand:VI48_512 1 "register_operand" "v")
9167 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9170 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9171 [(set_attr "prefix" "evex")
9172 (set_attr "mode" "<sseinsnmode>")])
9174 (define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
9175 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9176 (unspec:<avx512fmaskmode>
9177 [(match_operand:VI48_512 1 "register_operand" "v")
9178 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9181 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9182 [(set_attr "prefix" "evex")
9183 (set_attr "mode" "<sseinsnmode>")])
9185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9187 ;; Parallel integral element swizzling
9189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9191 (define_expand "vec_pack_trunc_<mode>"
9192 [(match_operand:<ssepackmode> 0 "register_operand")
9193 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
9194 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
9197 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
9198 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
9199 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
9203 (define_insn "<sse2_avx2>_packsswb"
9204 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9205 (vec_concat:VI1_AVX2
9206 (ss_truncate:<ssehalfvecmode>
9207 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9208 (ss_truncate:<ssehalfvecmode>
9209 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9212 packsswb\t{%2, %0|%0, %2}
9213 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
9214 [(set_attr "isa" "noavx,avx")
9215 (set_attr "type" "sselog")
9216 (set_attr "prefix_data16" "1,*")
9217 (set_attr "prefix" "orig,vex")
9218 (set_attr "mode" "<sseinsnmode>")])
9220 (define_insn "<sse2_avx2>_packssdw"
9221 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9222 (vec_concat:VI2_AVX2
9223 (ss_truncate:<ssehalfvecmode>
9224 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9225 (ss_truncate:<ssehalfvecmode>
9226 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9229 packssdw\t{%2, %0|%0, %2}
9230 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
9231 [(set_attr "isa" "noavx,avx")
9232 (set_attr "type" "sselog")
9233 (set_attr "prefix_data16" "1,*")
9234 (set_attr "prefix" "orig,vex")
9235 (set_attr "mode" "<sseinsnmode>")])
9237 (define_insn "<sse2_avx2>_packuswb"
9238 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9239 (vec_concat:VI1_AVX2
9240 (us_truncate:<ssehalfvecmode>
9241 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9242 (us_truncate:<ssehalfvecmode>
9243 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9246 packuswb\t{%2, %0|%0, %2}
9247 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
9248 [(set_attr "isa" "noavx,avx")
9249 (set_attr "type" "sselog")
9250 (set_attr "prefix_data16" "1,*")
9251 (set_attr "prefix" "orig,vex")
9252 (set_attr "mode" "<sseinsnmode>")])
9254 (define_insn "avx2_interleave_highv32qi"
9255 [(set (match_operand:V32QI 0 "register_operand" "=x")
9258 (match_operand:V32QI 1 "register_operand" "x")
9259 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9260 (parallel [(const_int 8) (const_int 40)
9261 (const_int 9) (const_int 41)
9262 (const_int 10) (const_int 42)
9263 (const_int 11) (const_int 43)
9264 (const_int 12) (const_int 44)
9265 (const_int 13) (const_int 45)
9266 (const_int 14) (const_int 46)
9267 (const_int 15) (const_int 47)
9268 (const_int 24) (const_int 56)
9269 (const_int 25) (const_int 57)
9270 (const_int 26) (const_int 58)
9271 (const_int 27) (const_int 59)
9272 (const_int 28) (const_int 60)
9273 (const_int 29) (const_int 61)
9274 (const_int 30) (const_int 62)
9275 (const_int 31) (const_int 63)])))]
9277 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9278 [(set_attr "type" "sselog")
9279 (set_attr "prefix" "vex")
9280 (set_attr "mode" "OI")])
9282 (define_insn "vec_interleave_highv16qi"
9283 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9286 (match_operand:V16QI 1 "register_operand" "0,x")
9287 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9288 (parallel [(const_int 8) (const_int 24)
9289 (const_int 9) (const_int 25)
9290 (const_int 10) (const_int 26)
9291 (const_int 11) (const_int 27)
9292 (const_int 12) (const_int 28)
9293 (const_int 13) (const_int 29)
9294 (const_int 14) (const_int 30)
9295 (const_int 15) (const_int 31)])))]
9298 punpckhbw\t{%2, %0|%0, %2}
9299 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9300 [(set_attr "isa" "noavx,avx")
9301 (set_attr "type" "sselog")
9302 (set_attr "prefix_data16" "1,*")
9303 (set_attr "prefix" "orig,vex")
9304 (set_attr "mode" "TI")])
9306 (define_insn "avx2_interleave_lowv32qi"
9307 [(set (match_operand:V32QI 0 "register_operand" "=x")
9310 (match_operand:V32QI 1 "register_operand" "x")
9311 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9312 (parallel [(const_int 0) (const_int 32)
9313 (const_int 1) (const_int 33)
9314 (const_int 2) (const_int 34)
9315 (const_int 3) (const_int 35)
9316 (const_int 4) (const_int 36)
9317 (const_int 5) (const_int 37)
9318 (const_int 6) (const_int 38)
9319 (const_int 7) (const_int 39)
9320 (const_int 16) (const_int 48)
9321 (const_int 17) (const_int 49)
9322 (const_int 18) (const_int 50)
9323 (const_int 19) (const_int 51)
9324 (const_int 20) (const_int 52)
9325 (const_int 21) (const_int 53)
9326 (const_int 22) (const_int 54)
9327 (const_int 23) (const_int 55)])))]
9329 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9330 [(set_attr "type" "sselog")
9331 (set_attr "prefix" "vex")
9332 (set_attr "mode" "OI")])
9334 (define_insn "vec_interleave_lowv16qi"
9335 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9338 (match_operand:V16QI 1 "register_operand" "0,x")
9339 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9340 (parallel [(const_int 0) (const_int 16)
9341 (const_int 1) (const_int 17)
9342 (const_int 2) (const_int 18)
9343 (const_int 3) (const_int 19)
9344 (const_int 4) (const_int 20)
9345 (const_int 5) (const_int 21)
9346 (const_int 6) (const_int 22)
9347 (const_int 7) (const_int 23)])))]
9350 punpcklbw\t{%2, %0|%0, %2}
9351 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9352 [(set_attr "isa" "noavx,avx")
9353 (set_attr "type" "sselog")
9354 (set_attr "prefix_data16" "1,*")
9355 (set_attr "prefix" "orig,vex")
9356 (set_attr "mode" "TI")])
9358 (define_insn "avx2_interleave_highv16hi"
9359 [(set (match_operand:V16HI 0 "register_operand" "=x")
9362 (match_operand:V16HI 1 "register_operand" "x")
9363 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9364 (parallel [(const_int 4) (const_int 20)
9365 (const_int 5) (const_int 21)
9366 (const_int 6) (const_int 22)
9367 (const_int 7) (const_int 23)
9368 (const_int 12) (const_int 28)
9369 (const_int 13) (const_int 29)
9370 (const_int 14) (const_int 30)
9371 (const_int 15) (const_int 31)])))]
9373 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9374 [(set_attr "type" "sselog")
9375 (set_attr "prefix" "vex")
9376 (set_attr "mode" "OI")])
9378 (define_insn "vec_interleave_highv8hi"
9379 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9382 (match_operand:V8HI 1 "register_operand" "0,x")
9383 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9384 (parallel [(const_int 4) (const_int 12)
9385 (const_int 5) (const_int 13)
9386 (const_int 6) (const_int 14)
9387 (const_int 7) (const_int 15)])))]
9390 punpckhwd\t{%2, %0|%0, %2}
9391 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9392 [(set_attr "isa" "noavx,avx")
9393 (set_attr "type" "sselog")
9394 (set_attr "prefix_data16" "1,*")
9395 (set_attr "prefix" "orig,vex")
9396 (set_attr "mode" "TI")])
9398 (define_insn "avx2_interleave_lowv16hi"
9399 [(set (match_operand:V16HI 0 "register_operand" "=x")
9402 (match_operand:V16HI 1 "register_operand" "x")
9403 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9404 (parallel [(const_int 0) (const_int 16)
9405 (const_int 1) (const_int 17)
9406 (const_int 2) (const_int 18)
9407 (const_int 3) (const_int 19)
9408 (const_int 8) (const_int 24)
9409 (const_int 9) (const_int 25)
9410 (const_int 10) (const_int 26)
9411 (const_int 11) (const_int 27)])))]
9413 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9414 [(set_attr "type" "sselog")
9415 (set_attr "prefix" "vex")
9416 (set_attr "mode" "OI")])
9418 (define_insn "vec_interleave_lowv8hi"
9419 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9422 (match_operand:V8HI 1 "register_operand" "0,x")
9423 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9424 (parallel [(const_int 0) (const_int 8)
9425 (const_int 1) (const_int 9)
9426 (const_int 2) (const_int 10)
9427 (const_int 3) (const_int 11)])))]
9430 punpcklwd\t{%2, %0|%0, %2}
9431 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9432 [(set_attr "isa" "noavx,avx")
9433 (set_attr "type" "sselog")
9434 (set_attr "prefix_data16" "1,*")
9435 (set_attr "prefix" "orig,vex")
9436 (set_attr "mode" "TI")])
9438 (define_insn "avx2_interleave_highv8si"
9439 [(set (match_operand:V8SI 0 "register_operand" "=x")
9442 (match_operand:V8SI 1 "register_operand" "x")
9443 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9444 (parallel [(const_int 2) (const_int 10)
9445 (const_int 3) (const_int 11)
9446 (const_int 6) (const_int 14)
9447 (const_int 7) (const_int 15)])))]
9449 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9450 [(set_attr "type" "sselog")
9451 (set_attr "prefix" "vex")
9452 (set_attr "mode" "OI")])
9454 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9455 [(set (match_operand:V16SI 0 "register_operand" "=v")
9458 (match_operand:V16SI 1 "register_operand" "v")
9459 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9460 (parallel [(const_int 2) (const_int 18)
9461 (const_int 3) (const_int 19)
9462 (const_int 6) (const_int 22)
9463 (const_int 7) (const_int 23)
9464 (const_int 10) (const_int 26)
9465 (const_int 11) (const_int 27)
9466 (const_int 14) (const_int 30)
9467 (const_int 15) (const_int 31)])))]
9469 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9470 [(set_attr "type" "sselog")
9471 (set_attr "prefix" "evex")
9472 (set_attr "mode" "XI")])
9475 (define_insn "vec_interleave_highv4si"
9476 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9479 (match_operand:V4SI 1 "register_operand" "0,x")
9480 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9481 (parallel [(const_int 2) (const_int 6)
9482 (const_int 3) (const_int 7)])))]
9485 punpckhdq\t{%2, %0|%0, %2}
9486 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9487 [(set_attr "isa" "noavx,avx")
9488 (set_attr "type" "sselog")
9489 (set_attr "prefix_data16" "1,*")
9490 (set_attr "prefix" "orig,vex")
9491 (set_attr "mode" "TI")])
9493 (define_insn "avx2_interleave_lowv8si"
9494 [(set (match_operand:V8SI 0 "register_operand" "=x")
9497 (match_operand:V8SI 1 "register_operand" "x")
9498 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9499 (parallel [(const_int 0) (const_int 8)
9500 (const_int 1) (const_int 9)
9501 (const_int 4) (const_int 12)
9502 (const_int 5) (const_int 13)])))]
9504 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9505 [(set_attr "type" "sselog")
9506 (set_attr "prefix" "vex")
9507 (set_attr "mode" "OI")])
9509 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9510 [(set (match_operand:V16SI 0 "register_operand" "=v")
9513 (match_operand:V16SI 1 "register_operand" "v")
9514 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9515 (parallel [(const_int 0) (const_int 16)
9516 (const_int 1) (const_int 17)
9517 (const_int 4) (const_int 20)
9518 (const_int 5) (const_int 21)
9519 (const_int 8) (const_int 24)
9520 (const_int 9) (const_int 25)
9521 (const_int 12) (const_int 28)
9522 (const_int 13) (const_int 29)])))]
9524 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9525 [(set_attr "type" "sselog")
9526 (set_attr "prefix" "evex")
9527 (set_attr "mode" "XI")])
9529 (define_insn "vec_interleave_lowv4si"
9530 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9533 (match_operand:V4SI 1 "register_operand" "0,x")
9534 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9535 (parallel [(const_int 0) (const_int 4)
9536 (const_int 1) (const_int 5)])))]
9539 punpckldq\t{%2, %0|%0, %2}
9540 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9541 [(set_attr "isa" "noavx,avx")
9542 (set_attr "type" "sselog")
9543 (set_attr "prefix_data16" "1,*")
9544 (set_attr "prefix" "orig,vex")
9545 (set_attr "mode" "TI")])
9547 (define_expand "vec_interleave_high<mode>"
9548 [(match_operand:VI_256 0 "register_operand" "=x")
9549 (match_operand:VI_256 1 "register_operand" "x")
9550 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9553 rtx t1 = gen_reg_rtx (<MODE>mode);
9554 rtx t2 = gen_reg_rtx (<MODE>mode);
9555 rtx t3 = gen_reg_rtx (V4DImode);
9556 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9557 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9558 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9559 gen_lowpart (V4DImode, t2),
9560 GEN_INT (1 + (3 << 4))));
9561 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9565 (define_expand "vec_interleave_low<mode>"
9566 [(match_operand:VI_256 0 "register_operand" "=x")
9567 (match_operand:VI_256 1 "register_operand" "x")
9568 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9571 rtx t1 = gen_reg_rtx (<MODE>mode);
9572 rtx t2 = gen_reg_rtx (<MODE>mode);
9573 rtx t3 = gen_reg_rtx (V4DImode);
9574 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9575 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9576 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9577 gen_lowpart (V4DImode, t2),
9578 GEN_INT (0 + (2 << 4))));
9579 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9583 ;; Modes handled by pinsr patterns.
9584 (define_mode_iterator PINSR_MODE
9585 [(V16QI "TARGET_SSE4_1") V8HI
9586 (V4SI "TARGET_SSE4_1")
9587 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9589 (define_mode_attr sse2p4_1
9590 [(V16QI "sse4_1") (V8HI "sse2")
9591 (V4SI "sse4_1") (V2DI "sse4_1")])
9593 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9594 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9595 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9596 (vec_merge:PINSR_MODE
9597 (vec_duplicate:PINSR_MODE
9598 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9599 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9600 (match_operand:SI 3 "const_int_operand")))]
9602 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9603 < GET_MODE_NUNITS (<MODE>mode))"
9605 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9607 switch (which_alternative)
9610 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9611 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9614 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9616 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9617 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9620 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9625 [(set_attr "isa" "noavx,noavx,avx,avx")
9626 (set_attr "type" "sselog")
9627 (set (attr "prefix_rex")
9629 (and (not (match_test "TARGET_AVX"))
9630 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9632 (const_string "*")))
9633 (set (attr "prefix_data16")
9635 (and (not (match_test "TARGET_AVX"))
9636 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9638 (const_string "*")))
9639 (set (attr "prefix_extra")
9641 (and (not (match_test "TARGET_AVX"))
9642 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9644 (const_string "1")))
9645 (set_attr "length_immediate" "1")
9646 (set_attr "prefix" "orig,orig,vex,vex")
9647 (set_attr "mode" "TI")])
9649 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9650 [(match_operand:V16FI 0 "register_operand")
9651 (match_operand:V16FI 1 "register_operand")
9652 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9653 (match_operand:SI 3 "const_0_to_3_operand")
9654 (match_operand:V16FI 4 "register_operand")
9655 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9658 switch (INTVAL (operands[3]))
9661 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9662 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9666 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9667 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9671 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9672 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9676 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9677 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9687 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9688 [(set (match_operand:V16FI 0 "register_operand" "=v")
9690 (match_operand:V16FI 1 "register_operand" "v")
9691 (vec_duplicate:V16FI
9692 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9693 (match_operand:SI 3 "const_int_operand" "n")))]
9697 if (INTVAL (operands[3]) == 0xFFF)
9699 else if ( INTVAL (operands[3]) == 0xF0FF)
9701 else if ( INTVAL (operands[3]) == 0xFF0F)
9703 else if ( INTVAL (operands[3]) == 0xFFF0)
9708 operands[3] = GEN_INT (mask);
9710 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9712 [(set_attr "type" "sselog")
9713 (set_attr "length_immediate" "1")
9714 (set_attr "prefix" "evex")
9715 (set_attr "mode" "<sseinsnmode>")])
9717 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9718 [(match_operand:V8FI 0 "register_operand")
9719 (match_operand:V8FI 1 "register_operand")
9720 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9721 (match_operand:SI 3 "const_0_to_1_operand")
9722 (match_operand:V8FI 4 "register_operand")
9723 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9726 int mask = INTVAL (operands[3]);
9728 emit_insn (gen_vec_set_lo_<mode>_mask
9729 (operands[0], operands[1], operands[2],
9730 operands[4], operands[5]));
9732 emit_insn (gen_vec_set_hi_<mode>_mask
9733 (operands[0], operands[1], operands[2],
9734 operands[4], operands[5]));
9738 (define_insn "vec_set_lo_<mode><mask_name>"
9739 [(set (match_operand:V8FI 0 "register_operand" "=v")
9741 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9742 (vec_select:<ssehalfvecmode>
9743 (match_operand:V8FI 1 "register_operand" "v")
9744 (parallel [(const_int 4) (const_int 5)
9745 (const_int 6) (const_int 7)]))))]
9747 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9748 [(set_attr "type" "sselog")
9749 (set_attr "length_immediate" "1")
9750 (set_attr "prefix" "evex")
9751 (set_attr "mode" "XI")])
9753 (define_insn "vec_set_hi_<mode><mask_name>"
9754 [(set (match_operand:V8FI 0 "register_operand" "=v")
9756 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9757 (vec_select:<ssehalfvecmode>
9758 (match_operand:V8FI 1 "register_operand" "v")
9759 (parallel [(const_int 0) (const_int 1)
9760 (const_int 2) (const_int 3)]))))]
9762 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9763 [(set_attr "type" "sselog")
9764 (set_attr "length_immediate" "1")
9765 (set_attr "prefix" "evex")
9766 (set_attr "mode" "XI")])
9768 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9769 [(match_operand:V8FI 0 "register_operand")
9770 (match_operand:V8FI 1 "register_operand")
9771 (match_operand:V8FI 2 "nonimmediate_operand")
9772 (match_operand:SI 3 "const_0_to_255_operand")
9773 (match_operand:V8FI 4 "register_operand")
9774 (match_operand:QI 5 "register_operand")]
9777 int mask = INTVAL (operands[3]);
9778 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9779 (operands[0], operands[1], operands[2],
9780 GEN_INT (((mask >> 0) & 3) * 2),
9781 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9782 GEN_INT (((mask >> 2) & 3) * 2),
9783 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9784 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9785 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9786 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9787 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9788 operands[4], operands[5]));
9792 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9793 [(set (match_operand:V8FI 0 "register_operand" "=v")
9795 (vec_concat:<ssedoublemode>
9796 (match_operand:V8FI 1 "register_operand" "v")
9797 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9798 (parallel [(match_operand 3 "const_0_to_7_operand")
9799 (match_operand 4 "const_0_to_7_operand")
9800 (match_operand 5 "const_0_to_7_operand")
9801 (match_operand 6 "const_0_to_7_operand")
9802 (match_operand 7 "const_8_to_15_operand")
9803 (match_operand 8 "const_8_to_15_operand")
9804 (match_operand 9 "const_8_to_15_operand")
9805 (match_operand 10 "const_8_to_15_operand")])))]
9807 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9808 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9809 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9810 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9813 mask = INTVAL (operands[3]) / 2;
9814 mask |= INTVAL (operands[5]) / 2 << 2;
9815 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9816 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9817 operands[3] = GEN_INT (mask);
9819 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9821 [(set_attr "type" "sselog")
9822 (set_attr "length_immediate" "1")
9823 (set_attr "prefix" "evex")
9824 (set_attr "mode" "<sseinsnmode>")])
9826 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9827 [(match_operand:V16FI 0 "register_operand")
9828 (match_operand:V16FI 1 "register_operand")
9829 (match_operand:V16FI 2 "nonimmediate_operand")
9830 (match_operand:SI 3 "const_0_to_255_operand")
9831 (match_operand:V16FI 4 "register_operand")
9832 (match_operand:HI 5 "register_operand")]
9835 int mask = INTVAL (operands[3]);
9836 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9837 (operands[0], operands[1], operands[2],
9838 GEN_INT (((mask >> 0) & 3) * 4),
9839 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9840 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9841 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9842 GEN_INT (((mask >> 2) & 3) * 4),
9843 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9844 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9845 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9846 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9847 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9848 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9849 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9850 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9851 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9852 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9853 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9854 operands[4], operands[5]));
9858 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9859 [(set (match_operand:V16FI 0 "register_operand" "=v")
9861 (vec_concat:<ssedoublemode>
9862 (match_operand:V16FI 1 "register_operand" "v")
9863 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9864 (parallel [(match_operand 3 "const_0_to_15_operand")
9865 (match_operand 4 "const_0_to_15_operand")
9866 (match_operand 5 "const_0_to_15_operand")
9867 (match_operand 6 "const_0_to_15_operand")
9868 (match_operand 7 "const_0_to_15_operand")
9869 (match_operand 8 "const_0_to_15_operand")
9870 (match_operand 9 "const_0_to_15_operand")
9871 (match_operand 10 "const_0_to_15_operand")
9872 (match_operand 11 "const_16_to_31_operand")
9873 (match_operand 12 "const_16_to_31_operand")
9874 (match_operand 13 "const_16_to_31_operand")
9875 (match_operand 14 "const_16_to_31_operand")
9876 (match_operand 15 "const_16_to_31_operand")
9877 (match_operand 16 "const_16_to_31_operand")
9878 (match_operand 17 "const_16_to_31_operand")
9879 (match_operand 18 "const_16_to_31_operand")])))]
9881 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9882 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9883 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9884 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9885 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9886 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9887 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9888 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9889 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9890 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9891 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
9892 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
9895 mask = INTVAL (operands[3]) / 4;
9896 mask |= INTVAL (operands[7]) / 4 << 2;
9897 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
9898 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
9899 operands[3] = GEN_INT (mask);
9901 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9903 [(set_attr "type" "sselog")
9904 (set_attr "length_immediate" "1")
9905 (set_attr "prefix" "evex")
9906 (set_attr "mode" "<sseinsnmode>")])
9908 (define_expand "avx512f_pshufdv3_mask"
9909 [(match_operand:V16SI 0 "register_operand")
9910 (match_operand:V16SI 1 "nonimmediate_operand")
9911 (match_operand:SI 2 "const_0_to_255_operand")
9912 (match_operand:V16SI 3 "register_operand")
9913 (match_operand:HI 4 "register_operand")]
9916 int mask = INTVAL (operands[2]);
9917 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
9918 GEN_INT ((mask >> 0) & 3),
9919 GEN_INT ((mask >> 2) & 3),
9920 GEN_INT ((mask >> 4) & 3),
9921 GEN_INT ((mask >> 6) & 3),
9922 GEN_INT (((mask >> 0) & 3) + 4),
9923 GEN_INT (((mask >> 2) & 3) + 4),
9924 GEN_INT (((mask >> 4) & 3) + 4),
9925 GEN_INT (((mask >> 6) & 3) + 4),
9926 GEN_INT (((mask >> 0) & 3) + 8),
9927 GEN_INT (((mask >> 2) & 3) + 8),
9928 GEN_INT (((mask >> 4) & 3) + 8),
9929 GEN_INT (((mask >> 6) & 3) + 8),
9930 GEN_INT (((mask >> 0) & 3) + 12),
9931 GEN_INT (((mask >> 2) & 3) + 12),
9932 GEN_INT (((mask >> 4) & 3) + 12),
9933 GEN_INT (((mask >> 6) & 3) + 12),
9934 operands[3], operands[4]));
9938 (define_insn "avx512f_pshufd_1<mask_name>"
9939 [(set (match_operand:V16SI 0 "register_operand" "=v")
9941 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
9942 (parallel [(match_operand 2 "const_0_to_3_operand")
9943 (match_operand 3 "const_0_to_3_operand")
9944 (match_operand 4 "const_0_to_3_operand")
9945 (match_operand 5 "const_0_to_3_operand")
9946 (match_operand 6 "const_4_to_7_operand")
9947 (match_operand 7 "const_4_to_7_operand")
9948 (match_operand 8 "const_4_to_7_operand")
9949 (match_operand 9 "const_4_to_7_operand")
9950 (match_operand 10 "const_8_to_11_operand")
9951 (match_operand 11 "const_8_to_11_operand")
9952 (match_operand 12 "const_8_to_11_operand")
9953 (match_operand 13 "const_8_to_11_operand")
9954 (match_operand 14 "const_12_to_15_operand")
9955 (match_operand 15 "const_12_to_15_operand")
9956 (match_operand 16 "const_12_to_15_operand")
9957 (match_operand 17 "const_12_to_15_operand")])))]
9959 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9960 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9961 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9962 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
9963 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
9964 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
9965 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
9966 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
9967 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
9968 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
9969 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
9970 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
9973 mask |= INTVAL (operands[2]) << 0;
9974 mask |= INTVAL (operands[3]) << 2;
9975 mask |= INTVAL (operands[4]) << 4;
9976 mask |= INTVAL (operands[5]) << 6;
9977 operands[2] = GEN_INT (mask);
9979 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
9981 [(set_attr "type" "sselog1")
9982 (set_attr "prefix" "evex")
9983 (set_attr "length_immediate" "1")
9984 (set_attr "mode" "XI")])
9986 (define_expand "avx2_pshufdv3"
9987 [(match_operand:V8SI 0 "register_operand")
9988 (match_operand:V8SI 1 "nonimmediate_operand")
9989 (match_operand:SI 2 "const_0_to_255_operand")]
9992 int mask = INTVAL (operands[2]);
9993 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
9994 GEN_INT ((mask >> 0) & 3),
9995 GEN_INT ((mask >> 2) & 3),
9996 GEN_INT ((mask >> 4) & 3),
9997 GEN_INT ((mask >> 6) & 3),
9998 GEN_INT (((mask >> 0) & 3) + 4),
9999 GEN_INT (((mask >> 2) & 3) + 4),
10000 GEN_INT (((mask >> 4) & 3) + 4),
10001 GEN_INT (((mask >> 6) & 3) + 4)));
10005 (define_insn "avx2_pshufd_1"
10006 [(set (match_operand:V8SI 0 "register_operand" "=x")
10008 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
10009 (parallel [(match_operand 2 "const_0_to_3_operand")
10010 (match_operand 3 "const_0_to_3_operand")
10011 (match_operand 4 "const_0_to_3_operand")
10012 (match_operand 5 "const_0_to_3_operand")
10013 (match_operand 6 "const_4_to_7_operand")
10014 (match_operand 7 "const_4_to_7_operand")
10015 (match_operand 8 "const_4_to_7_operand")
10016 (match_operand 9 "const_4_to_7_operand")])))]
10018 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
10019 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
10020 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
10021 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
10024 mask |= INTVAL (operands[2]) << 0;
10025 mask |= INTVAL (operands[3]) << 2;
10026 mask |= INTVAL (operands[4]) << 4;
10027 mask |= INTVAL (operands[5]) << 6;
10028 operands[2] = GEN_INT (mask);
10030 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
10032 [(set_attr "type" "sselog1")
10033 (set_attr "prefix" "vex")
10034 (set_attr "length_immediate" "1")
10035 (set_attr "mode" "OI")])
10037 (define_expand "sse2_pshufd"
10038 [(match_operand:V4SI 0 "register_operand")
10039 (match_operand:V4SI 1 "nonimmediate_operand")
10040 (match_operand:SI 2 "const_int_operand")]
10043 int mask = INTVAL (operands[2]);
10044 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
10045 GEN_INT ((mask >> 0) & 3),
10046 GEN_INT ((mask >> 2) & 3),
10047 GEN_INT ((mask >> 4) & 3),
10048 GEN_INT ((mask >> 6) & 3)));
10052 (define_insn "sse2_pshufd_1"
10053 [(set (match_operand:V4SI 0 "register_operand" "=x")
10055 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10056 (parallel [(match_operand 2 "const_0_to_3_operand")
10057 (match_operand 3 "const_0_to_3_operand")
10058 (match_operand 4 "const_0_to_3_operand")
10059 (match_operand 5 "const_0_to_3_operand")])))]
10063 mask |= INTVAL (operands[2]) << 0;
10064 mask |= INTVAL (operands[3]) << 2;
10065 mask |= INTVAL (operands[4]) << 4;
10066 mask |= INTVAL (operands[5]) << 6;
10067 operands[2] = GEN_INT (mask);
10069 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
10071 [(set_attr "type" "sselog1")
10072 (set_attr "prefix_data16" "1")
10073 (set_attr "prefix" "maybe_vex")
10074 (set_attr "length_immediate" "1")
10075 (set_attr "mode" "TI")])
10077 (define_expand "avx2_pshuflwv3"
10078 [(match_operand:V16HI 0 "register_operand")
10079 (match_operand:V16HI 1 "nonimmediate_operand")
10080 (match_operand:SI 2 "const_0_to_255_operand")]
10083 int mask = INTVAL (operands[2]);
10084 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
10085 GEN_INT ((mask >> 0) & 3),
10086 GEN_INT ((mask >> 2) & 3),
10087 GEN_INT ((mask >> 4) & 3),
10088 GEN_INT ((mask >> 6) & 3),
10089 GEN_INT (((mask >> 0) & 3) + 8),
10090 GEN_INT (((mask >> 2) & 3) + 8),
10091 GEN_INT (((mask >> 4) & 3) + 8),
10092 GEN_INT (((mask >> 6) & 3) + 8)));
10096 (define_insn "avx2_pshuflw_1"
10097 [(set (match_operand:V16HI 0 "register_operand" "=x")
10099 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10100 (parallel [(match_operand 2 "const_0_to_3_operand")
10101 (match_operand 3 "const_0_to_3_operand")
10102 (match_operand 4 "const_0_to_3_operand")
10103 (match_operand 5 "const_0_to_3_operand")
10108 (match_operand 6 "const_8_to_11_operand")
10109 (match_operand 7 "const_8_to_11_operand")
10110 (match_operand 8 "const_8_to_11_operand")
10111 (match_operand 9 "const_8_to_11_operand")
10115 (const_int 15)])))]
10117 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10118 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10119 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10120 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10123 mask |= INTVAL (operands[2]) << 0;
10124 mask |= INTVAL (operands[3]) << 2;
10125 mask |= INTVAL (operands[4]) << 4;
10126 mask |= INTVAL (operands[5]) << 6;
10127 operands[2] = GEN_INT (mask);
10129 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10131 [(set_attr "type" "sselog")
10132 (set_attr "prefix" "vex")
10133 (set_attr "length_immediate" "1")
10134 (set_attr "mode" "OI")])
10136 (define_expand "sse2_pshuflw"
10137 [(match_operand:V8HI 0 "register_operand")
10138 (match_operand:V8HI 1 "nonimmediate_operand")
10139 (match_operand:SI 2 "const_int_operand")]
10142 int mask = INTVAL (operands[2]);
10143 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
10144 GEN_INT ((mask >> 0) & 3),
10145 GEN_INT ((mask >> 2) & 3),
10146 GEN_INT ((mask >> 4) & 3),
10147 GEN_INT ((mask >> 6) & 3)));
10151 (define_insn "sse2_pshuflw_1"
10152 [(set (match_operand:V8HI 0 "register_operand" "=x")
10154 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10155 (parallel [(match_operand 2 "const_0_to_3_operand")
10156 (match_operand 3 "const_0_to_3_operand")
10157 (match_operand 4 "const_0_to_3_operand")
10158 (match_operand 5 "const_0_to_3_operand")
10166 mask |= INTVAL (operands[2]) << 0;
10167 mask |= INTVAL (operands[3]) << 2;
10168 mask |= INTVAL (operands[4]) << 4;
10169 mask |= INTVAL (operands[5]) << 6;
10170 operands[2] = GEN_INT (mask);
10172 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10174 [(set_attr "type" "sselog")
10175 (set_attr "prefix_data16" "0")
10176 (set_attr "prefix_rep" "1")
10177 (set_attr "prefix" "maybe_vex")
10178 (set_attr "length_immediate" "1")
10179 (set_attr "mode" "TI")])
10181 (define_expand "avx2_pshufhwv3"
10182 [(match_operand:V16HI 0 "register_operand")
10183 (match_operand:V16HI 1 "nonimmediate_operand")
10184 (match_operand:SI 2 "const_0_to_255_operand")]
10187 int mask = INTVAL (operands[2]);
10188 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
10189 GEN_INT (((mask >> 0) & 3) + 4),
10190 GEN_INT (((mask >> 2) & 3) + 4),
10191 GEN_INT (((mask >> 4) & 3) + 4),
10192 GEN_INT (((mask >> 6) & 3) + 4),
10193 GEN_INT (((mask >> 0) & 3) + 12),
10194 GEN_INT (((mask >> 2) & 3) + 12),
10195 GEN_INT (((mask >> 4) & 3) + 12),
10196 GEN_INT (((mask >> 6) & 3) + 12)));
10200 (define_insn "avx2_pshufhw_1"
10201 [(set (match_operand:V16HI 0 "register_operand" "=x")
10203 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10204 (parallel [(const_int 0)
10208 (match_operand 2 "const_4_to_7_operand")
10209 (match_operand 3 "const_4_to_7_operand")
10210 (match_operand 4 "const_4_to_7_operand")
10211 (match_operand 5 "const_4_to_7_operand")
10216 (match_operand 6 "const_12_to_15_operand")
10217 (match_operand 7 "const_12_to_15_operand")
10218 (match_operand 8 "const_12_to_15_operand")
10219 (match_operand 9 "const_12_to_15_operand")])))]
10221 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10222 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10223 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10224 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10227 mask |= (INTVAL (operands[2]) - 4) << 0;
10228 mask |= (INTVAL (operands[3]) - 4) << 2;
10229 mask |= (INTVAL (operands[4]) - 4) << 4;
10230 mask |= (INTVAL (operands[5]) - 4) << 6;
10231 operands[2] = GEN_INT (mask);
10233 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10235 [(set_attr "type" "sselog")
10236 (set_attr "prefix" "vex")
10237 (set_attr "length_immediate" "1")
10238 (set_attr "mode" "OI")])
10240 (define_expand "sse2_pshufhw"
10241 [(match_operand:V8HI 0 "register_operand")
10242 (match_operand:V8HI 1 "nonimmediate_operand")
10243 (match_operand:SI 2 "const_int_operand")]
10246 int mask = INTVAL (operands[2]);
10247 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
10248 GEN_INT (((mask >> 0) & 3) + 4),
10249 GEN_INT (((mask >> 2) & 3) + 4),
10250 GEN_INT (((mask >> 4) & 3) + 4),
10251 GEN_INT (((mask >> 6) & 3) + 4)));
10255 (define_insn "sse2_pshufhw_1"
10256 [(set (match_operand:V8HI 0 "register_operand" "=x")
10258 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10259 (parallel [(const_int 0)
10263 (match_operand 2 "const_4_to_7_operand")
10264 (match_operand 3 "const_4_to_7_operand")
10265 (match_operand 4 "const_4_to_7_operand")
10266 (match_operand 5 "const_4_to_7_operand")])))]
10270 mask |= (INTVAL (operands[2]) - 4) << 0;
10271 mask |= (INTVAL (operands[3]) - 4) << 2;
10272 mask |= (INTVAL (operands[4]) - 4) << 4;
10273 mask |= (INTVAL (operands[5]) - 4) << 6;
10274 operands[2] = GEN_INT (mask);
10276 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10278 [(set_attr "type" "sselog")
10279 (set_attr "prefix_rep" "1")
10280 (set_attr "prefix_data16" "0")
10281 (set_attr "prefix" "maybe_vex")
10282 (set_attr "length_immediate" "1")
10283 (set_attr "mode" "TI")])
10285 (define_expand "sse2_loadd"
10286 [(set (match_operand:V4SI 0 "register_operand")
10288 (vec_duplicate:V4SI
10289 (match_operand:SI 1 "nonimmediate_operand"))
10293 "operands[2] = CONST0_RTX (V4SImode);")
10295 (define_insn "sse2_loadld"
10296 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
10298 (vec_duplicate:V4SI
10299 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
10300 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
10304 %vmovd\t{%2, %0|%0, %2}
10305 %vmovd\t{%2, %0|%0, %2}
10306 movss\t{%2, %0|%0, %2}
10307 movss\t{%2, %0|%0, %2}
10308 vmovss\t{%2, %1, %0|%0, %1, %2}"
10309 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
10310 (set_attr "type" "ssemov")
10311 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10312 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10314 (define_insn "*vec_extract<mode>"
10315 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10316 (vec_select:<ssescalarmode>
10317 (match_operand:VI12_128 1 "register_operand" "x,x")
10319 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10322 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10323 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10324 [(set_attr "type" "sselog1")
10325 (set (attr "prefix_data16")
10327 (and (eq_attr "alternative" "0")
10328 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10330 (const_string "*")))
10331 (set (attr "prefix_extra")
10333 (and (eq_attr "alternative" "0")
10334 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10336 (const_string "1")))
10337 (set_attr "length_immediate" "1")
10338 (set_attr "prefix" "maybe_vex")
10339 (set_attr "mode" "TI")])
10341 (define_insn "*vec_extractv8hi_sse2"
10342 [(set (match_operand:HI 0 "register_operand" "=r")
10344 (match_operand:V8HI 1 "register_operand" "x")
10346 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10347 "TARGET_SSE2 && !TARGET_SSE4_1"
10348 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10349 [(set_attr "type" "sselog1")
10350 (set_attr "prefix_data16" "1")
10351 (set_attr "length_immediate" "1")
10352 (set_attr "mode" "TI")])
10354 (define_insn "*vec_extractv16qi_zext"
10355 [(set (match_operand:SWI48 0 "register_operand" "=r")
10358 (match_operand:V16QI 1 "register_operand" "x")
10360 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10362 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10363 [(set_attr "type" "sselog1")
10364 (set_attr "prefix_extra" "1")
10365 (set_attr "length_immediate" "1")
10366 (set_attr "prefix" "maybe_vex")
10367 (set_attr "mode" "TI")])
10369 (define_insn "*vec_extractv8hi_zext"
10370 [(set (match_operand:SWI48 0 "register_operand" "=r")
10373 (match_operand:V8HI 1 "register_operand" "x")
10375 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10377 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10378 [(set_attr "type" "sselog1")
10379 (set_attr "prefix_data16" "1")
10380 (set_attr "length_immediate" "1")
10381 (set_attr "prefix" "maybe_vex")
10382 (set_attr "mode" "TI")])
10384 (define_insn "*vec_extract<mode>_mem"
10385 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10386 (vec_select:<ssescalarmode>
10387 (match_operand:VI12_128 1 "memory_operand" "o")
10389 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10393 (define_insn "*vec_extract<ssevecmodelower>_0"
10394 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10396 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10397 (parallel [(const_int 0)])))]
10398 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10400 [(set_attr "isa" "*,sse4,*,*")])
10402 (define_insn_and_split "*vec_extractv4si_0_zext"
10403 [(set (match_operand:DI 0 "register_operand" "=r")
10406 (match_operand:V4SI 1 "register_operand" "x")
10407 (parallel [(const_int 0)]))))]
10408 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10410 "&& reload_completed"
10411 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10412 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10414 (define_insn "*vec_extractv2di_0_sse"
10415 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10417 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10418 (parallel [(const_int 0)])))]
10419 "TARGET_SSE && !TARGET_64BIT
10420 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10424 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10426 (match_operand:<ssevecmode> 1 "register_operand")
10427 (parallel [(const_int 0)])))]
10428 "TARGET_SSE && reload_completed"
10429 [(set (match_dup 0) (match_dup 1))]
10430 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10432 (define_insn "*vec_extractv4si"
10433 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10435 (match_operand:V4SI 1 "register_operand" "x,0,x")
10436 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10439 switch (which_alternative)
10442 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10445 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10446 return "psrldq\t{%2, %0|%0, %2}";
10449 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10450 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10453 gcc_unreachable ();
10456 [(set_attr "isa" "*,noavx,avx")
10457 (set_attr "type" "sselog1,sseishft1,sseishft1")
10458 (set_attr "prefix_extra" "1,*,*")
10459 (set_attr "length_immediate" "1")
10460 (set_attr "prefix" "maybe_vex,orig,vex")
10461 (set_attr "mode" "TI")])
10463 (define_insn "*vec_extractv4si_zext"
10464 [(set (match_operand:DI 0 "register_operand" "=r")
10467 (match_operand:V4SI 1 "register_operand" "x")
10468 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10469 "TARGET_64BIT && TARGET_SSE4_1"
10470 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10471 [(set_attr "type" "sselog1")
10472 (set_attr "prefix_extra" "1")
10473 (set_attr "length_immediate" "1")
10474 (set_attr "prefix" "maybe_vex")
10475 (set_attr "mode" "TI")])
10477 (define_insn "*vec_extractv4si_mem"
10478 [(set (match_operand:SI 0 "register_operand" "=x,r")
10480 (match_operand:V4SI 1 "memory_operand" "o,o")
10481 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10485 (define_insn_and_split "*vec_extractv4si_zext_mem"
10486 [(set (match_operand:DI 0 "register_operand" "=x,r")
10489 (match_operand:V4SI 1 "memory_operand" "o,o")
10490 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10491 "TARGET_64BIT && TARGET_SSE"
10493 "&& reload_completed"
10494 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10496 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10499 (define_insn "*vec_extractv2di_1"
10500 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10502 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10503 (parallel [(const_int 1)])))]
10504 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10506 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10507 %vmovhps\t{%1, %0|%0, %1}
10508 psrldq\t{$8, %0|%0, 8}
10509 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10510 movhlps\t{%1, %0|%0, %1}
10513 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10514 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10515 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10516 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10517 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10518 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10519 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10522 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10523 (vec_select:<ssescalarmode>
10524 (match_operand:VI_128 1 "memory_operand")
10526 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10527 "TARGET_SSE && reload_completed"
10528 [(set (match_dup 0) (match_dup 1))]
10530 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10532 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10535 (define_insn "*vec_dupv4si"
10536 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10537 (vec_duplicate:V4SI
10538 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10541 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10542 vbroadcastss\t{%1, %0|%0, %1}
10543 shufps\t{$0, %0, %0|%0, %0, 0}"
10544 [(set_attr "isa" "sse2,avx,noavx")
10545 (set_attr "type" "sselog1,ssemov,sselog1")
10546 (set_attr "length_immediate" "1,0,1")
10547 (set_attr "prefix_extra" "0,1,*")
10548 (set_attr "prefix" "maybe_vex,vex,orig")
10549 (set_attr "mode" "TI,V4SF,V4SF")])
10551 (define_insn "*vec_dupv2di"
10552 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10553 (vec_duplicate:V2DI
10554 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10558 vpunpcklqdq\t{%d1, %0|%0, %d1}
10559 %vmovddup\t{%1, %0|%0, %1}
10561 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10562 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10563 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10564 (set_attr "mode" "TI,TI,DF,V4SF")])
10566 (define_insn "*vec_concatv2si_sse4_1"
10567 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10569 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10570 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10573 pinsrd\t{$1, %2, %0|%0, %2, 1}
10574 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10575 punpckldq\t{%2, %0|%0, %2}
10576 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10577 %vmovd\t{%1, %0|%0, %1}
10578 punpckldq\t{%2, %0|%0, %2}
10579 movd\t{%1, %0|%0, %1}"
10580 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10581 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10582 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10583 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10584 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10585 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10587 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10588 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10589 ;; alternatives pretty much forces the MMX alternative to be chosen.
10590 (define_insn "*vec_concatv2si"
10591 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10593 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10594 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10595 "TARGET_SSE && !TARGET_SSE4_1"
10597 punpckldq\t{%2, %0|%0, %2}
10598 movd\t{%1, %0|%0, %1}
10599 movd\t{%1, %0|%0, %1}
10600 unpcklps\t{%2, %0|%0, %2}
10601 movss\t{%1, %0|%0, %1}
10602 punpckldq\t{%2, %0|%0, %2}
10603 movd\t{%1, %0|%0, %1}"
10604 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10605 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10606 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10608 (define_insn "*vec_concatv4si"
10609 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10611 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10612 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10615 punpcklqdq\t{%2, %0|%0, %2}
10616 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10617 movlhps\t{%2, %0|%0, %2}
10618 movhps\t{%2, %0|%0, %q2}
10619 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10620 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10621 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10622 (set_attr "prefix" "orig,vex,orig,orig,vex")
10623 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10625 ;; movd instead of movq is required to handle broken assemblers.
10626 (define_insn "vec_concatv2di"
10627 [(set (match_operand:V2DI 0 "register_operand"
10628 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10630 (match_operand:DI 1 "nonimmediate_operand"
10631 " 0,x ,r ,xm,*y,0,x,0,0,x")
10632 (match_operand:DI 2 "vector_move_operand"
10633 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10636 pinsrq\t{$1, %2, %0|%0, %2, 1}
10637 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10638 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10639 %vmovq\t{%1, %0|%0, %1}
10640 movq2dq\t{%1, %0|%0, %1}
10641 punpcklqdq\t{%2, %0|%0, %2}
10642 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10643 movlhps\t{%2, %0|%0, %2}
10644 movhps\t{%2, %0|%0, %2}
10645 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10646 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10649 (eq_attr "alternative" "0,1,5,6")
10650 (const_string "sselog")
10651 (const_string "ssemov")))
10652 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10653 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10654 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10655 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10656 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10658 (define_expand "vec_unpacks_lo_<mode>"
10659 [(match_operand:<sseunpackmode> 0 "register_operand")
10660 (match_operand:VI124_AVX512F 1 "register_operand")]
10662 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10664 (define_expand "vec_unpacks_hi_<mode>"
10665 [(match_operand:<sseunpackmode> 0 "register_operand")
10666 (match_operand:VI124_AVX512F 1 "register_operand")]
10668 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10670 (define_expand "vec_unpacku_lo_<mode>"
10671 [(match_operand:<sseunpackmode> 0 "register_operand")
10672 (match_operand:VI124_AVX512F 1 "register_operand")]
10674 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10676 (define_expand "vec_unpacku_hi_<mode>"
10677 [(match_operand:<sseunpackmode> 0 "register_operand")
10678 (match_operand:VI124_AVX512F 1 "register_operand")]
10680 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10682 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10688 (define_expand "<sse2_avx2>_uavg<mode>3"
10689 [(set (match_operand:VI12_AVX2 0 "register_operand")
10690 (truncate:VI12_AVX2
10691 (lshiftrt:<ssedoublemode>
10692 (plus:<ssedoublemode>
10693 (plus:<ssedoublemode>
10694 (zero_extend:<ssedoublemode>
10695 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10696 (zero_extend:<ssedoublemode>
10697 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10702 operands[3] = CONST1_RTX(<MODE>mode);
10703 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10706 (define_insn "*<sse2_avx2>_uavg<mode>3"
10707 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10708 (truncate:VI12_AVX2
10709 (lshiftrt:<ssedoublemode>
10710 (plus:<ssedoublemode>
10711 (plus:<ssedoublemode>
10712 (zero_extend:<ssedoublemode>
10713 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10714 (zero_extend:<ssedoublemode>
10715 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10716 (match_operand:VI12_AVX2 3 "const1_operand"))
10718 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10720 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10721 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10722 [(set_attr "isa" "noavx,avx")
10723 (set_attr "type" "sseiadd")
10724 (set_attr "prefix_data16" "1,*")
10725 (set_attr "prefix" "orig,vex")
10726 (set_attr "mode" "<sseinsnmode>")])
10728 ;; The correct representation for this is absolutely enormous, and
10729 ;; surely not generally useful.
10730 (define_insn "<sse2_avx2>_psadbw"
10731 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10733 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10734 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10738 psadbw\t{%2, %0|%0, %2}
10739 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10740 [(set_attr "isa" "noavx,avx")
10741 (set_attr "type" "sseiadd")
10742 (set_attr "atom_unit" "simul")
10743 (set_attr "prefix_data16" "1,*")
10744 (set_attr "prefix" "orig,vex")
10745 (set_attr "mode" "<sseinsnmode>")])
10747 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10748 [(set (match_operand:SI 0 "register_operand" "=r")
10750 [(match_operand:VF_128_256 1 "register_operand" "x")]
10753 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10754 [(set_attr "type" "ssemov")
10755 (set_attr "prefix" "maybe_vex")
10756 (set_attr "mode" "<MODE>")])
10758 (define_insn "avx2_pmovmskb"
10759 [(set (match_operand:SI 0 "register_operand" "=r")
10760 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10763 "vpmovmskb\t{%1, %0|%0, %1}"
10764 [(set_attr "type" "ssemov")
10765 (set_attr "prefix" "vex")
10766 (set_attr "mode" "DI")])
10768 (define_insn "sse2_pmovmskb"
10769 [(set (match_operand:SI 0 "register_operand" "=r")
10770 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10773 "%vpmovmskb\t{%1, %0|%0, %1}"
10774 [(set_attr "type" "ssemov")
10775 (set_attr "prefix_data16" "1")
10776 (set_attr "prefix" "maybe_vex")
10777 (set_attr "mode" "SI")])
10779 (define_expand "sse2_maskmovdqu"
10780 [(set (match_operand:V16QI 0 "memory_operand")
10781 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10782 (match_operand:V16QI 2 "register_operand")
10787 (define_insn "*sse2_maskmovdqu"
10788 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10789 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10790 (match_operand:V16QI 2 "register_operand" "x")
10791 (mem:V16QI (match_dup 0))]
10795 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10796 that requires %v to be at the beginning of the opcode name. */
10797 if (Pmode != word_mode)
10798 fputs ("\taddr32", asm_out_file);
10799 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10801 [(set_attr "type" "ssemov")
10802 (set_attr "prefix_data16" "1")
10803 (set (attr "length_address")
10804 (symbol_ref ("Pmode != word_mode")))
10805 ;; The implicit %rdi operand confuses default length_vex computation.
10806 (set (attr "length_vex")
10807 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10808 (set_attr "prefix" "maybe_vex")
10809 (set_attr "mode" "TI")])
10811 (define_insn "sse_ldmxcsr"
10812 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10816 [(set_attr "type" "sse")
10817 (set_attr "atom_sse_attr" "mxcsr")
10818 (set_attr "prefix" "maybe_vex")
10819 (set_attr "memory" "load")])
10821 (define_insn "sse_stmxcsr"
10822 [(set (match_operand:SI 0 "memory_operand" "=m")
10823 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10826 [(set_attr "type" "sse")
10827 (set_attr "atom_sse_attr" "mxcsr")
10828 (set_attr "prefix" "maybe_vex")
10829 (set_attr "memory" "store")])
10831 (define_insn "sse2_clflush"
10832 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10836 [(set_attr "type" "sse")
10837 (set_attr "atom_sse_attr" "fence")
10838 (set_attr "memory" "unknown")])
10840 ;; As per AMD and Intel ISA manuals, the first operand is extensions
10841 ;; and it goes to %ecx. The second operand received is hints and it goes
10843 (define_insn "sse3_mwait"
10844 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
10845 (match_operand:SI 1 "register_operand" "a")]
10848 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10849 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10850 ;; we only need to set up 32bit registers.
10852 [(set_attr "length" "3")])
10854 (define_insn "sse3_monitor_<mode>"
10855 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10856 (match_operand:SI 1 "register_operand" "c")
10857 (match_operand:SI 2 "register_operand" "d")]
10860 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10861 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10862 ;; zero extended to 64bit, we only need to set up 32bit registers.
10864 [(set (attr "length")
10865 (symbol_ref ("(Pmode != word_mode) + 3")))])
10867 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10869 ;; SSSE3 instructions
10871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10873 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10875 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10876 [(set (match_operand:V16HI 0 "register_operand" "=x")
10881 (ssse3_plusminus:HI
10883 (match_operand:V16HI 1 "register_operand" "x")
10884 (parallel [(const_int 0)]))
10885 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10886 (ssse3_plusminus:HI
10887 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10888 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10890 (ssse3_plusminus:HI
10891 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10892 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10893 (ssse3_plusminus:HI
10894 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10895 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10898 (ssse3_plusminus:HI
10899 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
10900 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
10901 (ssse3_plusminus:HI
10902 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
10903 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
10905 (ssse3_plusminus:HI
10906 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
10907 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
10908 (ssse3_plusminus:HI
10909 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
10910 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
10914 (ssse3_plusminus:HI
10916 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10917 (parallel [(const_int 0)]))
10918 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10919 (ssse3_plusminus:HI
10920 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10921 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10923 (ssse3_plusminus:HI
10924 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10925 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10926 (ssse3_plusminus:HI
10927 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10928 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
10931 (ssse3_plusminus:HI
10932 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
10933 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
10934 (ssse3_plusminus:HI
10935 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
10936 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
10938 (ssse3_plusminus:HI
10939 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
10940 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
10941 (ssse3_plusminus:HI
10942 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
10943 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
10945 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10946 [(set_attr "type" "sseiadd")
10947 (set_attr "prefix_extra" "1")
10948 (set_attr "prefix" "vex")
10949 (set_attr "mode" "OI")])
10951 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
10952 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10956 (ssse3_plusminus:HI
10958 (match_operand:V8HI 1 "register_operand" "0,x")
10959 (parallel [(const_int 0)]))
10960 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10961 (ssse3_plusminus:HI
10962 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10963 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10965 (ssse3_plusminus:HI
10966 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10967 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10968 (ssse3_plusminus:HI
10969 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10970 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10973 (ssse3_plusminus:HI
10975 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10976 (parallel [(const_int 0)]))
10977 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10978 (ssse3_plusminus:HI
10979 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10980 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10982 (ssse3_plusminus:HI
10983 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10984 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10985 (ssse3_plusminus:HI
10986 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10987 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
10990 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
10991 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10992 [(set_attr "isa" "noavx,avx")
10993 (set_attr "type" "sseiadd")
10994 (set_attr "atom_unit" "complex")
10995 (set_attr "prefix_data16" "1,*")
10996 (set_attr "prefix_extra" "1")
10997 (set_attr "prefix" "orig,vex")
10998 (set_attr "mode" "TI")])
11000 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
11001 [(set (match_operand:V4HI 0 "register_operand" "=y")
11004 (ssse3_plusminus:HI
11006 (match_operand:V4HI 1 "register_operand" "0")
11007 (parallel [(const_int 0)]))
11008 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11009 (ssse3_plusminus:HI
11010 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11011 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11013 (ssse3_plusminus:HI
11015 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
11016 (parallel [(const_int 0)]))
11017 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11018 (ssse3_plusminus:HI
11019 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11020 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
11022 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
11023 [(set_attr "type" "sseiadd")
11024 (set_attr "atom_unit" "complex")
11025 (set_attr "prefix_extra" "1")
11026 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11027 (set_attr "mode" "DI")])
11029 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
11030 [(set (match_operand:V8SI 0 "register_operand" "=x")
11036 (match_operand:V8SI 1 "register_operand" "x")
11037 (parallel [(const_int 0)]))
11038 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11040 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11041 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11044 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
11045 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
11047 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
11048 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
11053 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
11054 (parallel [(const_int 0)]))
11055 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11057 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11058 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
11061 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
11062 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
11064 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
11065 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
11067 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11068 [(set_attr "type" "sseiadd")
11069 (set_attr "prefix_extra" "1")
11070 (set_attr "prefix" "vex")
11071 (set_attr "mode" "OI")])
11073 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
11074 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11079 (match_operand:V4SI 1 "register_operand" "0,x")
11080 (parallel [(const_int 0)]))
11081 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11083 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11084 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11088 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
11089 (parallel [(const_int 0)]))
11090 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11092 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11093 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
11096 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
11097 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11098 [(set_attr "isa" "noavx,avx")
11099 (set_attr "type" "sseiadd")
11100 (set_attr "atom_unit" "complex")
11101 (set_attr "prefix_data16" "1,*")
11102 (set_attr "prefix_extra" "1")
11103 (set_attr "prefix" "orig,vex")
11104 (set_attr "mode" "TI")])
11106 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
11107 [(set (match_operand:V2SI 0 "register_operand" "=y")
11111 (match_operand:V2SI 1 "register_operand" "0")
11112 (parallel [(const_int 0)]))
11113 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11116 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
11117 (parallel [(const_int 0)]))
11118 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
11120 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
11121 [(set_attr "type" "sseiadd")
11122 (set_attr "atom_unit" "complex")
11123 (set_attr "prefix_extra" "1")
11124 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11125 (set_attr "mode" "DI")])
11127 (define_insn "avx2_pmaddubsw256"
11128 [(set (match_operand:V16HI 0 "register_operand" "=x")
11133 (match_operand:V32QI 1 "register_operand" "x")
11134 (parallel [(const_int 0) (const_int 2)
11135 (const_int 4) (const_int 6)
11136 (const_int 8) (const_int 10)
11137 (const_int 12) (const_int 14)
11138 (const_int 16) (const_int 18)
11139 (const_int 20) (const_int 22)
11140 (const_int 24) (const_int 26)
11141 (const_int 28) (const_int 30)])))
11144 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
11145 (parallel [(const_int 0) (const_int 2)
11146 (const_int 4) (const_int 6)
11147 (const_int 8) (const_int 10)
11148 (const_int 12) (const_int 14)
11149 (const_int 16) (const_int 18)
11150 (const_int 20) (const_int 22)
11151 (const_int 24) (const_int 26)
11152 (const_int 28) (const_int 30)]))))
11155 (vec_select:V16QI (match_dup 1)
11156 (parallel [(const_int 1) (const_int 3)
11157 (const_int 5) (const_int 7)
11158 (const_int 9) (const_int 11)
11159 (const_int 13) (const_int 15)
11160 (const_int 17) (const_int 19)
11161 (const_int 21) (const_int 23)
11162 (const_int 25) (const_int 27)
11163 (const_int 29) (const_int 31)])))
11165 (vec_select:V16QI (match_dup 2)
11166 (parallel [(const_int 1) (const_int 3)
11167 (const_int 5) (const_int 7)
11168 (const_int 9) (const_int 11)
11169 (const_int 13) (const_int 15)
11170 (const_int 17) (const_int 19)
11171 (const_int 21) (const_int 23)
11172 (const_int 25) (const_int 27)
11173 (const_int 29) (const_int 31)]))))))]
11175 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11176 [(set_attr "type" "sseiadd")
11177 (set_attr "prefix_extra" "1")
11178 (set_attr "prefix" "vex")
11179 (set_attr "mode" "OI")])
11181 (define_insn "ssse3_pmaddubsw128"
11182 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11187 (match_operand:V16QI 1 "register_operand" "0,x")
11188 (parallel [(const_int 0) (const_int 2)
11189 (const_int 4) (const_int 6)
11190 (const_int 8) (const_int 10)
11191 (const_int 12) (const_int 14)])))
11194 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
11195 (parallel [(const_int 0) (const_int 2)
11196 (const_int 4) (const_int 6)
11197 (const_int 8) (const_int 10)
11198 (const_int 12) (const_int 14)]))))
11201 (vec_select:V8QI (match_dup 1)
11202 (parallel [(const_int 1) (const_int 3)
11203 (const_int 5) (const_int 7)
11204 (const_int 9) (const_int 11)
11205 (const_int 13) (const_int 15)])))
11207 (vec_select:V8QI (match_dup 2)
11208 (parallel [(const_int 1) (const_int 3)
11209 (const_int 5) (const_int 7)
11210 (const_int 9) (const_int 11)
11211 (const_int 13) (const_int 15)]))))))]
11214 pmaddubsw\t{%2, %0|%0, %2}
11215 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11216 [(set_attr "isa" "noavx,avx")
11217 (set_attr "type" "sseiadd")
11218 (set_attr "atom_unit" "simul")
11219 (set_attr "prefix_data16" "1,*")
11220 (set_attr "prefix_extra" "1")
11221 (set_attr "prefix" "orig,vex")
11222 (set_attr "mode" "TI")])
11224 (define_insn "ssse3_pmaddubsw"
11225 [(set (match_operand:V4HI 0 "register_operand" "=y")
11230 (match_operand:V8QI 1 "register_operand" "0")
11231 (parallel [(const_int 0) (const_int 2)
11232 (const_int 4) (const_int 6)])))
11235 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
11236 (parallel [(const_int 0) (const_int 2)
11237 (const_int 4) (const_int 6)]))))
11240 (vec_select:V4QI (match_dup 1)
11241 (parallel [(const_int 1) (const_int 3)
11242 (const_int 5) (const_int 7)])))
11244 (vec_select:V4QI (match_dup 2)
11245 (parallel [(const_int 1) (const_int 3)
11246 (const_int 5) (const_int 7)]))))))]
11248 "pmaddubsw\t{%2, %0|%0, %2}"
11249 [(set_attr "type" "sseiadd")
11250 (set_attr "atom_unit" "simul")
11251 (set_attr "prefix_extra" "1")
11252 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11253 (set_attr "mode" "DI")])
11255 (define_mode_iterator PMULHRSW
11256 [V4HI V8HI (V16HI "TARGET_AVX2")])
11258 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
11259 [(set (match_operand:PMULHRSW 0 "register_operand")
11261 (lshiftrt:<ssedoublemode>
11262 (plus:<ssedoublemode>
11263 (lshiftrt:<ssedoublemode>
11264 (mult:<ssedoublemode>
11265 (sign_extend:<ssedoublemode>
11266 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
11267 (sign_extend:<ssedoublemode>
11268 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
11274 operands[3] = CONST1_RTX(<MODE>mode);
11275 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11278 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
11279 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
11281 (lshiftrt:<ssedoublemode>
11282 (plus:<ssedoublemode>
11283 (lshiftrt:<ssedoublemode>
11284 (mult:<ssedoublemode>
11285 (sign_extend:<ssedoublemode>
11286 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
11287 (sign_extend:<ssedoublemode>
11288 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
11290 (match_operand:VI2_AVX2 3 "const1_operand"))
11292 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
11294 pmulhrsw\t{%2, %0|%0, %2}
11295 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
11296 [(set_attr "isa" "noavx,avx")
11297 (set_attr "type" "sseimul")
11298 (set_attr "prefix_data16" "1,*")
11299 (set_attr "prefix_extra" "1")
11300 (set_attr "prefix" "orig,vex")
11301 (set_attr "mode" "<sseinsnmode>")])
11303 (define_insn "*ssse3_pmulhrswv4hi3"
11304 [(set (match_operand:V4HI 0 "register_operand" "=y")
11311 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11313 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11315 (match_operand:V4HI 3 "const1_operand"))
11317 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11318 "pmulhrsw\t{%2, %0|%0, %2}"
11319 [(set_attr "type" "sseimul")
11320 (set_attr "prefix_extra" "1")
11321 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11322 (set_attr "mode" "DI")])
11324 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11325 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11327 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11328 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11332 pshufb\t{%2, %0|%0, %2}
11333 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11334 [(set_attr "isa" "noavx,avx")
11335 (set_attr "type" "sselog1")
11336 (set_attr "prefix_data16" "1,*")
11337 (set_attr "prefix_extra" "1")
11338 (set_attr "prefix" "orig,vex")
11339 (set_attr "btver2_decode" "vector,vector")
11340 (set_attr "mode" "<sseinsnmode>")])
11342 (define_insn "ssse3_pshufbv8qi3"
11343 [(set (match_operand:V8QI 0 "register_operand" "=y")
11344 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11345 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11348 "pshufb\t{%2, %0|%0, %2}";
11349 [(set_attr "type" "sselog1")
11350 (set_attr "prefix_extra" "1")
11351 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11352 (set_attr "mode" "DI")])
11354 (define_insn "<ssse3_avx2>_psign<mode>3"
11355 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11357 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11358 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11362 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11363 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11364 [(set_attr "isa" "noavx,avx")
11365 (set_attr "type" "sselog1")
11366 (set_attr "prefix_data16" "1,*")
11367 (set_attr "prefix_extra" "1")
11368 (set_attr "prefix" "orig,vex")
11369 (set_attr "mode" "<sseinsnmode>")])
11371 (define_insn "ssse3_psign<mode>3"
11372 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11374 [(match_operand:MMXMODEI 1 "register_operand" "0")
11375 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11378 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11379 [(set_attr "type" "sselog1")
11380 (set_attr "prefix_extra" "1")
11381 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11382 (set_attr "mode" "DI")])
11384 (define_insn "<ssse3_avx2>_palignr<mode>"
11385 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11386 (unspec:SSESCALARMODE
11387 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11388 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11389 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11393 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11395 switch (which_alternative)
11398 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11400 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11402 gcc_unreachable ();
11405 [(set_attr "isa" "noavx,avx")
11406 (set_attr "type" "sseishft")
11407 (set_attr "atom_unit" "sishuf")
11408 (set_attr "prefix_data16" "1,*")
11409 (set_attr "prefix_extra" "1")
11410 (set_attr "length_immediate" "1")
11411 (set_attr "prefix" "orig,vex")
11412 (set_attr "mode" "<sseinsnmode>")])
11414 (define_insn "ssse3_palignrdi"
11415 [(set (match_operand:DI 0 "register_operand" "=y")
11416 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11417 (match_operand:DI 2 "nonimmediate_operand" "ym")
11418 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11422 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11423 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11425 [(set_attr "type" "sseishft")
11426 (set_attr "atom_unit" "sishuf")
11427 (set_attr "prefix_extra" "1")
11428 (set_attr "length_immediate" "1")
11429 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11430 (set_attr "mode" "DI")])
11432 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11433 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11434 (abs:VI124_AVX2_48_AVX512F
11435 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11436 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11437 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11438 [(set_attr "type" "sselog1")
11439 (set_attr "prefix_data16" "1")
11440 (set_attr "prefix_extra" "1")
11441 (set_attr "prefix" "maybe_vex")
11442 (set_attr "mode" "<sseinsnmode>")])
11444 (define_expand "abs<mode>2"
11445 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11446 (abs:VI124_AVX2_48_AVX512F
11447 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11452 ix86_expand_sse2_abs (operands[0], operands[1]);
11457 (define_insn "abs<mode>2"
11458 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11460 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11462 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11463 [(set_attr "type" "sselog1")
11464 (set_attr "prefix_rep" "0")
11465 (set_attr "prefix_extra" "1")
11466 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11467 (set_attr "mode" "DI")])
11469 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11471 ;; AMD SSE4A instructions
11473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11475 (define_insn "sse4a_movnt<mode>"
11476 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11478 [(match_operand:MODEF 1 "register_operand" "x")]
11481 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11482 [(set_attr "type" "ssemov")
11483 (set_attr "mode" "<MODE>")])
11485 (define_insn "sse4a_vmmovnt<mode>"
11486 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11487 (unspec:<ssescalarmode>
11488 [(vec_select:<ssescalarmode>
11489 (match_operand:VF_128 1 "register_operand" "x")
11490 (parallel [(const_int 0)]))]
11493 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11494 [(set_attr "type" "ssemov")
11495 (set_attr "mode" "<ssescalarmode>")])
11497 (define_insn "sse4a_extrqi"
11498 [(set (match_operand:V2DI 0 "register_operand" "=x")
11499 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11500 (match_operand 2 "const_0_to_255_operand")
11501 (match_operand 3 "const_0_to_255_operand")]
11504 "extrq\t{%3, %2, %0|%0, %2, %3}"
11505 [(set_attr "type" "sse")
11506 (set_attr "prefix_data16" "1")
11507 (set_attr "length_immediate" "2")
11508 (set_attr "mode" "TI")])
11510 (define_insn "sse4a_extrq"
11511 [(set (match_operand:V2DI 0 "register_operand" "=x")
11512 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11513 (match_operand:V16QI 2 "register_operand" "x")]
11516 "extrq\t{%2, %0|%0, %2}"
11517 [(set_attr "type" "sse")
11518 (set_attr "prefix_data16" "1")
11519 (set_attr "mode" "TI")])
11521 (define_insn "sse4a_insertqi"
11522 [(set (match_operand:V2DI 0 "register_operand" "=x")
11523 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11524 (match_operand:V2DI 2 "register_operand" "x")
11525 (match_operand 3 "const_0_to_255_operand")
11526 (match_operand 4 "const_0_to_255_operand")]
11529 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11530 [(set_attr "type" "sseins")
11531 (set_attr "prefix_data16" "0")
11532 (set_attr "prefix_rep" "1")
11533 (set_attr "length_immediate" "2")
11534 (set_attr "mode" "TI")])
11536 (define_insn "sse4a_insertq"
11537 [(set (match_operand:V2DI 0 "register_operand" "=x")
11538 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11539 (match_operand:V2DI 2 "register_operand" "x")]
11542 "insertq\t{%2, %0|%0, %2}"
11543 [(set_attr "type" "sseins")
11544 (set_attr "prefix_data16" "0")
11545 (set_attr "prefix_rep" "1")
11546 (set_attr "mode" "TI")])
11548 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11550 ;; Intel SSE4.1 instructions
11552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11554 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11555 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11556 (vec_merge:VF_128_256
11557 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11558 (match_operand:VF_128_256 1 "register_operand" "0,x")
11559 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11562 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11563 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11564 [(set_attr "isa" "noavx,avx")
11565 (set_attr "type" "ssemov")
11566 (set_attr "length_immediate" "1")
11567 (set_attr "prefix_data16" "1,*")
11568 (set_attr "prefix_extra" "1")
11569 (set_attr "prefix" "orig,vex")
11570 (set_attr "mode" "<MODE>")])
11572 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11573 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11575 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11576 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11577 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11581 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11582 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11583 [(set_attr "isa" "noavx,avx")
11584 (set_attr "type" "ssemov")
11585 (set_attr "length_immediate" "1")
11586 (set_attr "prefix_data16" "1,*")
11587 (set_attr "prefix_extra" "1")
11588 (set_attr "prefix" "orig,vex")
11589 (set_attr "btver2_decode" "vector,vector")
11590 (set_attr "mode" "<MODE>")])
11592 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11593 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11595 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11596 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11597 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11601 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11602 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11603 [(set_attr "isa" "noavx,avx")
11604 (set_attr "type" "ssemul")
11605 (set_attr "length_immediate" "1")
11606 (set_attr "prefix_data16" "1,*")
11607 (set_attr "prefix_extra" "1")
11608 (set_attr "prefix" "orig,vex")
11609 (set_attr "btver2_decode" "vector,vector")
11610 (set_attr "mode" "<MODE>")])
11612 (define_insn "<sse4_1_avx2>_movntdqa"
11613 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
11614 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
11617 "%vmovntdqa\t{%1, %0|%0, %1}"
11618 [(set_attr "type" "ssemov")
11619 (set_attr "prefix_extra" "1, *")
11620 (set_attr "prefix" "maybe_vex, evex")
11621 (set_attr "mode" "<sseinsnmode>")])
11623 (define_insn "<sse4_1_avx2>_mpsadbw"
11624 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11626 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11627 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11628 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11632 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11633 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11634 [(set_attr "isa" "noavx,avx")
11635 (set_attr "type" "sselog1")
11636 (set_attr "length_immediate" "1")
11637 (set_attr "prefix_extra" "1")
11638 (set_attr "prefix" "orig,vex")
11639 (set_attr "btver2_decode" "vector,vector")
11640 (set_attr "mode" "<sseinsnmode>")])
11642 (define_insn "avx2_packusdw"
11643 [(set (match_operand:V16HI 0 "register_operand" "=x")
11646 (match_operand:V8SI 1 "register_operand" "x"))
11648 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11650 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11651 [(set_attr "type" "sselog")
11652 (set_attr "prefix_extra" "1")
11653 (set_attr "prefix" "vex")
11654 (set_attr "mode" "OI")])
11656 (define_insn "sse4_1_packusdw"
11657 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11660 (match_operand:V4SI 1 "register_operand" "0,x"))
11662 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11665 packusdw\t{%2, %0|%0, %2}
11666 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11667 [(set_attr "isa" "noavx,avx")
11668 (set_attr "type" "sselog")
11669 (set_attr "prefix_extra" "1")
11670 (set_attr "prefix" "orig,vex")
11671 (set_attr "mode" "TI")])
11673 (define_insn "<sse4_1_avx2>_pblendvb"
11674 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11676 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11677 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11678 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11682 pblendvb\t{%3, %2, %0|%0, %2, %3}
11683 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11684 [(set_attr "isa" "noavx,avx")
11685 (set_attr "type" "ssemov")
11686 (set_attr "prefix_extra" "1")
11687 (set_attr "length_immediate" "*,1")
11688 (set_attr "prefix" "orig,vex")
11689 (set_attr "btver2_decode" "vector,vector")
11690 (set_attr "mode" "<sseinsnmode>")])
11692 (define_insn "sse4_1_pblendw"
11693 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11695 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11696 (match_operand:V8HI 1 "register_operand" "0,x")
11697 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11700 pblendw\t{%3, %2, %0|%0, %2, %3}
11701 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11702 [(set_attr "isa" "noavx,avx")
11703 (set_attr "type" "ssemov")
11704 (set_attr "prefix_extra" "1")
11705 (set_attr "length_immediate" "1")
11706 (set_attr "prefix" "orig,vex")
11707 (set_attr "mode" "TI")])
11709 ;; The builtin uses an 8-bit immediate. Expand that.
11710 (define_expand "avx2_pblendw"
11711 [(set (match_operand:V16HI 0 "register_operand")
11713 (match_operand:V16HI 2 "nonimmediate_operand")
11714 (match_operand:V16HI 1 "register_operand")
11715 (match_operand:SI 3 "const_0_to_255_operand")))]
11718 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11719 operands[3] = GEN_INT (val << 8 | val);
11722 (define_insn "*avx2_pblendw"
11723 [(set (match_operand:V16HI 0 "register_operand" "=x")
11725 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11726 (match_operand:V16HI 1 "register_operand" "x")
11727 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11730 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11731 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11733 [(set_attr "type" "ssemov")
11734 (set_attr "prefix_extra" "1")
11735 (set_attr "length_immediate" "1")
11736 (set_attr "prefix" "vex")
11737 (set_attr "mode" "OI")])
11739 (define_insn "avx2_pblendd<mode>"
11740 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11741 (vec_merge:VI4_AVX2
11742 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11743 (match_operand:VI4_AVX2 1 "register_operand" "x")
11744 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11746 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11747 [(set_attr "type" "ssemov")
11748 (set_attr "prefix_extra" "1")
11749 (set_attr "length_immediate" "1")
11750 (set_attr "prefix" "vex")
11751 (set_attr "mode" "<sseinsnmode>")])
11753 (define_insn "sse4_1_phminposuw"
11754 [(set (match_operand:V8HI 0 "register_operand" "=x")
11755 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11756 UNSPEC_PHMINPOSUW))]
11758 "%vphminposuw\t{%1, %0|%0, %1}"
11759 [(set_attr "type" "sselog1")
11760 (set_attr "prefix_extra" "1")
11761 (set_attr "prefix" "maybe_vex")
11762 (set_attr "mode" "TI")])
11764 (define_insn "avx2_<code>v16qiv16hi2"
11765 [(set (match_operand:V16HI 0 "register_operand" "=x")
11767 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11769 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11770 [(set_attr "type" "ssemov")
11771 (set_attr "prefix_extra" "1")
11772 (set_attr "prefix" "vex")
11773 (set_attr "mode" "OI")])
11775 (define_insn "sse4_1_<code>v8qiv8hi2"
11776 [(set (match_operand:V8HI 0 "register_operand" "=x")
11779 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11780 (parallel [(const_int 0) (const_int 1)
11781 (const_int 2) (const_int 3)
11782 (const_int 4) (const_int 5)
11783 (const_int 6) (const_int 7)]))))]
11785 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11786 [(set_attr "type" "ssemov")
11787 (set_attr "ssememalign" "64")
11788 (set_attr "prefix_extra" "1")
11789 (set_attr "prefix" "maybe_vex")
11790 (set_attr "mode" "TI")])
11792 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11793 [(set (match_operand:V16SI 0 "register_operand" "=v")
11795 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11797 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11798 [(set_attr "type" "ssemov")
11799 (set_attr "prefix" "evex")
11800 (set_attr "mode" "XI")])
11802 (define_insn "avx2_<code>v8qiv8si2"
11803 [(set (match_operand:V8SI 0 "register_operand" "=x")
11806 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11807 (parallel [(const_int 0) (const_int 1)
11808 (const_int 2) (const_int 3)
11809 (const_int 4) (const_int 5)
11810 (const_int 6) (const_int 7)]))))]
11812 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11813 [(set_attr "type" "ssemov")
11814 (set_attr "prefix_extra" "1")
11815 (set_attr "prefix" "vex")
11816 (set_attr "mode" "OI")])
11818 (define_insn "sse4_1_<code>v4qiv4si2"
11819 [(set (match_operand:V4SI 0 "register_operand" "=x")
11822 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11823 (parallel [(const_int 0) (const_int 1)
11824 (const_int 2) (const_int 3)]))))]
11826 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11827 [(set_attr "type" "ssemov")
11828 (set_attr "ssememalign" "32")
11829 (set_attr "prefix_extra" "1")
11830 (set_attr "prefix" "maybe_vex")
11831 (set_attr "mode" "TI")])
11833 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11834 [(set (match_operand:V16SI 0 "register_operand" "=v")
11836 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11838 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11839 [(set_attr "type" "ssemov")
11840 (set_attr "prefix" "evex")
11841 (set_attr "mode" "XI")])
11843 (define_insn "avx2_<code>v8hiv8si2"
11844 [(set (match_operand:V8SI 0 "register_operand" "=x")
11846 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11848 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11849 [(set_attr "type" "ssemov")
11850 (set_attr "prefix_extra" "1")
11851 (set_attr "prefix" "vex")
11852 (set_attr "mode" "OI")])
11854 (define_insn "sse4_1_<code>v4hiv4si2"
11855 [(set (match_operand:V4SI 0 "register_operand" "=x")
11858 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11859 (parallel [(const_int 0) (const_int 1)
11860 (const_int 2) (const_int 3)]))))]
11862 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11863 [(set_attr "type" "ssemov")
11864 (set_attr "ssememalign" "64")
11865 (set_attr "prefix_extra" "1")
11866 (set_attr "prefix" "maybe_vex")
11867 (set_attr "mode" "TI")])
11869 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11870 [(set (match_operand:V8DI 0 "register_operand" "=v")
11873 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11874 (parallel [(const_int 0) (const_int 1)
11875 (const_int 2) (const_int 3)
11876 (const_int 4) (const_int 5)
11877 (const_int 6) (const_int 7)]))))]
11879 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11880 [(set_attr "type" "ssemov")
11881 (set_attr "prefix" "evex")
11882 (set_attr "mode" "XI")])
11884 (define_insn "avx2_<code>v4qiv4di2"
11885 [(set (match_operand:V4DI 0 "register_operand" "=x")
11888 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11889 (parallel [(const_int 0) (const_int 1)
11890 (const_int 2) (const_int 3)]))))]
11892 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
11893 [(set_attr "type" "ssemov")
11894 (set_attr "prefix_extra" "1")
11895 (set_attr "prefix" "vex")
11896 (set_attr "mode" "OI")])
11898 (define_insn "sse4_1_<code>v2qiv2di2"
11899 [(set (match_operand:V2DI 0 "register_operand" "=x")
11902 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11903 (parallel [(const_int 0) (const_int 1)]))))]
11905 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
11906 [(set_attr "type" "ssemov")
11907 (set_attr "ssememalign" "16")
11908 (set_attr "prefix_extra" "1")
11909 (set_attr "prefix" "maybe_vex")
11910 (set_attr "mode" "TI")])
11912 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
11913 [(set (match_operand:V8DI 0 "register_operand" "=v")
11915 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
11917 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11918 [(set_attr "type" "ssemov")
11919 (set_attr "prefix" "evex")
11920 (set_attr "mode" "XI")])
11922 (define_insn "avx2_<code>v4hiv4di2"
11923 [(set (match_operand:V4DI 0 "register_operand" "=x")
11926 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11927 (parallel [(const_int 0) (const_int 1)
11928 (const_int 2) (const_int 3)]))))]
11930 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
11931 [(set_attr "type" "ssemov")
11932 (set_attr "prefix_extra" "1")
11933 (set_attr "prefix" "vex")
11934 (set_attr "mode" "OI")])
11936 (define_insn "sse4_1_<code>v2hiv2di2"
11937 [(set (match_operand:V2DI 0 "register_operand" "=x")
11940 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11941 (parallel [(const_int 0) (const_int 1)]))))]
11943 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
11944 [(set_attr "type" "ssemov")
11945 (set_attr "ssememalign" "32")
11946 (set_attr "prefix_extra" "1")
11947 (set_attr "prefix" "maybe_vex")
11948 (set_attr "mode" "TI")])
11950 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
11951 [(set (match_operand:V8DI 0 "register_operand" "=v")
11953 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
11955 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11956 [(set_attr "type" "ssemov")
11957 (set_attr "prefix" "evex")
11958 (set_attr "mode" "XI")])
11960 (define_insn "avx2_<code>v4siv4di2"
11961 [(set (match_operand:V4DI 0 "register_operand" "=x")
11963 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
11965 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
11966 [(set_attr "type" "ssemov")
11967 (set_attr "prefix_extra" "1")
11968 (set_attr "mode" "OI")])
11970 (define_insn "sse4_1_<code>v2siv2di2"
11971 [(set (match_operand:V2DI 0 "register_operand" "=x")
11974 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11975 (parallel [(const_int 0) (const_int 1)]))))]
11977 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
11978 [(set_attr "type" "ssemov")
11979 (set_attr "ssememalign" "64")
11980 (set_attr "prefix_extra" "1")
11981 (set_attr "prefix" "maybe_vex")
11982 (set_attr "mode" "TI")])
11984 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
11985 ;; setting FLAGS_REG. But it is not a really compare instruction.
11986 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
11987 [(set (reg:CC FLAGS_REG)
11988 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
11989 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
11992 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
11993 [(set_attr "type" "ssecomi")
11994 (set_attr "prefix_extra" "1")
11995 (set_attr "prefix" "vex")
11996 (set_attr "mode" "<MODE>")])
11998 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
11999 ;; But it is not a really compare instruction.
12000 (define_insn "avx_ptest256"
12001 [(set (reg:CC FLAGS_REG)
12002 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
12003 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
12006 "vptest\t{%1, %0|%0, %1}"
12007 [(set_attr "type" "ssecomi")
12008 (set_attr "prefix_extra" "1")
12009 (set_attr "prefix" "vex")
12010 (set_attr "btver2_decode" "vector")
12011 (set_attr "mode" "OI")])
12013 (define_insn "sse4_1_ptest"
12014 [(set (reg:CC FLAGS_REG)
12015 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
12016 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
12019 "%vptest\t{%1, %0|%0, %1}"
12020 [(set_attr "type" "ssecomi")
12021 (set_attr "prefix_extra" "1")
12022 (set_attr "prefix" "maybe_vex")
12023 (set_attr "mode" "TI")])
12025 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
12026 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
12028 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
12029 (match_operand:SI 2 "const_0_to_15_operand" "n")]
12032 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12033 [(set_attr "type" "ssecvt")
12034 (set (attr "prefix_data16")
12036 (match_test "TARGET_AVX")
12038 (const_string "1")))
12039 (set_attr "prefix_extra" "1")
12040 (set_attr "length_immediate" "1")
12041 (set_attr "prefix" "maybe_vex")
12042 (set_attr "mode" "<MODE>")])
12044 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
12045 [(match_operand:<sseintvecmode> 0 "register_operand")
12046 (match_operand:VF1_128_256 1 "nonimmediate_operand")
12047 (match_operand:SI 2 "const_0_to_15_operand")]
12050 rtx tmp = gen_reg_rtx (<MODE>mode);
12053 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
12056 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12060 (define_expand "avx512f_roundpd512"
12061 [(match_operand:V8DF 0 "register_operand")
12062 (match_operand:V8DF 1 "nonimmediate_operand")
12063 (match_operand:SI 2 "const_0_to_15_operand")]
12066 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
12070 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
12071 [(match_operand:<ssepackfltmode> 0 "register_operand")
12072 (match_operand:VF2 1 "nonimmediate_operand")
12073 (match_operand:VF2 2 "nonimmediate_operand")
12074 (match_operand:SI 3 "const_0_to_15_operand")]
12079 if (<MODE>mode == V2DFmode
12080 && TARGET_AVX && !TARGET_PREFER_AVX128)
12082 rtx tmp2 = gen_reg_rtx (V4DFmode);
12084 tmp0 = gen_reg_rtx (V4DFmode);
12085 tmp1 = force_reg (V2DFmode, operands[1]);
12087 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12088 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
12089 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12093 tmp0 = gen_reg_rtx (<MODE>mode);
12094 tmp1 = gen_reg_rtx (<MODE>mode);
12097 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
12100 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
12103 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12108 (define_insn "sse4_1_round<ssescalarmodesuffix>"
12109 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
12112 [(match_operand:VF_128 2 "register_operand" "x,x")
12113 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
12115 (match_operand:VF_128 1 "register_operand" "0,x")
12119 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
12120 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12121 [(set_attr "isa" "noavx,avx")
12122 (set_attr "type" "ssecvt")
12123 (set_attr "length_immediate" "1")
12124 (set_attr "prefix_data16" "1,*")
12125 (set_attr "prefix_extra" "1")
12126 (set_attr "prefix" "orig,vex")
12127 (set_attr "mode" "<MODE>")])
12129 (define_expand "round<mode>2"
12130 [(set (match_dup 4)
12132 (match_operand:VF 1 "register_operand")
12134 (set (match_operand:VF 0 "register_operand")
12136 [(match_dup 4) (match_dup 5)]
12138 "TARGET_ROUND && !flag_trapping_math"
12140 enum machine_mode scalar_mode;
12141 const struct real_format *fmt;
12142 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
12143 rtx half, vec_half;
12145 scalar_mode = GET_MODE_INNER (<MODE>mode);
12147 /* load nextafter (0.5, 0.0) */
12148 fmt = REAL_MODE_FORMAT (scalar_mode);
12149 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
12150 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
12151 half = const_double_from_real_value (pred_half, scalar_mode);
12153 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
12154 vec_half = force_reg (<MODE>mode, vec_half);
12156 operands[3] = gen_reg_rtx (<MODE>mode);
12157 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
12159 operands[4] = gen_reg_rtx (<MODE>mode);
12160 operands[5] = GEN_INT (ROUND_TRUNC);
12163 (define_expand "round<mode>2_sfix"
12164 [(match_operand:<sseintvecmode> 0 "register_operand")
12165 (match_operand:VF1_128_256 1 "register_operand")]
12166 "TARGET_ROUND && !flag_trapping_math"
12168 rtx tmp = gen_reg_rtx (<MODE>mode);
12170 emit_insn (gen_round<mode>2 (tmp, operands[1]));
12173 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12177 (define_expand "round<mode>2_vec_pack_sfix"
12178 [(match_operand:<ssepackfltmode> 0 "register_operand")
12179 (match_operand:VF2 1 "register_operand")
12180 (match_operand:VF2 2 "register_operand")]
12181 "TARGET_ROUND && !flag_trapping_math"
12185 if (<MODE>mode == V2DFmode
12186 && TARGET_AVX && !TARGET_PREFER_AVX128)
12188 rtx tmp2 = gen_reg_rtx (V4DFmode);
12190 tmp0 = gen_reg_rtx (V4DFmode);
12191 tmp1 = force_reg (V2DFmode, operands[1]);
12193 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12194 emit_insn (gen_roundv4df2 (tmp2, tmp0));
12195 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12199 tmp0 = gen_reg_rtx (<MODE>mode);
12200 tmp1 = gen_reg_rtx (<MODE>mode);
12202 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
12203 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
12206 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12211 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12213 ;; Intel SSE4.2 string/text processing instructions
12215 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12217 (define_insn_and_split "sse4_2_pcmpestr"
12218 [(set (match_operand:SI 0 "register_operand" "=c,c")
12220 [(match_operand:V16QI 2 "register_operand" "x,x")
12221 (match_operand:SI 3 "register_operand" "a,a")
12222 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
12223 (match_operand:SI 5 "register_operand" "d,d")
12224 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
12226 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12234 (set (reg:CC FLAGS_REG)
12243 && can_create_pseudo_p ()"
12248 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12249 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12250 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12253 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12254 operands[3], operands[4],
12255 operands[5], operands[6]));
12257 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12258 operands[3], operands[4],
12259 operands[5], operands[6]));
12260 if (flags && !(ecx || xmm0))
12261 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12262 operands[2], operands[3],
12263 operands[4], operands[5],
12265 if (!(flags || ecx || xmm0))
12266 emit_note (NOTE_INSN_DELETED);
12270 [(set_attr "type" "sselog")
12271 (set_attr "prefix_data16" "1")
12272 (set_attr "prefix_extra" "1")
12273 (set_attr "ssememalign" "8")
12274 (set_attr "length_immediate" "1")
12275 (set_attr "memory" "none,load")
12276 (set_attr "mode" "TI")])
12278 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
12279 [(set (match_operand:SI 0 "register_operand" "=c")
12281 [(match_operand:V16QI 2 "register_operand" "x")
12282 (match_operand:SI 3 "register_operand" "a")
12284 [(match_operand:V16QI 4 "memory_operand" "m")]
12286 (match_operand:SI 5 "register_operand" "d")
12287 (match_operand:SI 6 "const_0_to_255_operand" "n")]
12289 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12293 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12297 (set (reg:CC FLAGS_REG)
12301 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12306 && can_create_pseudo_p ()"
12311 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12312 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12313 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12316 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12317 operands[3], operands[4],
12318 operands[5], operands[6]));
12320 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12321 operands[3], operands[4],
12322 operands[5], operands[6]));
12323 if (flags && !(ecx || xmm0))
12324 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12325 operands[2], operands[3],
12326 operands[4], operands[5],
12328 if (!(flags || ecx || xmm0))
12329 emit_note (NOTE_INSN_DELETED);
12333 [(set_attr "type" "sselog")
12334 (set_attr "prefix_data16" "1")
12335 (set_attr "prefix_extra" "1")
12336 (set_attr "ssememalign" "8")
12337 (set_attr "length_immediate" "1")
12338 (set_attr "memory" "load")
12339 (set_attr "mode" "TI")])
12341 (define_insn "sse4_2_pcmpestri"
12342 [(set (match_operand:SI 0 "register_operand" "=c,c")
12344 [(match_operand:V16QI 1 "register_operand" "x,x")
12345 (match_operand:SI 2 "register_operand" "a,a")
12346 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12347 (match_operand:SI 4 "register_operand" "d,d")
12348 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12350 (set (reg:CC FLAGS_REG)
12359 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12360 [(set_attr "type" "sselog")
12361 (set_attr "prefix_data16" "1")
12362 (set_attr "prefix_extra" "1")
12363 (set_attr "prefix" "maybe_vex")
12364 (set_attr "ssememalign" "8")
12365 (set_attr "length_immediate" "1")
12366 (set_attr "btver2_decode" "vector")
12367 (set_attr "memory" "none,load")
12368 (set_attr "mode" "TI")])
12370 (define_insn "sse4_2_pcmpestrm"
12371 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12373 [(match_operand:V16QI 1 "register_operand" "x,x")
12374 (match_operand:SI 2 "register_operand" "a,a")
12375 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12376 (match_operand:SI 4 "register_operand" "d,d")
12377 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12379 (set (reg:CC FLAGS_REG)
12388 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12389 [(set_attr "type" "sselog")
12390 (set_attr "prefix_data16" "1")
12391 (set_attr "prefix_extra" "1")
12392 (set_attr "ssememalign" "8")
12393 (set_attr "length_immediate" "1")
12394 (set_attr "prefix" "maybe_vex")
12395 (set_attr "btver2_decode" "vector")
12396 (set_attr "memory" "none,load")
12397 (set_attr "mode" "TI")])
12399 (define_insn "sse4_2_pcmpestr_cconly"
12400 [(set (reg:CC FLAGS_REG)
12402 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12403 (match_operand:SI 3 "register_operand" "a,a,a,a")
12404 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12405 (match_operand:SI 5 "register_operand" "d,d,d,d")
12406 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12408 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12409 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12412 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12413 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12414 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12415 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12416 [(set_attr "type" "sselog")
12417 (set_attr "prefix_data16" "1")
12418 (set_attr "prefix_extra" "1")
12419 (set_attr "ssememalign" "8")
12420 (set_attr "length_immediate" "1")
12421 (set_attr "memory" "none,load,none,load")
12422 (set_attr "btver2_decode" "vector,vector,vector,vector")
12423 (set_attr "prefix" "maybe_vex")
12424 (set_attr "mode" "TI")])
12426 (define_insn_and_split "sse4_2_pcmpistr"
12427 [(set (match_operand:SI 0 "register_operand" "=c,c")
12429 [(match_operand:V16QI 2 "register_operand" "x,x")
12430 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12431 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12433 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12439 (set (reg:CC FLAGS_REG)
12446 && can_create_pseudo_p ()"
12451 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12452 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12453 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12456 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12457 operands[3], operands[4]));
12459 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12460 operands[3], operands[4]));
12461 if (flags && !(ecx || xmm0))
12462 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12463 operands[2], operands[3],
12465 if (!(flags || ecx || xmm0))
12466 emit_note (NOTE_INSN_DELETED);
12470 [(set_attr "type" "sselog")
12471 (set_attr "prefix_data16" "1")
12472 (set_attr "prefix_extra" "1")
12473 (set_attr "ssememalign" "8")
12474 (set_attr "length_immediate" "1")
12475 (set_attr "memory" "none,load")
12476 (set_attr "mode" "TI")])
12478 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12479 [(set (match_operand:SI 0 "register_operand" "=c")
12481 [(match_operand:V16QI 2 "register_operand" "x")
12483 [(match_operand:V16QI 3 "memory_operand" "m")]
12485 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12487 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12490 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12493 (set (reg:CC FLAGS_REG)
12496 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12500 && can_create_pseudo_p ()"
12505 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12506 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12507 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12510 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12511 operands[3], operands[4]));
12513 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12514 operands[3], operands[4]));
12515 if (flags && !(ecx || xmm0))
12516 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12517 operands[2], operands[3],
12519 if (!(flags || ecx || xmm0))
12520 emit_note (NOTE_INSN_DELETED);
12524 [(set_attr "type" "sselog")
12525 (set_attr "prefix_data16" "1")
12526 (set_attr "prefix_extra" "1")
12527 (set_attr "ssememalign" "8")
12528 (set_attr "length_immediate" "1")
12529 (set_attr "memory" "load")
12530 (set_attr "mode" "TI")])
12532 (define_insn "sse4_2_pcmpistri"
12533 [(set (match_operand:SI 0 "register_operand" "=c,c")
12535 [(match_operand:V16QI 1 "register_operand" "x,x")
12536 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12537 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12539 (set (reg:CC FLAGS_REG)
12546 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12547 [(set_attr "type" "sselog")
12548 (set_attr "prefix_data16" "1")
12549 (set_attr "prefix_extra" "1")
12550 (set_attr "ssememalign" "8")
12551 (set_attr "length_immediate" "1")
12552 (set_attr "prefix" "maybe_vex")
12553 (set_attr "memory" "none,load")
12554 (set_attr "btver2_decode" "vector")
12555 (set_attr "mode" "TI")])
12557 (define_insn "sse4_2_pcmpistrm"
12558 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12560 [(match_operand:V16QI 1 "register_operand" "x,x")
12561 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12562 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12564 (set (reg:CC FLAGS_REG)
12571 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12572 [(set_attr "type" "sselog")
12573 (set_attr "prefix_data16" "1")
12574 (set_attr "prefix_extra" "1")
12575 (set_attr "ssememalign" "8")
12576 (set_attr "length_immediate" "1")
12577 (set_attr "prefix" "maybe_vex")
12578 (set_attr "memory" "none,load")
12579 (set_attr "btver2_decode" "vector")
12580 (set_attr "mode" "TI")])
12582 (define_insn "sse4_2_pcmpistr_cconly"
12583 [(set (reg:CC FLAGS_REG)
12585 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12586 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12587 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12589 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12590 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12593 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12594 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12595 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12596 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12597 [(set_attr "type" "sselog")
12598 (set_attr "prefix_data16" "1")
12599 (set_attr "prefix_extra" "1")
12600 (set_attr "ssememalign" "8")
12601 (set_attr "length_immediate" "1")
12602 (set_attr "memory" "none,load,none,load")
12603 (set_attr "prefix" "maybe_vex")
12604 (set_attr "btver2_decode" "vector,vector,vector,vector")
12605 (set_attr "mode" "TI")])
12607 ;; Packed float variants
12608 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
12609 [(V8DI "V8SF") (V16SI "V16SF")])
12611 (define_expand "avx512pf_gatherpf<mode>sf"
12613 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12614 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12616 [(match_operand 2 "vsib_address_operand")
12617 (match_operand:VI48_512 1 "register_operand")
12618 (match_operand:SI 3 "const1248_operand")]))
12619 (match_operand:SI 4 "const_2_to_3_operand")]
12620 UNSPEC_GATHER_PREFETCH)]
12624 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12625 operands[3]), UNSPEC_VSIBADDR);
12628 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
12630 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12631 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12633 [(match_operand:P 2 "vsib_address_operand" "Tv")
12634 (match_operand:VI48_512 1 "register_operand" "v")
12635 (match_operand:SI 3 "const1248_operand" "n")]
12637 (match_operand:SI 4 "const_2_to_3_operand" "n")]
12638 UNSPEC_GATHER_PREFETCH)]
12641 switch (INTVAL (operands[4]))
12644 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12646 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12648 gcc_unreachable ();
12651 [(set_attr "type" "sse")
12652 (set_attr "prefix" "evex")
12653 (set_attr "mode" "XI")])
12655 (define_insn "*avx512pf_gatherpf<mode>sf"
12658 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12660 [(match_operand:P 1 "vsib_address_operand" "Tv")
12661 (match_operand:VI48_512 0 "register_operand" "v")
12662 (match_operand:SI 2 "const1248_operand" "n")]
12664 (match_operand:SI 3 "const_2_to_3_operand" "n")]
12665 UNSPEC_GATHER_PREFETCH)]
12668 switch (INTVAL (operands[3]))
12671 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12673 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12675 gcc_unreachable ();
12678 [(set_attr "type" "sse")
12679 (set_attr "prefix" "evex")
12680 (set_attr "mode" "XI")])
12682 ;; Packed double variants
12683 (define_expand "avx512pf_gatherpf<mode>df"
12685 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12688 [(match_operand 2 "vsib_address_operand")
12689 (match_operand:VI4_256_8_512 1 "register_operand")
12690 (match_operand:SI 3 "const1248_operand")]))
12691 (match_operand:SI 4 "const_2_to_3_operand")]
12692 UNSPEC_GATHER_PREFETCH)]
12696 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12697 operands[3]), UNSPEC_VSIBADDR);
12700 (define_insn "*avx512pf_gatherpf<mode>df_mask"
12702 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12703 (match_operator:V8DF 5 "vsib_mem_operator"
12705 [(match_operand:P 2 "vsib_address_operand" "Tv")
12706 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12707 (match_operand:SI 3 "const1248_operand" "n")]
12709 (match_operand:SI 4 "const_2_to_3_operand" "n")]
12710 UNSPEC_GATHER_PREFETCH)]
12713 switch (INTVAL (operands[4]))
12716 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12718 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12720 gcc_unreachable ();
12723 [(set_attr "type" "sse")
12724 (set_attr "prefix" "evex")
12725 (set_attr "mode" "XI")])
12727 (define_insn "*avx512pf_gatherpf<mode>df"
12730 (match_operator:V8DF 4 "vsib_mem_operator"
12732 [(match_operand:P 1 "vsib_address_operand" "Tv")
12733 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12734 (match_operand:SI 2 "const1248_operand" "n")]
12736 (match_operand:SI 3 "const_2_to_3_operand" "n")]
12737 UNSPEC_GATHER_PREFETCH)]
12740 switch (INTVAL (operands[3]))
12743 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
12745 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
12747 gcc_unreachable ();
12750 [(set_attr "type" "sse")
12751 (set_attr "prefix" "evex")
12752 (set_attr "mode" "XI")])
12754 ;; Packed float variants
12755 (define_expand "avx512pf_scatterpf<mode>sf"
12757 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12758 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12760 [(match_operand 2 "vsib_address_operand")
12761 (match_operand:VI48_512 1 "register_operand")
12762 (match_operand:SI 3 "const1248_operand")]))
12763 (match_operand:SI 4 "const2367_operand")]
12764 UNSPEC_SCATTER_PREFETCH)]
12768 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12769 operands[3]), UNSPEC_VSIBADDR);
12772 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
12774 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12775 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12777 [(match_operand:P 2 "vsib_address_operand" "Tv")
12778 (match_operand:VI48_512 1 "register_operand" "v")
12779 (match_operand:SI 3 "const1248_operand" "n")]
12781 (match_operand:SI 4 "const2367_operand" "n")]
12782 UNSPEC_SCATTER_PREFETCH)]
12785 switch (INTVAL (operands[4]))
12789 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12792 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12794 gcc_unreachable ();
12797 [(set_attr "type" "sse")
12798 (set_attr "prefix" "evex")
12799 (set_attr "mode" "XI")])
12801 (define_insn "*avx512pf_scatterpf<mode>sf"
12804 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12806 [(match_operand:P 1 "vsib_address_operand" "Tv")
12807 (match_operand:VI48_512 0 "register_operand" "v")
12808 (match_operand:SI 2 "const1248_operand" "n")]
12810 (match_operand:SI 3 "const2367_operand" "n")]
12811 UNSPEC_SCATTER_PREFETCH)]
12814 switch (INTVAL (operands[3]))
12818 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12821 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12823 gcc_unreachable ();
12826 [(set_attr "type" "sse")
12827 (set_attr "prefix" "evex")
12828 (set_attr "mode" "XI")])
12830 ;; Packed double variants
12831 (define_expand "avx512pf_scatterpf<mode>df"
12833 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12836 [(match_operand 2 "vsib_address_operand")
12837 (match_operand:VI4_256_8_512 1 "register_operand")
12838 (match_operand:SI 3 "const1248_operand")]))
12839 (match_operand:SI 4 "const2367_operand")]
12840 UNSPEC_SCATTER_PREFETCH)]
12844 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12845 operands[3]), UNSPEC_VSIBADDR);
12848 (define_insn "*avx512pf_scatterpf<mode>df_mask"
12850 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12851 (match_operator:V8DF 5 "vsib_mem_operator"
12853 [(match_operand:P 2 "vsib_address_operand" "Tv")
12854 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12855 (match_operand:SI 3 "const1248_operand" "n")]
12857 (match_operand:SI 4 "const2367_operand" "n")]
12858 UNSPEC_SCATTER_PREFETCH)]
12861 switch (INTVAL (operands[4]))
12865 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12868 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12870 gcc_unreachable ();
12873 [(set_attr "type" "sse")
12874 (set_attr "prefix" "evex")
12875 (set_attr "mode" "XI")])
12877 (define_insn "*avx512pf_scatterpf<mode>df"
12880 (match_operator:V8DF 4 "vsib_mem_operator"
12882 [(match_operand:P 1 "vsib_address_operand" "Tv")
12883 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12884 (match_operand:SI 2 "const1248_operand" "n")]
12886 (match_operand:SI 3 "const2367_operand" "n")]
12887 UNSPEC_SCATTER_PREFETCH)]
12890 switch (INTVAL (operands[3]))
12894 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
12897 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
12899 gcc_unreachable ();
12902 [(set_attr "type" "sse")
12903 (set_attr "prefix" "evex")
12904 (set_attr "mode" "XI")])
12906 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
12907 [(set (match_operand:VF_512 0 "register_operand" "=v")
12909 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12912 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12913 [(set_attr "prefix" "evex")
12914 (set_attr "type" "sse")
12915 (set_attr "mode" "<MODE>")])
12917 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
12918 [(set (match_operand:VF_512 0 "register_operand" "=v")
12920 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12923 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12924 [(set_attr "prefix" "evex")
12925 (set_attr "type" "sse")
12926 (set_attr "mode" "<MODE>")])
12928 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
12929 [(set (match_operand:VF_128 0 "register_operand" "=v")
12932 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12934 (match_operand:VF_128 2 "register_operand" "v")
12937 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
12938 [(set_attr "length_immediate" "1")
12939 (set_attr "prefix" "evex")
12940 (set_attr "type" "sse")
12941 (set_attr "mode" "<MODE>")])
12943 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
12944 [(set (match_operand:VF_512 0 "register_operand" "=v")
12946 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12949 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12950 [(set_attr "prefix" "evex")
12951 (set_attr "type" "sse")
12952 (set_attr "mode" "<MODE>")])
12954 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
12955 [(set (match_operand:VF_128 0 "register_operand" "=v")
12958 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12960 (match_operand:VF_128 2 "register_operand" "v")
12963 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
12964 [(set_attr "length_immediate" "1")
12965 (set_attr "type" "sse")
12966 (set_attr "prefix" "evex")
12967 (set_attr "mode" "<MODE>")])
12969 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12971 ;; XOP instructions
12973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12975 (define_code_iterator xop_plus [plus ss_plus])
12977 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
12978 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
12980 ;; XOP parallel integer multiply/add instructions.
12982 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
12983 [(set (match_operand:VI24_128 0 "register_operand" "=x")
12986 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
12987 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
12988 (match_operand:VI24_128 3 "register_operand" "x")))]
12990 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12991 [(set_attr "type" "ssemuladd")
12992 (set_attr "mode" "TI")])
12994 (define_insn "xop_p<macs>dql"
12995 [(set (match_operand:V2DI 0 "register_operand" "=x")
13000 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13001 (parallel [(const_int 0) (const_int 2)])))
13004 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13005 (parallel [(const_int 0) (const_int 2)]))))
13006 (match_operand:V2DI 3 "register_operand" "x")))]
13008 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13009 [(set_attr "type" "ssemuladd")
13010 (set_attr "mode" "TI")])
13012 (define_insn "xop_p<macs>dqh"
13013 [(set (match_operand:V2DI 0 "register_operand" "=x")
13018 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13019 (parallel [(const_int 1) (const_int 3)])))
13022 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13023 (parallel [(const_int 1) (const_int 3)]))))
13024 (match_operand:V2DI 3 "register_operand" "x")))]
13026 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13027 [(set_attr "type" "ssemuladd")
13028 (set_attr "mode" "TI")])
13030 ;; XOP parallel integer multiply/add instructions for the intrinisics
13031 (define_insn "xop_p<macs>wd"
13032 [(set (match_operand:V4SI 0 "register_operand" "=x")
13037 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13038 (parallel [(const_int 1) (const_int 3)
13039 (const_int 5) (const_int 7)])))
13042 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13043 (parallel [(const_int 1) (const_int 3)
13044 (const_int 5) (const_int 7)]))))
13045 (match_operand:V4SI 3 "register_operand" "x")))]
13047 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13048 [(set_attr "type" "ssemuladd")
13049 (set_attr "mode" "TI")])
13051 (define_insn "xop_p<madcs>wd"
13052 [(set (match_operand:V4SI 0 "register_operand" "=x")
13058 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13059 (parallel [(const_int 0) (const_int 2)
13060 (const_int 4) (const_int 6)])))
13063 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13064 (parallel [(const_int 0) (const_int 2)
13065 (const_int 4) (const_int 6)]))))
13070 (parallel [(const_int 1) (const_int 3)
13071 (const_int 5) (const_int 7)])))
13075 (parallel [(const_int 1) (const_int 3)
13076 (const_int 5) (const_int 7)])))))
13077 (match_operand:V4SI 3 "register_operand" "x")))]
13079 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13080 [(set_attr "type" "ssemuladd")
13081 (set_attr "mode" "TI")])
13083 ;; XOP parallel XMM conditional moves
13084 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
13085 [(set (match_operand:V 0 "register_operand" "=x,x")
13087 (match_operand:V 3 "nonimmediate_operand" "x,m")
13088 (match_operand:V 1 "register_operand" "x,x")
13089 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
13091 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13092 [(set_attr "type" "sse4arg")])
13094 ;; XOP horizontal add/subtract instructions
13095 (define_insn "xop_phadd<u>bw"
13096 [(set (match_operand:V8HI 0 "register_operand" "=x")
13100 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13101 (parallel [(const_int 0) (const_int 2)
13102 (const_int 4) (const_int 6)
13103 (const_int 8) (const_int 10)
13104 (const_int 12) (const_int 14)])))
13108 (parallel [(const_int 1) (const_int 3)
13109 (const_int 5) (const_int 7)
13110 (const_int 9) (const_int 11)
13111 (const_int 13) (const_int 15)])))))]
13113 "vphadd<u>bw\t{%1, %0|%0, %1}"
13114 [(set_attr "type" "sseiadd1")])
13116 (define_insn "xop_phadd<u>bd"
13117 [(set (match_operand:V4SI 0 "register_operand" "=x")
13122 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13123 (parallel [(const_int 0) (const_int 4)
13124 (const_int 8) (const_int 12)])))
13128 (parallel [(const_int 1) (const_int 5)
13129 (const_int 9) (const_int 13)]))))
13134 (parallel [(const_int 2) (const_int 6)
13135 (const_int 10) (const_int 14)])))
13139 (parallel [(const_int 3) (const_int 7)
13140 (const_int 11) (const_int 15)]))))))]
13142 "vphadd<u>bd\t{%1, %0|%0, %1}"
13143 [(set_attr "type" "sseiadd1")])
13145 (define_insn "xop_phadd<u>bq"
13146 [(set (match_operand:V2DI 0 "register_operand" "=x")
13152 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13153 (parallel [(const_int 0) (const_int 8)])))
13157 (parallel [(const_int 1) (const_int 9)]))))
13162 (parallel [(const_int 2) (const_int 10)])))
13166 (parallel [(const_int 3) (const_int 11)])))))
13172 (parallel [(const_int 4) (const_int 12)])))
13176 (parallel [(const_int 5) (const_int 13)]))))
13181 (parallel [(const_int 6) (const_int 14)])))
13185 (parallel [(const_int 7) (const_int 15)])))))))]
13187 "vphadd<u>bq\t{%1, %0|%0, %1}"
13188 [(set_attr "type" "sseiadd1")])
13190 (define_insn "xop_phadd<u>wd"
13191 [(set (match_operand:V4SI 0 "register_operand" "=x")
13195 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13196 (parallel [(const_int 0) (const_int 2)
13197 (const_int 4) (const_int 6)])))
13201 (parallel [(const_int 1) (const_int 3)
13202 (const_int 5) (const_int 7)])))))]
13204 "vphadd<u>wd\t{%1, %0|%0, %1}"
13205 [(set_attr "type" "sseiadd1")])
13207 (define_insn "xop_phadd<u>wq"
13208 [(set (match_operand:V2DI 0 "register_operand" "=x")
13213 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13214 (parallel [(const_int 0) (const_int 4)])))
13218 (parallel [(const_int 1) (const_int 5)]))))
13223 (parallel [(const_int 2) (const_int 6)])))
13227 (parallel [(const_int 3) (const_int 7)]))))))]
13229 "vphadd<u>wq\t{%1, %0|%0, %1}"
13230 [(set_attr "type" "sseiadd1")])
13232 (define_insn "xop_phadd<u>dq"
13233 [(set (match_operand:V2DI 0 "register_operand" "=x")
13237 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13238 (parallel [(const_int 0) (const_int 2)])))
13242 (parallel [(const_int 1) (const_int 3)])))))]
13244 "vphadd<u>dq\t{%1, %0|%0, %1}"
13245 [(set_attr "type" "sseiadd1")])
13247 (define_insn "xop_phsubbw"
13248 [(set (match_operand:V8HI 0 "register_operand" "=x")
13252 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13253 (parallel [(const_int 0) (const_int 2)
13254 (const_int 4) (const_int 6)
13255 (const_int 8) (const_int 10)
13256 (const_int 12) (const_int 14)])))
13260 (parallel [(const_int 1) (const_int 3)
13261 (const_int 5) (const_int 7)
13262 (const_int 9) (const_int 11)
13263 (const_int 13) (const_int 15)])))))]
13265 "vphsubbw\t{%1, %0|%0, %1}"
13266 [(set_attr "type" "sseiadd1")])
13268 (define_insn "xop_phsubwd"
13269 [(set (match_operand:V4SI 0 "register_operand" "=x")
13273 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13274 (parallel [(const_int 0) (const_int 2)
13275 (const_int 4) (const_int 6)])))
13279 (parallel [(const_int 1) (const_int 3)
13280 (const_int 5) (const_int 7)])))))]
13282 "vphsubwd\t{%1, %0|%0, %1}"
13283 [(set_attr "type" "sseiadd1")])
13285 (define_insn "xop_phsubdq"
13286 [(set (match_operand:V2DI 0 "register_operand" "=x")
13290 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13291 (parallel [(const_int 0) (const_int 2)])))
13295 (parallel [(const_int 1) (const_int 3)])))))]
13297 "vphsubdq\t{%1, %0|%0, %1}"
13298 [(set_attr "type" "sseiadd1")])
13300 ;; XOP permute instructions
13301 (define_insn "xop_pperm"
13302 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13304 [(match_operand:V16QI 1 "register_operand" "x,x")
13305 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13306 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
13307 UNSPEC_XOP_PERMUTE))]
13308 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13309 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13310 [(set_attr "type" "sse4arg")
13311 (set_attr "mode" "TI")])
13313 ;; XOP pack instructions that combine two vectors into a smaller vector
13314 (define_insn "xop_pperm_pack_v2di_v4si"
13315 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13318 (match_operand:V2DI 1 "register_operand" "x,x"))
13320 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
13321 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13322 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13323 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13324 [(set_attr "type" "sse4arg")
13325 (set_attr "mode" "TI")])
13327 (define_insn "xop_pperm_pack_v4si_v8hi"
13328 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13331 (match_operand:V4SI 1 "register_operand" "x,x"))
13333 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
13334 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13335 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13336 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13337 [(set_attr "type" "sse4arg")
13338 (set_attr "mode" "TI")])
13340 (define_insn "xop_pperm_pack_v8hi_v16qi"
13341 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13344 (match_operand:V8HI 1 "register_operand" "x,x"))
13346 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
13347 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13348 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13349 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13350 [(set_attr "type" "sse4arg")
13351 (set_attr "mode" "TI")])
13353 ;; XOP packed rotate instructions
13354 (define_expand "rotl<mode>3"
13355 [(set (match_operand:VI_128 0 "register_operand")
13357 (match_operand:VI_128 1 "nonimmediate_operand")
13358 (match_operand:SI 2 "general_operand")))]
13361 /* If we were given a scalar, convert it to parallel */
13362 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13364 rtvec vs = rtvec_alloc (<ssescalarnum>);
13365 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13366 rtx reg = gen_reg_rtx (<MODE>mode);
13367 rtx op2 = operands[2];
13370 if (GET_MODE (op2) != <ssescalarmode>mode)
13372 op2 = gen_reg_rtx (<ssescalarmode>mode);
13373 convert_move (op2, operands[2], false);
13376 for (i = 0; i < <ssescalarnum>; i++)
13377 RTVEC_ELT (vs, i) = op2;
13379 emit_insn (gen_vec_init<mode> (reg, par));
13380 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13385 (define_expand "rotr<mode>3"
13386 [(set (match_operand:VI_128 0 "register_operand")
13388 (match_operand:VI_128 1 "nonimmediate_operand")
13389 (match_operand:SI 2 "general_operand")))]
13392 /* If we were given a scalar, convert it to parallel */
13393 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13395 rtvec vs = rtvec_alloc (<ssescalarnum>);
13396 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13397 rtx neg = gen_reg_rtx (<MODE>mode);
13398 rtx reg = gen_reg_rtx (<MODE>mode);
13399 rtx op2 = operands[2];
13402 if (GET_MODE (op2) != <ssescalarmode>mode)
13404 op2 = gen_reg_rtx (<ssescalarmode>mode);
13405 convert_move (op2, operands[2], false);
13408 for (i = 0; i < <ssescalarnum>; i++)
13409 RTVEC_ELT (vs, i) = op2;
13411 emit_insn (gen_vec_init<mode> (reg, par));
13412 emit_insn (gen_neg<mode>2 (neg, reg));
13413 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
13418 (define_insn "xop_rotl<mode>3"
13419 [(set (match_operand:VI_128 0 "register_operand" "=x")
13421 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13422 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13424 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13425 [(set_attr "type" "sseishft")
13426 (set_attr "length_immediate" "1")
13427 (set_attr "mode" "TI")])
13429 (define_insn "xop_rotr<mode>3"
13430 [(set (match_operand:VI_128 0 "register_operand" "=x")
13432 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13433 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13437 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
13438 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
13440 [(set_attr "type" "sseishft")
13441 (set_attr "length_immediate" "1")
13442 (set_attr "mode" "TI")])
13444 (define_expand "vrotr<mode>3"
13445 [(match_operand:VI_128 0 "register_operand")
13446 (match_operand:VI_128 1 "register_operand")
13447 (match_operand:VI_128 2 "register_operand")]
13450 rtx reg = gen_reg_rtx (<MODE>mode);
13451 emit_insn (gen_neg<mode>2 (reg, operands[2]));
13452 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13456 (define_expand "vrotl<mode>3"
13457 [(match_operand:VI_128 0 "register_operand")
13458 (match_operand:VI_128 1 "register_operand")
13459 (match_operand:VI_128 2 "register_operand")]
13462 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
13466 (define_insn "xop_vrotl<mode>3"
13467 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13468 (if_then_else:VI_128
13470 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13473 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13477 (neg:VI_128 (match_dup 2)))))]
13478 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13479 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13480 [(set_attr "type" "sseishft")
13481 (set_attr "prefix_data16" "0")
13482 (set_attr "prefix_extra" "2")
13483 (set_attr "mode" "TI")])
13485 ;; XOP packed shift instructions.
13486 (define_expand "vlshr<mode>3"
13487 [(set (match_operand:VI12_128 0 "register_operand")
13489 (match_operand:VI12_128 1 "register_operand")
13490 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13493 rtx neg = gen_reg_rtx (<MODE>mode);
13494 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13495 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13499 (define_expand "vlshr<mode>3"
13500 [(set (match_operand:VI48_128 0 "register_operand")
13502 (match_operand:VI48_128 1 "register_operand")
13503 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13504 "TARGET_AVX2 || TARGET_XOP"
13508 rtx neg = gen_reg_rtx (<MODE>mode);
13509 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13510 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13515 (define_expand "vlshr<mode>3"
13516 [(set (match_operand:VI48_512 0 "register_operand")
13518 (match_operand:VI48_512 1 "register_operand")
13519 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13522 (define_expand "vlshr<mode>3"
13523 [(set (match_operand:VI48_256 0 "register_operand")
13525 (match_operand:VI48_256 1 "register_operand")
13526 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13529 (define_expand "vashr<mode>3"
13530 [(set (match_operand:VI128_128 0 "register_operand")
13531 (ashiftrt:VI128_128
13532 (match_operand:VI128_128 1 "register_operand")
13533 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13536 rtx neg = gen_reg_rtx (<MODE>mode);
13537 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13538 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13542 (define_expand "vashrv4si3"
13543 [(set (match_operand:V4SI 0 "register_operand")
13544 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13545 (match_operand:V4SI 2 "nonimmediate_operand")))]
13546 "TARGET_AVX2 || TARGET_XOP"
13550 rtx neg = gen_reg_rtx (V4SImode);
13551 emit_insn (gen_negv4si2 (neg, operands[2]));
13552 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13557 (define_expand "vashrv16si3"
13558 [(set (match_operand:V16SI 0 "register_operand")
13559 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13560 (match_operand:V16SI 2 "nonimmediate_operand")))]
13563 (define_expand "vashrv8si3"
13564 [(set (match_operand:V8SI 0 "register_operand")
13565 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13566 (match_operand:V8SI 2 "nonimmediate_operand")))]
13569 (define_expand "vashl<mode>3"
13570 [(set (match_operand:VI12_128 0 "register_operand")
13572 (match_operand:VI12_128 1 "register_operand")
13573 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13576 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13580 (define_expand "vashl<mode>3"
13581 [(set (match_operand:VI48_128 0 "register_operand")
13583 (match_operand:VI48_128 1 "register_operand")
13584 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13585 "TARGET_AVX2 || TARGET_XOP"
13589 operands[2] = force_reg (<MODE>mode, operands[2]);
13590 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13595 (define_expand "vashl<mode>3"
13596 [(set (match_operand:VI48_512 0 "register_operand")
13598 (match_operand:VI48_512 1 "register_operand")
13599 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13602 (define_expand "vashl<mode>3"
13603 [(set (match_operand:VI48_256 0 "register_operand")
13605 (match_operand:VI48_256 1 "register_operand")
13606 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13609 (define_insn "xop_sha<mode>3"
13610 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13611 (if_then_else:VI_128
13613 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13616 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13620 (neg:VI_128 (match_dup 2)))))]
13621 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13622 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13623 [(set_attr "type" "sseishft")
13624 (set_attr "prefix_data16" "0")
13625 (set_attr "prefix_extra" "2")
13626 (set_attr "mode" "TI")])
13628 (define_insn "xop_shl<mode>3"
13629 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13630 (if_then_else:VI_128
13632 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13635 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13639 (neg:VI_128 (match_dup 2)))))]
13640 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13641 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13642 [(set_attr "type" "sseishft")
13643 (set_attr "prefix_data16" "0")
13644 (set_attr "prefix_extra" "2")
13645 (set_attr "mode" "TI")])
13647 (define_expand "<shift_insn><mode>3"
13648 [(set (match_operand:VI1_AVX2 0 "register_operand")
13649 (any_shift:VI1_AVX2
13650 (match_operand:VI1_AVX2 1 "register_operand")
13651 (match_operand:SI 2 "nonmemory_operand")))]
13654 if (TARGET_XOP && <MODE>mode == V16QImode)
13656 bool negate = false;
13657 rtx (*gen) (rtx, rtx, rtx);
13661 if (<CODE> != ASHIFT)
13663 if (CONST_INT_P (operands[2]))
13664 operands[2] = GEN_INT (-INTVAL (operands[2]));
13668 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13669 for (i = 0; i < 16; i++)
13670 XVECEXP (par, 0, i) = operands[2];
13672 tmp = gen_reg_rtx (V16QImode);
13673 emit_insn (gen_vec_initv16qi (tmp, par));
13676 emit_insn (gen_negv16qi2 (tmp, tmp));
13678 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13679 emit_insn (gen (operands[0], operands[1], tmp));
13682 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13686 (define_expand "ashrv2di3"
13687 [(set (match_operand:V2DI 0 "register_operand")
13689 (match_operand:V2DI 1 "register_operand")
13690 (match_operand:DI 2 "nonmemory_operand")))]
13693 rtx reg = gen_reg_rtx (V2DImode);
13695 bool negate = false;
13698 if (CONST_INT_P (operands[2]))
13699 operands[2] = GEN_INT (-INTVAL (operands[2]));
13703 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13704 for (i = 0; i < 2; i++)
13705 XVECEXP (par, 0, i) = operands[2];
13707 emit_insn (gen_vec_initv2di (reg, par));
13710 emit_insn (gen_negv2di2 (reg, reg));
13712 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13716 ;; XOP FRCZ support
13717 (define_insn "xop_frcz<mode>2"
13718 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13720 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13723 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13724 [(set_attr "type" "ssecvt1")
13725 (set_attr "mode" "<MODE>")])
13727 (define_expand "xop_vmfrcz<mode>2"
13728 [(set (match_operand:VF_128 0 "register_operand")
13731 [(match_operand:VF_128 1 "nonimmediate_operand")]
13736 "operands[2] = CONST0_RTX (<MODE>mode);")
13738 (define_insn "*xop_vmfrcz<mode>2"
13739 [(set (match_operand:VF_128 0 "register_operand" "=x")
13742 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13744 (match_operand:VF_128 2 "const0_operand")
13747 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13748 [(set_attr "type" "ssecvt1")
13749 (set_attr "mode" "<MODE>")])
13751 (define_insn "xop_maskcmp<mode>3"
13752 [(set (match_operand:VI_128 0 "register_operand" "=x")
13753 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13754 [(match_operand:VI_128 2 "register_operand" "x")
13755 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13757 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13758 [(set_attr "type" "sse4arg")
13759 (set_attr "prefix_data16" "0")
13760 (set_attr "prefix_rep" "0")
13761 (set_attr "prefix_extra" "2")
13762 (set_attr "length_immediate" "1")
13763 (set_attr "mode" "TI")])
13765 (define_insn "xop_maskcmp_uns<mode>3"
13766 [(set (match_operand:VI_128 0 "register_operand" "=x")
13767 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13768 [(match_operand:VI_128 2 "register_operand" "x")
13769 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13771 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13772 [(set_attr "type" "ssecmp")
13773 (set_attr "prefix_data16" "0")
13774 (set_attr "prefix_rep" "0")
13775 (set_attr "prefix_extra" "2")
13776 (set_attr "length_immediate" "1")
13777 (set_attr "mode" "TI")])
13779 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13780 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13781 ;; the exact instruction generated for the intrinsic.
13782 (define_insn "xop_maskcmp_uns2<mode>3"
13783 [(set (match_operand:VI_128 0 "register_operand" "=x")
13785 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13786 [(match_operand:VI_128 2 "register_operand" "x")
13787 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13788 UNSPEC_XOP_UNSIGNED_CMP))]
13790 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13791 [(set_attr "type" "ssecmp")
13792 (set_attr "prefix_data16" "0")
13793 (set_attr "prefix_extra" "2")
13794 (set_attr "length_immediate" "1")
13795 (set_attr "mode" "TI")])
13797 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13798 ;; being added here to be complete.
13799 (define_insn "xop_pcom_tf<mode>3"
13800 [(set (match_operand:VI_128 0 "register_operand" "=x")
13802 [(match_operand:VI_128 1 "register_operand" "x")
13803 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13804 (match_operand:SI 3 "const_int_operand" "n")]
13805 UNSPEC_XOP_TRUEFALSE))]
13808 return ((INTVAL (operands[3]) != 0)
13809 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13810 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13812 [(set_attr "type" "ssecmp")
13813 (set_attr "prefix_data16" "0")
13814 (set_attr "prefix_extra" "2")
13815 (set_attr "length_immediate" "1")
13816 (set_attr "mode" "TI")])
13818 (define_insn "xop_vpermil2<mode>3"
13819 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13821 [(match_operand:VF_128_256 1 "register_operand" "x")
13822 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13823 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13824 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13827 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13828 [(set_attr "type" "sse4arg")
13829 (set_attr "length_immediate" "1")
13830 (set_attr "mode" "<MODE>")])
13832 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13834 (define_insn "aesenc"
13835 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13836 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13837 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13841 aesenc\t{%2, %0|%0, %2}
13842 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13843 [(set_attr "isa" "noavx,avx")
13844 (set_attr "type" "sselog1")
13845 (set_attr "prefix_extra" "1")
13846 (set_attr "prefix" "orig,vex")
13847 (set_attr "btver2_decode" "double,double")
13848 (set_attr "mode" "TI")])
13850 (define_insn "aesenclast"
13851 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13852 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13853 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13854 UNSPEC_AESENCLAST))]
13857 aesenclast\t{%2, %0|%0, %2}
13858 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13859 [(set_attr "isa" "noavx,avx")
13860 (set_attr "type" "sselog1")
13861 (set_attr "prefix_extra" "1")
13862 (set_attr "prefix" "orig,vex")
13863 (set_attr "btver2_decode" "double,double")
13864 (set_attr "mode" "TI")])
13866 (define_insn "aesdec"
13867 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13868 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13869 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13873 aesdec\t{%2, %0|%0, %2}
13874 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13875 [(set_attr "isa" "noavx,avx")
13876 (set_attr "type" "sselog1")
13877 (set_attr "prefix_extra" "1")
13878 (set_attr "prefix" "orig,vex")
13879 (set_attr "btver2_decode" "double,double")
13880 (set_attr "mode" "TI")])
13882 (define_insn "aesdeclast"
13883 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13884 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13885 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13886 UNSPEC_AESDECLAST))]
13889 aesdeclast\t{%2, %0|%0, %2}
13890 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
13891 [(set_attr "isa" "noavx,avx")
13892 (set_attr "type" "sselog1")
13893 (set_attr "prefix_extra" "1")
13894 (set_attr "prefix" "orig,vex")
13895 (set_attr "btver2_decode" "double,double")
13896 (set_attr "mode" "TI")])
13898 (define_insn "aesimc"
13899 [(set (match_operand:V2DI 0 "register_operand" "=x")
13900 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13903 "%vaesimc\t{%1, %0|%0, %1}"
13904 [(set_attr "type" "sselog1")
13905 (set_attr "prefix_extra" "1")
13906 (set_attr "prefix" "maybe_vex")
13907 (set_attr "mode" "TI")])
13909 (define_insn "aeskeygenassist"
13910 [(set (match_operand:V2DI 0 "register_operand" "=x")
13911 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
13912 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13913 UNSPEC_AESKEYGENASSIST))]
13915 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
13916 [(set_attr "type" "sselog1")
13917 (set_attr "prefix_extra" "1")
13918 (set_attr "length_immediate" "1")
13919 (set_attr "prefix" "maybe_vex")
13920 (set_attr "mode" "TI")])
13922 (define_insn "pclmulqdq"
13923 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13924 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13925 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
13926 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13930 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
13931 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13932 [(set_attr "isa" "noavx,avx")
13933 (set_attr "type" "sselog1")
13934 (set_attr "prefix_extra" "1")
13935 (set_attr "length_immediate" "1")
13936 (set_attr "prefix" "orig,vex")
13937 (set_attr "mode" "TI")])
13939 (define_expand "avx_vzeroall"
13940 [(match_par_dup 0 [(const_int 0)])]
13943 int nregs = TARGET_64BIT ? 16 : 8;
13946 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
13948 XVECEXP (operands[0], 0, 0)
13949 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
13952 for (regno = 0; regno < nregs; regno++)
13953 XVECEXP (operands[0], 0, regno + 1)
13954 = gen_rtx_SET (VOIDmode,
13955 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
13956 CONST0_RTX (V8SImode));
13959 (define_insn "*avx_vzeroall"
13960 [(match_parallel 0 "vzeroall_operation"
13961 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
13964 [(set_attr "type" "sse")
13965 (set_attr "modrm" "0")
13966 (set_attr "memory" "none")
13967 (set_attr "prefix" "vex")
13968 (set_attr "btver2_decode" "vector")
13969 (set_attr "mode" "OI")])
13971 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
13972 ;; if the upper 128bits are unused.
13973 (define_insn "avx_vzeroupper"
13974 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
13977 [(set_attr "type" "sse")
13978 (set_attr "modrm" "0")
13979 (set_attr "memory" "none")
13980 (set_attr "prefix" "vex")
13981 (set_attr "btver2_decode" "vector")
13982 (set_attr "mode" "OI")])
13984 (define_insn "avx2_pbroadcast<mode>"
13985 [(set (match_operand:VI 0 "register_operand" "=x")
13987 (vec_select:<ssescalarmode>
13988 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
13989 (parallel [(const_int 0)]))))]
13991 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
13992 [(set_attr "type" "ssemov")
13993 (set_attr "prefix_extra" "1")
13994 (set_attr "prefix" "vex")
13995 (set_attr "mode" "<sseinsnmode>")])
13997 (define_insn "avx2_pbroadcast<mode>_1"
13998 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
13999 (vec_duplicate:VI_256
14000 (vec_select:<ssescalarmode>
14001 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
14002 (parallel [(const_int 0)]))))]
14005 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
14006 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
14007 [(set_attr "type" "ssemov")
14008 (set_attr "prefix_extra" "1")
14009 (set_attr "prefix" "vex")
14010 (set_attr "mode" "<sseinsnmode>")])
14012 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
14013 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
14014 (unspec:VI48F_256_512
14015 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
14016 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
14018 "TARGET_AVX2 && <mask_mode512bit_condition>"
14019 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
14020 [(set_attr "type" "sselog")
14021 (set_attr "prefix" "<mask_prefix2>")
14022 (set_attr "mode" "<sseinsnmode>")])
14024 (define_expand "<avx2_avx512f>_perm<mode>"
14025 [(match_operand:VI8F_256_512 0 "register_operand")
14026 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
14027 (match_operand:SI 2 "const_0_to_255_operand")]
14030 int mask = INTVAL (operands[2]);
14031 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
14032 GEN_INT ((mask >> 0) & 3),
14033 GEN_INT ((mask >> 2) & 3),
14034 GEN_INT ((mask >> 4) & 3),
14035 GEN_INT ((mask >> 6) & 3)));
14039 (define_expand "avx512f_perm<mode>_mask"
14040 [(match_operand:V8FI 0 "register_operand")
14041 (match_operand:V8FI 1 "nonimmediate_operand")
14042 (match_operand:SI 2 "const_0_to_255_operand")
14043 (match_operand:V8FI 3 "vector_move_operand")
14044 (match_operand:<avx512fmaskmode> 4 "register_operand")]
14047 int mask = INTVAL (operands[2]);
14048 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
14049 GEN_INT ((mask >> 0) & 3),
14050 GEN_INT ((mask >> 2) & 3),
14051 GEN_INT ((mask >> 4) & 3),
14052 GEN_INT ((mask >> 6) & 3),
14053 operands[3], operands[4]));
14057 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
14058 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
14059 (vec_select:VI8F_256_512
14060 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
14061 (parallel [(match_operand 2 "const_0_to_3_operand")
14062 (match_operand 3 "const_0_to_3_operand")
14063 (match_operand 4 "const_0_to_3_operand")
14064 (match_operand 5 "const_0_to_3_operand")])))]
14065 "TARGET_AVX2 && <mask_mode512bit_condition>"
14068 mask |= INTVAL (operands[2]) << 0;
14069 mask |= INTVAL (operands[3]) << 2;
14070 mask |= INTVAL (operands[4]) << 4;
14071 mask |= INTVAL (operands[5]) << 6;
14072 operands[2] = GEN_INT (mask);
14073 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14075 [(set_attr "type" "sselog")
14076 (set_attr "prefix" "<mask_prefix2>")
14077 (set_attr "mode" "<sseinsnmode>")])
14079 (define_insn "avx2_permv2ti"
14080 [(set (match_operand:V4DI 0 "register_operand" "=x")
14082 [(match_operand:V4DI 1 "register_operand" "x")
14083 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
14084 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14087 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14088 [(set_attr "type" "sselog")
14089 (set_attr "prefix" "vex")
14090 (set_attr "mode" "OI")])
14092 (define_insn "avx2_vec_dupv4df"
14093 [(set (match_operand:V4DF 0 "register_operand" "=x")
14094 (vec_duplicate:V4DF
14096 (match_operand:V2DF 1 "register_operand" "x")
14097 (parallel [(const_int 0)]))))]
14099 "vbroadcastsd\t{%1, %0|%0, %1}"
14100 [(set_attr "type" "sselog1")
14101 (set_attr "prefix" "vex")
14102 (set_attr "mode" "V4DF")])
14104 ;; Modes handled by AVX vec_dup patterns.
14105 (define_mode_iterator AVX_VEC_DUP_MODE
14106 [V8SI V8SF V4DI V4DF])
14108 (define_insn "vec_dup<mode>"
14109 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
14110 (vec_duplicate:AVX_VEC_DUP_MODE
14111 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
14114 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
14115 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
14117 [(set_attr "type" "ssemov")
14118 (set_attr "prefix_extra" "1")
14119 (set_attr "prefix" "vex")
14120 (set_attr "isa" "*,avx2,noavx2")
14121 (set_attr "mode" "V8SF")])
14123 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
14124 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14125 (vec_duplicate:VI48F_512
14126 (vec_select:<ssescalarmode>
14127 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
14128 (parallel [(const_int 0)]))))]
14130 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14131 [(set_attr "type" "ssemov")
14132 (set_attr "prefix" "evex")
14133 (set_attr "mode" "<sseinsnmode>")])
14135 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14136 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
14137 (vec_duplicate:V16FI
14138 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
14141 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
14142 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14143 [(set_attr "type" "ssemov")
14144 (set_attr "prefix" "evex")
14145 (set_attr "mode" "<sseinsnmode>")])
14147 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14148 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
14149 (vec_duplicate:V8FI
14150 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
14153 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
14154 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14155 [(set_attr "type" "ssemov")
14156 (set_attr "prefix" "evex")
14157 (set_attr "mode" "<sseinsnmode>")])
14159 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
14160 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14161 (vec_duplicate:VI48_512
14162 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
14163 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
14164 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14165 [(set_attr "type" "ssemov")
14166 (set_attr "prefix" "evex")
14167 (set_attr "mode" "<sseinsnmode>")])
14169 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
14170 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14171 (vec_duplicate:VI48F_512
14172 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
14174 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14175 [(set_attr "type" "ssemov")
14176 (set_attr "prefix" "evex")
14177 (set_attr "mode" "<sseinsnmode>")])
14179 (define_insn "avx2_vbroadcasti128_<mode>"
14180 [(set (match_operand:VI_256 0 "register_operand" "=x")
14182 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
14185 "vbroadcasti128\t{%1, %0|%0, %1}"
14186 [(set_attr "type" "ssemov")
14187 (set_attr "prefix_extra" "1")
14188 (set_attr "prefix" "vex")
14189 (set_attr "mode" "OI")])
14192 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
14193 (vec_duplicate:AVX_VEC_DUP_MODE
14194 (match_operand:<ssescalarmode> 1 "register_operand")))]
14195 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
14196 [(set (match_dup 2)
14197 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
14199 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
14200 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
14202 (define_insn "avx_vbroadcastf128_<mode>"
14203 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
14205 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
14209 vbroadcast<i128>\t{%1, %0|%0, %1}
14210 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
14211 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
14212 [(set_attr "type" "ssemov,sselog1,sselog1")
14213 (set_attr "prefix_extra" "1")
14214 (set_attr "length_immediate" "0,1,1")
14215 (set_attr "prefix" "vex")
14216 (set_attr "mode" "<sseinsnmode>")])
14218 (define_insn "avx512cd_maskb_vec_dupv8di"
14219 [(set (match_operand:V8DI 0 "register_operand" "=v")
14220 (vec_duplicate:V8DI
14222 (match_operand:QI 1 "register_operand" "Yk"))))]
14224 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
14225 [(set_attr "type" "mskmov")
14226 (set_attr "prefix" "evex")
14227 (set_attr "mode" "XI")])
14229 (define_insn "avx512cd_maskw_vec_dupv16si"
14230 [(set (match_operand:V16SI 0 "register_operand" "=v")
14231 (vec_duplicate:V16SI
14233 (match_operand:HI 1 "register_operand" "Yk"))))]
14235 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
14236 [(set_attr "type" "mskmov")
14237 (set_attr "prefix" "evex")
14238 (set_attr "mode" "XI")])
14240 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
14241 ;; If it so happens that the input is in memory, use vbroadcast.
14242 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
14243 (define_insn "*avx_vperm_broadcast_v4sf"
14244 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
14246 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
14247 (match_parallel 2 "avx_vbroadcast_operand"
14248 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14251 int elt = INTVAL (operands[3]);
14252 switch (which_alternative)
14256 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
14257 return "vbroadcastss\t{%1, %0|%0, %k1}";
14259 operands[2] = GEN_INT (elt * 0x55);
14260 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
14262 gcc_unreachable ();
14265 [(set_attr "type" "ssemov,ssemov,sselog1")
14266 (set_attr "prefix_extra" "1")
14267 (set_attr "length_immediate" "0,0,1")
14268 (set_attr "prefix" "vex")
14269 (set_attr "mode" "SF,SF,V4SF")])
14271 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
14272 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
14274 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
14275 (match_parallel 2 "avx_vbroadcast_operand"
14276 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14279 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
14280 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
14282 rtx op0 = operands[0], op1 = operands[1];
14283 int elt = INTVAL (operands[3]);
14289 if (TARGET_AVX2 && elt == 0)
14291 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
14296 /* Shuffle element we care about into all elements of the 128-bit lane.
14297 The other lane gets shuffled too, but we don't care. */
14298 if (<MODE>mode == V4DFmode)
14299 mask = (elt & 1 ? 15 : 0);
14301 mask = (elt & 3) * 0x55;
14302 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
14304 /* Shuffle the lane we care about into both lanes of the dest. */
14305 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
14306 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
14310 operands[1] = adjust_address (op1, <ssescalarmode>mode,
14311 elt * GET_MODE_SIZE (<ssescalarmode>mode));
14314 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14315 [(set (match_operand:VF2 0 "register_operand")
14317 (match_operand:VF2 1 "nonimmediate_operand")
14318 (match_operand:SI 2 "const_0_to_255_operand")))]
14319 "TARGET_AVX && <mask_mode512bit_condition>"
14321 int mask = INTVAL (operands[2]);
14322 rtx perm[<ssescalarnum>];
14325 for (i = 0; i < <ssescalarnum>; i = i + 2)
14327 perm[i] = GEN_INT (((mask >> i) & 1) + i);
14328 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
14332 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14335 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14336 [(set (match_operand:VF1 0 "register_operand")
14338 (match_operand:VF1 1 "nonimmediate_operand")
14339 (match_operand:SI 2 "const_0_to_255_operand")))]
14340 "TARGET_AVX && <mask_mode512bit_condition>"
14342 int mask = INTVAL (operands[2]);
14343 rtx perm[<ssescalarnum>];
14346 for (i = 0; i < <ssescalarnum>; i = i + 4)
14348 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
14349 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
14350 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
14351 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
14355 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14358 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
14359 [(set (match_operand:VF 0 "register_operand" "=v")
14361 (match_operand:VF 1 "nonimmediate_operand" "vm")
14362 (match_parallel 2 ""
14363 [(match_operand 3 "const_int_operand")])))]
14364 "TARGET_AVX && <mask_mode512bit_condition>
14365 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
14367 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
14368 operands[2] = GEN_INT (mask);
14369 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
14371 [(set_attr "type" "sselog")
14372 (set_attr "prefix_extra" "1")
14373 (set_attr "length_immediate" "1")
14374 (set_attr "prefix" "<mask_prefix>")
14375 (set_attr "mode" "<sseinsnmode>")])
14377 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
14378 [(set (match_operand:VF 0 "register_operand" "=v")
14380 [(match_operand:VF 1 "register_operand" "v")
14381 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
14383 "TARGET_AVX && <mask_mode512bit_condition>"
14384 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14385 [(set_attr "type" "sselog")
14386 (set_attr "prefix_extra" "1")
14387 (set_attr "btver2_decode" "vector")
14388 (set_attr "prefix" "<mask_prefix>")
14389 (set_attr "mode" "<sseinsnmode>")])
14391 (define_expand "avx512f_vpermi2var<mode>3_maskz"
14392 [(match_operand:VI48F_512 0 "register_operand" "=v")
14393 (match_operand:VI48F_512 1 "register_operand" "v")
14394 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14395 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14396 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14399 emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
14400 operands[0], operands[1], operands[2], operands[3],
14401 CONST0_RTX (<MODE>mode), operands[4]));
14405 (define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
14406 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14408 [(match_operand:VI48F_512 1 "register_operand" "v")
14409 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14410 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14413 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14414 [(set_attr "type" "sselog")
14415 (set_attr "prefix" "evex")
14416 (set_attr "mode" "<sseinsnmode>")])
14418 (define_insn "avx512f_vpermi2var<mode>3_mask"
14419 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14420 (vec_merge:VI48F_512
14422 [(match_operand:VI48F_512 1 "register_operand" "v")
14423 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14424 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14425 UNSPEC_VPERMI2_MASK)
14427 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14429 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14430 [(set_attr "type" "sselog")
14431 (set_attr "prefix" "evex")
14432 (set_attr "mode" "<sseinsnmode>")])
14434 (define_expand "avx512f_vpermt2var<mode>3_maskz"
14435 [(match_operand:VI48F_512 0 "register_operand" "=v")
14436 (match_operand:<sseintvecmode> 1 "register_operand" "v")
14437 (match_operand:VI48F_512 2 "register_operand" "0")
14438 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14439 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14442 emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
14443 operands[0], operands[1], operands[2], operands[3],
14444 CONST0_RTX (<MODE>mode), operands[4]));
14448 (define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
14449 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14451 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14452 (match_operand:VI48F_512 2 "register_operand" "0")
14453 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14456 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14457 [(set_attr "type" "sselog")
14458 (set_attr "prefix" "evex")
14459 (set_attr "mode" "<sseinsnmode>")])
14461 (define_insn "avx512f_vpermt2var<mode>3_mask"
14462 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14463 (vec_merge:VI48F_512
14465 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14466 (match_operand:VI48F_512 2 "register_operand" "0")
14467 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14470 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14472 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14473 [(set_attr "type" "sselog")
14474 (set_attr "prefix" "evex")
14475 (set_attr "mode" "<sseinsnmode>")])
14477 (define_expand "avx_vperm2f128<mode>3"
14478 [(set (match_operand:AVX256MODE2P 0 "register_operand")
14479 (unspec:AVX256MODE2P
14480 [(match_operand:AVX256MODE2P 1 "register_operand")
14481 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
14482 (match_operand:SI 3 "const_0_to_255_operand")]
14483 UNSPEC_VPERMIL2F128))]
14486 int mask = INTVAL (operands[3]);
14487 if ((mask & 0x88) == 0)
14489 rtx perm[<ssescalarnum>], t1, t2;
14490 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
14492 base = (mask & 3) * nelt2;
14493 for (i = 0; i < nelt2; ++i)
14494 perm[i] = GEN_INT (base + i);
14496 base = ((mask >> 4) & 3) * nelt2;
14497 for (i = 0; i < nelt2; ++i)
14498 perm[i + nelt2] = GEN_INT (base + i);
14500 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
14501 operands[1], operands[2]);
14502 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
14503 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
14504 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
14510 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
14511 ;; means that in order to represent this properly in rtl we'd have to
14512 ;; nest *another* vec_concat with a zero operand and do the select from
14513 ;; a 4x wide vector. That doesn't seem very nice.
14514 (define_insn "*avx_vperm2f128<mode>_full"
14515 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14516 (unspec:AVX256MODE2P
14517 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
14518 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
14519 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14520 UNSPEC_VPERMIL2F128))]
14522 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14523 [(set_attr "type" "sselog")
14524 (set_attr "prefix_extra" "1")
14525 (set_attr "length_immediate" "1")
14526 (set_attr "prefix" "vex")
14527 (set_attr "mode" "<sseinsnmode>")])
14529 (define_insn "*avx_vperm2f128<mode>_nozero"
14530 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14531 (vec_select:AVX256MODE2P
14532 (vec_concat:<ssedoublevecmode>
14533 (match_operand:AVX256MODE2P 1 "register_operand" "x")
14534 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
14535 (match_parallel 3 ""
14536 [(match_operand 4 "const_int_operand")])))]
14538 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
14540 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
14542 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
14544 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
14545 operands[3] = GEN_INT (mask);
14546 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14548 [(set_attr "type" "sselog")
14549 (set_attr "prefix_extra" "1")
14550 (set_attr "length_immediate" "1")
14551 (set_attr "prefix" "vex")
14552 (set_attr "mode" "<sseinsnmode>")])
14554 (define_expand "avx_vinsertf128<mode>"
14555 [(match_operand:V_256 0 "register_operand")
14556 (match_operand:V_256 1 "register_operand")
14557 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14558 (match_operand:SI 3 "const_0_to_1_operand")]
14561 rtx (*insn)(rtx, rtx, rtx);
14563 switch (INTVAL (operands[3]))
14566 insn = gen_vec_set_lo_<mode>;
14569 insn = gen_vec_set_hi_<mode>;
14572 gcc_unreachable ();
14575 emit_insn (insn (operands[0], operands[1], operands[2]));
14579 (define_insn "avx2_vec_set_lo_v4di"
14580 [(set (match_operand:V4DI 0 "register_operand" "=x")
14582 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14584 (match_operand:V4DI 1 "register_operand" "x")
14585 (parallel [(const_int 2) (const_int 3)]))))]
14587 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14588 [(set_attr "type" "sselog")
14589 (set_attr "prefix_extra" "1")
14590 (set_attr "length_immediate" "1")
14591 (set_attr "prefix" "vex")
14592 (set_attr "mode" "OI")])
14594 (define_insn "avx2_vec_set_hi_v4di"
14595 [(set (match_operand:V4DI 0 "register_operand" "=x")
14598 (match_operand:V4DI 1 "register_operand" "x")
14599 (parallel [(const_int 0) (const_int 1)]))
14600 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14602 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14603 [(set_attr "type" "sselog")
14604 (set_attr "prefix_extra" "1")
14605 (set_attr "length_immediate" "1")
14606 (set_attr "prefix" "vex")
14607 (set_attr "mode" "OI")])
14609 (define_insn "vec_set_lo_<mode>"
14610 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14611 (vec_concat:VI8F_256
14612 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14613 (vec_select:<ssehalfvecmode>
14614 (match_operand:VI8F_256 1 "register_operand" "x")
14615 (parallel [(const_int 2) (const_int 3)]))))]
14617 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14618 [(set_attr "type" "sselog")
14619 (set_attr "prefix_extra" "1")
14620 (set_attr "length_immediate" "1")
14621 (set_attr "prefix" "vex")
14622 (set_attr "mode" "<sseinsnmode>")])
14624 (define_insn "vec_set_hi_<mode>"
14625 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14626 (vec_concat:VI8F_256
14627 (vec_select:<ssehalfvecmode>
14628 (match_operand:VI8F_256 1 "register_operand" "x")
14629 (parallel [(const_int 0) (const_int 1)]))
14630 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14632 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14633 [(set_attr "type" "sselog")
14634 (set_attr "prefix_extra" "1")
14635 (set_attr "length_immediate" "1")
14636 (set_attr "prefix" "vex")
14637 (set_attr "mode" "<sseinsnmode>")])
14639 (define_insn "vec_set_lo_<mode>"
14640 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14641 (vec_concat:VI4F_256
14642 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14643 (vec_select:<ssehalfvecmode>
14644 (match_operand:VI4F_256 1 "register_operand" "x")
14645 (parallel [(const_int 4) (const_int 5)
14646 (const_int 6) (const_int 7)]))))]
14648 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14649 [(set_attr "type" "sselog")
14650 (set_attr "prefix_extra" "1")
14651 (set_attr "length_immediate" "1")
14652 (set_attr "prefix" "vex")
14653 (set_attr "mode" "<sseinsnmode>")])
14655 (define_insn "vec_set_hi_<mode>"
14656 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14657 (vec_concat:VI4F_256
14658 (vec_select:<ssehalfvecmode>
14659 (match_operand:VI4F_256 1 "register_operand" "x")
14660 (parallel [(const_int 0) (const_int 1)
14661 (const_int 2) (const_int 3)]))
14662 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14664 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14665 [(set_attr "type" "sselog")
14666 (set_attr "prefix_extra" "1")
14667 (set_attr "length_immediate" "1")
14668 (set_attr "prefix" "vex")
14669 (set_attr "mode" "<sseinsnmode>")])
14671 (define_insn "vec_set_lo_v16hi"
14672 [(set (match_operand:V16HI 0 "register_operand" "=x")
14674 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14676 (match_operand:V16HI 1 "register_operand" "x")
14677 (parallel [(const_int 8) (const_int 9)
14678 (const_int 10) (const_int 11)
14679 (const_int 12) (const_int 13)
14680 (const_int 14) (const_int 15)]))))]
14682 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14683 [(set_attr "type" "sselog")
14684 (set_attr "prefix_extra" "1")
14685 (set_attr "length_immediate" "1")
14686 (set_attr "prefix" "vex")
14687 (set_attr "mode" "OI")])
14689 (define_insn "vec_set_hi_v16hi"
14690 [(set (match_operand:V16HI 0 "register_operand" "=x")
14693 (match_operand:V16HI 1 "register_operand" "x")
14694 (parallel [(const_int 0) (const_int 1)
14695 (const_int 2) (const_int 3)
14696 (const_int 4) (const_int 5)
14697 (const_int 6) (const_int 7)]))
14698 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14700 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14701 [(set_attr "type" "sselog")
14702 (set_attr "prefix_extra" "1")
14703 (set_attr "length_immediate" "1")
14704 (set_attr "prefix" "vex")
14705 (set_attr "mode" "OI")])
14707 (define_insn "vec_set_lo_v32qi"
14708 [(set (match_operand:V32QI 0 "register_operand" "=x")
14710 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14712 (match_operand:V32QI 1 "register_operand" "x")
14713 (parallel [(const_int 16) (const_int 17)
14714 (const_int 18) (const_int 19)
14715 (const_int 20) (const_int 21)
14716 (const_int 22) (const_int 23)
14717 (const_int 24) (const_int 25)
14718 (const_int 26) (const_int 27)
14719 (const_int 28) (const_int 29)
14720 (const_int 30) (const_int 31)]))))]
14722 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14723 [(set_attr "type" "sselog")
14724 (set_attr "prefix_extra" "1")
14725 (set_attr "length_immediate" "1")
14726 (set_attr "prefix" "vex")
14727 (set_attr "mode" "OI")])
14729 (define_insn "vec_set_hi_v32qi"
14730 [(set (match_operand:V32QI 0 "register_operand" "=x")
14733 (match_operand:V32QI 1 "register_operand" "x")
14734 (parallel [(const_int 0) (const_int 1)
14735 (const_int 2) (const_int 3)
14736 (const_int 4) (const_int 5)
14737 (const_int 6) (const_int 7)
14738 (const_int 8) (const_int 9)
14739 (const_int 10) (const_int 11)
14740 (const_int 12) (const_int 13)
14741 (const_int 14) (const_int 15)]))
14742 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14744 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14745 [(set_attr "type" "sselog")
14746 (set_attr "prefix_extra" "1")
14747 (set_attr "length_immediate" "1")
14748 (set_attr "prefix" "vex")
14749 (set_attr "mode" "OI")])
14751 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14752 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14754 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14755 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14758 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14759 [(set_attr "type" "sselog1")
14760 (set_attr "prefix_extra" "1")
14761 (set_attr "prefix" "vex")
14762 (set_attr "btver2_decode" "vector")
14763 (set_attr "mode" "<sseinsnmode>")])
14765 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14766 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14768 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14769 (match_operand:V48_AVX2 2 "register_operand" "x")
14773 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14774 [(set_attr "type" "sselog1")
14775 (set_attr "prefix_extra" "1")
14776 (set_attr "prefix" "vex")
14777 (set_attr "btver2_decode" "vector")
14778 (set_attr "mode" "<sseinsnmode>")])
14780 (define_expand "maskload<mode>"
14781 [(set (match_operand:V48_AVX2 0 "register_operand")
14783 [(match_operand:<sseintvecmode> 2 "register_operand")
14784 (match_operand:V48_AVX2 1 "memory_operand")]
14788 (define_expand "maskstore<mode>"
14789 [(set (match_operand:V48_AVX2 0 "memory_operand")
14791 [(match_operand:<sseintvecmode> 2 "register_operand")
14792 (match_operand:V48_AVX2 1 "register_operand")
14797 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14798 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14799 (unspec:AVX256MODE2P
14800 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14804 "&& reload_completed"
14807 rtx op0 = operands[0];
14808 rtx op1 = operands[1];
14810 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14812 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14813 emit_move_insn (op0, op1);
14817 (define_expand "vec_init<mode>"
14818 [(match_operand:V_256 0 "register_operand")
14822 ix86_expand_vector_init (false, operands[0], operands[1]);
14826 (define_expand "vec_init<mode>"
14827 [(match_operand:VI48F_512 0 "register_operand")
14831 ix86_expand_vector_init (false, operands[0], operands[1]);
14835 (define_expand "avx2_extracti128"
14836 [(match_operand:V2DI 0 "nonimmediate_operand")
14837 (match_operand:V4DI 1 "register_operand")
14838 (match_operand:SI 2 "const_0_to_1_operand")]
14841 rtx (*insn)(rtx, rtx);
14843 switch (INTVAL (operands[2]))
14846 insn = gen_vec_extract_lo_v4di;
14849 insn = gen_vec_extract_hi_v4di;
14852 gcc_unreachable ();
14855 emit_insn (insn (operands[0], operands[1]));
14859 (define_expand "avx2_inserti128"
14860 [(match_operand:V4DI 0 "register_operand")
14861 (match_operand:V4DI 1 "register_operand")
14862 (match_operand:V2DI 2 "nonimmediate_operand")
14863 (match_operand:SI 3 "const_0_to_1_operand")]
14866 rtx (*insn)(rtx, rtx, rtx);
14868 switch (INTVAL (operands[3]))
14871 insn = gen_avx2_vec_set_lo_v4di;
14874 insn = gen_avx2_vec_set_hi_v4di;
14877 gcc_unreachable ();
14880 emit_insn (insn (operands[0], operands[1], operands[2]));
14884 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
14885 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
14886 (ashiftrt:VI48_AVX512F
14887 (match_operand:VI48_AVX512F 1 "register_operand" "v")
14888 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
14889 "TARGET_AVX2 && <mask_mode512bit_condition>"
14890 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14891 [(set_attr "type" "sseishft")
14892 (set_attr "prefix" "maybe_evex")
14893 (set_attr "mode" "<sseinsnmode>")])
14895 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
14896 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
14897 (any_lshift:VI48_AVX2_48_AVX512F
14898 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
14899 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
14900 "TARGET_AVX2 && <mask_mode512bit_condition>"
14901 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14902 [(set_attr "type" "sseishft")
14903 (set_attr "prefix" "maybe_evex")
14904 (set_attr "mode" "<sseinsnmode>")])
14906 ;; For avx_vec_concat<mode> insn pattern
14907 (define_mode_attr concat_tg_mode
14908 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
14909 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
14911 (define_insn "avx_vec_concat<mode>"
14912 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
14913 (vec_concat:V_256_512
14914 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
14915 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
14918 switch (which_alternative)
14921 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
14923 switch (get_attr_mode (insn))
14926 return "vmovaps\t{%1, %t0|%t0, %1}";
14928 return "vmovapd\t{%1, %t0|%t0, %1}";
14930 return "vmovaps\t{%1, %x0|%x0, %1}";
14932 return "vmovapd\t{%1, %x0|%x0, %1}";
14934 return "vmovdqa\t{%1, %t0|%t0, %1}";
14936 return "vmovdqa\t{%1, %x0|%x0, %1}";
14938 gcc_unreachable ();
14941 gcc_unreachable ();
14944 [(set_attr "type" "sselog,ssemov")
14945 (set_attr "prefix_extra" "1,*")
14946 (set_attr "length_immediate" "1,*")
14947 (set_attr "prefix" "maybe_evex")
14948 (set_attr "mode" "<sseinsnmode>")])
14950 (define_insn "vcvtph2ps"
14951 [(set (match_operand:V4SF 0 "register_operand" "=x")
14953 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
14955 (parallel [(const_int 0) (const_int 1)
14956 (const_int 2) (const_int 3)])))]
14958 "vcvtph2ps\t{%1, %0|%0, %1}"
14959 [(set_attr "type" "ssecvt")
14960 (set_attr "prefix" "vex")
14961 (set_attr "mode" "V4SF")])
14963 (define_insn "*vcvtph2ps_load"
14964 [(set (match_operand:V4SF 0 "register_operand" "=x")
14965 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
14966 UNSPEC_VCVTPH2PS))]
14968 "vcvtph2ps\t{%1, %0|%0, %1}"
14969 [(set_attr "type" "ssecvt")
14970 (set_attr "prefix" "vex")
14971 (set_attr "mode" "V8SF")])
14973 (define_insn "vcvtph2ps256"
14974 [(set (match_operand:V8SF 0 "register_operand" "=x")
14975 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14976 UNSPEC_VCVTPH2PS))]
14978 "vcvtph2ps\t{%1, %0|%0, %1}"
14979 [(set_attr "type" "ssecvt")
14980 (set_attr "prefix" "vex")
14981 (set_attr "btver2_decode" "double")
14982 (set_attr "mode" "V8SF")])
14984 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
14985 [(set (match_operand:V16SF 0 "register_operand" "=v")
14987 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14988 UNSPEC_VCVTPH2PS))]
14990 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14991 [(set_attr "type" "ssecvt")
14992 (set_attr "prefix" "evex")
14993 (set_attr "mode" "V16SF")])
14995 (define_expand "vcvtps2ph"
14996 [(set (match_operand:V8HI 0 "register_operand")
14998 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
14999 (match_operand:SI 2 "const_0_to_255_operand")]
15003 "operands[3] = CONST0_RTX (V4HImode);")
15005 (define_insn "*vcvtps2ph"
15006 [(set (match_operand:V8HI 0 "register_operand" "=x")
15008 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15009 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15011 (match_operand:V4HI 3 "const0_operand")))]
15013 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15014 [(set_attr "type" "ssecvt")
15015 (set_attr "prefix" "vex")
15016 (set_attr "mode" "V4SF")])
15018 (define_insn "*vcvtps2ph_store"
15019 [(set (match_operand:V4HI 0 "memory_operand" "=m")
15020 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15021 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15022 UNSPEC_VCVTPS2PH))]
15024 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15025 [(set_attr "type" "ssecvt")
15026 (set_attr "prefix" "vex")
15027 (set_attr "mode" "V4SF")])
15029 (define_insn "vcvtps2ph256"
15030 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
15031 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
15032 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15033 UNSPEC_VCVTPS2PH))]
15035 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15036 [(set_attr "type" "ssecvt")
15037 (set_attr "prefix" "vex")
15038 (set_attr "btver2_decode" "vector")
15039 (set_attr "mode" "V8SF")])
15041 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
15042 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
15044 [(match_operand:V16SF 1 "register_operand" "v")
15045 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15046 UNSPEC_VCVTPS2PH))]
15048 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15049 [(set_attr "type" "ssecvt")
15050 (set_attr "prefix" "evex")
15051 (set_attr "mode" "V16SF")])
15053 ;; For gather* insn patterns
15054 (define_mode_iterator VEC_GATHER_MODE
15055 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
15056 (define_mode_attr VEC_GATHER_IDXSI
15057 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
15058 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
15059 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
15060 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
15062 (define_mode_attr VEC_GATHER_IDXDI
15063 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15064 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
15065 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
15066 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
15068 (define_mode_attr VEC_GATHER_SRCDI
15069 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15070 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
15071 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
15072 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
15074 (define_expand "avx2_gathersi<mode>"
15075 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15076 (unspec:VEC_GATHER_MODE
15077 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
15078 (mem:<ssescalarmode>
15080 [(match_operand 2 "vsib_address_operand")
15081 (match_operand:<VEC_GATHER_IDXSI>
15082 3 "register_operand")
15083 (match_operand:SI 5 "const1248_operand ")]))
15084 (mem:BLK (scratch))
15085 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
15087 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15091 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15092 operands[5]), UNSPEC_VSIBADDR);
15095 (define_insn "*avx2_gathersi<mode>"
15096 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15097 (unspec:VEC_GATHER_MODE
15098 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
15099 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15101 [(match_operand:P 3 "vsib_address_operand" "Tv")
15102 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
15103 (match_operand:SI 6 "const1248_operand" "n")]
15105 (mem:BLK (scratch))
15106 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
15108 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15110 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
15111 [(set_attr "type" "ssemov")
15112 (set_attr "prefix" "vex")
15113 (set_attr "mode" "<sseinsnmode>")])
15115 (define_insn "*avx2_gathersi<mode>_2"
15116 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15117 (unspec:VEC_GATHER_MODE
15119 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15121 [(match_operand:P 2 "vsib_address_operand" "Tv")
15122 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
15123 (match_operand:SI 5 "const1248_operand" "n")]
15125 (mem:BLK (scratch))
15126 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
15128 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15130 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
15131 [(set_attr "type" "ssemov")
15132 (set_attr "prefix" "vex")
15133 (set_attr "mode" "<sseinsnmode>")])
15135 (define_expand "avx2_gatherdi<mode>"
15136 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15137 (unspec:VEC_GATHER_MODE
15138 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15139 (mem:<ssescalarmode>
15141 [(match_operand 2 "vsib_address_operand")
15142 (match_operand:<VEC_GATHER_IDXDI>
15143 3 "register_operand")
15144 (match_operand:SI 5 "const1248_operand ")]))
15145 (mem:BLK (scratch))
15146 (match_operand:<VEC_GATHER_SRCDI>
15147 4 "register_operand")]
15149 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15153 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15154 operands[5]), UNSPEC_VSIBADDR);
15157 (define_insn "*avx2_gatherdi<mode>"
15158 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15159 (unspec:VEC_GATHER_MODE
15160 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15161 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15163 [(match_operand:P 3 "vsib_address_operand" "Tv")
15164 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15165 (match_operand:SI 6 "const1248_operand" "n")]
15167 (mem:BLK (scratch))
15168 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15170 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15172 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
15173 [(set_attr "type" "ssemov")
15174 (set_attr "prefix" "vex")
15175 (set_attr "mode" "<sseinsnmode>")])
15177 (define_insn "*avx2_gatherdi<mode>_2"
15178 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15179 (unspec:VEC_GATHER_MODE
15181 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15183 [(match_operand:P 2 "vsib_address_operand" "Tv")
15184 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15185 (match_operand:SI 5 "const1248_operand" "n")]
15187 (mem:BLK (scratch))
15188 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15190 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15193 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15194 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
15195 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
15197 [(set_attr "type" "ssemov")
15198 (set_attr "prefix" "vex")
15199 (set_attr "mode" "<sseinsnmode>")])
15201 (define_insn "*avx2_gatherdi<mode>_3"
15202 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15203 (vec_select:<VEC_GATHER_SRCDI>
15205 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15206 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15208 [(match_operand:P 3 "vsib_address_operand" "Tv")
15209 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15210 (match_operand:SI 6 "const1248_operand" "n")]
15212 (mem:BLK (scratch))
15213 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15215 (parallel [(const_int 0) (const_int 1)
15216 (const_int 2) (const_int 3)])))
15217 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15219 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
15220 [(set_attr "type" "ssemov")
15221 (set_attr "prefix" "vex")
15222 (set_attr "mode" "<sseinsnmode>")])
15224 (define_insn "*avx2_gatherdi<mode>_4"
15225 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15226 (vec_select:<VEC_GATHER_SRCDI>
15229 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15231 [(match_operand:P 2 "vsib_address_operand" "Tv")
15232 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15233 (match_operand:SI 5 "const1248_operand" "n")]
15235 (mem:BLK (scratch))
15236 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15238 (parallel [(const_int 0) (const_int 1)
15239 (const_int 2) (const_int 3)])))
15240 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15242 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
15243 [(set_attr "type" "ssemov")
15244 (set_attr "prefix" "vex")
15245 (set_attr "mode" "<sseinsnmode>")])
15247 (define_expand "avx512f_gathersi<mode>"
15248 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15250 [(match_operand:VI48F_512 1 "register_operand")
15251 (match_operand:<avx512fmaskmode> 4 "register_operand")
15252 (mem:<ssescalarmode>
15254 [(match_operand 2 "vsib_address_operand")
15255 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
15256 (match_operand:SI 5 "const1248_operand")]))]
15258 (clobber (match_scratch:<avx512fmaskmode> 7))])]
15262 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15263 operands[5]), UNSPEC_VSIBADDR);
15266 (define_insn "*avx512f_gathersi<mode>"
15267 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15269 [(match_operand:VI48F_512 1 "register_operand" "0")
15270 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
15271 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15273 [(match_operand:P 4 "vsib_address_operand" "Tv")
15274 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
15275 (match_operand:SI 5 "const1248_operand" "n")]
15276 UNSPEC_VSIBADDR)])]
15278 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
15280 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
15281 [(set_attr "type" "ssemov")
15282 (set_attr "prefix" "evex")
15283 (set_attr "mode" "<sseinsnmode>")])
15285 (define_insn "*avx512f_gathersi<mode>_2"
15286 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15289 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15290 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15292 [(match_operand:P 3 "vsib_address_operand" "Tv")
15293 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15294 (match_operand:SI 4 "const1248_operand" "n")]
15295 UNSPEC_VSIBADDR)])]
15297 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15299 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
15300 [(set_attr "type" "ssemov")
15301 (set_attr "prefix" "evex")
15302 (set_attr "mode" "<sseinsnmode>")])
15305 (define_expand "avx512f_gatherdi<mode>"
15306 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15308 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15309 (match_operand:QI 4 "register_operand")
15310 (mem:<ssescalarmode>
15312 [(match_operand 2 "vsib_address_operand")
15313 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
15314 (match_operand:SI 5 "const1248_operand")]))]
15316 (clobber (match_scratch:QI 7))])]
15320 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15321 operands[5]), UNSPEC_VSIBADDR);
15324 (define_insn "*avx512f_gatherdi<mode>"
15325 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15327 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
15328 (match_operand:QI 7 "register_operand" "2")
15329 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15331 [(match_operand:P 4 "vsib_address_operand" "Tv")
15332 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
15333 (match_operand:SI 5 "const1248_operand" "n")]
15334 UNSPEC_VSIBADDR)])]
15336 (clobber (match_scratch:QI 2 "=&Yk"))]
15338 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
15339 [(set_attr "type" "ssemov")
15340 (set_attr "prefix" "evex")
15341 (set_attr "mode" "<sseinsnmode>")])
15343 (define_insn "*avx512f_gatherdi<mode>_2"
15344 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15347 (match_operand:QI 6 "register_operand" "1")
15348 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15350 [(match_operand:P 3 "vsib_address_operand" "Tv")
15351 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
15352 (match_operand:SI 4 "const1248_operand" "n")]
15353 UNSPEC_VSIBADDR)])]
15355 (clobber (match_scratch:QI 1 "=&Yk"))]
15358 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15359 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
15360 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
15362 [(set_attr "type" "ssemov")
15363 (set_attr "prefix" "evex")
15364 (set_attr "mode" "<sseinsnmode>")])
15366 (define_expand "avx512f_scattersi<mode>"
15367 [(parallel [(set (mem:VI48F_512
15369 [(match_operand 0 "vsib_address_operand")
15370 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
15371 (match_operand:SI 4 "const1248_operand")]))
15373 [(match_operand:<avx512fmaskmode> 1 "register_operand")
15374 (match_operand:VI48F_512 3 "register_operand")]
15376 (clobber (match_scratch:<avx512fmaskmode> 6))])]
15380 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15381 operands[4]), UNSPEC_VSIBADDR);
15384 (define_insn "*avx512f_scattersi<mode>"
15385 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15387 [(match_operand:P 0 "vsib_address_operand" "Tv")
15388 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15389 (match_operand:SI 4 "const1248_operand" "n")]
15392 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15393 (match_operand:VI48F_512 3 "register_operand" "v")]
15395 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15397 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15398 [(set_attr "type" "ssemov")
15399 (set_attr "prefix" "evex")
15400 (set_attr "mode" "<sseinsnmode>")])
15402 (define_expand "avx512f_scatterdi<mode>"
15403 [(parallel [(set (mem:VI48F_512
15405 [(match_operand 0 "vsib_address_operand")
15406 (match_operand:V8DI 2 "register_operand")
15407 (match_operand:SI 4 "const1248_operand")]))
15409 [(match_operand:QI 1 "register_operand")
15410 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
15412 (clobber (match_scratch:QI 6))])]
15416 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15417 operands[4]), UNSPEC_VSIBADDR);
15420 (define_insn "*avx512f_scatterdi<mode>"
15421 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15423 [(match_operand:P 0 "vsib_address_operand" "Tv")
15424 (match_operand:V8DI 2 "register_operand" "v")
15425 (match_operand:SI 4 "const1248_operand" "n")]
15428 [(match_operand:QI 6 "register_operand" "1")
15429 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
15431 (clobber (match_scratch:QI 1 "=&Yk"))]
15433 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15434 [(set_attr "type" "ssemov")
15435 (set_attr "prefix" "evex")
15436 (set_attr "mode" "<sseinsnmode>")])
15438 (define_insn "avx512f_compress<mode>_mask"
15439 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
15441 [(match_operand:VI48F_512 1 "register_operand" "v")
15442 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
15443 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
15446 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15447 [(set_attr "type" "ssemov")
15448 (set_attr "prefix" "evex")
15449 (set_attr "mode" "<sseinsnmode>")])
15451 (define_insn "avx512f_compressstore<mode>_mask"
15452 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
15454 [(match_operand:VI48F_512 1 "register_operand" "x")
15456 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
15457 UNSPEC_COMPRESS_STORE))]
15459 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
15460 [(set_attr "type" "ssemov")
15461 (set_attr "prefix" "evex")
15462 (set_attr "memory" "store")
15463 (set_attr "mode" "<sseinsnmode>")])
15465 (define_expand "avx512f_expand<mode>_maskz"
15466 [(set (match_operand:VI48F_512 0 "register_operand")
15468 [(match_operand:VI48F_512 1 "nonimmediate_operand")
15469 (match_operand:VI48F_512 2 "vector_move_operand")
15470 (match_operand:<avx512fmaskmode> 3 "register_operand")]
15473 "operands[2] = CONST0_RTX (<MODE>mode);")
15475 (define_insn "avx512f_expand<mode>_mask"
15476 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
15478 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
15479 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
15480 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
15483 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15484 [(set_attr "type" "ssemov")
15485 (set_attr "prefix" "evex")
15486 (set_attr "memory" "none,load")
15487 (set_attr "mode" "<sseinsnmode>")])
15489 (define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
15490 [(set (match_operand:VF_512 0 "register_operand" "=v")
15492 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15493 (match_operand:SI 2 "const_0_to_15_operand")]
15496 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
15497 [(set_attr "prefix" "evex")
15498 (set_attr "mode" "<MODE>")])
15500 (define_insn "avx512f_getmant<mode><round_saeonly_name>"
15501 [(set (match_operand:VF_128 0 "register_operand" "=v")
15504 [(match_operand:VF_128 1 "register_operand" "v")
15505 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15506 (match_operand:SI 3 "const_0_to_15_operand")]
15511 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
15512 [(set_attr "prefix" "evex")
15513 (set_attr "mode" "<ssescalarmode>")])
15515 (define_insn "clz<mode>2<mask_name>"
15516 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15518 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
15520 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15521 [(set_attr "type" "sse")
15522 (set_attr "prefix" "evex")
15523 (set_attr "mode" "<sseinsnmode>")])
15525 (define_insn "<mask_codefor>conflict<mode><mask_name>"
15526 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15528 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
15531 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15532 [(set_attr "type" "sse")
15533 (set_attr "prefix" "evex")
15534 (set_attr "mode" "<sseinsnmode>")])
15536 (define_insn "sha1msg1"
15537 [(set (match_operand:V4SI 0 "register_operand" "=x")
15539 [(match_operand:V4SI 1 "register_operand" "0")
15540 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15543 "sha1msg1\t{%2, %0|%0, %2}"
15544 [(set_attr "type" "sselog1")
15545 (set_attr "mode" "TI")])
15547 (define_insn "sha1msg2"
15548 [(set (match_operand:V4SI 0 "register_operand" "=x")
15550 [(match_operand:V4SI 1 "register_operand" "0")
15551 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15554 "sha1msg2\t{%2, %0|%0, %2}"
15555 [(set_attr "type" "sselog1")
15556 (set_attr "mode" "TI")])
15558 (define_insn "sha1nexte"
15559 [(set (match_operand:V4SI 0 "register_operand" "=x")
15561 [(match_operand:V4SI 1 "register_operand" "0")
15562 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15563 UNSPEC_SHA1NEXTE))]
15565 "sha1nexte\t{%2, %0|%0, %2}"
15566 [(set_attr "type" "sselog1")
15567 (set_attr "mode" "TI")])
15569 (define_insn "sha1rnds4"
15570 [(set (match_operand:V4SI 0 "register_operand" "=x")
15572 [(match_operand:V4SI 1 "register_operand" "0")
15573 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15574 (match_operand:SI 3 "const_0_to_3_operand" "n")]
15575 UNSPEC_SHA1RNDS4))]
15577 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
15578 [(set_attr "type" "sselog1")
15579 (set_attr "length_immediate" "1")
15580 (set_attr "mode" "TI")])
15582 (define_insn "sha256msg1"
15583 [(set (match_operand:V4SI 0 "register_operand" "=x")
15585 [(match_operand:V4SI 1 "register_operand" "0")
15586 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15587 UNSPEC_SHA256MSG1))]
15589 "sha256msg1\t{%2, %0|%0, %2}"
15590 [(set_attr "type" "sselog1")
15591 (set_attr "mode" "TI")])
15593 (define_insn "sha256msg2"
15594 [(set (match_operand:V4SI 0 "register_operand" "=x")
15596 [(match_operand:V4SI 1 "register_operand" "0")
15597 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15598 UNSPEC_SHA256MSG2))]
15600 "sha256msg2\t{%2, %0|%0, %2}"
15601 [(set_attr "type" "sselog1")
15602 (set_attr "mode" "TI")])
15604 (define_insn "sha256rnds2"
15605 [(set (match_operand:V4SI 0 "register_operand" "=x")
15607 [(match_operand:V4SI 1 "register_operand" "0")
15608 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15609 (match_operand:V4SI 3 "register_operand" "Yz")]
15610 UNSPEC_SHA256RNDS2))]
15612 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
15613 [(set_attr "type" "sselog1")
15614 (set_attr "length_immediate" "1")
15615 (set_attr "mode" "TI")])