1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
133 (define_c_enum "unspecv" [
143 ;; All vector modes including V?TImode, used in move patterns.
144 (define_mode_iterator VMOVE
145 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
146 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
147 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
148 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
149 (V2TI "TARGET_AVX") V1TI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
154 (define_mode_iterator V
155 [(V32QI "TARGET_AVX") V16QI
156 (V16HI "TARGET_AVX") V8HI
157 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
158 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
159 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
160 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
162 ;; All 128bit vector modes
163 (define_mode_iterator V_128
164 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
166 ;; All 256bit vector modes
167 (define_mode_iterator V_256
168 [V32QI V16HI V8SI V4DI V8SF V4DF])
170 ;; All 512bit vector modes
171 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
173 ;; All 256bit and 512bit vector modes
174 (define_mode_iterator V_256_512
175 [V32QI V16HI V8SI V4DI V8SF V4DF
176 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
177 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
179 ;; All vector float modes
180 (define_mode_iterator VF
181 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
182 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
184 ;; 128- and 256-bit float vector modes
185 (define_mode_iterator VF_128_256
186 [(V8SF "TARGET_AVX") V4SF
187 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
189 ;; All SFmode vector float modes
190 (define_mode_iterator VF1
191 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
193 ;; 128- and 256-bit SF vector modes
194 (define_mode_iterator VF1_128_256
195 [(V8SF "TARGET_AVX") V4SF])
197 ;; All DFmode vector float modes
198 (define_mode_iterator VF2
199 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
201 ;; 128- and 256-bit DF vector modes
202 (define_mode_iterator VF2_128_256
203 [(V4DF "TARGET_AVX") V2DF])
205 (define_mode_iterator VF2_512_256
206 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
208 ;; All 128bit vector float modes
209 (define_mode_iterator VF_128
210 [V4SF (V2DF "TARGET_SSE2")])
212 ;; All 256bit vector float modes
213 (define_mode_iterator VF_256
216 ;; All 512bit vector float modes
217 (define_mode_iterator VF_512
220 ;; All vector integer modes
221 (define_mode_iterator VI
222 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
223 (V32QI "TARGET_AVX") V16QI
224 (V16HI "TARGET_AVX") V8HI
225 (V8SI "TARGET_AVX") V4SI
226 (V4DI "TARGET_AVX") V2DI])
228 (define_mode_iterator VI_AVX2
229 [(V32QI "TARGET_AVX2") V16QI
230 (V16HI "TARGET_AVX2") V8HI
231 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
232 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
234 ;; All QImode vector integer modes
235 (define_mode_iterator VI1
236 [(V32QI "TARGET_AVX") V16QI])
238 (define_mode_iterator VI_UNALIGNED_LOADSTORE
239 [(V32QI "TARGET_AVX") V16QI
240 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
242 ;; All DImode vector integer modes
243 (define_mode_iterator VI8
244 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
246 (define_mode_iterator VI1_AVX2
247 [(V32QI "TARGET_AVX2") V16QI])
249 (define_mode_iterator VI2_AVX2
250 [(V16HI "TARGET_AVX2") V8HI])
252 (define_mode_iterator VI2_AVX512F
253 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
255 (define_mode_iterator VI4_AVX
256 [(V8SI "TARGET_AVX") V4SI])
258 (define_mode_iterator VI4_AVX2
259 [(V8SI "TARGET_AVX2") V4SI])
261 (define_mode_iterator VI4_AVX512F
262 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
264 (define_mode_iterator VI48_AVX512F
265 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
266 (V8DI "TARGET_AVX512F")])
268 (define_mode_iterator VI8_AVX2
269 [(V4DI "TARGET_AVX2") V2DI])
271 (define_mode_iterator VI8_AVX2_AVX512F
272 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
275 (define_mode_iterator V8FI
279 (define_mode_iterator V16FI
282 ;; ??? We should probably use TImode instead.
283 (define_mode_iterator VIMAX_AVX2
284 [(V2TI "TARGET_AVX2") V1TI])
286 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
287 (define_mode_iterator SSESCALARMODE
288 [(V2TI "TARGET_AVX2") TI])
290 (define_mode_iterator VI12_AVX2
291 [(V32QI "TARGET_AVX2") V16QI
292 (V16HI "TARGET_AVX2") V8HI])
294 (define_mode_iterator VI24_AVX2
295 [(V16HI "TARGET_AVX2") V8HI
296 (V8SI "TARGET_AVX2") V4SI])
298 (define_mode_iterator VI124_AVX2_48_AVX512F
299 [(V32QI "TARGET_AVX2") V16QI
300 (V16HI "TARGET_AVX2") V8HI
301 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
302 (V8DI "TARGET_AVX512F")])
304 (define_mode_iterator VI124_AVX512F
305 [(V32QI "TARGET_AVX2") V16QI
306 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
307 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
309 (define_mode_iterator VI124_AVX2
310 [(V32QI "TARGET_AVX2") V16QI
311 (V16HI "TARGET_AVX2") V8HI
312 (V8SI "TARGET_AVX2") V4SI])
314 (define_mode_iterator VI248_AVX2
315 [(V16HI "TARGET_AVX2") V8HI
316 (V8SI "TARGET_AVX2") V4SI
317 (V4DI "TARGET_AVX2") V2DI])
319 (define_mode_iterator VI248_AVX2_8_AVX512F
320 [(V16HI "TARGET_AVX2") V8HI
321 (V8SI "TARGET_AVX2") V4SI
322 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
324 (define_mode_iterator VI48_AVX2_48_AVX512F
325 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
326 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
328 (define_mode_iterator V48_AVX2
331 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
332 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
334 (define_mode_attr sse2_avx_avx512f
335 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
336 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
338 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
339 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
341 (define_mode_attr sse2_avx2
342 [(V16QI "sse2") (V32QI "avx2")
343 (V8HI "sse2") (V16HI "avx2")
344 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
345 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
346 (V1TI "sse2") (V2TI "avx2")])
348 (define_mode_attr ssse3_avx2
349 [(V16QI "ssse3") (V32QI "avx2")
350 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
351 (V4SI "ssse3") (V8SI "avx2")
352 (V2DI "ssse3") (V4DI "avx2")
353 (TI "ssse3") (V2TI "avx2")])
355 (define_mode_attr sse4_1_avx2
356 [(V16QI "sse4_1") (V32QI "avx2")
357 (V8HI "sse4_1") (V16HI "avx2")
358 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
359 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
361 (define_mode_attr avx_avx2
362 [(V4SF "avx") (V2DF "avx")
363 (V8SF "avx") (V4DF "avx")
364 (V4SI "avx2") (V2DI "avx2")
365 (V8SI "avx2") (V4DI "avx2")])
367 (define_mode_attr vec_avx2
368 [(V16QI "vec") (V32QI "avx2")
369 (V8HI "vec") (V16HI "avx2")
370 (V4SI "vec") (V8SI "avx2")
371 (V2DI "vec") (V4DI "avx2")])
373 (define_mode_attr avx2_avx512f
374 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
375 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
376 (V8SF "avx2") (V16SF "avx512f")
377 (V4DF "avx2") (V8DF "avx512f")])
379 (define_mode_attr shuffletype
380 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
381 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
382 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
383 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
384 (V64QI "i") (V1TI "i") (V2TI "i")])
386 (define_mode_attr ssequartermode
387 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
389 (define_mode_attr ssedoublemode
390 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
391 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
392 (V32QI "V32HI") (V16QI "V16HI")])
394 (define_mode_attr ssebytemode
395 [(V4DI "V32QI") (V2DI "V16QI")])
397 ;; All 128bit vector integer modes
398 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
400 ;; All 256bit vector integer modes
401 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
403 ;; All 512bit vector integer modes
404 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
406 ;; Various 128bit vector integer mode combinations
407 (define_mode_iterator VI12_128 [V16QI V8HI])
408 (define_mode_iterator VI14_128 [V16QI V4SI])
409 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
410 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
411 (define_mode_iterator VI24_128 [V8HI V4SI])
412 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
413 (define_mode_iterator VI48_128 [V4SI V2DI])
415 ;; Various 256bit and 512 vector integer mode combinations
416 (define_mode_iterator VI124_256_48_512
417 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
418 (define_mode_iterator VI48_256 [V8SI V4DI])
419 (define_mode_iterator VI48_512 [V16SI V8DI])
420 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
422 ;; Int-float size matches
423 (define_mode_iterator VI4F_128 [V4SI V4SF])
424 (define_mode_iterator VI8F_128 [V2DI V2DF])
425 (define_mode_iterator VI4F_256 [V8SI V8SF])
426 (define_mode_iterator VI8F_256 [V4DI V4DF])
427 (define_mode_iterator VI8F_256_512
428 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
429 (define_mode_iterator VI48F_256_512
431 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
432 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
433 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
435 ;; Mapping from float mode to required SSE level
436 (define_mode_attr sse
437 [(SF "sse") (DF "sse2")
438 (V4SF "sse") (V2DF "sse2")
439 (V16SF "avx512f") (V8SF "avx")
440 (V8DF "avx512f") (V4DF "avx")])
442 (define_mode_attr sse2
443 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
444 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
446 (define_mode_attr sse3
447 [(V16QI "sse3") (V32QI "avx")])
449 (define_mode_attr sse4_1
450 [(V4SF "sse4_1") (V2DF "sse4_1")
451 (V8SF "avx") (V4DF "avx")
454 (define_mode_attr avxsizesuffix
455 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
456 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
457 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
458 (V16SF "512") (V8DF "512")
459 (V8SF "256") (V4DF "256")
460 (V4SF "") (V2DF "")])
462 ;; SSE instruction mode
463 (define_mode_attr sseinsnmode
464 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
465 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
466 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
467 (V16SF "V16SF") (V8DF "V8DF")
468 (V8SF "V8SF") (V4DF "V4DF")
469 (V4SF "V4SF") (V2DF "V2DF")
472 ;; Mapping of vector modes to corresponding mask size
473 (define_mode_attr avx512fmaskmode
475 (V16HI "HI") (V8HI "QI")
476 (V16SI "HI") (V8SI "QI") (V4SI "QI")
477 (V8DI "QI") (V4DI "QI") (V2DI "QI")
478 (V16SF "HI") (V8SF "QI") (V4SF "QI")
479 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
481 ;; Mapping of vector float modes to an integer mode of the same size
482 (define_mode_attr sseintvecmode
483 [(V16SF "V16SI") (V8DF "V8DI")
484 (V8SF "V8SI") (V4DF "V4DI")
485 (V4SF "V4SI") (V2DF "V2DI")
486 (V16SI "V16SI") (V8DI "V8DI")
487 (V8SI "V8SI") (V4DI "V4DI")
488 (V4SI "V4SI") (V2DI "V2DI")
489 (V16HI "V16HI") (V8HI "V8HI")
490 (V32QI "V32QI") (V16QI "V16QI")])
492 (define_mode_attr sseintvecmodelower
494 (V8SF "v8si") (V4DF "v4di")
495 (V4SF "v4si") (V2DF "v2di")
496 (V8SI "v8si") (V4DI "v4di")
497 (V4SI "v4si") (V2DI "v2di")
498 (V16HI "v16hi") (V8HI "v8hi")
499 (V32QI "v32qi") (V16QI "v16qi")])
501 ;; Mapping of vector modes to a vector mode of double size
502 (define_mode_attr ssedoublevecmode
503 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
504 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
505 (V8SF "V16SF") (V4DF "V8DF")
506 (V4SF "V8SF") (V2DF "V4DF")])
508 ;; Mapping of vector modes to a vector mode of half size
509 (define_mode_attr ssehalfvecmode
510 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
511 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
512 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
513 (V16SF "V8SF") (V8DF "V4DF")
514 (V8SF "V4SF") (V4DF "V2DF")
517 ;; Mapping of vector modes ti packed single mode of the same size
518 (define_mode_attr ssePSmode
519 [(V16SI "V16SF") (V8DF "V16SF")
520 (V16SF "V16SF") (V8DI "V16SF")
521 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
522 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
523 (V8SI "V8SF") (V4SI "V4SF")
524 (V4DI "V8SF") (V2DI "V4SF")
525 (V2TI "V8SF") (V1TI "V4SF")
526 (V8SF "V8SF") (V4SF "V4SF")
527 (V4DF "V8SF") (V2DF "V4SF")])
529 ;; Mapping of vector modes back to the scalar modes
530 (define_mode_attr ssescalarmode
531 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
532 (V32HI "HI") (V16HI "HI") (V8HI "HI")
533 (V16SI "SI") (V8SI "SI") (V4SI "SI")
534 (V8DI "DI") (V4DI "DI") (V2DI "DI")
535 (V16SF "SF") (V8SF "SF") (V4SF "SF")
536 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
538 ;; Mapping of vector modes to the 128bit modes
539 (define_mode_attr ssexmmmode
540 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
541 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
542 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
543 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
544 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
545 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
547 ;; Pointer size override for scalar modes (Intel asm dialect)
548 (define_mode_attr iptr
549 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
550 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
551 (V8SF "k") (V4DF "q")
552 (V4SF "k") (V2DF "q")
555 ;; Number of scalar elements in each vector type
556 (define_mode_attr ssescalarnum
557 [(V64QI "64") (V16SI "16") (V8DI "8")
558 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
559 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
560 (V16SF "16") (V8DF "8")
561 (V8SF "8") (V4DF "4")
562 (V4SF "4") (V2DF "2")])
564 ;; Mask of scalar elements in each vector type
565 (define_mode_attr ssescalarnummask
566 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
567 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
568 (V8SF "7") (V4DF "3")
569 (V4SF "3") (V2DF "1")])
571 (define_mode_attr ssescalarsize
572 [(V8DI "64") (V4DI "64") (V2DI "64")
573 (V32HI "16") (V16HI "16") (V8HI "16")
574 (V16SI "32") (V8SI "32") (V4SI "32")
575 (V16SF "32") (V8DF "64")])
577 ;; SSE prefix for integer vector modes
578 (define_mode_attr sseintprefix
579 [(V2DI "p") (V2DF "")
584 (V16SI "p") (V16SF "")])
586 ;; SSE scalar suffix for vector modes
587 (define_mode_attr ssescalarmodesuffix
589 (V8SF "ss") (V4DF "sd")
590 (V4SF "ss") (V2DF "sd")
591 (V8SI "ss") (V4DI "sd")
594 ;; Pack/unpack vector modes
595 (define_mode_attr sseunpackmode
596 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
597 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
598 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
600 (define_mode_attr ssepackmode
601 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
602 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
603 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
605 ;; Mapping of the max integer size for xop rotate immediate constraint
606 (define_mode_attr sserotatemax
607 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
609 ;; Mapping of mode to cast intrinsic name
610 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
612 ;; Instruction suffix for sign and zero extensions.
613 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
615 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
616 ;; i64x4 or f64x4 for 512bit modes.
617 (define_mode_attr i128
618 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
619 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
620 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
623 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
625 ;; Mapping of immediate bits for blend instructions
626 (define_mode_attr blendbits
627 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
629 ;; Mapping suffixes for broadcast
630 (define_mode_attr bcstscalarsuff
631 [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
633 ;; Include define_subst patterns for instructions with mask
636 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
644 ;; All of these patterns are enabled for SSE1 as well as SSE2.
645 ;; This is essential for maintaining stable calling conventions.
647 (define_expand "mov<mode>"
648 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
649 (match_operand:VMOVE 1 "nonimmediate_operand"))]
652 ix86_expand_vector_move (<MODE>mode, operands);
656 (define_insn "*mov<mode>_internal"
657 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
658 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
660 && (register_operand (operands[0], <MODE>mode)
661 || register_operand (operands[1], <MODE>mode))"
663 int mode = get_attr_mode (insn);
664 switch (which_alternative)
667 return standard_sse_constant_opcode (insn, operands[1]);
670 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
671 in avx512f, so we need to use workarounds, to access sse registers
672 16-31, which are evex-only. */
673 if (TARGET_AVX512F && <MODE_SIZE> < 64
674 && ((REG_P (operands[0])
675 && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
676 || (REG_P (operands[1])
677 && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
679 if (memory_operand (operands[0], <MODE>mode))
681 if (<MODE_SIZE> == 32)
682 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
683 else if (<MODE_SIZE> == 16)
684 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
688 else if (memory_operand (operands[1], <MODE>mode))
690 if (<MODE_SIZE> == 32)
691 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
692 else if (<MODE_SIZE> == 16)
693 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
698 /* Reg -> reg move is always aligned. Just use wider move. */
703 return "vmovaps\t{%g1, %g0|%g0, %g1}";
706 return "vmovapd\t{%g1, %g0|%g0, %g1}";
709 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
720 && (misaligned_operand (operands[0], <MODE>mode)
721 || misaligned_operand (operands[1], <MODE>mode)))
722 return "vmovups\t{%1, %0|%0, %1}";
724 return "%vmovaps\t{%1, %0|%0, %1}";
730 && (misaligned_operand (operands[0], <MODE>mode)
731 || misaligned_operand (operands[1], <MODE>mode)))
732 return "vmovupd\t{%1, %0|%0, %1}";
734 return "%vmovapd\t{%1, %0|%0, %1}";
739 && (misaligned_operand (operands[0], <MODE>mode)
740 || misaligned_operand (operands[1], <MODE>mode)))
741 return "vmovdqu\t{%1, %0|%0, %1}";
743 return "%vmovdqa\t{%1, %0|%0, %1}";
745 if (misaligned_operand (operands[0], <MODE>mode)
746 || misaligned_operand (operands[1], <MODE>mode))
747 return "vmovdqu64\t{%1, %0|%0, %1}";
749 return "vmovdqa64\t{%1, %0|%0, %1}";
758 [(set_attr "type" "sselog1,ssemov,ssemov")
759 (set_attr "prefix" "maybe_vex")
761 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
762 (const_string "<ssePSmode>")
763 (and (match_test "<MODE_SIZE> == 16")
764 (and (eq_attr "alternative" "2")
765 (match_test "TARGET_SSE_TYPELESS_STORES")))
766 (const_string "<ssePSmode>")
767 (match_test "TARGET_AVX")
768 (const_string "<sseinsnmode>")
769 (ior (not (match_test "TARGET_SSE2"))
770 (match_test "optimize_function_for_size_p (cfun)"))
771 (const_string "V4SF")
772 (and (eq_attr "alternative" "0")
773 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
776 (const_string "<sseinsnmode>")))])
778 (define_insn "avx512f_load<mode>_mask"
779 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
781 (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
782 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
783 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
786 switch (MODE_<sseinsnmode>)
790 if (misaligned_operand (operands[1], <MODE>mode))
791 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
792 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
794 if (misaligned_operand (operands[1], <MODE>mode))
795 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
796 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
799 [(set_attr "type" "ssemov")
800 (set_attr "prefix" "evex")
801 (set_attr "memory" "none,load")
802 (set_attr "mode" "<sseinsnmode>")])
804 (define_insn "avx512f_blendm<mode>"
805 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
807 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
808 (match_operand:VI48F_512 1 "register_operand" "v")
809 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))]
811 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
812 [(set_attr "type" "ssemov")
813 (set_attr "prefix" "evex")
814 (set_attr "mode" "<sseinsnmode>")])
816 (define_insn "avx512f_store<mode>_mask"
817 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
819 (match_operand:VI48F_512 1 "register_operand" "v")
821 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
824 switch (MODE_<sseinsnmode>)
828 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
830 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
833 [(set_attr "type" "ssemov")
834 (set_attr "prefix" "evex")
835 (set_attr "memory" "store")
836 (set_attr "mode" "<sseinsnmode>")])
838 (define_insn "sse2_movq128"
839 [(set (match_operand:V2DI 0 "register_operand" "=x")
842 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
843 (parallel [(const_int 0)]))
846 "%vmovq\t{%1, %0|%0, %q1}"
847 [(set_attr "type" "ssemov")
848 (set_attr "prefix" "maybe_vex")
849 (set_attr "mode" "TI")])
851 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
852 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
853 ;; from memory, we'd prefer to load the memory directly into the %xmm
854 ;; register. To facilitate this happy circumstance, this pattern won't
855 ;; split until after register allocation. If the 64-bit value didn't
856 ;; come from memory, this is the best we can do. This is much better
857 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
860 (define_insn_and_split "movdi_to_sse"
862 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
863 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
864 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
865 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
867 "&& reload_completed"
870 if (register_operand (operands[1], DImode))
872 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
873 Assemble the 64-bit DImode value in an xmm register. */
874 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
875 gen_rtx_SUBREG (SImode, operands[1], 0)));
876 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
877 gen_rtx_SUBREG (SImode, operands[1], 4)));
878 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
881 else if (memory_operand (operands[1], DImode))
883 rtx tmp = gen_reg_rtx (V2DImode);
884 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
885 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
892 [(set (match_operand:V4SF 0 "register_operand")
893 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
894 "TARGET_SSE && reload_completed"
897 (vec_duplicate:V4SF (match_dup 1))
901 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
902 operands[2] = CONST0_RTX (V4SFmode);
906 [(set (match_operand:V2DF 0 "register_operand")
907 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
908 "TARGET_SSE2 && reload_completed"
909 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
911 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
912 operands[2] = CONST0_RTX (DFmode);
915 (define_expand "push<mode>1"
916 [(match_operand:VMOVE 0 "register_operand")]
919 ix86_expand_push (<MODE>mode, operands[0]);
923 (define_expand "movmisalign<mode>"
924 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
925 (match_operand:VMOVE 1 "nonimmediate_operand"))]
928 ix86_expand_vector_move_misalign (<MODE>mode, operands);
932 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
933 [(set (match_operand:VF 0 "register_operand")
934 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
936 "TARGET_SSE && <mask_mode512bit_condition>"
938 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
939 just fine if misaligned_operand is true, and without the UNSPEC it can
940 be combined with arithmetic instructions. If misaligned_operand is
941 false, still emit UNSPEC_LOADU insn to honor user's request for
944 && misaligned_operand (operands[1], <MODE>mode))
946 rtx src = operands[1];
948 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
949 operands[2 * <mask_applied>],
950 operands[3 * <mask_applied>]);
951 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
956 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
957 [(set (match_operand:VF 0 "register_operand" "=v")
959 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
961 "TARGET_SSE && <mask_mode512bit_condition>"
963 switch (get_attr_mode (insn))
968 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
970 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
973 [(set_attr "type" "ssemov")
974 (set_attr "movu" "1")
975 (set_attr "ssememalign" "8")
976 (set_attr "prefix" "maybe_vex")
978 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
979 (const_string "<ssePSmode>")
980 (match_test "TARGET_AVX")
981 (const_string "<MODE>")
982 (match_test "optimize_function_for_size_p (cfun)")
983 (const_string "V4SF")
985 (const_string "<MODE>")))])
987 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
988 [(set (match_operand:VF 0 "memory_operand" "=m")
990 [(match_operand:VF 1 "register_operand" "v")]
994 switch (get_attr_mode (insn))
999 return "%vmovups\t{%1, %0|%0, %1}";
1001 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1004 [(set_attr "type" "ssemov")
1005 (set_attr "movu" "1")
1006 (set_attr "ssememalign" "8")
1007 (set_attr "prefix" "maybe_vex")
1009 (cond [(and (match_test "<MODE_SIZE> == 16")
1010 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1011 (match_test "TARGET_SSE_TYPELESS_STORES")))
1012 (const_string "<ssePSmode>")
1013 (match_test "TARGET_AVX")
1014 (const_string "<MODE>")
1015 (match_test "optimize_function_for_size_p (cfun)")
1016 (const_string "V4SF")
1018 (const_string "<MODE>")))])
1020 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
1021 [(set (match_operand:VF_512 0 "memory_operand" "=m")
1024 [(match_operand:VF_512 1 "register_operand" "v")]
1027 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
1030 switch (get_attr_mode (insn))
1033 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1035 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1038 [(set_attr "type" "ssemov")
1039 (set_attr "movu" "1")
1040 (set_attr "memory" "store")
1041 (set_attr "prefix" "evex")
1042 (set_attr "mode" "<sseinsnmode>")])
1044 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1045 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
1046 (unspec:VI_UNALIGNED_LOADSTORE
1047 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
1049 "TARGET_SSE2 && <mask_mode512bit_condition>"
1051 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1052 just fine if misaligned_operand is true, and without the UNSPEC it can
1053 be combined with arithmetic instructions. If misaligned_operand is
1054 false, still emit UNSPEC_LOADU insn to honor user's request for
1057 && misaligned_operand (operands[1], <MODE>mode))
1059 rtx src = operands[1];
1061 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1062 operands[2 * <mask_applied>],
1063 operands[3 * <mask_applied>]);
1064 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1069 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1070 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1071 (unspec:VI_UNALIGNED_LOADSTORE
1072 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1074 "TARGET_SSE2 && <mask_mode512bit_condition>"
1076 switch (get_attr_mode (insn))
1080 return "%vmovups\t{%1, %0|%0, %1}";
1082 if (<MODE>mode == V8DImode)
1083 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1085 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1087 return "%vmovdqu\t{%1, %0|%0, %1}";
1090 [(set_attr "type" "ssemov")
1091 (set_attr "movu" "1")
1092 (set_attr "ssememalign" "8")
1093 (set (attr "prefix_data16")
1095 (match_test "TARGET_AVX")
1097 (const_string "1")))
1098 (set_attr "prefix" "maybe_vex")
1100 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1101 (const_string "<ssePSmode>")
1102 (match_test "TARGET_AVX")
1103 (const_string "<sseinsnmode>")
1104 (match_test "optimize_function_for_size_p (cfun)")
1105 (const_string "V4SF")
1107 (const_string "<sseinsnmode>")))])
1109 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1110 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1111 (unspec:VI_UNALIGNED_LOADSTORE
1112 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1116 switch (get_attr_mode (insn))
1121 return "%vmovups\t{%1, %0|%0, %1}";
1123 if (<MODE>mode == V8DImode)
1124 return "vmovdqu64\t{%1, %0|%0, %1}";
1126 return "vmovdqu32\t{%1, %0|%0, %1}";
1128 return "%vmovdqu\t{%1, %0|%0, %1}";
1131 [(set_attr "type" "ssemov")
1132 (set_attr "movu" "1")
1133 (set_attr "ssememalign" "8")
1134 (set (attr "prefix_data16")
1136 (match_test "TARGET_AVX")
1138 (const_string "1")))
1139 (set_attr "prefix" "maybe_vex")
1141 (cond [(and (match_test "<MODE_SIZE> == 16")
1142 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1143 (match_test "TARGET_SSE_TYPELESS_STORES")))
1144 (const_string "<ssePSmode>")
1145 (match_test "TARGET_AVX")
1146 (const_string "<sseinsnmode>")
1147 (match_test "optimize_function_for_size_p (cfun)")
1148 (const_string "V4SF")
1150 (const_string "<sseinsnmode>")))])
1152 (define_insn "avx512f_storedqu<mode>_mask"
1153 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1156 [(match_operand:VI48_512 1 "register_operand" "v")]
1159 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
1162 if (<MODE>mode == V8DImode)
1163 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1165 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1167 [(set_attr "type" "ssemov")
1168 (set_attr "movu" "1")
1169 (set_attr "memory" "store")
1170 (set_attr "prefix" "evex")
1171 (set_attr "mode" "<sseinsnmode>")])
1173 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1174 [(set (match_operand:VI1 0 "register_operand" "=x")
1175 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1178 "%vlddqu\t{%1, %0|%0, %1}"
1179 [(set_attr "type" "ssemov")
1180 (set_attr "movu" "1")
1181 (set_attr "ssememalign" "8")
1182 (set (attr "prefix_data16")
1184 (match_test "TARGET_AVX")
1186 (const_string "0")))
1187 (set (attr "prefix_rep")
1189 (match_test "TARGET_AVX")
1191 (const_string "1")))
1192 (set_attr "prefix" "maybe_vex")
1193 (set_attr "mode" "<sseinsnmode>")])
1195 (define_insn "sse2_movnti<mode>"
1196 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1197 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1200 "movnti\t{%1, %0|%0, %1}"
1201 [(set_attr "type" "ssemov")
1202 (set_attr "prefix_data16" "0")
1203 (set_attr "mode" "<MODE>")])
1205 (define_insn "<sse>_movnt<mode>"
1206 [(set (match_operand:VF 0 "memory_operand" "=m")
1208 [(match_operand:VF 1 "register_operand" "v")]
1211 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1212 [(set_attr "type" "ssemov")
1213 (set_attr "prefix" "maybe_vex")
1214 (set_attr "mode" "<MODE>")])
1216 (define_insn "<sse2>_movnt<mode>"
1217 [(set (match_operand:VI8 0 "memory_operand" "=m")
1218 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1221 "%vmovntdq\t{%1, %0|%0, %1}"
1222 [(set_attr "type" "ssecvt")
1223 (set (attr "prefix_data16")
1225 (match_test "TARGET_AVX")
1227 (const_string "1")))
1228 (set_attr "prefix" "maybe_vex")
1229 (set_attr "mode" "<sseinsnmode>")])
1231 ; Expand patterns for non-temporal stores. At the moment, only those
1232 ; that directly map to insns are defined; it would be possible to
1233 ; define patterns for other modes that would expand to several insns.
1235 ;; Modes handled by storent patterns.
1236 (define_mode_iterator STORENT_MODE
1237 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1238 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1239 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1240 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1241 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1243 (define_expand "storent<mode>"
1244 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1245 (unspec:STORENT_MODE
1246 [(match_operand:STORENT_MODE 1 "register_operand")]
1250 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1252 ;; Parallel floating point arithmetic
1254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1256 (define_expand "<code><mode>2"
1257 [(set (match_operand:VF 0 "register_operand")
1259 (match_operand:VF 1 "register_operand")))]
1261 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1263 (define_insn_and_split "*absneg<mode>2"
1264 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1265 (match_operator:VF 3 "absneg_operator"
1266 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1267 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1270 "&& reload_completed"
1273 enum rtx_code absneg_op;
1279 if (MEM_P (operands[1]))
1280 op1 = operands[2], op2 = operands[1];
1282 op1 = operands[1], op2 = operands[2];
1287 if (rtx_equal_p (operands[0], operands[1]))
1293 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1294 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1295 t = gen_rtx_SET (VOIDmode, operands[0], t);
1299 [(set_attr "isa" "noavx,noavx,avx,avx")])
1301 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1302 [(set (match_operand:VF 0 "register_operand")
1304 (match_operand:VF 1 "<round_nimm_predicate>")
1305 (match_operand:VF 2 "<round_nimm_predicate>")))]
1306 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1307 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1309 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1310 [(set (match_operand:VF 0 "register_operand" "=x,v")
1312 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1313 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1314 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1316 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1317 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1318 [(set_attr "isa" "noavx,avx")
1319 (set_attr "type" "sseadd")
1320 (set_attr "prefix" "<mask_prefix3>")
1321 (set_attr "mode" "<MODE>")])
1323 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1324 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1327 (match_operand:VF_128 1 "register_operand" "0,v")
1328 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1333 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1334 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1335 [(set_attr "isa" "noavx,avx")
1336 (set_attr "type" "sseadd")
1337 (set_attr "prefix" "<round_prefix>")
1338 (set_attr "mode" "<ssescalarmode>")])
1340 (define_expand "mul<mode>3<mask_name><round_name>"
1341 [(set (match_operand:VF 0 "register_operand")
1343 (match_operand:VF 1 "<round_nimm_predicate>")
1344 (match_operand:VF 2 "<round_nimm_predicate>")))]
1345 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1346 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1348 (define_insn "*mul<mode>3<mask_name><round_name>"
1349 [(set (match_operand:VF 0 "register_operand" "=x,v")
1351 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1352 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1353 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1355 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1356 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1357 [(set_attr "isa" "noavx,avx")
1358 (set_attr "type" "ssemul")
1359 (set_attr "prefix" "<mask_prefix3>")
1360 (set_attr "btver2_decode" "direct,double")
1361 (set_attr "mode" "<MODE>")])
1363 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1364 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1367 (match_operand:VF_128 1 "register_operand" "0,v")
1368 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1373 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1374 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1375 [(set_attr "isa" "noavx,avx")
1376 (set_attr "type" "sse<multdiv_mnemonic>")
1377 (set_attr "prefix" "<round_prefix>")
1378 (set_attr "btver2_decode" "direct,double")
1379 (set_attr "mode" "<ssescalarmode>")])
1381 (define_expand "div<mode>3"
1382 [(set (match_operand:VF2 0 "register_operand")
1383 (div:VF2 (match_operand:VF2 1 "register_operand")
1384 (match_operand:VF2 2 "nonimmediate_operand")))]
1386 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1388 (define_expand "div<mode>3"
1389 [(set (match_operand:VF1 0 "register_operand")
1390 (div:VF1 (match_operand:VF1 1 "register_operand")
1391 (match_operand:VF1 2 "nonimmediate_operand")))]
1394 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1397 && TARGET_RECIP_VEC_DIV
1398 && !optimize_insn_for_size_p ()
1399 && flag_finite_math_only && !flag_trapping_math
1400 && flag_unsafe_math_optimizations)
1402 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1407 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1408 [(set (match_operand:VF 0 "register_operand" "=x,v")
1410 (match_operand:VF 1 "register_operand" "0,v")
1411 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1412 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1414 div<ssemodesuffix>\t{%2, %0|%0, %2}
1415 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1416 [(set_attr "isa" "noavx,avx")
1417 (set_attr "type" "ssediv")
1418 (set_attr "prefix" "<mask_prefix3>")
1419 (set_attr "mode" "<MODE>")])
1421 (define_insn "<sse>_rcp<mode>2"
1422 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1424 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1426 "%vrcpps\t{%1, %0|%0, %1}"
1427 [(set_attr "type" "sse")
1428 (set_attr "atom_sse_attr" "rcp")
1429 (set_attr "btver2_sse_attr" "rcp")
1430 (set_attr "prefix" "maybe_vex")
1431 (set_attr "mode" "<MODE>")])
1433 (define_insn "sse_vmrcpv4sf2"
1434 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1436 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1438 (match_operand:V4SF 2 "register_operand" "0,x")
1442 rcpss\t{%1, %0|%0, %k1}
1443 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1444 [(set_attr "isa" "noavx,avx")
1445 (set_attr "type" "sse")
1446 (set_attr "ssememalign" "32")
1447 (set_attr "atom_sse_attr" "rcp")
1448 (set_attr "btver2_sse_attr" "rcp")
1449 (set_attr "prefix" "orig,vex")
1450 (set_attr "mode" "SF")])
1452 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1453 [(set (match_operand:VF_512 0 "register_operand" "=v")
1455 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1458 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1459 [(set_attr "type" "sse")
1460 (set_attr "prefix" "evex")
1461 (set_attr "mode" "<MODE>")])
1463 (define_insn "srcp14<mode>"
1464 [(set (match_operand:VF_128 0 "register_operand" "=v")
1467 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1469 (match_operand:VF_128 2 "register_operand" "v")
1472 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1473 [(set_attr "type" "sse")
1474 (set_attr "prefix" "evex")
1475 (set_attr "mode" "<MODE>")])
1477 (define_expand "sqrt<mode>2"
1478 [(set (match_operand:VF2 0 "register_operand")
1479 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1482 (define_expand "sqrt<mode>2"
1483 [(set (match_operand:VF1 0 "register_operand")
1484 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1488 && TARGET_RECIP_VEC_SQRT
1489 && !optimize_insn_for_size_p ()
1490 && flag_finite_math_only && !flag_trapping_math
1491 && flag_unsafe_math_optimizations)
1493 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1498 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1499 [(set (match_operand:VF 0 "register_operand" "=v")
1500 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1501 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1502 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1503 [(set_attr "type" "sse")
1504 (set_attr "atom_sse_attr" "sqrt")
1505 (set_attr "btver2_sse_attr" "sqrt")
1506 (set_attr "prefix" "maybe_vex")
1507 (set_attr "mode" "<MODE>")])
1509 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1510 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1513 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1514 (match_operand:VF_128 2 "register_operand" "0,v")
1518 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1519 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1520 [(set_attr "isa" "noavx,avx")
1521 (set_attr "type" "sse")
1522 (set_attr "atom_sse_attr" "sqrt")
1523 (set_attr "prefix" "<round_prefix>")
1524 (set_attr "btver2_sse_attr" "sqrt")
1525 (set_attr "mode" "<ssescalarmode>")])
1527 (define_expand "rsqrt<mode>2"
1528 [(set (match_operand:VF1_128_256 0 "register_operand")
1530 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1533 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1537 (define_insn "<sse>_rsqrt<mode>2"
1538 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1540 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1542 "%vrsqrtps\t{%1, %0|%0, %1}"
1543 [(set_attr "type" "sse")
1544 (set_attr "prefix" "maybe_vex")
1545 (set_attr "mode" "<MODE>")])
1547 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1548 [(set (match_operand:VF_512 0 "register_operand" "=v")
1550 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1553 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1554 [(set_attr "type" "sse")
1555 (set_attr "prefix" "evex")
1556 (set_attr "mode" "<MODE>")])
1558 (define_insn "rsqrt14<mode>"
1559 [(set (match_operand:VF_128 0 "register_operand" "=v")
1562 [(match_operand:VF_128 1 "register_operand" "v")
1563 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1568 "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1569 [(set_attr "type" "sse")
1570 (set_attr "prefix" "evex")
1571 (set_attr "mode" "<MODE>")])
1573 (define_insn "sse_vmrsqrtv4sf2"
1574 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1576 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1578 (match_operand:V4SF 2 "register_operand" "0,x")
1582 rsqrtss\t{%1, %0|%0, %k1}
1583 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1584 [(set_attr "isa" "noavx,avx")
1585 (set_attr "type" "sse")
1586 (set_attr "ssememalign" "32")
1587 (set_attr "prefix" "orig,vex")
1588 (set_attr "mode" "SF")])
1590 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1591 ;; isn't really correct, as those rtl operators aren't defined when
1592 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1594 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1595 [(set (match_operand:VF 0 "register_operand")
1597 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1598 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1599 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1601 if (!flag_finite_math_only)
1602 operands[1] = force_reg (<MODE>mode, operands[1]);
1603 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1606 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1607 [(set (match_operand:VF 0 "register_operand" "=x,v")
1609 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1610 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1611 "TARGET_SSE && flag_finite_math_only
1612 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1613 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1615 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1616 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1617 [(set_attr "isa" "noavx,avx")
1618 (set_attr "type" "sseadd")
1619 (set_attr "btver2_sse_attr" "maxmin")
1620 (set_attr "prefix" "<mask_prefix3>")
1621 (set_attr "mode" "<MODE>")])
1623 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1624 [(set (match_operand:VF 0 "register_operand" "=x,v")
1626 (match_operand:VF 1 "register_operand" "0,v")
1627 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1628 "TARGET_SSE && !flag_finite_math_only
1629 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1631 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1632 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1633 [(set_attr "isa" "noavx,avx")
1634 (set_attr "type" "sseadd")
1635 (set_attr "btver2_sse_attr" "maxmin")
1636 (set_attr "prefix" "<mask_prefix3>")
1637 (set_attr "mode" "<MODE>")])
1639 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1640 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1643 (match_operand:VF_128 1 "register_operand" "0,v")
1644 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1649 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1650 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1651 [(set_attr "isa" "noavx,avx")
1652 (set_attr "type" "sse")
1653 (set_attr "btver2_sse_attr" "maxmin")
1654 (set_attr "prefix" "<round_saeonly_prefix>")
1655 (set_attr "mode" "<ssescalarmode>")])
1657 ;; These versions of the min/max patterns implement exactly the operations
1658 ;; min = (op1 < op2 ? op1 : op2)
1659 ;; max = (!(op1 < op2) ? op1 : op2)
1660 ;; Their operands are not commutative, and thus they may be used in the
1661 ;; presence of -0.0 and NaN.
1663 (define_insn "*ieee_smin<mode>3"
1664 [(set (match_operand:VF 0 "register_operand" "=v,v")
1666 [(match_operand:VF 1 "register_operand" "0,v")
1667 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1671 min<ssemodesuffix>\t{%2, %0|%0, %2}
1672 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1673 [(set_attr "isa" "noavx,avx")
1674 (set_attr "type" "sseadd")
1675 (set_attr "prefix" "orig,vex")
1676 (set_attr "mode" "<MODE>")])
1678 (define_insn "*ieee_smax<mode>3"
1679 [(set (match_operand:VF 0 "register_operand" "=v,v")
1681 [(match_operand:VF 1 "register_operand" "0,v")
1682 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1686 max<ssemodesuffix>\t{%2, %0|%0, %2}
1687 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1688 [(set_attr "isa" "noavx,avx")
1689 (set_attr "type" "sseadd")
1690 (set_attr "prefix" "orig,vex")
1691 (set_attr "mode" "<MODE>")])
1693 (define_insn "avx_addsubv4df3"
1694 [(set (match_operand:V4DF 0 "register_operand" "=x")
1697 (match_operand:V4DF 1 "register_operand" "x")
1698 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1699 (minus:V4DF (match_dup 1) (match_dup 2))
1702 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1703 [(set_attr "type" "sseadd")
1704 (set_attr "prefix" "vex")
1705 (set_attr "mode" "V4DF")])
1707 (define_insn "sse3_addsubv2df3"
1708 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1711 (match_operand:V2DF 1 "register_operand" "0,x")
1712 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1713 (minus:V2DF (match_dup 1) (match_dup 2))
1717 addsubpd\t{%2, %0|%0, %2}
1718 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1719 [(set_attr "isa" "noavx,avx")
1720 (set_attr "type" "sseadd")
1721 (set_attr "atom_unit" "complex")
1722 (set_attr "prefix" "orig,vex")
1723 (set_attr "mode" "V2DF")])
1725 (define_insn "avx_addsubv8sf3"
1726 [(set (match_operand:V8SF 0 "register_operand" "=x")
1729 (match_operand:V8SF 1 "register_operand" "x")
1730 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1731 (minus:V8SF (match_dup 1) (match_dup 2))
1734 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1735 [(set_attr "type" "sseadd")
1736 (set_attr "prefix" "vex")
1737 (set_attr "mode" "V8SF")])
1739 (define_insn "sse3_addsubv4sf3"
1740 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1743 (match_operand:V4SF 1 "register_operand" "0,x")
1744 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1745 (minus:V4SF (match_dup 1) (match_dup 2))
1749 addsubps\t{%2, %0|%0, %2}
1750 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1751 [(set_attr "isa" "noavx,avx")
1752 (set_attr "type" "sseadd")
1753 (set_attr "prefix" "orig,vex")
1754 (set_attr "prefix_rep" "1,*")
1755 (set_attr "mode" "V4SF")])
1757 (define_insn "avx_h<plusminus_insn>v4df3"
1758 [(set (match_operand:V4DF 0 "register_operand" "=x")
1763 (match_operand:V4DF 1 "register_operand" "x")
1764 (parallel [(const_int 0)]))
1765 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1768 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1769 (parallel [(const_int 0)]))
1770 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1773 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1774 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1776 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1777 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1779 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1780 [(set_attr "type" "sseadd")
1781 (set_attr "prefix" "vex")
1782 (set_attr "mode" "V4DF")])
1784 (define_expand "sse3_haddv2df3"
1785 [(set (match_operand:V2DF 0 "register_operand")
1789 (match_operand:V2DF 1 "register_operand")
1790 (parallel [(const_int 0)]))
1791 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1794 (match_operand:V2DF 2 "nonimmediate_operand")
1795 (parallel [(const_int 0)]))
1796 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1799 (define_insn "*sse3_haddv2df3"
1800 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1804 (match_operand:V2DF 1 "register_operand" "0,x")
1805 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1808 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1811 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1812 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1815 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1817 && INTVAL (operands[3]) != INTVAL (operands[4])
1818 && INTVAL (operands[5]) != INTVAL (operands[6])"
1820 haddpd\t{%2, %0|%0, %2}
1821 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1822 [(set_attr "isa" "noavx,avx")
1823 (set_attr "type" "sseadd")
1824 (set_attr "prefix" "orig,vex")
1825 (set_attr "mode" "V2DF")])
1827 (define_insn "sse3_hsubv2df3"
1828 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1832 (match_operand:V2DF 1 "register_operand" "0,x")
1833 (parallel [(const_int 0)]))
1834 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1837 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1838 (parallel [(const_int 0)]))
1839 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1842 hsubpd\t{%2, %0|%0, %2}
1843 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1844 [(set_attr "isa" "noavx,avx")
1845 (set_attr "type" "sseadd")
1846 (set_attr "prefix" "orig,vex")
1847 (set_attr "mode" "V2DF")])
1849 (define_insn "*sse3_haddv2df3_low"
1850 [(set (match_operand:DF 0 "register_operand" "=x,x")
1853 (match_operand:V2DF 1 "register_operand" "0,x")
1854 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1857 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1859 && INTVAL (operands[2]) != INTVAL (operands[3])"
1861 haddpd\t{%0, %0|%0, %0}
1862 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1863 [(set_attr "isa" "noavx,avx")
1864 (set_attr "type" "sseadd1")
1865 (set_attr "prefix" "orig,vex")
1866 (set_attr "mode" "V2DF")])
1868 (define_insn "*sse3_hsubv2df3_low"
1869 [(set (match_operand:DF 0 "register_operand" "=x,x")
1872 (match_operand:V2DF 1 "register_operand" "0,x")
1873 (parallel [(const_int 0)]))
1876 (parallel [(const_int 1)]))))]
1879 hsubpd\t{%0, %0|%0, %0}
1880 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1881 [(set_attr "isa" "noavx,avx")
1882 (set_attr "type" "sseadd1")
1883 (set_attr "prefix" "orig,vex")
1884 (set_attr "mode" "V2DF")])
1886 (define_insn "avx_h<plusminus_insn>v8sf3"
1887 [(set (match_operand:V8SF 0 "register_operand" "=x")
1893 (match_operand:V8SF 1 "register_operand" "x")
1894 (parallel [(const_int 0)]))
1895 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1897 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1898 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1902 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1903 (parallel [(const_int 0)]))
1904 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1906 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1907 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1911 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1912 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1914 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1915 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1918 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1919 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1921 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1922 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1924 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1925 [(set_attr "type" "sseadd")
1926 (set_attr "prefix" "vex")
1927 (set_attr "mode" "V8SF")])
1929 (define_insn "sse3_h<plusminus_insn>v4sf3"
1930 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1935 (match_operand:V4SF 1 "register_operand" "0,x")
1936 (parallel [(const_int 0)]))
1937 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1939 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1940 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1944 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1945 (parallel [(const_int 0)]))
1946 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1948 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1949 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1952 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1953 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1954 [(set_attr "isa" "noavx,avx")
1955 (set_attr "type" "sseadd")
1956 (set_attr "atom_unit" "complex")
1957 (set_attr "prefix" "orig,vex")
1958 (set_attr "prefix_rep" "1,*")
1959 (set_attr "mode" "V4SF")])
1961 (define_expand "reduc_splus_v8df"
1962 [(match_operand:V8DF 0 "register_operand")
1963 (match_operand:V8DF 1 "register_operand")]
1966 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
1970 (define_expand "reduc_splus_v4df"
1971 [(match_operand:V4DF 0 "register_operand")
1972 (match_operand:V4DF 1 "register_operand")]
1975 rtx tmp = gen_reg_rtx (V4DFmode);
1976 rtx tmp2 = gen_reg_rtx (V4DFmode);
1977 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1978 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1979 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1983 (define_expand "reduc_splus_v2df"
1984 [(match_operand:V2DF 0 "register_operand")
1985 (match_operand:V2DF 1 "register_operand")]
1988 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1992 (define_expand "reduc_splus_v16sf"
1993 [(match_operand:V16SF 0 "register_operand")
1994 (match_operand:V16SF 1 "register_operand")]
1997 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2001 (define_expand "reduc_splus_v8sf"
2002 [(match_operand:V8SF 0 "register_operand")
2003 (match_operand:V8SF 1 "register_operand")]
2006 rtx tmp = gen_reg_rtx (V8SFmode);
2007 rtx tmp2 = gen_reg_rtx (V8SFmode);
2008 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2009 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2010 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2011 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2015 (define_expand "reduc_splus_v4sf"
2016 [(match_operand:V4SF 0 "register_operand")
2017 (match_operand:V4SF 1 "register_operand")]
2022 rtx tmp = gen_reg_rtx (V4SFmode);
2023 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2024 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2027 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2031 ;; Modes handled by reduc_sm{in,ax}* patterns.
2032 (define_mode_iterator REDUC_SMINMAX_MODE
2033 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2034 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2035 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2036 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
2037 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2038 (V8DF "TARGET_AVX512F")])
2040 (define_expand "reduc_<code>_<mode>"
2041 [(smaxmin:REDUC_SMINMAX_MODE
2042 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2043 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2046 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2050 (define_expand "reduc_<code>_<mode>"
2052 (match_operand:VI48_512 0 "register_operand")
2053 (match_operand:VI48_512 1 "register_operand"))]
2056 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2060 (define_expand "reduc_<code>_<mode>"
2062 (match_operand:VI_256 0 "register_operand")
2063 (match_operand:VI_256 1 "register_operand"))]
2066 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2070 (define_expand "reduc_umin_v8hi"
2072 (match_operand:V8HI 0 "register_operand")
2073 (match_operand:V8HI 1 "register_operand"))]
2076 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2082 ;; Parallel floating point comparisons
2084 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2086 (define_insn "avx_cmp<mode>3"
2087 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2089 [(match_operand:VF_128_256 1 "register_operand" "x")
2090 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2091 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2094 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2095 [(set_attr "type" "ssecmp")
2096 (set_attr "length_immediate" "1")
2097 (set_attr "prefix" "vex")
2098 (set_attr "mode" "<MODE>")])
2100 (define_insn "avx_vmcmp<mode>3"
2101 [(set (match_operand:VF_128 0 "register_operand" "=x")
2104 [(match_operand:VF_128 1 "register_operand" "x")
2105 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2106 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2111 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2112 [(set_attr "type" "ssecmp")
2113 (set_attr "length_immediate" "1")
2114 (set_attr "prefix" "vex")
2115 (set_attr "mode" "<ssescalarmode>")])
2117 (define_insn "*<sse>_maskcmp<mode>3_comm"
2118 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2119 (match_operator:VF_128_256 3 "sse_comparison_operator"
2120 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2121 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2123 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2125 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2126 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2127 [(set_attr "isa" "noavx,avx")
2128 (set_attr "type" "ssecmp")
2129 (set_attr "length_immediate" "1")
2130 (set_attr "prefix" "orig,vex")
2131 (set_attr "mode" "<MODE>")])
2133 (define_insn "<sse>_maskcmp<mode>3"
2134 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2135 (match_operator:VF_128_256 3 "sse_comparison_operator"
2136 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2137 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2140 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2141 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2142 [(set_attr "isa" "noavx,avx")
2143 (set_attr "type" "ssecmp")
2144 (set_attr "length_immediate" "1")
2145 (set_attr "prefix" "orig,vex")
2146 (set_attr "mode" "<MODE>")])
2148 (define_insn "<sse>_vmmaskcmp<mode>3"
2149 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2151 (match_operator:VF_128 3 "sse_comparison_operator"
2152 [(match_operand:VF_128 1 "register_operand" "0,x")
2153 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2158 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2159 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2160 [(set_attr "isa" "noavx,avx")
2161 (set_attr "type" "ssecmp")
2162 (set_attr "length_immediate" "1,*")
2163 (set_attr "prefix" "orig,vex")
2164 (set_attr "mode" "<ssescalarmode>")])
2166 (define_mode_attr cmp_imm_predicate
2167 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2168 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2170 (define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2171 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2172 (unspec:<avx512fmaskmode>
2173 [(match_operand:VI48F_512 1 "register_operand" "v")
2174 (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2175 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2177 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2178 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2179 [(set_attr "type" "ssecmp")
2180 (set_attr "length_immediate" "1")
2181 (set_attr "prefix" "evex")
2182 (set_attr "mode" "<sseinsnmode>")])
2184 (define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
2185 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2186 (unspec:<avx512fmaskmode>
2187 [(match_operand:VI48_512 1 "register_operand" "v")
2188 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2189 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2190 UNSPEC_UNSIGNED_PCMP))]
2192 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2193 [(set_attr "type" "ssecmp")
2194 (set_attr "length_immediate" "1")
2195 (set_attr "prefix" "evex")
2196 (set_attr "mode" "<sseinsnmode>")])
2198 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2199 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2200 (and:<avx512fmaskmode>
2201 (unspec:<avx512fmaskmode>
2202 [(match_operand:VF_128 1 "register_operand" "v")
2203 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2204 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2208 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2209 [(set_attr "type" "ssecmp")
2210 (set_attr "length_immediate" "1")
2211 (set_attr "prefix" "evex")
2212 (set_attr "mode" "<ssescalarmode>")])
2214 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2215 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2216 (and:<avx512fmaskmode>
2217 (unspec:<avx512fmaskmode>
2218 [(match_operand:VF_128 1 "register_operand" "v")
2219 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2220 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2222 (and:<avx512fmaskmode>
2223 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
2226 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2227 [(set_attr "type" "ssecmp")
2228 (set_attr "length_immediate" "1")
2229 (set_attr "prefix" "evex")
2230 (set_attr "mode" "<ssescalarmode>")])
2232 (define_insn "avx512f_maskcmp<mode>3"
2233 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2234 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2235 [(match_operand:VF 1 "register_operand" "v")
2236 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2238 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2239 [(set_attr "type" "ssecmp")
2240 (set_attr "length_immediate" "1")
2241 (set_attr "prefix" "evex")
2242 (set_attr "mode" "<sseinsnmode>")])
2244 (define_insn "<sse>_comi<round_saeonly_name>"
2245 [(set (reg:CCFP FLAGS_REG)
2248 (match_operand:<ssevecmode> 0 "register_operand" "v")
2249 (parallel [(const_int 0)]))
2251 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2252 (parallel [(const_int 0)]))))]
2253 "SSE_FLOAT_MODE_P (<MODE>mode)"
2254 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2255 [(set_attr "type" "ssecomi")
2256 (set_attr "prefix" "maybe_vex")
2257 (set_attr "prefix_rep" "0")
2258 (set (attr "prefix_data16")
2259 (if_then_else (eq_attr "mode" "DF")
2261 (const_string "0")))
2262 (set_attr "mode" "<MODE>")])
2264 (define_insn "<sse>_ucomi<round_saeonly_name>"
2265 [(set (reg:CCFPU FLAGS_REG)
2268 (match_operand:<ssevecmode> 0 "register_operand" "v")
2269 (parallel [(const_int 0)]))
2271 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2272 (parallel [(const_int 0)]))))]
2273 "SSE_FLOAT_MODE_P (<MODE>mode)"
2274 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2275 [(set_attr "type" "ssecomi")
2276 (set_attr "prefix" "maybe_vex")
2277 (set_attr "prefix_rep" "0")
2278 (set (attr "prefix_data16")
2279 (if_then_else (eq_attr "mode" "DF")
2281 (const_string "0")))
2282 (set_attr "mode" "<MODE>")])
2284 (define_expand "vcond<V_512:mode><VF_512:mode>"
2285 [(set (match_operand:V_512 0 "register_operand")
2287 (match_operator 3 ""
2288 [(match_operand:VF_512 4 "nonimmediate_operand")
2289 (match_operand:VF_512 5 "nonimmediate_operand")])
2290 (match_operand:V_512 1 "general_operand")
2291 (match_operand:V_512 2 "general_operand")))]
2293 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2294 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2296 bool ok = ix86_expand_fp_vcond (operands);
2301 (define_expand "vcond<V_256:mode><VF_256:mode>"
2302 [(set (match_operand:V_256 0 "register_operand")
2304 (match_operator 3 ""
2305 [(match_operand:VF_256 4 "nonimmediate_operand")
2306 (match_operand:VF_256 5 "nonimmediate_operand")])
2307 (match_operand:V_256 1 "general_operand")
2308 (match_operand:V_256 2 "general_operand")))]
2310 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2311 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2313 bool ok = ix86_expand_fp_vcond (operands);
2318 (define_expand "vcond<V_128:mode><VF_128:mode>"
2319 [(set (match_operand:V_128 0 "register_operand")
2321 (match_operator 3 ""
2322 [(match_operand:VF_128 4 "nonimmediate_operand")
2323 (match_operand:VF_128 5 "nonimmediate_operand")])
2324 (match_operand:V_128 1 "general_operand")
2325 (match_operand:V_128 2 "general_operand")))]
2327 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2328 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2330 bool ok = ix86_expand_fp_vcond (operands);
2335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2337 ;; Parallel floating point logical operations
2339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2341 (define_insn "<sse>_andnot<mode>3"
2342 [(set (match_operand:VF 0 "register_operand" "=x,v")
2345 (match_operand:VF 1 "register_operand" "0,v"))
2346 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2349 static char buf[32];
2353 switch (get_attr_mode (insn))
2360 suffix = "<ssemodesuffix>";
2363 switch (which_alternative)
2366 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2369 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2375 /* There is no vandnp[sd]. Use vpandnq. */
2376 if (<MODE_SIZE> == 64)
2379 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2382 snprintf (buf, sizeof (buf), ops, suffix);
2385 [(set_attr "isa" "noavx,avx")
2386 (set_attr "type" "sselog")
2387 (set_attr "prefix" "orig,maybe_evex")
2389 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2390 (const_string "<ssePSmode>")
2391 (match_test "TARGET_AVX")
2392 (const_string "<MODE>")
2393 (match_test "optimize_function_for_size_p (cfun)")
2394 (const_string "V4SF")
2396 (const_string "<MODE>")))])
2398 (define_expand "<code><mode>3"
2399 [(set (match_operand:VF_128_256 0 "register_operand")
2400 (any_logic:VF_128_256
2401 (match_operand:VF_128_256 1 "nonimmediate_operand")
2402 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2404 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2406 (define_expand "<code><mode>3"
2407 [(set (match_operand:VF_512 0 "register_operand")
2409 (match_operand:VF_512 1 "nonimmediate_operand")
2410 (match_operand:VF_512 2 "nonimmediate_operand")))]
2412 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2414 (define_insn "*<code><mode>3"
2415 [(set (match_operand:VF 0 "register_operand" "=x,v")
2417 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2418 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2419 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2421 static char buf[32];
2425 switch (get_attr_mode (insn))
2432 suffix = "<ssemodesuffix>";
2435 switch (which_alternative)
2438 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2441 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2447 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2448 if (<MODE_SIZE> == 64)
2451 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2454 snprintf (buf, sizeof (buf), ops, suffix);
2457 [(set_attr "isa" "noavx,avx")
2458 (set_attr "type" "sselog")
2459 (set_attr "prefix" "orig,maybe_evex")
2461 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2462 (const_string "<ssePSmode>")
2463 (match_test "TARGET_AVX")
2464 (const_string "<MODE>")
2465 (match_test "optimize_function_for_size_p (cfun)")
2466 (const_string "V4SF")
2468 (const_string "<MODE>")))])
2470 (define_expand "copysign<mode>3"
2473 (not:VF (match_dup 3))
2474 (match_operand:VF 1 "nonimmediate_operand")))
2476 (and:VF (match_dup 3)
2477 (match_operand:VF 2 "nonimmediate_operand")))
2478 (set (match_operand:VF 0 "register_operand")
2479 (ior:VF (match_dup 4) (match_dup 5)))]
2482 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2484 operands[4] = gen_reg_rtx (<MODE>mode);
2485 operands[5] = gen_reg_rtx (<MODE>mode);
2488 ;; Also define scalar versions. These are used for abs, neg, and
2489 ;; conditional move. Using subregs into vector modes causes register
2490 ;; allocation lossage. These patterns do not allow memory operands
2491 ;; because the native instructions read the full 128-bits.
2493 (define_insn "*andnot<mode>3"
2494 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2497 (match_operand:MODEF 1 "register_operand" "0,x"))
2498 (match_operand:MODEF 2 "register_operand" "x,x")))]
2499 "SSE_FLOAT_MODE_P (<MODE>mode)"
2501 static char buf[32];
2504 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2506 switch (which_alternative)
2509 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2512 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2518 snprintf (buf, sizeof (buf), ops, suffix);
2521 [(set_attr "isa" "noavx,avx")
2522 (set_attr "type" "sselog")
2523 (set_attr "prefix" "orig,vex")
2525 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2526 (const_string "V4SF")
2527 (match_test "TARGET_AVX")
2528 (const_string "<ssevecmode>")
2529 (match_test "optimize_function_for_size_p (cfun)")
2530 (const_string "V4SF")
2532 (const_string "<ssevecmode>")))])
2534 (define_insn "*andnottf3"
2535 [(set (match_operand:TF 0 "register_operand" "=x,x")
2537 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2538 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2541 static char buf[32];
2544 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2546 switch (which_alternative)
2549 ops = "%s\t{%%2, %%0|%%0, %%2}";
2552 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2558 snprintf (buf, sizeof (buf), ops, tmp);
2561 [(set_attr "isa" "noavx,avx")
2562 (set_attr "type" "sselog")
2563 (set (attr "prefix_data16")
2565 (and (eq_attr "alternative" "0")
2566 (eq_attr "mode" "TI"))
2568 (const_string "*")))
2569 (set_attr "prefix" "orig,vex")
2571 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2572 (const_string "V4SF")
2573 (match_test "TARGET_AVX")
2575 (ior (not (match_test "TARGET_SSE2"))
2576 (match_test "optimize_function_for_size_p (cfun)"))
2577 (const_string "V4SF")
2579 (const_string "TI")))])
2581 (define_insn "*<code><mode>3"
2582 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2584 (match_operand:MODEF 1 "register_operand" "%0,x")
2585 (match_operand:MODEF 2 "register_operand" "x,x")))]
2586 "SSE_FLOAT_MODE_P (<MODE>mode)"
2588 static char buf[32];
2591 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2593 switch (which_alternative)
2596 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2599 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2605 snprintf (buf, sizeof (buf), ops, suffix);
2608 [(set_attr "isa" "noavx,avx")
2609 (set_attr "type" "sselog")
2610 (set_attr "prefix" "orig,vex")
2612 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2613 (const_string "V4SF")
2614 (match_test "TARGET_AVX")
2615 (const_string "<ssevecmode>")
2616 (match_test "optimize_function_for_size_p (cfun)")
2617 (const_string "V4SF")
2619 (const_string "<ssevecmode>")))])
2621 (define_expand "<code>tf3"
2622 [(set (match_operand:TF 0 "register_operand")
2624 (match_operand:TF 1 "nonimmediate_operand")
2625 (match_operand:TF 2 "nonimmediate_operand")))]
2627 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2629 (define_insn "*<code>tf3"
2630 [(set (match_operand:TF 0 "register_operand" "=x,x")
2632 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2633 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2635 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2637 static char buf[32];
2640 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2642 switch (which_alternative)
2645 ops = "%s\t{%%2, %%0|%%0, %%2}";
2648 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2654 snprintf (buf, sizeof (buf), ops, tmp);
2657 [(set_attr "isa" "noavx,avx")
2658 (set_attr "type" "sselog")
2659 (set (attr "prefix_data16")
2661 (and (eq_attr "alternative" "0")
2662 (eq_attr "mode" "TI"))
2664 (const_string "*")))
2665 (set_attr "prefix" "orig,vex")
2667 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2668 (const_string "V4SF")
2669 (match_test "TARGET_AVX")
2671 (ior (not (match_test "TARGET_SSE2"))
2672 (match_test "optimize_function_for_size_p (cfun)"))
2673 (const_string "V4SF")
2675 (const_string "TI")))])
2677 ;; There are no floating point xor for V16SF and V8DF in avx512f
2678 ;; but we need them for negation. Instead we use int versions of
2679 ;; xor. Maybe there could be a better way to do that.
2681 (define_mode_attr avx512flogicsuff
2682 [(V16SF "d") (V8DF "q")])
2684 (define_insn "avx512f_<logic><mode>"
2685 [(set (match_operand:VF_512 0 "register_operand" "=v")
2687 (match_operand:VF_512 1 "register_operand" "v")
2688 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2690 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2691 [(set_attr "type" "sselog")
2692 (set_attr "prefix" "evex")])
2694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2696 ;; FMA floating point multiply/accumulate instructions. These include
2697 ;; scalar versions of the instructions as well as vector versions.
2699 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2701 ;; The standard names for scalar FMA are only available with SSE math enabled.
2702 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2703 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2704 ;; and TARGET_FMA4 are both false.
2705 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2706 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2707 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2708 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2709 (define_mode_iterator FMAMODEM
2710 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2711 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2712 (V4SF "TARGET_FMA || TARGET_FMA4")
2713 (V2DF "TARGET_FMA || TARGET_FMA4")
2714 (V8SF "TARGET_FMA || TARGET_FMA4")
2715 (V4DF "TARGET_FMA || TARGET_FMA4")
2716 (V16SF "TARGET_AVX512F")
2717 (V8DF "TARGET_AVX512F")])
2719 (define_expand "fma<mode>4"
2720 [(set (match_operand:FMAMODEM 0 "register_operand")
2722 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2723 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2724 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2727 (define_expand "fms<mode>4"
2728 [(set (match_operand:FMAMODEM 0 "register_operand")
2730 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2731 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2732 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2735 (define_expand "fnma<mode>4"
2736 [(set (match_operand:FMAMODEM 0 "register_operand")
2738 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2739 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2740 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2743 (define_expand "fnms<mode>4"
2744 [(set (match_operand:FMAMODEM 0 "register_operand")
2746 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2747 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2748 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2751 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2752 (define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2753 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2754 (V4SF "TARGET_FMA || TARGET_FMA4")
2755 (V2DF "TARGET_FMA || TARGET_FMA4")
2756 (V8SF "TARGET_FMA || TARGET_FMA4")
2757 (V4DF "TARGET_FMA || TARGET_FMA4")
2758 (V16SF "TARGET_AVX512F")
2759 (V8DF "TARGET_AVX512F")])
2761 (define_expand "fma4i_fmadd_<mode>"
2762 [(set (match_operand:FMAMODE 0 "register_operand")
2764 (match_operand:FMAMODE 1 "nonimmediate_operand")
2765 (match_operand:FMAMODE 2 "nonimmediate_operand")
2766 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2769 (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
2770 [(match_operand:VF_512 0 "register_operand")
2771 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
2772 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
2773 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
2774 (match_operand:<avx512fmaskmode> 4 "register_operand")]
2777 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
2778 operands[0], operands[1], operands[2], operands[3],
2779 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
2783 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2784 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2786 (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2787 (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2788 (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2789 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2791 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2792 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2793 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2794 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2795 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2796 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2797 (set_attr "type" "ssemuladd")
2798 (set_attr "mode" "<MODE>")])
2800 (define_insn "avx512f_fmadd_<mode>_mask<round_name>"
2801 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2804 (match_operand:VF_512 1 "register_operand" "0,0")
2805 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2806 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2808 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2811 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2812 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2813 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2814 (set_attr "type" "ssemuladd")
2815 (set_attr "mode" "<MODE>")])
2817 (define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
2818 [(set (match_operand:VF_512 0 "register_operand" "=x")
2821 (match_operand:VF_512 1 "register_operand" "x")
2822 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2823 (match_operand:VF_512 3 "register_operand" "0"))
2825 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2827 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2828 [(set_attr "isa" "fma_avx512f")
2829 (set_attr "type" "ssemuladd")
2830 (set_attr "mode" "<MODE>")])
2832 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2833 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2835 (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0, 0, v, x,x")
2836 (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2838 (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2839 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2841 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2842 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2843 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2844 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2845 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2846 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2847 (set_attr "type" "ssemuladd")
2848 (set_attr "mode" "<MODE>")])
2850 (define_insn "avx512f_fmsub_<mode>_mask<round_name>"
2851 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2854 (match_operand:VF_512 1 "register_operand" "0,0")
2855 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2857 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
2859 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2862 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2863 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2864 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2865 (set_attr "type" "ssemuladd")
2866 (set_attr "mode" "<MODE>")])
2868 (define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
2869 [(set (match_operand:VF_512 0 "register_operand" "=v")
2872 (match_operand:VF_512 1 "register_operand" "v")
2873 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2875 (match_operand:VF_512 3 "register_operand" "0")))
2877 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2879 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2880 [(set_attr "isa" "fma_avx512f")
2881 (set_attr "type" "ssemuladd")
2882 (set_attr "mode" "<MODE>")])
2884 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
2885 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2888 (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2889 (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2890 (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2891 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2893 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2894 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2895 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2896 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2897 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2898 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2899 (set_attr "type" "ssemuladd")
2900 (set_attr "mode" "<MODE>")])
2902 (define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
2903 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2907 (match_operand:VF_512 1 "register_operand" "0,0"))
2908 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2909 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2911 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2914 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2915 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2916 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2917 (set_attr "type" "ssemuladd")
2918 (set_attr "mode" "<MODE>")])
2920 (define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
2921 [(set (match_operand:VF_512 0 "register_operand" "=v")
2925 (match_operand:VF_512 1 "register_operand" "v"))
2926 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2927 (match_operand:VF_512 3 "register_operand" "0"))
2929 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2931 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2932 [(set_attr "isa" "fma_avx512f")
2933 (set_attr "type" "ssemuladd")
2934 (set_attr "mode" "<MODE>")])
2936 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
2937 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2940 (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2941 (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2943 (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2944 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2946 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2947 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2948 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2949 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2950 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2951 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2952 (set_attr "type" "ssemuladd")
2953 (set_attr "mode" "<MODE>")])
2955 (define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
2956 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2960 (match_operand:VF_512 1 "register_operand" "0,0"))
2961 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2963 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
2965 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2968 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2969 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2970 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2971 (set_attr "type" "ssemuladd")
2972 (set_attr "mode" "<MODE>")])
2974 (define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
2975 [(set (match_operand:VF_512 0 "register_operand" "=v")
2979 (match_operand:VF_512 1 "register_operand" "v"))
2980 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2982 (match_operand:VF_512 3 "register_operand" "0")))
2984 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2986 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2987 [(set_attr "isa" "fma_avx512f")
2988 (set_attr "type" "ssemuladd")
2989 (set_attr "mode" "<MODE>")])
2991 ;; FMA parallel floating point multiply addsub and subadd operations.
2993 ;; It would be possible to represent these without the UNSPEC as
2996 ;; (fma op1 op2 op3)
2997 ;; (fma op1 op2 (neg op3))
3000 ;; But this doesn't seem useful in practice.
3002 (define_expand "fmaddsub_<mode>"
3003 [(set (match_operand:VF 0 "register_operand")
3005 [(match_operand:VF 1 "nonimmediate_operand")
3006 (match_operand:VF 2 "nonimmediate_operand")
3007 (match_operand:VF 3 "nonimmediate_operand")]
3009 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3011 (define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
3012 [(match_operand:VF_512 0 "register_operand")
3013 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
3014 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
3015 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
3016 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3019 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3020 operands[0], operands[1], operands[2], operands[3],
3021 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3025 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3026 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
3028 [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3029 (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3030 (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
3032 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3034 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3035 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3036 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3037 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3038 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3039 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3040 (set_attr "type" "ssemuladd")
3041 (set_attr "mode" "<MODE>")])
3043 (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
3044 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3047 [(match_operand:VF_512 1 "register_operand" "0,0")
3048 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3049 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
3052 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
3055 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3056 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3057 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3058 (set_attr "type" "ssemuladd")
3059 (set_attr "mode" "<MODE>")])
3061 (define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
3062 [(set (match_operand:VF_512 0 "register_operand" "=v")
3065 [(match_operand:VF_512 1 "register_operand" "v")
3066 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3067 (match_operand:VF_512 3 "register_operand" "0")]
3070 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
3072 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3073 [(set_attr "isa" "fma_avx512f")
3074 (set_attr "type" "ssemuladd")
3075 (set_attr "mode" "<MODE>")])
3077 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3078 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
3080 [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3081 (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3083 (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
3085 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3087 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3088 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3089 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3090 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3091 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3092 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3093 (set_attr "type" "ssemuladd")
3094 (set_attr "mode" "<MODE>")])
3096 (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
3097 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3100 [(match_operand:VF_512 1 "register_operand" "0,0")
3101 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3103 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
3106 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
3109 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3110 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3111 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3112 (set_attr "type" "ssemuladd")
3113 (set_attr "mode" "<MODE>")])
3115 (define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
3116 [(set (match_operand:VF_512 0 "register_operand" "=v")
3119 [(match_operand:VF_512 1 "register_operand" "v")
3120 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3122 (match_operand:VF_512 3 "register_operand" "0"))]
3125 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
3127 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3128 [(set_attr "isa" "fma_avx512f")
3129 (set_attr "type" "ssemuladd")
3130 (set_attr "mode" "<MODE>")])
3132 ;; FMA3 floating point scalar intrinsics. These merge result with
3133 ;; high-order elements from the destination register.
3135 (define_expand "fmai_vmfmadd_<mode><round_name>"
3136 [(set (match_operand:VF_128 0 "register_operand")
3139 (match_operand:VF_128 1 "<round_nimm_predicate>")
3140 (match_operand:VF_128 2 "<round_nimm_predicate>")
3141 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3146 (define_insn "*fmai_fmadd_<mode>"
3147 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3150 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3151 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3152 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3155 "TARGET_FMA || TARGET_AVX512F"
3157 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3158 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3159 [(set_attr "type" "ssemuladd")
3160 (set_attr "mode" "<MODE>")])
3162 (define_insn "*fmai_fmsub_<mode>"
3163 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3166 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3167 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3169 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3172 "TARGET_FMA || TARGET_AVX512F"
3174 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3175 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3176 [(set_attr "type" "ssemuladd")
3177 (set_attr "mode" "<MODE>")])
3179 (define_insn "*fmai_fnmadd_<mode><round_name>"
3180 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3184 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3185 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3186 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3189 "TARGET_FMA || TARGET_AVX512F"
3191 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3192 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3193 [(set_attr "type" "ssemuladd")
3194 (set_attr "mode" "<MODE>")])
3196 (define_insn "*fmai_fnmsub_<mode><round_name>"
3197 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3201 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3202 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3204 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3207 "TARGET_FMA || TARGET_AVX512F"
3209 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3210 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3211 [(set_attr "type" "ssemuladd")
3212 (set_attr "mode" "<MODE>")])
3214 ;; FMA4 floating point scalar intrinsics. These write the
3215 ;; entire destination register, with the high-order elements zeroed.
3217 (define_expand "fma4i_vmfmadd_<mode>"
3218 [(set (match_operand:VF_128 0 "register_operand")
3221 (match_operand:VF_128 1 "nonimmediate_operand")
3222 (match_operand:VF_128 2 "nonimmediate_operand")
3223 (match_operand:VF_128 3 "nonimmediate_operand"))
3227 "operands[4] = CONST0_RTX (<MODE>mode);")
3229 (define_insn "*fma4i_vmfmadd_<mode>"
3230 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3233 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3234 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3235 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3236 (match_operand:VF_128 4 "const0_operand")
3239 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3240 [(set_attr "type" "ssemuladd")
3241 (set_attr "mode" "<MODE>")])
3243 (define_insn "*fma4i_vmfmsub_<mode>"
3244 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3247 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3248 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3250 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3251 (match_operand:VF_128 4 "const0_operand")
3254 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3255 [(set_attr "type" "ssemuladd")
3256 (set_attr "mode" "<MODE>")])
3258 (define_insn "*fma4i_vmfnmadd_<mode>"
3259 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3263 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3264 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3265 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3266 (match_operand:VF_128 4 "const0_operand")
3269 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3270 [(set_attr "type" "ssemuladd")
3271 (set_attr "mode" "<MODE>")])
3273 (define_insn "*fma4i_vmfnmsub_<mode>"
3274 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3278 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3279 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3281 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3282 (match_operand:VF_128 4 "const0_operand")
3285 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3286 [(set_attr "type" "ssemuladd")
3287 (set_attr "mode" "<MODE>")])
3289 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3291 ;; Parallel single-precision floating point conversion operations
3293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3295 (define_insn "sse_cvtpi2ps"
3296 [(set (match_operand:V4SF 0 "register_operand" "=x")
3299 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3300 (match_operand:V4SF 1 "register_operand" "0")
3303 "cvtpi2ps\t{%2, %0|%0, %2}"
3304 [(set_attr "type" "ssecvt")
3305 (set_attr "mode" "V4SF")])
3307 (define_insn "sse_cvtps2pi"
3308 [(set (match_operand:V2SI 0 "register_operand" "=y")
3310 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3312 (parallel [(const_int 0) (const_int 1)])))]
3314 "cvtps2pi\t{%1, %0|%0, %q1}"
3315 [(set_attr "type" "ssecvt")
3316 (set_attr "unit" "mmx")
3317 (set_attr "mode" "DI")])
3319 (define_insn "sse_cvttps2pi"
3320 [(set (match_operand:V2SI 0 "register_operand" "=y")
3322 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3323 (parallel [(const_int 0) (const_int 1)])))]
3325 "cvttps2pi\t{%1, %0|%0, %q1}"
3326 [(set_attr "type" "ssecvt")
3327 (set_attr "unit" "mmx")
3328 (set_attr "prefix_rep" "0")
3329 (set_attr "mode" "SF")])
3331 (define_insn "sse_cvtsi2ss<round_name>"
3332 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3335 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3336 (match_operand:V4SF 1 "register_operand" "0,0,v")
3340 cvtsi2ss\t{%2, %0|%0, %2}
3341 cvtsi2ss\t{%2, %0|%0, %2}
3342 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3343 [(set_attr "isa" "noavx,noavx,avx")
3344 (set_attr "type" "sseicvt")
3345 (set_attr "athlon_decode" "vector,double,*")
3346 (set_attr "amdfam10_decode" "vector,double,*")
3347 (set_attr "bdver1_decode" "double,direct,*")
3348 (set_attr "btver2_decode" "double,double,double")
3349 (set_attr "prefix" "orig,orig,maybe_evex")
3350 (set_attr "mode" "SF")])
3352 (define_insn "sse_cvtsi2ssq<round_name>"
3353 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3356 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3357 (match_operand:V4SF 1 "register_operand" "0,0,v")
3359 "TARGET_SSE && TARGET_64BIT"
3361 cvtsi2ssq\t{%2, %0|%0, %2}
3362 cvtsi2ssq\t{%2, %0|%0, %2}
3363 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3364 [(set_attr "isa" "noavx,noavx,avx")
3365 (set_attr "type" "sseicvt")
3366 (set_attr "athlon_decode" "vector,double,*")
3367 (set_attr "amdfam10_decode" "vector,double,*")
3368 (set_attr "bdver1_decode" "double,direct,*")
3369 (set_attr "btver2_decode" "double,double,double")
3370 (set_attr "length_vex" "*,*,4")
3371 (set_attr "prefix_rex" "1,1,*")
3372 (set_attr "prefix" "orig,orig,maybe_evex")
3373 (set_attr "mode" "SF")])
3375 (define_insn "sse_cvtss2si<round_name>"
3376 [(set (match_operand:SI 0 "register_operand" "=r,r")
3379 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3380 (parallel [(const_int 0)]))]
3381 UNSPEC_FIX_NOTRUNC))]
3383 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3384 [(set_attr "type" "sseicvt")
3385 (set_attr "athlon_decode" "double,vector")
3386 (set_attr "bdver1_decode" "double,double")
3387 (set_attr "prefix_rep" "1")
3388 (set_attr "prefix" "maybe_vex")
3389 (set_attr "mode" "SI")])
3391 (define_insn "sse_cvtss2si_2"
3392 [(set (match_operand:SI 0 "register_operand" "=r,r")
3393 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3394 UNSPEC_FIX_NOTRUNC))]
3396 "%vcvtss2si\t{%1, %0|%0, %k1}"
3397 [(set_attr "type" "sseicvt")
3398 (set_attr "athlon_decode" "double,vector")
3399 (set_attr "amdfam10_decode" "double,double")
3400 (set_attr "bdver1_decode" "double,double")
3401 (set_attr "prefix_rep" "1")
3402 (set_attr "prefix" "maybe_vex")
3403 (set_attr "mode" "SI")])
3405 (define_insn "sse_cvtss2siq<round_name>"
3406 [(set (match_operand:DI 0 "register_operand" "=r,r")
3409 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3410 (parallel [(const_int 0)]))]
3411 UNSPEC_FIX_NOTRUNC))]
3412 "TARGET_SSE && TARGET_64BIT"
3413 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3414 [(set_attr "type" "sseicvt")
3415 (set_attr "athlon_decode" "double,vector")
3416 (set_attr "bdver1_decode" "double,double")
3417 (set_attr "prefix_rep" "1")
3418 (set_attr "prefix" "maybe_vex")
3419 (set_attr "mode" "DI")])
3421 (define_insn "sse_cvtss2siq_2"
3422 [(set (match_operand:DI 0 "register_operand" "=r,r")
3423 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3424 UNSPEC_FIX_NOTRUNC))]
3425 "TARGET_SSE && TARGET_64BIT"
3426 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3427 [(set_attr "type" "sseicvt")
3428 (set_attr "athlon_decode" "double,vector")
3429 (set_attr "amdfam10_decode" "double,double")
3430 (set_attr "bdver1_decode" "double,double")
3431 (set_attr "prefix_rep" "1")
3432 (set_attr "prefix" "maybe_vex")
3433 (set_attr "mode" "DI")])
3435 (define_insn "sse_cvttss2si<round_saeonly_name>"
3436 [(set (match_operand:SI 0 "register_operand" "=r,r")
3439 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3440 (parallel [(const_int 0)]))))]
3442 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3443 [(set_attr "type" "sseicvt")
3444 (set_attr "athlon_decode" "double,vector")
3445 (set_attr "amdfam10_decode" "double,double")
3446 (set_attr "bdver1_decode" "double,double")
3447 (set_attr "prefix_rep" "1")
3448 (set_attr "prefix" "maybe_vex")
3449 (set_attr "mode" "SI")])
3451 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3452 [(set (match_operand:DI 0 "register_operand" "=r,r")
3455 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3456 (parallel [(const_int 0)]))))]
3457 "TARGET_SSE && TARGET_64BIT"
3458 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3459 [(set_attr "type" "sseicvt")
3460 (set_attr "athlon_decode" "double,vector")
3461 (set_attr "amdfam10_decode" "double,double")
3462 (set_attr "bdver1_decode" "double,double")
3463 (set_attr "prefix_rep" "1")
3464 (set_attr "prefix" "maybe_vex")
3465 (set_attr "mode" "DI")])
3467 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3468 [(set (match_operand:VF_128 0 "register_operand" "=v")
3470 (vec_duplicate:VF_128
3471 (unsigned_float:<ssescalarmode>
3472 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3473 (match_operand:VF_128 1 "register_operand" "v")
3475 "TARGET_AVX512F && <round_modev4sf_condition>"
3476 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3477 [(set_attr "type" "sseicvt")
3478 (set_attr "prefix" "evex")
3479 (set_attr "mode" "<ssescalarmode>")])
3481 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3482 [(set (match_operand:VF_128 0 "register_operand" "=v")
3484 (vec_duplicate:VF_128
3485 (unsigned_float:<ssescalarmode>
3486 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3487 (match_operand:VF_128 1 "register_operand" "v")
3489 "TARGET_AVX512F && TARGET_64BIT"
3490 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3491 [(set_attr "type" "sseicvt")
3492 (set_attr "prefix" "evex")
3493 (set_attr "mode" "<ssescalarmode>")])
3495 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3496 [(set (match_operand:VF1 0 "register_operand" "=v")
3498 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3499 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3500 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3501 [(set_attr "type" "ssecvt")
3502 (set_attr "prefix" "maybe_vex")
3503 (set_attr "mode" "<sseinsnmode>")])
3505 (define_insn "ufloatv16siv16sf2<mask_name><round_name>"
3506 [(set (match_operand:V16SF 0 "register_operand" "=v")
3507 (unsigned_float:V16SF
3508 (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
3510 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3511 [(set_attr "type" "ssecvt")
3512 (set_attr "prefix" "evex")
3513 (set_attr "mode" "V16SF")])
3515 (define_expand "floatuns<sseintvecmodelower><mode>2"
3516 [(match_operand:VF1 0 "register_operand")
3517 (match_operand:<sseintvecmode> 1 "register_operand")]
3518 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3520 if (<MODE>mode == V16SFmode)
3521 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3523 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3529 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3530 (define_mode_attr sf2simodelower
3531 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3533 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3534 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3536 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3537 UNSPEC_FIX_NOTRUNC))]
3539 "%vcvtps2dq\t{%1, %0|%0, %1}"
3540 [(set_attr "type" "ssecvt")
3541 (set (attr "prefix_data16")
3543 (match_test "TARGET_AVX")
3545 (const_string "1")))
3546 (set_attr "prefix" "maybe_vex")
3547 (set_attr "mode" "<sseinsnmode>")])
3549 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3550 [(set (match_operand:V16SI 0 "register_operand" "=v")
3552 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3553 UNSPEC_FIX_NOTRUNC))]
3555 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3556 [(set_attr "type" "ssecvt")
3557 (set_attr "prefix" "evex")
3558 (set_attr "mode" "XI")])
3560 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
3561 [(set (match_operand:V16SI 0 "register_operand" "=v")
3563 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3564 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3566 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3567 [(set_attr "type" "ssecvt")
3568 (set_attr "prefix" "evex")
3569 (set_attr "mode" "XI")])
3571 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
3572 [(set (match_operand:V16SI 0 "register_operand" "=v")
3574 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
3576 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
3577 [(set_attr "type" "ssecvt")
3578 (set_attr "prefix" "evex")
3579 (set_attr "mode" "XI")])
3581 (define_insn "fix_truncv8sfv8si2"
3582 [(set (match_operand:V8SI 0 "register_operand" "=x")
3583 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3585 "vcvttps2dq\t{%1, %0|%0, %1}"
3586 [(set_attr "type" "ssecvt")
3587 (set_attr "prefix" "vex")
3588 (set_attr "mode" "OI")])
3590 (define_insn "fix_truncv4sfv4si2"
3591 [(set (match_operand:V4SI 0 "register_operand" "=x")
3592 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3594 "%vcvttps2dq\t{%1, %0|%0, %1}"
3595 [(set_attr "type" "ssecvt")
3596 (set (attr "prefix_rep")
3598 (match_test "TARGET_AVX")
3600 (const_string "1")))
3601 (set (attr "prefix_data16")
3603 (match_test "TARGET_AVX")
3605 (const_string "0")))
3606 (set_attr "prefix_data16" "0")
3607 (set_attr "prefix" "maybe_vex")
3608 (set_attr "mode" "TI")])
3610 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3611 [(match_operand:<sseintvecmode> 0 "register_operand")
3612 (match_operand:VF1 1 "register_operand")]
3615 if (<MODE>mode == V16SFmode)
3616 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
3621 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3622 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3623 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3624 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3631 ;; Parallel double-precision floating point conversion operations
3633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3635 (define_insn "sse2_cvtpi2pd"
3636 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3637 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3639 "cvtpi2pd\t{%1, %0|%0, %1}"
3640 [(set_attr "type" "ssecvt")
3641 (set_attr "unit" "mmx,*")
3642 (set_attr "prefix_data16" "1,*")
3643 (set_attr "mode" "V2DF")])
3645 (define_insn "sse2_cvtpd2pi"
3646 [(set (match_operand:V2SI 0 "register_operand" "=y")
3647 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3648 UNSPEC_FIX_NOTRUNC))]
3650 "cvtpd2pi\t{%1, %0|%0, %1}"
3651 [(set_attr "type" "ssecvt")
3652 (set_attr "unit" "mmx")
3653 (set_attr "bdver1_decode" "double")
3654 (set_attr "btver2_decode" "direct")
3655 (set_attr "prefix_data16" "1")
3656 (set_attr "mode" "DI")])
3658 (define_insn "sse2_cvttpd2pi"
3659 [(set (match_operand:V2SI 0 "register_operand" "=y")
3660 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3662 "cvttpd2pi\t{%1, %0|%0, %1}"
3663 [(set_attr "type" "ssecvt")
3664 (set_attr "unit" "mmx")
3665 (set_attr "bdver1_decode" "double")
3666 (set_attr "prefix_data16" "1")
3667 (set_attr "mode" "TI")])
3669 (define_insn "sse2_cvtsi2sd"
3670 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3673 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3674 (match_operand:V2DF 1 "register_operand" "0,0,x")
3678 cvtsi2sd\t{%2, %0|%0, %2}
3679 cvtsi2sd\t{%2, %0|%0, %2}
3680 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3681 [(set_attr "isa" "noavx,noavx,avx")
3682 (set_attr "type" "sseicvt")
3683 (set_attr "athlon_decode" "double,direct,*")
3684 (set_attr "amdfam10_decode" "vector,double,*")
3685 (set_attr "bdver1_decode" "double,direct,*")
3686 (set_attr "btver2_decode" "double,double,double")
3687 (set_attr "prefix" "orig,orig,vex")
3688 (set_attr "mode" "DF")])
3690 (define_insn "sse2_cvtsi2sdq<round_name>"
3691 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3694 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3695 (match_operand:V2DF 1 "register_operand" "0,0,v")
3697 "TARGET_SSE2 && TARGET_64BIT"
3699 cvtsi2sdq\t{%2, %0|%0, %2}
3700 cvtsi2sdq\t{%2, %0|%0, %2}
3701 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3702 [(set_attr "isa" "noavx,noavx,avx")
3703 (set_attr "type" "sseicvt")
3704 (set_attr "athlon_decode" "double,direct,*")
3705 (set_attr "amdfam10_decode" "vector,double,*")
3706 (set_attr "bdver1_decode" "double,direct,*")
3707 (set_attr "length_vex" "*,*,4")
3708 (set_attr "prefix_rex" "1,1,*")
3709 (set_attr "prefix" "orig,orig,maybe_evex")
3710 (set_attr "mode" "DF")])
3712 (define_insn "avx512f_vcvtss2usi<round_name>"
3713 [(set (match_operand:SI 0 "register_operand" "=r")
3716 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3717 (parallel [(const_int 0)]))]
3718 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3720 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3721 [(set_attr "type" "sseicvt")
3722 (set_attr "prefix" "evex")
3723 (set_attr "mode" "SI")])
3725 (define_insn "avx512f_vcvtss2usiq<round_name>"
3726 [(set (match_operand:DI 0 "register_operand" "=r")
3729 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3730 (parallel [(const_int 0)]))]
3731 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3732 "TARGET_AVX512F && TARGET_64BIT"
3733 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3734 [(set_attr "type" "sseicvt")
3735 (set_attr "prefix" "evex")
3736 (set_attr "mode" "DI")])
3738 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
3739 [(set (match_operand:SI 0 "register_operand" "=r")
3742 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3743 (parallel [(const_int 0)]))))]
3745 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3746 [(set_attr "type" "sseicvt")
3747 (set_attr "prefix" "evex")
3748 (set_attr "mode" "SI")])
3750 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
3751 [(set (match_operand:DI 0 "register_operand" "=r")
3754 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3755 (parallel [(const_int 0)]))))]
3756 "TARGET_AVX512F && TARGET_64BIT"
3757 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3758 [(set_attr "type" "sseicvt")
3759 (set_attr "prefix" "evex")
3760 (set_attr "mode" "DI")])
3762 (define_insn "avx512f_vcvtsd2usi<round_name>"
3763 [(set (match_operand:SI 0 "register_operand" "=r")
3766 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3767 (parallel [(const_int 0)]))]
3768 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3770 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3771 [(set_attr "type" "sseicvt")
3772 (set_attr "prefix" "evex")
3773 (set_attr "mode" "SI")])
3775 (define_insn "avx512f_vcvtsd2usiq<round_name>"
3776 [(set (match_operand:DI 0 "register_operand" "=r")
3779 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3780 (parallel [(const_int 0)]))]
3781 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3782 "TARGET_AVX512F && TARGET_64BIT"
3783 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3784 [(set_attr "type" "sseicvt")
3785 (set_attr "prefix" "evex")
3786 (set_attr "mode" "DI")])
3788 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
3789 [(set (match_operand:SI 0 "register_operand" "=r")
3792 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3793 (parallel [(const_int 0)]))))]
3795 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3796 [(set_attr "type" "sseicvt")
3797 (set_attr "prefix" "evex")
3798 (set_attr "mode" "SI")])
3800 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
3801 [(set (match_operand:DI 0 "register_operand" "=r")
3804 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3805 (parallel [(const_int 0)]))))]
3806 "TARGET_AVX512F && TARGET_64BIT"
3807 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3808 [(set_attr "type" "sseicvt")
3809 (set_attr "prefix" "evex")
3810 (set_attr "mode" "DI")])
3812 (define_insn "sse2_cvtsd2si<round_name>"
3813 [(set (match_operand:SI 0 "register_operand" "=r,r")
3816 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3817 (parallel [(const_int 0)]))]
3818 UNSPEC_FIX_NOTRUNC))]
3820 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3821 [(set_attr "type" "sseicvt")
3822 (set_attr "athlon_decode" "double,vector")
3823 (set_attr "bdver1_decode" "double,double")
3824 (set_attr "btver2_decode" "double,double")
3825 (set_attr "prefix_rep" "1")
3826 (set_attr "prefix" "maybe_vex")
3827 (set_attr "mode" "SI")])
3829 (define_insn "sse2_cvtsd2si_2"
3830 [(set (match_operand:SI 0 "register_operand" "=r,r")
3831 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3832 UNSPEC_FIX_NOTRUNC))]
3834 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3835 [(set_attr "type" "sseicvt")
3836 (set_attr "athlon_decode" "double,vector")
3837 (set_attr "amdfam10_decode" "double,double")
3838 (set_attr "bdver1_decode" "double,double")
3839 (set_attr "prefix_rep" "1")
3840 (set_attr "prefix" "maybe_vex")
3841 (set_attr "mode" "SI")])
3843 (define_insn "sse2_cvtsd2siq<round_name>"
3844 [(set (match_operand:DI 0 "register_operand" "=r,r")
3847 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3848 (parallel [(const_int 0)]))]
3849 UNSPEC_FIX_NOTRUNC))]
3850 "TARGET_SSE2 && TARGET_64BIT"
3851 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3852 [(set_attr "type" "sseicvt")
3853 (set_attr "athlon_decode" "double,vector")
3854 (set_attr "bdver1_decode" "double,double")
3855 (set_attr "prefix_rep" "1")
3856 (set_attr "prefix" "maybe_vex")
3857 (set_attr "mode" "DI")])
3859 (define_insn "sse2_cvtsd2siq_2"
3860 [(set (match_operand:DI 0 "register_operand" "=r,r")
3861 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3862 UNSPEC_FIX_NOTRUNC))]
3863 "TARGET_SSE2 && TARGET_64BIT"
3864 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3865 [(set_attr "type" "sseicvt")
3866 (set_attr "athlon_decode" "double,vector")
3867 (set_attr "amdfam10_decode" "double,double")
3868 (set_attr "bdver1_decode" "double,double")
3869 (set_attr "prefix_rep" "1")
3870 (set_attr "prefix" "maybe_vex")
3871 (set_attr "mode" "DI")])
3873 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
3874 [(set (match_operand:SI 0 "register_operand" "=r,r")
3877 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3878 (parallel [(const_int 0)]))))]
3880 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
3881 [(set_attr "type" "sseicvt")
3882 (set_attr "athlon_decode" "double,vector")
3883 (set_attr "amdfam10_decode" "double,double")
3884 (set_attr "bdver1_decode" "double,double")
3885 (set_attr "btver2_decode" "double,double")
3886 (set_attr "prefix_rep" "1")
3887 (set_attr "prefix" "maybe_vex")
3888 (set_attr "mode" "SI")])
3890 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
3891 [(set (match_operand:DI 0 "register_operand" "=r,r")
3894 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3895 (parallel [(const_int 0)]))))]
3896 "TARGET_SSE2 && TARGET_64BIT"
3897 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
3898 [(set_attr "type" "sseicvt")
3899 (set_attr "athlon_decode" "double,vector")
3900 (set_attr "amdfam10_decode" "double,double")
3901 (set_attr "bdver1_decode" "double,double")
3902 (set_attr "prefix_rep" "1")
3903 (set_attr "prefix" "maybe_vex")
3904 (set_attr "mode" "DI")])
3906 ;; For float<si2dfmode><mode>2 insn pattern
3907 (define_mode_attr si2dfmode
3908 [(V8DF "V8SI") (V4DF "V4SI")])
3909 (define_mode_attr si2dfmodelower
3910 [(V8DF "v8si") (V4DF "v4si")])
3912 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
3913 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3914 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
3915 "TARGET_AVX && <mask_mode512bit_condition>"
3916 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3917 [(set_attr "type" "ssecvt")
3918 (set_attr "prefix" "maybe_vex")
3919 (set_attr "mode" "<MODE>")])
3921 (define_insn "ufloatv8siv8df<mask_name>"
3922 [(set (match_operand:V8DF 0 "register_operand" "=v")
3923 (unsigned_float:V8DF
3924 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
3926 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3927 [(set_attr "type" "ssecvt")
3928 (set_attr "prefix" "evex")
3929 (set_attr "mode" "V8DF")])
3931 (define_insn "avx512f_cvtdq2pd512_2"
3932 [(set (match_operand:V8DF 0 "register_operand" "=v")
3935 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
3936 (parallel [(const_int 0) (const_int 1)
3937 (const_int 2) (const_int 3)
3938 (const_int 4) (const_int 5)
3939 (const_int 6) (const_int 7)]))))]
3941 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
3942 [(set_attr "type" "ssecvt")
3943 (set_attr "prefix" "evex")
3944 (set_attr "mode" "V8DF")])
3946 (define_insn "avx_cvtdq2pd256_2"
3947 [(set (match_operand:V4DF 0 "register_operand" "=x")
3950 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
3951 (parallel [(const_int 0) (const_int 1)
3952 (const_int 2) (const_int 3)]))))]
3954 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
3955 [(set_attr "type" "ssecvt")
3956 (set_attr "prefix" "vex")
3957 (set_attr "mode" "V4DF")])
3959 (define_insn "sse2_cvtdq2pd"
3960 [(set (match_operand:V2DF 0 "register_operand" "=x")
3963 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3964 (parallel [(const_int 0) (const_int 1)]))))]
3966 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
3967 [(set_attr "type" "ssecvt")
3968 (set_attr "prefix" "maybe_vex")
3969 (set_attr "ssememalign" "64")
3970 (set_attr "mode" "V2DF")])
3972 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
3973 [(set (match_operand:V8SI 0 "register_operand" "=v")
3975 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
3976 UNSPEC_FIX_NOTRUNC))]
3978 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3979 [(set_attr "type" "ssecvt")
3980 (set_attr "prefix" "evex")
3981 (set_attr "mode" "OI")])
3983 (define_insn "avx_cvtpd2dq256"
3984 [(set (match_operand:V4SI 0 "register_operand" "=x")
3985 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3986 UNSPEC_FIX_NOTRUNC))]
3988 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3989 [(set_attr "type" "ssecvt")
3990 (set_attr "prefix" "vex")
3991 (set_attr "mode" "OI")])
3993 (define_expand "avx_cvtpd2dq256_2"
3994 [(set (match_operand:V8SI 0 "register_operand")
3996 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4000 "operands[2] = CONST0_RTX (V4SImode);")
4002 (define_insn "*avx_cvtpd2dq256_2"
4003 [(set (match_operand:V8SI 0 "register_operand" "=x")
4005 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4007 (match_operand:V4SI 2 "const0_operand")))]
4009 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4010 [(set_attr "type" "ssecvt")
4011 (set_attr "prefix" "vex")
4012 (set_attr "btver2_decode" "vector")
4013 (set_attr "mode" "OI")])
4015 (define_expand "sse2_cvtpd2dq"
4016 [(set (match_operand:V4SI 0 "register_operand")
4018 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
4022 "operands[2] = CONST0_RTX (V2SImode);")
4024 (define_insn "*sse2_cvtpd2dq"
4025 [(set (match_operand:V4SI 0 "register_operand" "=x")
4027 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4029 (match_operand:V2SI 2 "const0_operand")))]
4033 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
4035 return "cvtpd2dq\t{%1, %0|%0, %1}";
4037 [(set_attr "type" "ssecvt")
4038 (set_attr "prefix_rep" "1")
4039 (set_attr "prefix_data16" "0")
4040 (set_attr "prefix" "maybe_vex")
4041 (set_attr "mode" "TI")
4042 (set_attr "amdfam10_decode" "double")
4043 (set_attr "athlon_decode" "vector")
4044 (set_attr "bdver1_decode" "double")])
4046 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
4047 [(set (match_operand:V8SI 0 "register_operand" "=v")
4049 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4050 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4052 "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4053 [(set_attr "type" "ssecvt")
4054 (set_attr "prefix" "evex")
4055 (set_attr "mode" "OI")])
4057 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4058 [(set (match_operand:V8SI 0 "register_operand" "=v")
4060 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4062 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4063 [(set_attr "type" "ssecvt")
4064 (set_attr "prefix" "evex")
4065 (set_attr "mode" "OI")])
4067 (define_insn "fix_truncv4dfv4si2"
4068 [(set (match_operand:V4SI 0 "register_operand" "=x")
4069 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4071 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
4072 [(set_attr "type" "ssecvt")
4073 (set_attr "prefix" "vex")
4074 (set_attr "mode" "OI")])
4076 (define_expand "avx_cvttpd2dq256_2"
4077 [(set (match_operand:V8SI 0 "register_operand")
4079 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4082 "operands[2] = CONST0_RTX (V4SImode);")
4084 (define_insn "*avx_cvttpd2dq256_2"
4085 [(set (match_operand:V8SI 0 "register_operand" "=x")
4087 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
4088 (match_operand:V4SI 2 "const0_operand")))]
4090 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
4091 [(set_attr "type" "ssecvt")
4092 (set_attr "prefix" "vex")
4093 (set_attr "btver2_decode" "vector")
4094 (set_attr "mode" "OI")])
4096 (define_expand "sse2_cvttpd2dq"
4097 [(set (match_operand:V4SI 0 "register_operand")
4099 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
4102 "operands[2] = CONST0_RTX (V2SImode);")
4104 (define_insn "*sse2_cvttpd2dq"
4105 [(set (match_operand:V4SI 0 "register_operand" "=x")
4107 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4108 (match_operand:V2SI 2 "const0_operand")))]
4112 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4114 return "cvttpd2dq\t{%1, %0|%0, %1}";
4116 [(set_attr "type" "ssecvt")
4117 (set_attr "amdfam10_decode" "double")
4118 (set_attr "athlon_decode" "vector")
4119 (set_attr "bdver1_decode" "double")
4120 (set_attr "prefix" "maybe_vex")
4121 (set_attr "mode" "TI")])
4123 (define_insn "sse2_cvtsd2ss<round_name>"
4124 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4127 (float_truncate:V2SF
4128 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4129 (match_operand:V4SF 1 "register_operand" "0,0,v")
4133 cvtsd2ss\t{%2, %0|%0, %2}
4134 cvtsd2ss\t{%2, %0|%0, %q2}
4135 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4136 [(set_attr "isa" "noavx,noavx,avx")
4137 (set_attr "type" "ssecvt")
4138 (set_attr "athlon_decode" "vector,double,*")
4139 (set_attr "amdfam10_decode" "vector,double,*")
4140 (set_attr "bdver1_decode" "direct,direct,*")
4141 (set_attr "btver2_decode" "double,double,double")
4142 (set_attr "prefix" "orig,orig,<round_prefix>")
4143 (set_attr "mode" "SF")])
4145 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4146 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4150 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
4151 (parallel [(const_int 0) (const_int 1)])))
4152 (match_operand:V2DF 1 "register_operand" "0,0,v")
4156 cvtss2sd\t{%2, %0|%0, %2}
4157 cvtss2sd\t{%2, %0|%0, %k2}
4158 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4159 [(set_attr "isa" "noavx,noavx,avx")
4160 (set_attr "type" "ssecvt")
4161 (set_attr "amdfam10_decode" "vector,double,*")
4162 (set_attr "athlon_decode" "direct,direct,*")
4163 (set_attr "bdver1_decode" "direct,direct,*")
4164 (set_attr "btver2_decode" "double,double,double")
4165 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4166 (set_attr "mode" "DF")])
4168 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4169 [(set (match_operand:V8SF 0 "register_operand" "=v")
4170 (float_truncate:V8SF
4171 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4173 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4174 [(set_attr "type" "ssecvt")
4175 (set_attr "prefix" "evex")
4176 (set_attr "mode" "V8SF")])
4178 (define_insn "avx_cvtpd2ps256"
4179 [(set (match_operand:V4SF 0 "register_operand" "=x")
4180 (float_truncate:V4SF
4181 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4183 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4184 [(set_attr "type" "ssecvt")
4185 (set_attr "prefix" "vex")
4186 (set_attr "btver2_decode" "vector")
4187 (set_attr "mode" "V4SF")])
4189 (define_expand "sse2_cvtpd2ps"
4190 [(set (match_operand:V4SF 0 "register_operand")
4192 (float_truncate:V2SF
4193 (match_operand:V2DF 1 "nonimmediate_operand"))
4196 "operands[2] = CONST0_RTX (V2SFmode);")
4198 (define_insn "*sse2_cvtpd2ps"
4199 [(set (match_operand:V4SF 0 "register_operand" "=x")
4201 (float_truncate:V2SF
4202 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4203 (match_operand:V2SF 2 "const0_operand")))]
4207 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4209 return "cvtpd2ps\t{%1, %0|%0, %1}";
4211 [(set_attr "type" "ssecvt")
4212 (set_attr "amdfam10_decode" "double")
4213 (set_attr "athlon_decode" "vector")
4214 (set_attr "bdver1_decode" "double")
4215 (set_attr "prefix_data16" "1")
4216 (set_attr "prefix" "maybe_vex")
4217 (set_attr "mode" "V4SF")])
4219 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4220 (define_mode_attr sf2dfmode
4221 [(V8DF "V8SF") (V4DF "V4SF")])
4223 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4224 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4225 (float_extend:VF2_512_256
4226 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4227 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4228 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4229 [(set_attr "type" "ssecvt")
4230 (set_attr "prefix" "maybe_vex")
4231 (set_attr "mode" "<MODE>")])
4233 (define_insn "*avx_cvtps2pd256_2"
4234 [(set (match_operand:V4DF 0 "register_operand" "=x")
4237 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4238 (parallel [(const_int 0) (const_int 1)
4239 (const_int 2) (const_int 3)]))))]
4241 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4242 [(set_attr "type" "ssecvt")
4243 (set_attr "prefix" "vex")
4244 (set_attr "mode" "V4DF")])
4246 (define_insn "vec_unpacks_lo_v16sf"
4247 [(set (match_operand:V8DF 0 "register_operand" "=v")
4250 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4251 (parallel [(const_int 0) (const_int 1)
4252 (const_int 2) (const_int 3)
4253 (const_int 4) (const_int 5)
4254 (const_int 6) (const_int 7)]))))]
4256 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4257 [(set_attr "type" "ssecvt")
4258 (set_attr "prefix" "evex")
4259 (set_attr "mode" "V8DF")])
4261 (define_insn "sse2_cvtps2pd"
4262 [(set (match_operand:V2DF 0 "register_operand" "=x")
4265 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4266 (parallel [(const_int 0) (const_int 1)]))))]
4268 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4269 [(set_attr "type" "ssecvt")
4270 (set_attr "amdfam10_decode" "direct")
4271 (set_attr "athlon_decode" "double")
4272 (set_attr "bdver1_decode" "double")
4273 (set_attr "prefix_data16" "0")
4274 (set_attr "prefix" "maybe_vex")
4275 (set_attr "mode" "V2DF")])
4277 (define_expand "vec_unpacks_hi_v4sf"
4282 (match_operand:V4SF 1 "nonimmediate_operand"))
4283 (parallel [(const_int 6) (const_int 7)
4284 (const_int 2) (const_int 3)])))
4285 (set (match_operand:V2DF 0 "register_operand")
4289 (parallel [(const_int 0) (const_int 1)]))))]
4291 "operands[2] = gen_reg_rtx (V4SFmode);")
4293 (define_expand "vec_unpacks_hi_v8sf"
4296 (match_operand:V8SF 1 "nonimmediate_operand")
4297 (parallel [(const_int 4) (const_int 5)
4298 (const_int 6) (const_int 7)])))
4299 (set (match_operand:V4DF 0 "register_operand")
4303 "operands[2] = gen_reg_rtx (V4SFmode);")
4305 (define_expand "vec_unpacks_hi_v16sf"
4308 (match_operand:V16SF 1 "nonimmediate_operand")
4309 (parallel [(const_int 8) (const_int 9)
4310 (const_int 10) (const_int 11)
4311 (const_int 12) (const_int 13)
4312 (const_int 14) (const_int 15)])))
4313 (set (match_operand:V8DF 0 "register_operand")
4317 "operands[2] = gen_reg_rtx (V8SFmode);")
4319 (define_expand "vec_unpacks_lo_v4sf"
4320 [(set (match_operand:V2DF 0 "register_operand")
4323 (match_operand:V4SF 1 "nonimmediate_operand")
4324 (parallel [(const_int 0) (const_int 1)]))))]
4327 (define_expand "vec_unpacks_lo_v8sf"
4328 [(set (match_operand:V4DF 0 "register_operand")
4331 (match_operand:V8SF 1 "nonimmediate_operand")
4332 (parallel [(const_int 0) (const_int 1)
4333 (const_int 2) (const_int 3)]))))]
4336 (define_mode_attr sseunpackfltmode
4337 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4338 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4340 (define_expand "vec_unpacks_float_hi_<mode>"
4341 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4342 (match_operand:VI2_AVX512F 1 "register_operand")]
4345 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4347 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4348 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4349 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4353 (define_expand "vec_unpacks_float_lo_<mode>"
4354 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4355 (match_operand:VI2_AVX512F 1 "register_operand")]
4358 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4360 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4361 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4362 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4366 (define_expand "vec_unpacku_float_hi_<mode>"
4367 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4368 (match_operand:VI2_AVX512F 1 "register_operand")]
4371 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4373 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4374 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4375 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4379 (define_expand "vec_unpacku_float_lo_<mode>"
4380 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4381 (match_operand:VI2_AVX512F 1 "register_operand")]
4384 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4386 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4387 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4388 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4392 (define_expand "vec_unpacks_float_hi_v4si"
4395 (match_operand:V4SI 1 "nonimmediate_operand")
4396 (parallel [(const_int 2) (const_int 3)
4397 (const_int 2) (const_int 3)])))
4398 (set (match_operand:V2DF 0 "register_operand")
4402 (parallel [(const_int 0) (const_int 1)]))))]
4404 "operands[2] = gen_reg_rtx (V4SImode);")
4406 (define_expand "vec_unpacks_float_lo_v4si"
4407 [(set (match_operand:V2DF 0 "register_operand")
4410 (match_operand:V4SI 1 "nonimmediate_operand")
4411 (parallel [(const_int 0) (const_int 1)]))))]
4414 (define_expand "vec_unpacks_float_hi_v8si"
4417 (match_operand:V8SI 1 "nonimmediate_operand")
4418 (parallel [(const_int 4) (const_int 5)
4419 (const_int 6) (const_int 7)])))
4420 (set (match_operand:V4DF 0 "register_operand")
4424 "operands[2] = gen_reg_rtx (V4SImode);")
4426 (define_expand "vec_unpacks_float_lo_v8si"
4427 [(set (match_operand:V4DF 0 "register_operand")
4430 (match_operand:V8SI 1 "nonimmediate_operand")
4431 (parallel [(const_int 0) (const_int 1)
4432 (const_int 2) (const_int 3)]))))]
4435 (define_expand "vec_unpacks_float_hi_v16si"
4438 (match_operand:V16SI 1 "nonimmediate_operand")
4439 (parallel [(const_int 8) (const_int 9)
4440 (const_int 10) (const_int 11)
4441 (const_int 12) (const_int 13)
4442 (const_int 14) (const_int 15)])))
4443 (set (match_operand:V8DF 0 "register_operand")
4447 "operands[2] = gen_reg_rtx (V8SImode);")
4449 (define_expand "vec_unpacks_float_lo_v16si"
4450 [(set (match_operand:V8DF 0 "register_operand")
4453 (match_operand:V16SI 1 "nonimmediate_operand")
4454 (parallel [(const_int 0) (const_int 1)
4455 (const_int 2) (const_int 3)
4456 (const_int 4) (const_int 5)
4457 (const_int 6) (const_int 7)]))))]
4460 (define_expand "vec_unpacku_float_hi_v4si"
4463 (match_operand:V4SI 1 "nonimmediate_operand")
4464 (parallel [(const_int 2) (const_int 3)
4465 (const_int 2) (const_int 3)])))
4470 (parallel [(const_int 0) (const_int 1)]))))
4472 (lt:V2DF (match_dup 6) (match_dup 3)))
4474 (and:V2DF (match_dup 7) (match_dup 4)))
4475 (set (match_operand:V2DF 0 "register_operand")
4476 (plus:V2DF (match_dup 6) (match_dup 8)))]
4479 REAL_VALUE_TYPE TWO32r;
4483 real_ldexp (&TWO32r, &dconst1, 32);
4484 x = const_double_from_real_value (TWO32r, DFmode);
4486 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4487 operands[4] = force_reg (V2DFmode,
4488 ix86_build_const_vector (V2DFmode, 1, x));
4490 operands[5] = gen_reg_rtx (V4SImode);
4492 for (i = 6; i < 9; i++)
4493 operands[i] = gen_reg_rtx (V2DFmode);
4496 (define_expand "vec_unpacku_float_lo_v4si"
4500 (match_operand:V4SI 1 "nonimmediate_operand")
4501 (parallel [(const_int 0) (const_int 1)]))))
4503 (lt:V2DF (match_dup 5) (match_dup 3)))
4505 (and:V2DF (match_dup 6) (match_dup 4)))
4506 (set (match_operand:V2DF 0 "register_operand")
4507 (plus:V2DF (match_dup 5) (match_dup 7)))]
4510 REAL_VALUE_TYPE TWO32r;
4514 real_ldexp (&TWO32r, &dconst1, 32);
4515 x = const_double_from_real_value (TWO32r, DFmode);
4517 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4518 operands[4] = force_reg (V2DFmode,
4519 ix86_build_const_vector (V2DFmode, 1, x));
4521 for (i = 5; i < 8; i++)
4522 operands[i] = gen_reg_rtx (V2DFmode);
4525 (define_expand "vec_unpacku_float_hi_v8si"
4526 [(match_operand:V4DF 0 "register_operand")
4527 (match_operand:V8SI 1 "register_operand")]
4530 REAL_VALUE_TYPE TWO32r;
4534 real_ldexp (&TWO32r, &dconst1, 32);
4535 x = const_double_from_real_value (TWO32r, DFmode);
4537 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4538 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4539 tmp[5] = gen_reg_rtx (V4SImode);
4541 for (i = 2; i < 5; i++)
4542 tmp[i] = gen_reg_rtx (V4DFmode);
4543 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4544 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4545 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4546 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4547 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4548 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4552 (define_expand "vec_unpacku_float_hi_v16si"
4553 [(match_operand:V8DF 0 "register_operand")
4554 (match_operand:V16SI 1 "register_operand")]
4557 REAL_VALUE_TYPE TWO32r;
4560 real_ldexp (&TWO32r, &dconst1, 32);
4561 x = const_double_from_real_value (TWO32r, DFmode);
4563 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4564 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4565 tmp[2] = gen_reg_rtx (V8DFmode);
4566 tmp[3] = gen_reg_rtx (V8SImode);
4567 k = gen_reg_rtx (QImode);
4569 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
4570 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
4571 emit_insn (gen_rtx_SET (VOIDmode, k,
4572 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4573 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4574 emit_move_insn (operands[0], tmp[2]);
4578 (define_expand "vec_unpacku_float_lo_v8si"
4579 [(match_operand:V4DF 0 "register_operand")
4580 (match_operand:V8SI 1 "nonimmediate_operand")]
4583 REAL_VALUE_TYPE TWO32r;
4587 real_ldexp (&TWO32r, &dconst1, 32);
4588 x = const_double_from_real_value (TWO32r, DFmode);
4590 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4591 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4593 for (i = 2; i < 5; i++)
4594 tmp[i] = gen_reg_rtx (V4DFmode);
4595 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4596 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4597 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4598 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4599 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4603 (define_expand "vec_unpacku_float_lo_v16si"
4604 [(match_operand:V8DF 0 "register_operand")
4605 (match_operand:V16SI 1 "nonimmediate_operand")]
4608 REAL_VALUE_TYPE TWO32r;
4611 real_ldexp (&TWO32r, &dconst1, 32);
4612 x = const_double_from_real_value (TWO32r, DFmode);
4614 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4615 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4616 tmp[2] = gen_reg_rtx (V8DFmode);
4617 k = gen_reg_rtx (QImode);
4619 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4620 emit_insn (gen_rtx_SET (VOIDmode, k,
4621 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4622 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4623 emit_move_insn (operands[0], tmp[2]);
4627 (define_expand "vec_pack_trunc_<mode>"
4629 (float_truncate:<sf2dfmode>
4630 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4632 (float_truncate:<sf2dfmode>
4633 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4634 (set (match_operand:<ssePSmode> 0 "register_operand")
4635 (vec_concat:<ssePSmode>
4640 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4641 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4644 (define_expand "vec_pack_trunc_v2df"
4645 [(match_operand:V4SF 0 "register_operand")
4646 (match_operand:V2DF 1 "nonimmediate_operand")
4647 (match_operand:V2DF 2 "nonimmediate_operand")]
4652 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4654 tmp0 = gen_reg_rtx (V4DFmode);
4655 tmp1 = force_reg (V2DFmode, operands[1]);
4657 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4658 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4662 tmp0 = gen_reg_rtx (V4SFmode);
4663 tmp1 = gen_reg_rtx (V4SFmode);
4665 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4666 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4667 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4672 (define_expand "vec_pack_sfix_trunc_v8df"
4673 [(match_operand:V16SI 0 "register_operand")
4674 (match_operand:V8DF 1 "nonimmediate_operand")
4675 (match_operand:V8DF 2 "nonimmediate_operand")]
4680 r1 = gen_reg_rtx (V8SImode);
4681 r2 = gen_reg_rtx (V8SImode);
4683 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4684 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4685 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4689 (define_expand "vec_pack_sfix_trunc_v4df"
4690 [(match_operand:V8SI 0 "register_operand")
4691 (match_operand:V4DF 1 "nonimmediate_operand")
4692 (match_operand:V4DF 2 "nonimmediate_operand")]
4697 r1 = gen_reg_rtx (V4SImode);
4698 r2 = gen_reg_rtx (V4SImode);
4700 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4701 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4702 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4706 (define_expand "vec_pack_sfix_trunc_v2df"
4707 [(match_operand:V4SI 0 "register_operand")
4708 (match_operand:V2DF 1 "nonimmediate_operand")
4709 (match_operand:V2DF 2 "nonimmediate_operand")]
4712 rtx tmp0, tmp1, tmp2;
4714 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4716 tmp0 = gen_reg_rtx (V4DFmode);
4717 tmp1 = force_reg (V2DFmode, operands[1]);
4719 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4720 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4724 tmp0 = gen_reg_rtx (V4SImode);
4725 tmp1 = gen_reg_rtx (V4SImode);
4726 tmp2 = gen_reg_rtx (V2DImode);
4728 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4729 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4730 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4731 gen_lowpart (V2DImode, tmp0),
4732 gen_lowpart (V2DImode, tmp1)));
4733 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4738 (define_mode_attr ssepackfltmode
4739 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4741 (define_expand "vec_pack_ufix_trunc_<mode>"
4742 [(match_operand:<ssepackfltmode> 0 "register_operand")
4743 (match_operand:VF2 1 "register_operand")
4744 (match_operand:VF2 2 "register_operand")]
4747 if (<MODE>mode == V8DFmode)
4751 r1 = gen_reg_rtx (V8SImode);
4752 r2 = gen_reg_rtx (V8SImode);
4754 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
4755 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
4756 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4761 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4762 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4763 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4764 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4765 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4767 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4768 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4772 tmp[5] = gen_reg_rtx (V8SFmode);
4773 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4774 gen_lowpart (V8SFmode, tmp[3]), 0);
4775 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4777 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4778 operands[0], 0, OPTAB_DIRECT);
4779 if (tmp[6] != operands[0])
4780 emit_move_insn (operands[0], tmp[6]);
4786 (define_expand "vec_pack_sfix_v4df"
4787 [(match_operand:V8SI 0 "register_operand")
4788 (match_operand:V4DF 1 "nonimmediate_operand")
4789 (match_operand:V4DF 2 "nonimmediate_operand")]
4794 r1 = gen_reg_rtx (V4SImode);
4795 r2 = gen_reg_rtx (V4SImode);
4797 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4798 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4799 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4803 (define_expand "vec_pack_sfix_v2df"
4804 [(match_operand:V4SI 0 "register_operand")
4805 (match_operand:V2DF 1 "nonimmediate_operand")
4806 (match_operand:V2DF 2 "nonimmediate_operand")]
4809 rtx tmp0, tmp1, tmp2;
4811 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4813 tmp0 = gen_reg_rtx (V4DFmode);
4814 tmp1 = force_reg (V2DFmode, operands[1]);
4816 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4817 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4821 tmp0 = gen_reg_rtx (V4SImode);
4822 tmp1 = gen_reg_rtx (V4SImode);
4823 tmp2 = gen_reg_rtx (V2DImode);
4825 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4826 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4827 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4828 gen_lowpart (V2DImode, tmp0),
4829 gen_lowpart (V2DImode, tmp1)));
4830 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4835 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4837 ;; Parallel single-precision floating point element swizzling
4839 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4841 (define_expand "sse_movhlps_exp"
4842 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4845 (match_operand:V4SF 1 "nonimmediate_operand")
4846 (match_operand:V4SF 2 "nonimmediate_operand"))
4847 (parallel [(const_int 6)
4853 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4855 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
4857 /* Fix up the destination if needed. */
4858 if (dst != operands[0])
4859 emit_move_insn (operands[0], dst);
4864 (define_insn "sse_movhlps"
4865 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4868 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4869 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
4870 (parallel [(const_int 6)
4874 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4876 movhlps\t{%2, %0|%0, %2}
4877 vmovhlps\t{%2, %1, %0|%0, %1, %2}
4878 movlps\t{%H2, %0|%0, %H2}
4879 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
4880 %vmovhps\t{%2, %0|%q0, %2}"
4881 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4882 (set_attr "type" "ssemov")
4883 (set_attr "ssememalign" "64")
4884 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4885 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4887 (define_expand "sse_movlhps_exp"
4888 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4891 (match_operand:V4SF 1 "nonimmediate_operand")
4892 (match_operand:V4SF 2 "nonimmediate_operand"))
4893 (parallel [(const_int 0)
4899 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4901 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
4903 /* Fix up the destination if needed. */
4904 if (dst != operands[0])
4905 emit_move_insn (operands[0], dst);
4910 (define_insn "sse_movlhps"
4911 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
4914 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4915 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
4916 (parallel [(const_int 0)
4920 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
4922 movlhps\t{%2, %0|%0, %2}
4923 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4924 movhps\t{%2, %0|%0, %q2}
4925 vmovhps\t{%2, %1, %0|%0, %1, %q2}
4926 %vmovlps\t{%2, %H0|%H0, %2}"
4927 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4928 (set_attr "type" "ssemov")
4929 (set_attr "ssememalign" "64")
4930 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4931 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4933 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
4934 [(set (match_operand:V16SF 0 "register_operand" "=v")
4937 (match_operand:V16SF 1 "register_operand" "v")
4938 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4939 (parallel [(const_int 2) (const_int 18)
4940 (const_int 3) (const_int 19)
4941 (const_int 6) (const_int 22)
4942 (const_int 7) (const_int 23)
4943 (const_int 10) (const_int 26)
4944 (const_int 11) (const_int 27)
4945 (const_int 14) (const_int 30)
4946 (const_int 15) (const_int 31)])))]
4948 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4949 [(set_attr "type" "sselog")
4950 (set_attr "prefix" "evex")
4951 (set_attr "mode" "V16SF")])
4953 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4954 (define_insn "avx_unpckhps256"
4955 [(set (match_operand:V8SF 0 "register_operand" "=x")
4958 (match_operand:V8SF 1 "register_operand" "x")
4959 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4960 (parallel [(const_int 2) (const_int 10)
4961 (const_int 3) (const_int 11)
4962 (const_int 6) (const_int 14)
4963 (const_int 7) (const_int 15)])))]
4965 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4966 [(set_attr "type" "sselog")
4967 (set_attr "prefix" "vex")
4968 (set_attr "mode" "V8SF")])
4970 (define_expand "vec_interleave_highv8sf"
4974 (match_operand:V8SF 1 "register_operand" "x")
4975 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4976 (parallel [(const_int 0) (const_int 8)
4977 (const_int 1) (const_int 9)
4978 (const_int 4) (const_int 12)
4979 (const_int 5) (const_int 13)])))
4985 (parallel [(const_int 2) (const_int 10)
4986 (const_int 3) (const_int 11)
4987 (const_int 6) (const_int 14)
4988 (const_int 7) (const_int 15)])))
4989 (set (match_operand:V8SF 0 "register_operand")
4994 (parallel [(const_int 4) (const_int 5)
4995 (const_int 6) (const_int 7)
4996 (const_int 12) (const_int 13)
4997 (const_int 14) (const_int 15)])))]
5000 operands[3] = gen_reg_rtx (V8SFmode);
5001 operands[4] = gen_reg_rtx (V8SFmode);
5004 (define_insn "vec_interleave_highv4sf"
5005 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5008 (match_operand:V4SF 1 "register_operand" "0,x")
5009 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5010 (parallel [(const_int 2) (const_int 6)
5011 (const_int 3) (const_int 7)])))]
5014 unpckhps\t{%2, %0|%0, %2}
5015 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5016 [(set_attr "isa" "noavx,avx")
5017 (set_attr "type" "sselog")
5018 (set_attr "prefix" "orig,vex")
5019 (set_attr "mode" "V4SF")])
5021 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5022 [(set (match_operand:V16SF 0 "register_operand" "=v")
5025 (match_operand:V16SF 1 "register_operand" "v")
5026 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5027 (parallel [(const_int 0) (const_int 16)
5028 (const_int 1) (const_int 17)
5029 (const_int 4) (const_int 20)
5030 (const_int 5) (const_int 21)
5031 (const_int 8) (const_int 24)
5032 (const_int 9) (const_int 25)
5033 (const_int 12) (const_int 28)
5034 (const_int 13) (const_int 29)])))]
5036 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5037 [(set_attr "type" "sselog")
5038 (set_attr "prefix" "evex")
5039 (set_attr "mode" "V16SF")])
5041 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5042 (define_insn "avx_unpcklps256"
5043 [(set (match_operand:V8SF 0 "register_operand" "=x")
5046 (match_operand:V8SF 1 "register_operand" "x")
5047 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5048 (parallel [(const_int 0) (const_int 8)
5049 (const_int 1) (const_int 9)
5050 (const_int 4) (const_int 12)
5051 (const_int 5) (const_int 13)])))]
5053 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5054 [(set_attr "type" "sselog")
5055 (set_attr "prefix" "vex")
5056 (set_attr "mode" "V8SF")])
5058 (define_expand "vec_interleave_lowv8sf"
5062 (match_operand:V8SF 1 "register_operand" "x")
5063 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5064 (parallel [(const_int 0) (const_int 8)
5065 (const_int 1) (const_int 9)
5066 (const_int 4) (const_int 12)
5067 (const_int 5) (const_int 13)])))
5073 (parallel [(const_int 2) (const_int 10)
5074 (const_int 3) (const_int 11)
5075 (const_int 6) (const_int 14)
5076 (const_int 7) (const_int 15)])))
5077 (set (match_operand:V8SF 0 "register_operand")
5082 (parallel [(const_int 0) (const_int 1)
5083 (const_int 2) (const_int 3)
5084 (const_int 8) (const_int 9)
5085 (const_int 10) (const_int 11)])))]
5088 operands[3] = gen_reg_rtx (V8SFmode);
5089 operands[4] = gen_reg_rtx (V8SFmode);
5092 (define_insn "vec_interleave_lowv4sf"
5093 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5096 (match_operand:V4SF 1 "register_operand" "0,x")
5097 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5098 (parallel [(const_int 0) (const_int 4)
5099 (const_int 1) (const_int 5)])))]
5102 unpcklps\t{%2, %0|%0, %2}
5103 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5104 [(set_attr "isa" "noavx,avx")
5105 (set_attr "type" "sselog")
5106 (set_attr "prefix" "orig,vex")
5107 (set_attr "mode" "V4SF")])
5109 ;; These are modeled with the same vec_concat as the others so that we
5110 ;; capture users of shufps that can use the new instructions
5111 (define_insn "avx_movshdup256"
5112 [(set (match_operand:V8SF 0 "register_operand" "=x")
5115 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5117 (parallel [(const_int 1) (const_int 1)
5118 (const_int 3) (const_int 3)
5119 (const_int 5) (const_int 5)
5120 (const_int 7) (const_int 7)])))]
5122 "vmovshdup\t{%1, %0|%0, %1}"
5123 [(set_attr "type" "sse")
5124 (set_attr "prefix" "vex")
5125 (set_attr "mode" "V8SF")])
5127 (define_insn "sse3_movshdup"
5128 [(set (match_operand:V4SF 0 "register_operand" "=x")
5131 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5133 (parallel [(const_int 1)
5138 "%vmovshdup\t{%1, %0|%0, %1}"
5139 [(set_attr "type" "sse")
5140 (set_attr "prefix_rep" "1")
5141 (set_attr "prefix" "maybe_vex")
5142 (set_attr "mode" "V4SF")])
5144 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5145 [(set (match_operand:V16SF 0 "register_operand" "=v")
5148 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5150 (parallel [(const_int 1) (const_int 1)
5151 (const_int 3) (const_int 3)
5152 (const_int 5) (const_int 5)
5153 (const_int 7) (const_int 7)
5154 (const_int 9) (const_int 9)
5155 (const_int 11) (const_int 11)
5156 (const_int 13) (const_int 13)
5157 (const_int 15) (const_int 15)])))]
5159 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5160 [(set_attr "type" "sse")
5161 (set_attr "prefix" "evex")
5162 (set_attr "mode" "V16SF")])
5164 (define_insn "avx_movsldup256"
5165 [(set (match_operand:V8SF 0 "register_operand" "=x")
5168 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5170 (parallel [(const_int 0) (const_int 0)
5171 (const_int 2) (const_int 2)
5172 (const_int 4) (const_int 4)
5173 (const_int 6) (const_int 6)])))]
5175 "vmovsldup\t{%1, %0|%0, %1}"
5176 [(set_attr "type" "sse")
5177 (set_attr "prefix" "vex")
5178 (set_attr "mode" "V8SF")])
5180 (define_insn "sse3_movsldup"
5181 [(set (match_operand:V4SF 0 "register_operand" "=x")
5184 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5186 (parallel [(const_int 0)
5191 "%vmovsldup\t{%1, %0|%0, %1}"
5192 [(set_attr "type" "sse")
5193 (set_attr "prefix_rep" "1")
5194 (set_attr "prefix" "maybe_vex")
5195 (set_attr "mode" "V4SF")])
5197 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5198 [(set (match_operand:V16SF 0 "register_operand" "=v")
5201 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5203 (parallel [(const_int 0) (const_int 0)
5204 (const_int 2) (const_int 2)
5205 (const_int 4) (const_int 4)
5206 (const_int 6) (const_int 6)
5207 (const_int 8) (const_int 8)
5208 (const_int 10) (const_int 10)
5209 (const_int 12) (const_int 12)
5210 (const_int 14) (const_int 14)])))]
5212 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5213 [(set_attr "type" "sse")
5214 (set_attr "prefix" "evex")
5215 (set_attr "mode" "V16SF")])
5217 (define_expand "avx_shufps256"
5218 [(match_operand:V8SF 0 "register_operand")
5219 (match_operand:V8SF 1 "register_operand")
5220 (match_operand:V8SF 2 "nonimmediate_operand")
5221 (match_operand:SI 3 "const_int_operand")]
5224 int mask = INTVAL (operands[3]);
5225 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5226 GEN_INT ((mask >> 0) & 3),
5227 GEN_INT ((mask >> 2) & 3),
5228 GEN_INT (((mask >> 4) & 3) + 8),
5229 GEN_INT (((mask >> 6) & 3) + 8),
5230 GEN_INT (((mask >> 0) & 3) + 4),
5231 GEN_INT (((mask >> 2) & 3) + 4),
5232 GEN_INT (((mask >> 4) & 3) + 12),
5233 GEN_INT (((mask >> 6) & 3) + 12)));
5237 ;; One bit in mask selects 2 elements.
5238 (define_insn "avx_shufps256_1"
5239 [(set (match_operand:V8SF 0 "register_operand" "=x")
5242 (match_operand:V8SF 1 "register_operand" "x")
5243 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5244 (parallel [(match_operand 3 "const_0_to_3_operand" )
5245 (match_operand 4 "const_0_to_3_operand" )
5246 (match_operand 5 "const_8_to_11_operand" )
5247 (match_operand 6 "const_8_to_11_operand" )
5248 (match_operand 7 "const_4_to_7_operand" )
5249 (match_operand 8 "const_4_to_7_operand" )
5250 (match_operand 9 "const_12_to_15_operand")
5251 (match_operand 10 "const_12_to_15_operand")])))]
5253 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5254 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5255 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5256 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5259 mask = INTVAL (operands[3]);
5260 mask |= INTVAL (operands[4]) << 2;
5261 mask |= (INTVAL (operands[5]) - 8) << 4;
5262 mask |= (INTVAL (operands[6]) - 8) << 6;
5263 operands[3] = GEN_INT (mask);
5265 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5267 [(set_attr "type" "sseshuf")
5268 (set_attr "length_immediate" "1")
5269 (set_attr "prefix" "vex")
5270 (set_attr "mode" "V8SF")])
5272 (define_expand "sse_shufps"
5273 [(match_operand:V4SF 0 "register_operand")
5274 (match_operand:V4SF 1 "register_operand")
5275 (match_operand:V4SF 2 "nonimmediate_operand")
5276 (match_operand:SI 3 "const_int_operand")]
5279 int mask = INTVAL (operands[3]);
5280 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5281 GEN_INT ((mask >> 0) & 3),
5282 GEN_INT ((mask >> 2) & 3),
5283 GEN_INT (((mask >> 4) & 3) + 4),
5284 GEN_INT (((mask >> 6) & 3) + 4)));
5288 (define_insn "sse_shufps_<mode>"
5289 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5290 (vec_select:VI4F_128
5291 (vec_concat:<ssedoublevecmode>
5292 (match_operand:VI4F_128 1 "register_operand" "0,x")
5293 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5294 (parallel [(match_operand 3 "const_0_to_3_operand")
5295 (match_operand 4 "const_0_to_3_operand")
5296 (match_operand 5 "const_4_to_7_operand")
5297 (match_operand 6 "const_4_to_7_operand")])))]
5301 mask |= INTVAL (operands[3]) << 0;
5302 mask |= INTVAL (operands[4]) << 2;
5303 mask |= (INTVAL (operands[5]) - 4) << 4;
5304 mask |= (INTVAL (operands[6]) - 4) << 6;
5305 operands[3] = GEN_INT (mask);
5307 switch (which_alternative)
5310 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5312 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5317 [(set_attr "isa" "noavx,avx")
5318 (set_attr "type" "sseshuf")
5319 (set_attr "length_immediate" "1")
5320 (set_attr "prefix" "orig,vex")
5321 (set_attr "mode" "V4SF")])
5323 (define_insn "sse_storehps"
5324 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5326 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5327 (parallel [(const_int 2) (const_int 3)])))]
5330 %vmovhps\t{%1, %0|%q0, %1}
5331 %vmovhlps\t{%1, %d0|%d0, %1}
5332 %vmovlps\t{%H1, %d0|%d0, %H1}"
5333 [(set_attr "type" "ssemov")
5334 (set_attr "ssememalign" "64")
5335 (set_attr "prefix" "maybe_vex")
5336 (set_attr "mode" "V2SF,V4SF,V2SF")])
5338 (define_expand "sse_loadhps_exp"
5339 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5342 (match_operand:V4SF 1 "nonimmediate_operand")
5343 (parallel [(const_int 0) (const_int 1)]))
5344 (match_operand:V2SF 2 "nonimmediate_operand")))]
5347 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5349 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5351 /* Fix up the destination if needed. */
5352 if (dst != operands[0])
5353 emit_move_insn (operands[0], dst);
5358 (define_insn "sse_loadhps"
5359 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5362 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5363 (parallel [(const_int 0) (const_int 1)]))
5364 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5367 movhps\t{%2, %0|%0, %q2}
5368 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5369 movlhps\t{%2, %0|%0, %2}
5370 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5371 %vmovlps\t{%2, %H0|%H0, %2}"
5372 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5373 (set_attr "type" "ssemov")
5374 (set_attr "ssememalign" "64")
5375 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5376 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5378 (define_insn "sse_storelps"
5379 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5381 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5382 (parallel [(const_int 0) (const_int 1)])))]
5385 %vmovlps\t{%1, %0|%q0, %1}
5386 %vmovaps\t{%1, %0|%0, %1}
5387 %vmovlps\t{%1, %d0|%d0, %q1}"
5388 [(set_attr "type" "ssemov")
5389 (set_attr "prefix" "maybe_vex")
5390 (set_attr "mode" "V2SF,V4SF,V2SF")])
5392 (define_expand "sse_loadlps_exp"
5393 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5395 (match_operand:V2SF 2 "nonimmediate_operand")
5397 (match_operand:V4SF 1 "nonimmediate_operand")
5398 (parallel [(const_int 2) (const_int 3)]))))]
5401 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5403 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5405 /* Fix up the destination if needed. */
5406 if (dst != operands[0])
5407 emit_move_insn (operands[0], dst);
5412 (define_insn "sse_loadlps"
5413 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5415 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5417 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5418 (parallel [(const_int 2) (const_int 3)]))))]
5421 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5422 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5423 movlps\t{%2, %0|%0, %q2}
5424 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5425 %vmovlps\t{%2, %0|%q0, %2}"
5426 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5427 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5428 (set_attr "ssememalign" "64")
5429 (set_attr "length_immediate" "1,1,*,*,*")
5430 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5431 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5433 (define_insn "sse_movss"
5434 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5436 (match_operand:V4SF 2 "register_operand" " x,x")
5437 (match_operand:V4SF 1 "register_operand" " 0,x")
5441 movss\t{%2, %0|%0, %2}
5442 vmovss\t{%2, %1, %0|%0, %1, %2}"
5443 [(set_attr "isa" "noavx,avx")
5444 (set_attr "type" "ssemov")
5445 (set_attr "prefix" "orig,vex")
5446 (set_attr "mode" "SF")])
5448 (define_insn "avx2_vec_dup<mode>"
5449 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5450 (vec_duplicate:VF1_128_256
5452 (match_operand:V4SF 1 "register_operand" "x")
5453 (parallel [(const_int 0)]))))]
5455 "vbroadcastss\t{%1, %0|%0, %1}"
5456 [(set_attr "type" "sselog1")
5457 (set_attr "prefix" "vex")
5458 (set_attr "mode" "<MODE>")])
5460 (define_insn "avx2_vec_dupv8sf_1"
5461 [(set (match_operand:V8SF 0 "register_operand" "=x")
5464 (match_operand:V8SF 1 "register_operand" "x")
5465 (parallel [(const_int 0)]))))]
5467 "vbroadcastss\t{%x1, %0|%0, %x1}"
5468 [(set_attr "type" "sselog1")
5469 (set_attr "prefix" "vex")
5470 (set_attr "mode" "V8SF")])
5472 (define_insn "vec_dupv4sf"
5473 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5475 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5478 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5479 vbroadcastss\t{%1, %0|%0, %1}
5480 shufps\t{$0, %0, %0|%0, %0, 0}"
5481 [(set_attr "isa" "avx,avx,noavx")
5482 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5483 (set_attr "length_immediate" "1,0,1")
5484 (set_attr "prefix_extra" "0,1,*")
5485 (set_attr "prefix" "vex,vex,orig")
5486 (set_attr "mode" "V4SF")])
5488 ;; Although insertps takes register source, we prefer
5489 ;; unpcklps with register source since it is shorter.
5490 (define_insn "*vec_concatv2sf_sse4_1"
5491 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5493 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5494 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5497 unpcklps\t{%2, %0|%0, %2}
5498 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5499 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5500 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5501 %vmovss\t{%1, %0|%0, %1}
5502 punpckldq\t{%2, %0|%0, %2}
5503 movd\t{%1, %0|%0, %1}"
5504 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5505 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5506 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5507 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5508 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5509 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5510 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5512 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5513 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5514 ;; alternatives pretty much forces the MMX alternative to be chosen.
5515 (define_insn "*vec_concatv2sf_sse"
5516 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5518 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5519 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5522 unpcklps\t{%2, %0|%0, %2}
5523 movss\t{%1, %0|%0, %1}
5524 punpckldq\t{%2, %0|%0, %2}
5525 movd\t{%1, %0|%0, %1}"
5526 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5527 (set_attr "mode" "V4SF,SF,DI,DI")])
5529 (define_insn "*vec_concatv4sf"
5530 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5532 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5533 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5536 movlhps\t{%2, %0|%0, %2}
5537 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5538 movhps\t{%2, %0|%0, %q2}
5539 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5540 [(set_attr "isa" "noavx,avx,noavx,avx")
5541 (set_attr "type" "ssemov")
5542 (set_attr "prefix" "orig,vex,orig,vex")
5543 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5545 (define_expand "vec_init<mode>"
5546 [(match_operand:V_128 0 "register_operand")
5550 ix86_expand_vector_init (false, operands[0], operands[1]);
5554 ;; Avoid combining registers from different units in a single alternative,
5555 ;; see comment above inline_secondary_memory_needed function in i386.c
5556 (define_insn "vec_set<mode>_0"
5557 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5558 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5560 (vec_duplicate:VI4F_128
5561 (match_operand:<ssescalarmode> 2 "general_operand"
5562 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5563 (match_operand:VI4F_128 1 "vector_move_operand"
5564 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5568 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5569 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5570 %vmovd\t{%2, %0|%0, %2}
5571 movss\t{%2, %0|%0, %2}
5572 movss\t{%2, %0|%0, %2}
5573 vmovss\t{%2, %1, %0|%0, %1, %2}
5574 pinsrd\t{$0, %2, %0|%0, %2, 0}
5575 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5579 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5581 (cond [(eq_attr "alternative" "0,6,7")
5582 (const_string "sselog")
5583 (eq_attr "alternative" "9")
5584 (const_string "imov")
5585 (eq_attr "alternative" "10")
5586 (const_string "fmov")
5588 (const_string "ssemov")))
5589 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5590 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5591 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5592 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5594 ;; A subset is vec_setv4sf.
5595 (define_insn "*vec_setv4sf_sse4_1"
5596 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5599 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5600 (match_operand:V4SF 1 "register_operand" "0,x")
5601 (match_operand:SI 3 "const_int_operand")))]
5603 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5604 < GET_MODE_NUNITS (V4SFmode))"
5606 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5607 switch (which_alternative)
5610 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5612 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5617 [(set_attr "isa" "noavx,avx")
5618 (set_attr "type" "sselog")
5619 (set_attr "prefix_data16" "1,*")
5620 (set_attr "prefix_extra" "1")
5621 (set_attr "length_immediate" "1")
5622 (set_attr "prefix" "orig,vex")
5623 (set_attr "mode" "V4SF")])
5625 (define_insn "sse4_1_insertps"
5626 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5627 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5628 (match_operand:V4SF 1 "register_operand" "0,x")
5629 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5633 if (MEM_P (operands[2]))
5635 unsigned count_s = INTVAL (operands[3]) >> 6;
5637 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5638 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5640 switch (which_alternative)
5643 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5645 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5650 [(set_attr "isa" "noavx,avx")
5651 (set_attr "type" "sselog")
5652 (set_attr "prefix_data16" "1,*")
5653 (set_attr "prefix_extra" "1")
5654 (set_attr "length_immediate" "1")
5655 (set_attr "prefix" "orig,vex")
5656 (set_attr "mode" "V4SF")])
5659 [(set (match_operand:VI4F_128 0 "memory_operand")
5661 (vec_duplicate:VI4F_128
5662 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5665 "TARGET_SSE && reload_completed"
5666 [(set (match_dup 0) (match_dup 1))]
5667 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5669 (define_expand "vec_set<mode>"
5670 [(match_operand:V 0 "register_operand")
5671 (match_operand:<ssescalarmode> 1 "register_operand")
5672 (match_operand 2 "const_int_operand")]
5675 ix86_expand_vector_set (false, operands[0], operands[1],
5676 INTVAL (operands[2]));
5680 (define_insn_and_split "*vec_extractv4sf_0"
5681 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5683 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5684 (parallel [(const_int 0)])))]
5685 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5687 "&& reload_completed"
5688 [(set (match_dup 0) (match_dup 1))]
5690 if (REG_P (operands[1]))
5691 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5693 operands[1] = adjust_address (operands[1], SFmode, 0);
5696 (define_insn_and_split "*sse4_1_extractps"
5697 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5699 (match_operand:V4SF 1 "register_operand" "x,0,x")
5700 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5703 %vextractps\t{%2, %1, %0|%0, %1, %2}
5706 "&& reload_completed && SSE_REG_P (operands[0])"
5709 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5710 switch (INTVAL (operands[2]))
5714 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5715 operands[2], operands[2],
5716 GEN_INT (INTVAL (operands[2]) + 4),
5717 GEN_INT (INTVAL (operands[2]) + 4)));
5720 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5723 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5728 [(set_attr "isa" "*,noavx,avx")
5729 (set_attr "type" "sselog,*,*")
5730 (set_attr "prefix_data16" "1,*,*")
5731 (set_attr "prefix_extra" "1,*,*")
5732 (set_attr "length_immediate" "1,*,*")
5733 (set_attr "prefix" "maybe_vex,*,*")
5734 (set_attr "mode" "V4SF,*,*")])
5736 (define_insn_and_split "*vec_extractv4sf_mem"
5737 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5739 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5740 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5743 "&& reload_completed"
5744 [(set (match_dup 0) (match_dup 1))]
5746 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5749 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5750 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5751 (match_operand:V16FI 1 "register_operand")
5752 (match_operand:SI 2 "const_0_to_3_operand")
5753 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5754 (match_operand:QI 4 "register_operand")]
5757 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5758 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5759 switch (INTVAL (operands[2]))
5762 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5763 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5764 GEN_INT (3), operands[3], operands[4]));
5767 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5768 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5769 GEN_INT (7), operands[3], operands[4]));
5772 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5773 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5774 GEN_INT (11), operands[3], operands[4]));
5777 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5778 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5779 GEN_INT (15), operands[3], operands[4]));
5787 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5788 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5789 (vec_merge:<ssequartermode>
5790 (vec_select:<ssequartermode>
5791 (match_operand:V16FI 1 "register_operand" "v")
5792 (parallel [(match_operand 2 "const_0_to_15_operand")
5793 (match_operand 3 "const_0_to_15_operand")
5794 (match_operand 4 "const_0_to_15_operand")
5795 (match_operand 5 "const_0_to_15_operand")]))
5796 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5797 (match_operand:QI 7 "register_operand" "k")))]
5798 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5799 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5800 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5802 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5803 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5805 [(set_attr "type" "sselog")
5806 (set_attr "prefix_extra" "1")
5807 (set_attr "length_immediate" "1")
5808 (set_attr "memory" "store")
5809 (set_attr "prefix" "evex")
5810 (set_attr "mode" "<sseinsnmode>")])
5812 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5813 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5814 (vec_select:<ssequartermode>
5815 (match_operand:V16FI 1 "register_operand" "v")
5816 (parallel [(match_operand 2 "const_0_to_15_operand")
5817 (match_operand 3 "const_0_to_15_operand")
5818 (match_operand 4 "const_0_to_15_operand")
5819 (match_operand 5 "const_0_to_15_operand")])))]
5820 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5821 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5822 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5824 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5825 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5827 [(set_attr "type" "sselog")
5828 (set_attr "prefix_extra" "1")
5829 (set_attr "length_immediate" "1")
5830 (set (attr "memory")
5831 (if_then_else (match_test "MEM_P (operands[0])")
5832 (const_string "store")
5833 (const_string "none")))
5834 (set_attr "prefix" "evex")
5835 (set_attr "mode" "<sseinsnmode>")])
5837 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
5838 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5839 (match_operand:V8FI 1 "register_operand")
5840 (match_operand:SI 2 "const_0_to_1_operand")
5841 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
5842 (match_operand:QI 4 "register_operand")]
5845 rtx (*insn)(rtx, rtx, rtx, rtx);
5847 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5848 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5850 switch (INTVAL (operands[2]))
5853 insn = gen_vec_extract_lo_<mode>_mask;
5856 insn = gen_vec_extract_hi_<mode>_mask;
5862 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
5867 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5868 (vec_select:<ssehalfvecmode>
5869 (match_operand:V8FI 1 "nonimmediate_operand")
5870 (parallel [(const_int 0) (const_int 1)
5871 (const_int 2) (const_int 3)])))]
5872 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
5873 && reload_completed"
5876 rtx op1 = operands[1];
5878 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5880 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5881 emit_move_insn (operands[0], op1);
5885 (define_insn "vec_extract_lo_<mode>_maskm"
5886 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5887 (vec_merge:<ssehalfvecmode>
5888 (vec_select:<ssehalfvecmode>
5889 (match_operand:V8FI 1 "register_operand" "v")
5890 (parallel [(const_int 0) (const_int 1)
5891 (const_int 2) (const_int 3)]))
5892 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5893 (match_operand:QI 3 "register_operand" "k")))]
5895 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
5896 [(set_attr "type" "sselog")
5897 (set_attr "prefix_extra" "1")
5898 (set_attr "length_immediate" "1")
5899 (set_attr "prefix" "evex")
5900 (set_attr "mode" "<sseinsnmode>")])
5902 (define_insn "vec_extract_lo_<mode><mask_name>"
5903 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5904 (vec_select:<ssehalfvecmode>
5905 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
5906 (parallel [(const_int 0) (const_int 1)
5907 (const_int 2) (const_int 3)])))]
5908 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5911 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
5915 [(set_attr "type" "sselog")
5916 (set_attr "prefix_extra" "1")
5917 (set_attr "length_immediate" "1")
5918 (set (attr "memory")
5919 (if_then_else (match_test "MEM_P (operands[0])")
5920 (const_string "store")
5921 (const_string "none")))
5922 (set_attr "prefix" "evex")
5923 (set_attr "mode" "<sseinsnmode>")])
5925 (define_insn "vec_extract_hi_<mode>_maskm"
5926 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5927 (vec_merge:<ssehalfvecmode>
5928 (vec_select:<ssehalfvecmode>
5929 (match_operand:V8FI 1 "register_operand" "v")
5930 (parallel [(const_int 4) (const_int 5)
5931 (const_int 6) (const_int 7)]))
5932 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5933 (match_operand:QI 3 "register_operand" "k")))]
5935 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
5936 [(set_attr "type" "sselog")
5937 (set_attr "prefix_extra" "1")
5938 (set_attr "length_immediate" "1")
5939 (set_attr "memory" "store")
5940 (set_attr "prefix" "evex")
5941 (set_attr "mode" "<sseinsnmode>")])
5943 (define_insn "vec_extract_hi_<mode><mask_name>"
5944 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5945 (vec_select:<ssehalfvecmode>
5946 (match_operand:V8FI 1 "register_operand" "v")
5947 (parallel [(const_int 4) (const_int 5)
5948 (const_int 6) (const_int 7)])))]
5950 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
5951 [(set_attr "type" "sselog")
5952 (set_attr "prefix_extra" "1")
5953 (set_attr "length_immediate" "1")
5954 (set (attr "memory")
5955 (if_then_else (match_test "MEM_P (operands[0])")
5956 (const_string "store")
5957 (const_string "none")))
5958 (set_attr "prefix" "evex")
5959 (set_attr "mode" "<sseinsnmode>")])
5961 (define_expand "avx_vextractf128<mode>"
5962 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5963 (match_operand:V_256 1 "register_operand")
5964 (match_operand:SI 2 "const_0_to_1_operand")]
5967 rtx (*insn)(rtx, rtx);
5969 switch (INTVAL (operands[2]))
5972 insn = gen_vec_extract_lo_<mode>;
5975 insn = gen_vec_extract_hi_<mode>;
5981 emit_insn (insn (operands[0], operands[1]));
5985 (define_insn_and_split "vec_extract_lo_<mode>"
5986 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5987 (vec_select:<ssehalfvecmode>
5988 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
5989 (parallel [(const_int 0) (const_int 1)
5990 (const_int 2) (const_int 3)
5991 (const_int 4) (const_int 5)
5992 (const_int 6) (const_int 7)])))]
5993 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5995 "&& reload_completed"
5998 rtx op1 = operands[1];
6000 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6002 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6003 emit_move_insn (operands[0], op1);
6007 (define_insn "vec_extract_hi_<mode>"
6008 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6009 (vec_select:<ssehalfvecmode>
6010 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
6011 (parallel [(const_int 8) (const_int 9)
6012 (const_int 10) (const_int 11)
6013 (const_int 12) (const_int 13)
6014 (const_int 14) (const_int 15)])))]
6016 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6017 [(set_attr "type" "sselog")
6018 (set_attr "prefix_extra" "1")
6019 (set_attr "length_immediate" "1")
6020 (set_attr "memory" "none,store")
6021 (set_attr "prefix" "evex")
6022 (set_attr "mode" "XI")])
6024 (define_insn_and_split "vec_extract_lo_<mode>"
6025 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6026 (vec_select:<ssehalfvecmode>
6027 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
6028 (parallel [(const_int 0) (const_int 1)])))]
6029 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6031 "&& reload_completed"
6032 [(set (match_dup 0) (match_dup 1))]
6034 if (REG_P (operands[1]))
6035 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6037 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6040 (define_insn "vec_extract_hi_<mode>"
6041 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6042 (vec_select:<ssehalfvecmode>
6043 (match_operand:VI8F_256 1 "register_operand" "x,x")
6044 (parallel [(const_int 2) (const_int 3)])))]
6046 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6047 [(set_attr "type" "sselog")
6048 (set_attr "prefix_extra" "1")
6049 (set_attr "length_immediate" "1")
6050 (set_attr "memory" "none,store")
6051 (set_attr "prefix" "vex")
6052 (set_attr "mode" "<sseinsnmode>")])
6054 (define_insn_and_split "vec_extract_lo_<mode>"
6055 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6056 (vec_select:<ssehalfvecmode>
6057 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
6058 (parallel [(const_int 0) (const_int 1)
6059 (const_int 2) (const_int 3)])))]
6060 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6062 "&& reload_completed"
6063 [(set (match_dup 0) (match_dup 1))]
6065 if (REG_P (operands[1]))
6066 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6068 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6071 (define_insn "vec_extract_hi_<mode>"
6072 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6073 (vec_select:<ssehalfvecmode>
6074 (match_operand:VI4F_256 1 "register_operand" "x,x")
6075 (parallel [(const_int 4) (const_int 5)
6076 (const_int 6) (const_int 7)])))]
6078 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6079 [(set_attr "type" "sselog")
6080 (set_attr "prefix_extra" "1")
6081 (set_attr "length_immediate" "1")
6082 (set_attr "memory" "none,store")
6083 (set_attr "prefix" "vex")
6084 (set_attr "mode" "<sseinsnmode>")])
6086 (define_insn_and_split "vec_extract_lo_v32hi"
6087 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6089 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6090 (parallel [(const_int 0) (const_int 1)
6091 (const_int 2) (const_int 3)
6092 (const_int 4) (const_int 5)
6093 (const_int 6) (const_int 7)
6094 (const_int 8) (const_int 9)
6095 (const_int 10) (const_int 11)
6096 (const_int 12) (const_int 13)
6097 (const_int 14) (const_int 15)])))]
6098 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6100 "&& reload_completed"
6101 [(set (match_dup 0) (match_dup 1))]
6103 if (REG_P (operands[1]))
6104 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6106 operands[1] = adjust_address (operands[1], V16HImode, 0);
6109 (define_insn "vec_extract_hi_v32hi"
6110 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6112 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6113 (parallel [(const_int 16) (const_int 17)
6114 (const_int 18) (const_int 19)
6115 (const_int 20) (const_int 21)
6116 (const_int 22) (const_int 23)
6117 (const_int 24) (const_int 25)
6118 (const_int 26) (const_int 27)
6119 (const_int 28) (const_int 29)
6120 (const_int 30) (const_int 31)])))]
6122 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6123 [(set_attr "type" "sselog")
6124 (set_attr "prefix_extra" "1")
6125 (set_attr "length_immediate" "1")
6126 (set_attr "memory" "none,store")
6127 (set_attr "prefix" "evex")
6128 (set_attr "mode" "XI")])
6130 (define_insn_and_split "vec_extract_lo_v16hi"
6131 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6133 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6134 (parallel [(const_int 0) (const_int 1)
6135 (const_int 2) (const_int 3)
6136 (const_int 4) (const_int 5)
6137 (const_int 6) (const_int 7)])))]
6138 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6140 "&& reload_completed"
6141 [(set (match_dup 0) (match_dup 1))]
6143 if (REG_P (operands[1]))
6144 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
6146 operands[1] = adjust_address (operands[1], V8HImode, 0);
6149 (define_insn "vec_extract_hi_v16hi"
6150 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6152 (match_operand:V16HI 1 "register_operand" "x,x")
6153 (parallel [(const_int 8) (const_int 9)
6154 (const_int 10) (const_int 11)
6155 (const_int 12) (const_int 13)
6156 (const_int 14) (const_int 15)])))]
6158 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6159 [(set_attr "type" "sselog")
6160 (set_attr "prefix_extra" "1")
6161 (set_attr "length_immediate" "1")
6162 (set_attr "memory" "none,store")
6163 (set_attr "prefix" "vex")
6164 (set_attr "mode" "OI")])
6166 (define_insn_and_split "vec_extract_lo_v64qi"
6167 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6169 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6170 (parallel [(const_int 0) (const_int 1)
6171 (const_int 2) (const_int 3)
6172 (const_int 4) (const_int 5)
6173 (const_int 6) (const_int 7)
6174 (const_int 8) (const_int 9)
6175 (const_int 10) (const_int 11)
6176 (const_int 12) (const_int 13)
6177 (const_int 14) (const_int 15)
6178 (const_int 16) (const_int 17)
6179 (const_int 18) (const_int 19)
6180 (const_int 20) (const_int 21)
6181 (const_int 22) (const_int 23)
6182 (const_int 24) (const_int 25)
6183 (const_int 26) (const_int 27)
6184 (const_int 28) (const_int 29)
6185 (const_int 30) (const_int 31)])))]
6186 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6188 "&& reload_completed"
6189 [(set (match_dup 0) (match_dup 1))]
6191 if (REG_P (operands[1]))
6192 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6194 operands[1] = adjust_address (operands[1], V32QImode, 0);
6197 (define_insn "vec_extract_hi_v64qi"
6198 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6200 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6201 (parallel [(const_int 32) (const_int 33)
6202 (const_int 34) (const_int 35)
6203 (const_int 36) (const_int 37)
6204 (const_int 38) (const_int 39)
6205 (const_int 40) (const_int 41)
6206 (const_int 42) (const_int 43)
6207 (const_int 44) (const_int 45)
6208 (const_int 46) (const_int 47)
6209 (const_int 48) (const_int 49)
6210 (const_int 50) (const_int 51)
6211 (const_int 52) (const_int 53)
6212 (const_int 54) (const_int 55)
6213 (const_int 56) (const_int 57)
6214 (const_int 58) (const_int 59)
6215 (const_int 60) (const_int 61)
6216 (const_int 62) (const_int 63)])))]
6218 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6219 [(set_attr "type" "sselog")
6220 (set_attr "prefix_extra" "1")
6221 (set_attr "length_immediate" "1")
6222 (set_attr "memory" "none,store")
6223 (set_attr "prefix" "evex")
6224 (set_attr "mode" "XI")])
6226 (define_insn_and_split "vec_extract_lo_v32qi"
6227 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6229 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6230 (parallel [(const_int 0) (const_int 1)
6231 (const_int 2) (const_int 3)
6232 (const_int 4) (const_int 5)
6233 (const_int 6) (const_int 7)
6234 (const_int 8) (const_int 9)
6235 (const_int 10) (const_int 11)
6236 (const_int 12) (const_int 13)
6237 (const_int 14) (const_int 15)])))]
6238 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6240 "&& reload_completed"
6241 [(set (match_dup 0) (match_dup 1))]
6243 if (REG_P (operands[1]))
6244 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6246 operands[1] = adjust_address (operands[1], V16QImode, 0);
6249 (define_insn "vec_extract_hi_v32qi"
6250 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6252 (match_operand:V32QI 1 "register_operand" "x,x")
6253 (parallel [(const_int 16) (const_int 17)
6254 (const_int 18) (const_int 19)
6255 (const_int 20) (const_int 21)
6256 (const_int 22) (const_int 23)
6257 (const_int 24) (const_int 25)
6258 (const_int 26) (const_int 27)
6259 (const_int 28) (const_int 29)
6260 (const_int 30) (const_int 31)])))]
6262 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6263 [(set_attr "type" "sselog")
6264 (set_attr "prefix_extra" "1")
6265 (set_attr "length_immediate" "1")
6266 (set_attr "memory" "none,store")
6267 (set_attr "prefix" "vex")
6268 (set_attr "mode" "OI")])
6270 ;; Modes handled by vec_extract patterns.
6271 (define_mode_iterator VEC_EXTRACT_MODE
6272 [(V32QI "TARGET_AVX") V16QI
6273 (V16HI "TARGET_AVX") V8HI
6274 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6275 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6276 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6277 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6279 (define_expand "vec_extract<mode>"
6280 [(match_operand:<ssescalarmode> 0 "register_operand")
6281 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6282 (match_operand 2 "const_int_operand")]
6285 ix86_expand_vector_extract (false, operands[0], operands[1],
6286 INTVAL (operands[2]));
6290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6292 ;; Parallel double-precision floating point element swizzling
6294 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6296 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6297 [(set (match_operand:V8DF 0 "register_operand" "=v")
6300 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6301 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6302 (parallel [(const_int 1) (const_int 9)
6303 (const_int 3) (const_int 11)
6304 (const_int 5) (const_int 13)
6305 (const_int 7) (const_int 15)])))]
6307 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6308 [(set_attr "type" "sselog")
6309 (set_attr "prefix" "evex")
6310 (set_attr "mode" "V8DF")])
6312 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6313 (define_insn "avx_unpckhpd256"
6314 [(set (match_operand:V4DF 0 "register_operand" "=x")
6317 (match_operand:V4DF 1 "register_operand" "x")
6318 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6319 (parallel [(const_int 1) (const_int 5)
6320 (const_int 3) (const_int 7)])))]
6322 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6323 [(set_attr "type" "sselog")
6324 (set_attr "prefix" "vex")
6325 (set_attr "mode" "V4DF")])
6327 (define_expand "vec_interleave_highv4df"
6331 (match_operand:V4DF 1 "register_operand" "x")
6332 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6333 (parallel [(const_int 0) (const_int 4)
6334 (const_int 2) (const_int 6)])))
6340 (parallel [(const_int 1) (const_int 5)
6341 (const_int 3) (const_int 7)])))
6342 (set (match_operand:V4DF 0 "register_operand")
6347 (parallel [(const_int 2) (const_int 3)
6348 (const_int 6) (const_int 7)])))]
6351 operands[3] = gen_reg_rtx (V4DFmode);
6352 operands[4] = gen_reg_rtx (V4DFmode);
6356 (define_expand "vec_interleave_highv2df"
6357 [(set (match_operand:V2DF 0 "register_operand")
6360 (match_operand:V2DF 1 "nonimmediate_operand")
6361 (match_operand:V2DF 2 "nonimmediate_operand"))
6362 (parallel [(const_int 1)
6366 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6367 operands[2] = force_reg (V2DFmode, operands[2]);
6370 (define_insn "*vec_interleave_highv2df"
6371 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6374 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6375 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6376 (parallel [(const_int 1)
6378 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6380 unpckhpd\t{%2, %0|%0, %2}
6381 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6382 %vmovddup\t{%H1, %0|%0, %H1}
6383 movlpd\t{%H1, %0|%0, %H1}
6384 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6385 %vmovhpd\t{%1, %0|%q0, %1}"
6386 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6387 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6388 (set_attr "ssememalign" "64")
6389 (set_attr "prefix_data16" "*,*,*,1,*,1")
6390 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6391 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6393 (define_expand "avx512f_movddup512<mask_name>"
6394 [(set (match_operand:V8DF 0 "register_operand")
6397 (match_operand:V8DF 1 "nonimmediate_operand")
6399 (parallel [(const_int 0) (const_int 8)
6400 (const_int 2) (const_int 10)
6401 (const_int 4) (const_int 12)
6402 (const_int 6) (const_int 14)])))]
6405 (define_expand "avx512f_unpcklpd512<mask_name>"
6406 [(set (match_operand:V8DF 0 "register_operand")
6409 (match_operand:V8DF 1 "register_operand")
6410 (match_operand:V8DF 2 "nonimmediate_operand"))
6411 (parallel [(const_int 0) (const_int 8)
6412 (const_int 2) (const_int 10)
6413 (const_int 4) (const_int 12)
6414 (const_int 6) (const_int 14)])))]
6417 (define_insn "*avx512f_unpcklpd512<mask_name>"
6418 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6421 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6422 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6423 (parallel [(const_int 0) (const_int 8)
6424 (const_int 2) (const_int 10)
6425 (const_int 4) (const_int 12)
6426 (const_int 6) (const_int 14)])))]
6429 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6430 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6431 [(set_attr "type" "sselog")
6432 (set_attr "prefix" "evex")
6433 (set_attr "mode" "V8DF")])
6435 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6436 (define_expand "avx_movddup256"
6437 [(set (match_operand:V4DF 0 "register_operand")
6440 (match_operand:V4DF 1 "nonimmediate_operand")
6442 (parallel [(const_int 0) (const_int 4)
6443 (const_int 2) (const_int 6)])))]
6446 (define_expand "avx_unpcklpd256"
6447 [(set (match_operand:V4DF 0 "register_operand")
6450 (match_operand:V4DF 1 "register_operand")
6451 (match_operand:V4DF 2 "nonimmediate_operand"))
6452 (parallel [(const_int 0) (const_int 4)
6453 (const_int 2) (const_int 6)])))]
6456 (define_insn "*avx_unpcklpd256"
6457 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6460 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6461 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6462 (parallel [(const_int 0) (const_int 4)
6463 (const_int 2) (const_int 6)])))]
6466 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6467 vmovddup\t{%1, %0|%0, %1}"
6468 [(set_attr "type" "sselog")
6469 (set_attr "prefix" "vex")
6470 (set_attr "mode" "V4DF")])
6472 (define_expand "vec_interleave_lowv4df"
6476 (match_operand:V4DF 1 "register_operand" "x")
6477 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6478 (parallel [(const_int 0) (const_int 4)
6479 (const_int 2) (const_int 6)])))
6485 (parallel [(const_int 1) (const_int 5)
6486 (const_int 3) (const_int 7)])))
6487 (set (match_operand:V4DF 0 "register_operand")
6492 (parallel [(const_int 0) (const_int 1)
6493 (const_int 4) (const_int 5)])))]
6496 operands[3] = gen_reg_rtx (V4DFmode);
6497 operands[4] = gen_reg_rtx (V4DFmode);
6500 (define_expand "vec_interleave_lowv2df"
6501 [(set (match_operand:V2DF 0 "register_operand")
6504 (match_operand:V2DF 1 "nonimmediate_operand")
6505 (match_operand:V2DF 2 "nonimmediate_operand"))
6506 (parallel [(const_int 0)
6510 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6511 operands[1] = force_reg (V2DFmode, operands[1]);
6514 (define_insn "*vec_interleave_lowv2df"
6515 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6518 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6519 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6520 (parallel [(const_int 0)
6522 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6524 unpcklpd\t{%2, %0|%0, %2}
6525 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6526 %vmovddup\t{%1, %0|%0, %q1}
6527 movhpd\t{%2, %0|%0, %q2}
6528 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6529 %vmovlpd\t{%2, %H0|%H0, %2}"
6530 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6531 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6532 (set_attr "ssememalign" "64")
6533 (set_attr "prefix_data16" "*,*,*,1,*,1")
6534 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6535 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6538 [(set (match_operand:V2DF 0 "memory_operand")
6541 (match_operand:V2DF 1 "register_operand")
6543 (parallel [(const_int 0)
6545 "TARGET_SSE3 && reload_completed"
6548 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6549 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6550 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6555 [(set (match_operand:V2DF 0 "register_operand")
6558 (match_operand:V2DF 1 "memory_operand")
6560 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6561 (match_operand:SI 3 "const_int_operand")])))]
6562 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6563 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6565 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6568 (define_insn "avx512f_vmscalef<mode><round_name>"
6569 [(set (match_operand:VF_128 0 "register_operand" "=v")
6572 [(match_operand:VF_128 1 "register_operand" "v")
6573 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
6578 "%vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
6579 [(set_attr "prefix" "evex")
6580 (set_attr "mode" "<ssescalarmode>")])
6582 (define_insn "avx512f_scalef<mode><mask_name><round_name>"
6583 [(set (match_operand:VF_512 0 "register_operand" "=v")
6585 [(match_operand:VF_512 1 "register_operand" "v")
6586 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
6589 "%vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
6590 [(set_attr "prefix" "evex")
6591 (set_attr "mode" "<MODE>")])
6593 (define_expand "avx512f_vternlog<mode>_maskz"
6594 [(match_operand:VI48_512 0 "register_operand")
6595 (match_operand:VI48_512 1 "register_operand")
6596 (match_operand:VI48_512 2 "register_operand")
6597 (match_operand:VI48_512 3 "nonimmediate_operand")
6598 (match_operand:SI 4 "const_0_to_255_operand")
6599 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6602 emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
6603 operands[0], operands[1], operands[2], operands[3],
6604 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
6608 (define_insn "avx512f_vternlog<mode><sd_maskz_name>"
6609 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6611 [(match_operand:VI48_512 1 "register_operand" "0")
6612 (match_operand:VI48_512 2 "register_operand" "v")
6613 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6614 (match_operand:SI 4 "const_0_to_255_operand")]
6617 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
6618 [(set_attr "type" "sselog")
6619 (set_attr "prefix" "evex")
6620 (set_attr "mode" "<sseinsnmode>")])
6622 (define_insn "avx512f_vternlog<mode>_mask"
6623 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6626 [(match_operand:VI48_512 1 "register_operand" "0")
6627 (match_operand:VI48_512 2 "register_operand" "v")
6628 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6629 (match_operand:SI 4 "const_0_to_255_operand")]
6632 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6634 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6635 [(set_attr "type" "sselog")
6636 (set_attr "prefix" "evex")
6637 (set_attr "mode" "<sseinsnmode>")])
6639 (define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
6640 [(set (match_operand:VF_512 0 "register_operand" "=v")
6641 (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6644 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
6645 [(set_attr "prefix" "evex")
6646 (set_attr "mode" "<MODE>")])
6648 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
6649 [(set (match_operand:VF_128 0 "register_operand" "=v")
6652 [(match_operand:VF_128 1 "register_operand" "v")
6653 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6658 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
6659 [(set_attr "prefix" "evex")
6660 (set_attr "mode" "<ssescalarmode>")])
6662 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6663 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6664 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6665 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6666 (match_operand:SI 3 "const_0_to_255_operand")]
6669 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6670 [(set_attr "prefix" "evex")
6671 (set_attr "mode" "<sseinsnmode>")])
6673 (define_expand "avx512f_shufps512_mask"
6674 [(match_operand:V16SF 0 "register_operand")
6675 (match_operand:V16SF 1 "register_operand")
6676 (match_operand:V16SF 2 "nonimmediate_operand")
6677 (match_operand:SI 3 "const_0_to_255_operand")
6678 (match_operand:V16SF 4 "register_operand")
6679 (match_operand:HI 5 "register_operand")]
6682 int mask = INTVAL (operands[3]);
6683 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6684 GEN_INT ((mask >> 0) & 3),
6685 GEN_INT ((mask >> 2) & 3),
6686 GEN_INT (((mask >> 4) & 3) + 16),
6687 GEN_INT (((mask >> 6) & 3) + 16),
6688 GEN_INT (((mask >> 0) & 3) + 4),
6689 GEN_INT (((mask >> 2) & 3) + 4),
6690 GEN_INT (((mask >> 4) & 3) + 20),
6691 GEN_INT (((mask >> 6) & 3) + 20),
6692 GEN_INT (((mask >> 0) & 3) + 8),
6693 GEN_INT (((mask >> 2) & 3) + 8),
6694 GEN_INT (((mask >> 4) & 3) + 24),
6695 GEN_INT (((mask >> 6) & 3) + 24),
6696 GEN_INT (((mask >> 0) & 3) + 12),
6697 GEN_INT (((mask >> 2) & 3) + 12),
6698 GEN_INT (((mask >> 4) & 3) + 28),
6699 GEN_INT (((mask >> 6) & 3) + 28),
6700 operands[4], operands[5]));
6705 (define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
6706 [(match_operand:VF_512 0 "register_operand")
6707 (match_operand:VF_512 1 "register_operand")
6708 (match_operand:VF_512 2 "register_operand")
6709 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6710 (match_operand:SI 4 "const_0_to_255_operand")
6711 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6714 emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6715 operands[0], operands[1], operands[2], operands[3],
6716 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6717 <round_saeonly_expand_operand6>));
6721 (define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
6722 [(set (match_operand:VF_512 0 "register_operand" "=v")
6724 [(match_operand:VF_512 1 "register_operand" "0")
6725 (match_operand:VF_512 2 "register_operand" "v")
6726 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6727 (match_operand:SI 4 "const_0_to_255_operand")]
6730 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6731 [(set_attr "prefix" "evex")
6732 (set_attr "mode" "<MODE>")])
6734 (define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
6735 [(set (match_operand:VF_512 0 "register_operand" "=v")
6738 [(match_operand:VF_512 1 "register_operand" "0")
6739 (match_operand:VF_512 2 "register_operand" "v")
6740 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6741 (match_operand:SI 4 "const_0_to_255_operand")]
6744 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6746 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6747 [(set_attr "prefix" "evex")
6748 (set_attr "mode" "<MODE>")])
6750 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
6751 [(match_operand:VF_128 0 "register_operand")
6752 (match_operand:VF_128 1 "register_operand")
6753 (match_operand:VF_128 2 "register_operand")
6754 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6755 (match_operand:SI 4 "const_0_to_255_operand")
6756 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6759 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6760 operands[0], operands[1], operands[2], operands[3],
6761 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6762 <round_saeonly_expand_operand6>));
6766 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
6767 [(set (match_operand:VF_128 0 "register_operand" "=v")
6770 [(match_operand:VF_128 1 "register_operand" "0")
6771 (match_operand:VF_128 2 "register_operand" "v")
6772 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6773 (match_operand:SI 4 "const_0_to_255_operand")]
6778 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6779 [(set_attr "prefix" "evex")
6780 (set_attr "mode" "<ssescalarmode>")])
6782 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
6783 [(set (match_operand:VF_128 0 "register_operand" "=v")
6787 [(match_operand:VF_128 1 "register_operand" "0")
6788 (match_operand:VF_128 2 "register_operand" "v")
6789 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6790 (match_operand:SI 4 "const_0_to_255_operand")]
6795 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6797 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6798 [(set_attr "prefix" "evex")
6799 (set_attr "mode" "<ssescalarmode>")])
6801 (define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
6802 [(set (match_operand:VF_512 0 "register_operand" "=v")
6804 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6805 (match_operand:SI 2 "const_0_to_255_operand")]
6808 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
6809 [(set_attr "length_immediate" "1")
6810 (set_attr "prefix" "evex")
6811 (set_attr "mode" "<MODE>")])
6813 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
6814 [(set (match_operand:VF_128 0 "register_operand" "=v")
6817 [(match_operand:VF_128 1 "register_operand" "v")
6818 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6819 (match_operand:SI 3 "const_0_to_255_operand")]
6824 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
6825 [(set_attr "length_immediate" "1")
6826 (set_attr "prefix" "evex")
6827 (set_attr "mode" "<MODE>")])
6829 ;; One bit in mask selects 2 elements.
6830 (define_insn "avx512f_shufps512_1<mask_name>"
6831 [(set (match_operand:V16SF 0 "register_operand" "=v")
6834 (match_operand:V16SF 1 "register_operand" "v")
6835 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6836 (parallel [(match_operand 3 "const_0_to_3_operand")
6837 (match_operand 4 "const_0_to_3_operand")
6838 (match_operand 5 "const_16_to_19_operand")
6839 (match_operand 6 "const_16_to_19_operand")
6840 (match_operand 7 "const_4_to_7_operand")
6841 (match_operand 8 "const_4_to_7_operand")
6842 (match_operand 9 "const_20_to_23_operand")
6843 (match_operand 10 "const_20_to_23_operand")
6844 (match_operand 11 "const_8_to_11_operand")
6845 (match_operand 12 "const_8_to_11_operand")
6846 (match_operand 13 "const_24_to_27_operand")
6847 (match_operand 14 "const_24_to_27_operand")
6848 (match_operand 15 "const_12_to_15_operand")
6849 (match_operand 16 "const_12_to_15_operand")
6850 (match_operand 17 "const_28_to_31_operand")
6851 (match_operand 18 "const_28_to_31_operand")])))]
6853 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6854 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6855 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6856 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
6857 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
6858 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
6859 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
6860 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
6861 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
6862 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
6863 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
6864 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
6867 mask = INTVAL (operands[3]);
6868 mask |= INTVAL (operands[4]) << 2;
6869 mask |= (INTVAL (operands[5]) - 16) << 4;
6870 mask |= (INTVAL (operands[6]) - 16) << 6;
6871 operands[3] = GEN_INT (mask);
6873 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
6875 [(set_attr "type" "sselog")
6876 (set_attr "length_immediate" "1")
6877 (set_attr "prefix" "evex")
6878 (set_attr "mode" "V16SF")])
6880 (define_expand "avx512f_shufpd512_mask"
6881 [(match_operand:V8DF 0 "register_operand")
6882 (match_operand:V8DF 1 "register_operand")
6883 (match_operand:V8DF 2 "nonimmediate_operand")
6884 (match_operand:SI 3 "const_0_to_255_operand")
6885 (match_operand:V8DF 4 "register_operand")
6886 (match_operand:QI 5 "register_operand")]
6889 int mask = INTVAL (operands[3]);
6890 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
6892 GEN_INT (mask & 2 ? 9 : 8),
6893 GEN_INT (mask & 4 ? 3 : 2),
6894 GEN_INT (mask & 8 ? 11 : 10),
6895 GEN_INT (mask & 16 ? 5 : 4),
6896 GEN_INT (mask & 32 ? 13 : 12),
6897 GEN_INT (mask & 64 ? 7 : 6),
6898 GEN_INT (mask & 128 ? 15 : 14),
6899 operands[4], operands[5]));
6903 (define_insn "avx512f_shufpd512_1<mask_name>"
6904 [(set (match_operand:V8DF 0 "register_operand" "=v")
6907 (match_operand:V8DF 1 "register_operand" "v")
6908 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6909 (parallel [(match_operand 3 "const_0_to_1_operand")
6910 (match_operand 4 "const_8_to_9_operand")
6911 (match_operand 5 "const_2_to_3_operand")
6912 (match_operand 6 "const_10_to_11_operand")
6913 (match_operand 7 "const_4_to_5_operand")
6914 (match_operand 8 "const_12_to_13_operand")
6915 (match_operand 9 "const_6_to_7_operand")
6916 (match_operand 10 "const_14_to_15_operand")])))]
6920 mask = INTVAL (operands[3]);
6921 mask |= (INTVAL (operands[4]) - 8) << 1;
6922 mask |= (INTVAL (operands[5]) - 2) << 2;
6923 mask |= (INTVAL (operands[6]) - 10) << 3;
6924 mask |= (INTVAL (operands[7]) - 4) << 4;
6925 mask |= (INTVAL (operands[8]) - 12) << 5;
6926 mask |= (INTVAL (operands[9]) - 6) << 6;
6927 mask |= (INTVAL (operands[10]) - 14) << 7;
6928 operands[3] = GEN_INT (mask);
6930 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6932 [(set_attr "type" "sselog")
6933 (set_attr "length_immediate" "1")
6934 (set_attr "prefix" "evex")
6935 (set_attr "mode" "V8DF")])
6937 (define_expand "avx_shufpd256"
6938 [(match_operand:V4DF 0 "register_operand")
6939 (match_operand:V4DF 1 "register_operand")
6940 (match_operand:V4DF 2 "nonimmediate_operand")
6941 (match_operand:SI 3 "const_int_operand")]
6944 int mask = INTVAL (operands[3]);
6945 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
6947 GEN_INT (mask & 2 ? 5 : 4),
6948 GEN_INT (mask & 4 ? 3 : 2),
6949 GEN_INT (mask & 8 ? 7 : 6)));
6953 (define_insn "avx_shufpd256_1"
6954 [(set (match_operand:V4DF 0 "register_operand" "=x")
6957 (match_operand:V4DF 1 "register_operand" "x")
6958 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6959 (parallel [(match_operand 3 "const_0_to_1_operand")
6960 (match_operand 4 "const_4_to_5_operand")
6961 (match_operand 5 "const_2_to_3_operand")
6962 (match_operand 6 "const_6_to_7_operand")])))]
6966 mask = INTVAL (operands[3]);
6967 mask |= (INTVAL (operands[4]) - 4) << 1;
6968 mask |= (INTVAL (operands[5]) - 2) << 2;
6969 mask |= (INTVAL (operands[6]) - 6) << 3;
6970 operands[3] = GEN_INT (mask);
6972 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6974 [(set_attr "type" "sseshuf")
6975 (set_attr "length_immediate" "1")
6976 (set_attr "prefix" "vex")
6977 (set_attr "mode" "V4DF")])
6979 (define_expand "sse2_shufpd"
6980 [(match_operand:V2DF 0 "register_operand")
6981 (match_operand:V2DF 1 "register_operand")
6982 (match_operand:V2DF 2 "nonimmediate_operand")
6983 (match_operand:SI 3 "const_int_operand")]
6986 int mask = INTVAL (operands[3]);
6987 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
6989 GEN_INT (mask & 2 ? 3 : 2)));
6993 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
6994 (define_insn "avx2_interleave_highv4di"
6995 [(set (match_operand:V4DI 0 "register_operand" "=x")
6998 (match_operand:V4DI 1 "register_operand" "x")
6999 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7000 (parallel [(const_int 1)
7005 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7006 [(set_attr "type" "sselog")
7007 (set_attr "prefix" "vex")
7008 (set_attr "mode" "OI")])
7010 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7011 [(set (match_operand:V8DI 0 "register_operand" "=v")
7014 (match_operand:V8DI 1 "register_operand" "v")
7015 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7016 (parallel [(const_int 1) (const_int 9)
7017 (const_int 3) (const_int 11)
7018 (const_int 5) (const_int 13)
7019 (const_int 7) (const_int 15)])))]
7021 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7022 [(set_attr "type" "sselog")
7023 (set_attr "prefix" "evex")
7024 (set_attr "mode" "XI")])
7026 (define_insn "vec_interleave_highv2di"
7027 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7030 (match_operand:V2DI 1 "register_operand" "0,x")
7031 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7032 (parallel [(const_int 1)
7036 punpckhqdq\t{%2, %0|%0, %2}
7037 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7038 [(set_attr "isa" "noavx,avx")
7039 (set_attr "type" "sselog")
7040 (set_attr "prefix_data16" "1,*")
7041 (set_attr "prefix" "orig,vex")
7042 (set_attr "mode" "TI")])
7044 (define_insn "avx2_interleave_lowv4di"
7045 [(set (match_operand:V4DI 0 "register_operand" "=x")
7048 (match_operand:V4DI 1 "register_operand" "x")
7049 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7050 (parallel [(const_int 0)
7055 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7056 [(set_attr "type" "sselog")
7057 (set_attr "prefix" "vex")
7058 (set_attr "mode" "OI")])
7060 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7061 [(set (match_operand:V8DI 0 "register_operand" "=v")
7064 (match_operand:V8DI 1 "register_operand" "v")
7065 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7066 (parallel [(const_int 0) (const_int 8)
7067 (const_int 2) (const_int 10)
7068 (const_int 4) (const_int 12)
7069 (const_int 6) (const_int 14)])))]
7071 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7072 [(set_attr "type" "sselog")
7073 (set_attr "prefix" "evex")
7074 (set_attr "mode" "XI")])
7076 (define_insn "vec_interleave_lowv2di"
7077 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7080 (match_operand:V2DI 1 "register_operand" "0,x")
7081 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7082 (parallel [(const_int 0)
7086 punpcklqdq\t{%2, %0|%0, %2}
7087 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7088 [(set_attr "isa" "noavx,avx")
7089 (set_attr "type" "sselog")
7090 (set_attr "prefix_data16" "1,*")
7091 (set_attr "prefix" "orig,vex")
7092 (set_attr "mode" "TI")])
7094 (define_insn "sse2_shufpd_<mode>"
7095 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
7096 (vec_select:VI8F_128
7097 (vec_concat:<ssedoublevecmode>
7098 (match_operand:VI8F_128 1 "register_operand" "0,x")
7099 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
7100 (parallel [(match_operand 3 "const_0_to_1_operand")
7101 (match_operand 4 "const_2_to_3_operand")])))]
7105 mask = INTVAL (operands[3]);
7106 mask |= (INTVAL (operands[4]) - 2) << 1;
7107 operands[3] = GEN_INT (mask);
7109 switch (which_alternative)
7112 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
7114 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7119 [(set_attr "isa" "noavx,avx")
7120 (set_attr "type" "sseshuf")
7121 (set_attr "length_immediate" "1")
7122 (set_attr "prefix" "orig,vex")
7123 (set_attr "mode" "V2DF")])
7125 ;; Avoid combining registers from different units in a single alternative,
7126 ;; see comment above inline_secondary_memory_needed function in i386.c
7127 (define_insn "sse2_storehpd"
7128 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
7130 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
7131 (parallel [(const_int 1)])))]
7132 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7134 %vmovhpd\t{%1, %0|%0, %1}
7136 vunpckhpd\t{%d1, %0|%0, %d1}
7140 [(set_attr "isa" "*,noavx,avx,*,*,*")
7141 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
7142 (set (attr "prefix_data16")
7144 (and (eq_attr "alternative" "0")
7145 (not (match_test "TARGET_AVX")))
7147 (const_string "*")))
7148 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
7149 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
7152 [(set (match_operand:DF 0 "register_operand")
7154 (match_operand:V2DF 1 "memory_operand")
7155 (parallel [(const_int 1)])))]
7156 "TARGET_SSE2 && reload_completed"
7157 [(set (match_dup 0) (match_dup 1))]
7158 "operands[1] = adjust_address (operands[1], DFmode, 8);")
7160 (define_insn "*vec_extractv2df_1_sse"
7161 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7163 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
7164 (parallel [(const_int 1)])))]
7165 "!TARGET_SSE2 && TARGET_SSE
7166 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7168 movhps\t{%1, %0|%q0, %1}
7169 movhlps\t{%1, %0|%0, %1}
7170 movlps\t{%H1, %0|%0, %H1}"
7171 [(set_attr "type" "ssemov")
7172 (set_attr "ssememalign" "64")
7173 (set_attr "mode" "V2SF,V4SF,V2SF")])
7175 ;; Avoid combining registers from different units in a single alternative,
7176 ;; see comment above inline_secondary_memory_needed function in i386.c
7177 (define_insn "sse2_storelpd"
7178 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
7180 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
7181 (parallel [(const_int 0)])))]
7182 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7184 %vmovlpd\t{%1, %0|%0, %1}
7189 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
7190 (set_attr "prefix_data16" "1,*,*,*,*")
7191 (set_attr "prefix" "maybe_vex")
7192 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
7195 [(set (match_operand:DF 0 "register_operand")
7197 (match_operand:V2DF 1 "nonimmediate_operand")
7198 (parallel [(const_int 0)])))]
7199 "TARGET_SSE2 && reload_completed"
7200 [(set (match_dup 0) (match_dup 1))]
7202 if (REG_P (operands[1]))
7203 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7205 operands[1] = adjust_address (operands[1], DFmode, 0);
7208 (define_insn "*vec_extractv2df_0_sse"
7209 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7211 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7212 (parallel [(const_int 0)])))]
7213 "!TARGET_SSE2 && TARGET_SSE
7214 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7216 movlps\t{%1, %0|%0, %1}
7217 movaps\t{%1, %0|%0, %1}
7218 movlps\t{%1, %0|%0, %q1}"
7219 [(set_attr "type" "ssemov")
7220 (set_attr "mode" "V2SF,V4SF,V2SF")])
7222 (define_expand "sse2_loadhpd_exp"
7223 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7226 (match_operand:V2DF 1 "nonimmediate_operand")
7227 (parallel [(const_int 0)]))
7228 (match_operand:DF 2 "nonimmediate_operand")))]
7231 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7233 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7235 /* Fix up the destination if needed. */
7236 if (dst != operands[0])
7237 emit_move_insn (operands[0], dst);
7242 ;; Avoid combining registers from different units in a single alternative,
7243 ;; see comment above inline_secondary_memory_needed function in i386.c
7244 (define_insn "sse2_loadhpd"
7245 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7249 (match_operand:V2DF 1 "nonimmediate_operand"
7251 (parallel [(const_int 0)]))
7252 (match_operand:DF 2 "nonimmediate_operand"
7253 " m,m,x,x,x,*f,r")))]
7254 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7256 movhpd\t{%2, %0|%0, %2}
7257 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7258 unpcklpd\t{%2, %0|%0, %2}
7259 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7263 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7264 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7265 (set_attr "ssememalign" "64")
7266 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7267 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7268 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7271 [(set (match_operand:V2DF 0 "memory_operand")
7273 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7274 (match_operand:DF 1 "register_operand")))]
7275 "TARGET_SSE2 && reload_completed"
7276 [(set (match_dup 0) (match_dup 1))]
7277 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7279 (define_expand "sse2_loadlpd_exp"
7280 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7282 (match_operand:DF 2 "nonimmediate_operand")
7284 (match_operand:V2DF 1 "nonimmediate_operand")
7285 (parallel [(const_int 1)]))))]
7288 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7290 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7292 /* Fix up the destination if needed. */
7293 if (dst != operands[0])
7294 emit_move_insn (operands[0], dst);
7299 ;; Avoid combining registers from different units in a single alternative,
7300 ;; see comment above inline_secondary_memory_needed function in i386.c
7301 (define_insn "sse2_loadlpd"
7302 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7303 "=x,x,x,x,x,x,x,x,m,m ,m")
7305 (match_operand:DF 2 "nonimmediate_operand"
7306 " m,m,m,x,x,0,0,x,x,*f,r")
7308 (match_operand:V2DF 1 "vector_move_operand"
7309 " C,0,x,0,x,x,o,o,0,0 ,0")
7310 (parallel [(const_int 1)]))))]
7311 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7313 %vmovsd\t{%2, %0|%0, %2}
7314 movlpd\t{%2, %0|%0, %2}
7315 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7316 movsd\t{%2, %0|%0, %2}
7317 vmovsd\t{%2, %1, %0|%0, %1, %2}
7318 shufpd\t{$2, %1, %0|%0, %1, 2}
7319 movhpd\t{%H1, %0|%0, %H1}
7320 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7324 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7326 (cond [(eq_attr "alternative" "5")
7327 (const_string "sselog")
7328 (eq_attr "alternative" "9")
7329 (const_string "fmov")
7330 (eq_attr "alternative" "10")
7331 (const_string "imov")
7333 (const_string "ssemov")))
7334 (set_attr "ssememalign" "64")
7335 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7336 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7337 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7338 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7341 [(set (match_operand:V2DF 0 "memory_operand")
7343 (match_operand:DF 1 "register_operand")
7344 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7345 "TARGET_SSE2 && reload_completed"
7346 [(set (match_dup 0) (match_dup 1))]
7347 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7349 (define_insn "sse2_movsd"
7350 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7352 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7353 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7357 movsd\t{%2, %0|%0, %2}
7358 vmovsd\t{%2, %1, %0|%0, %1, %2}
7359 movlpd\t{%2, %0|%0, %q2}
7360 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7361 %vmovlpd\t{%2, %0|%q0, %2}
7362 shufpd\t{$2, %1, %0|%0, %1, 2}
7363 movhps\t{%H1, %0|%0, %H1}
7364 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7365 %vmovhps\t{%1, %H0|%H0, %1}"
7366 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7369 (eq_attr "alternative" "5")
7370 (const_string "sselog")
7371 (const_string "ssemov")))
7372 (set (attr "prefix_data16")
7374 (and (eq_attr "alternative" "2,4")
7375 (not (match_test "TARGET_AVX")))
7377 (const_string "*")))
7378 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7379 (set_attr "ssememalign" "64")
7380 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7381 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7383 (define_insn "vec_dupv2df"
7384 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7386 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7390 %vmovddup\t{%1, %0|%0, %1}"
7391 [(set_attr "isa" "noavx,sse3")
7392 (set_attr "type" "sselog1")
7393 (set_attr "prefix" "orig,maybe_vex")
7394 (set_attr "mode" "V2DF,DF")])
7396 (define_insn "*vec_concatv2df"
7397 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7399 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7400 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7403 unpcklpd\t{%2, %0|%0, %2}
7404 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7405 %vmovddup\t{%1, %0|%0, %1}
7406 movhpd\t{%2, %0|%0, %2}
7407 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7408 %vmovsd\t{%1, %0|%0, %1}
7409 movlhps\t{%2, %0|%0, %2}
7410 movhps\t{%2, %0|%0, %2}"
7411 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7414 (eq_attr "alternative" "0,1,2")
7415 (const_string "sselog")
7416 (const_string "ssemov")))
7417 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7418 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7419 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7423 ;; Parallel integer down-conversion operations
7425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7427 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7428 (define_mode_attr pmov_src_mode
7429 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7430 (define_mode_attr pmov_src_lower
7431 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7432 (define_mode_attr pmov_suff
7433 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7435 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7436 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7437 (any_truncate:PMOV_DST_MODE
7438 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7440 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7441 [(set_attr "type" "ssemov")
7442 (set_attr "memory" "none,store")
7443 (set_attr "prefix" "evex")
7444 (set_attr "mode" "<sseinsnmode>")])
7446 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7447 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7448 (vec_merge:PMOV_DST_MODE
7449 (any_truncate:PMOV_DST_MODE
7450 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7451 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7452 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
7454 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7455 [(set_attr "type" "ssemov")
7456 (set_attr "memory" "none,store")
7457 (set_attr "prefix" "evex")
7458 (set_attr "mode" "<sseinsnmode>")])
7460 (define_insn "*avx512f_<code>v8div16qi2"
7461 [(set (match_operand:V16QI 0 "register_operand" "=v")
7464 (match_operand:V8DI 1 "register_operand" "v"))
7465 (const_vector:V8QI [(const_int 0) (const_int 0)
7466 (const_int 0) (const_int 0)
7467 (const_int 0) (const_int 0)
7468 (const_int 0) (const_int 0)])))]
7470 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7471 [(set_attr "type" "ssemov")
7472 (set_attr "prefix" "evex")
7473 (set_attr "mode" "TI")])
7475 (define_insn "*avx512f_<code>v8div16qi2_store"
7476 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7479 (match_operand:V8DI 1 "register_operand" "v"))
7482 (parallel [(const_int 8) (const_int 9)
7483 (const_int 10) (const_int 11)
7484 (const_int 12) (const_int 13)
7485 (const_int 14) (const_int 15)]))))]
7487 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7488 [(set_attr "type" "ssemov")
7489 (set_attr "memory" "store")
7490 (set_attr "prefix" "evex")
7491 (set_attr "mode" "TI")])
7493 (define_insn "avx512f_<code>v8div16qi2_mask"
7494 [(set (match_operand:V16QI 0 "register_operand" "=v")
7498 (match_operand:V8DI 1 "register_operand" "v"))
7500 (match_operand:V16QI 2 "vector_move_operand" "0C")
7501 (parallel [(const_int 0) (const_int 1)
7502 (const_int 2) (const_int 3)
7503 (const_int 4) (const_int 5)
7504 (const_int 6) (const_int 7)]))
7505 (match_operand:QI 3 "register_operand" "k"))
7506 (const_vector:V8QI [(const_int 0) (const_int 0)
7507 (const_int 0) (const_int 0)
7508 (const_int 0) (const_int 0)
7509 (const_int 0) (const_int 0)])))]
7511 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7512 [(set_attr "type" "ssemov")
7513 (set_attr "prefix" "evex")
7514 (set_attr "mode" "TI")])
7516 (define_insn "*avx512f_<code>v8div16qi2_store_mask"
7517 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7521 (match_operand:V8DI 1 "register_operand" "v"))
7524 (parallel [(const_int 0) (const_int 1)
7525 (const_int 2) (const_int 3)
7526 (const_int 4) (const_int 5)
7527 (const_int 6) (const_int 7)]))
7528 (match_operand:QI 2 "register_operand" "k"))
7531 (parallel [(const_int 8) (const_int 9)
7532 (const_int 10) (const_int 11)
7533 (const_int 12) (const_int 13)
7534 (const_int 14) (const_int 15)]))))]
7536 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7537 [(set_attr "type" "ssemov")
7538 (set_attr "memory" "store")
7539 (set_attr "prefix" "evex")
7540 (set_attr "mode" "TI")])
7542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7544 ;; Parallel integral arithmetic
7546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7548 (define_expand "neg<mode>2"
7549 [(set (match_operand:VI_AVX2 0 "register_operand")
7552 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7554 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7556 (define_expand "<plusminus_insn><mode>3<mask_name>"
7557 [(set (match_operand:VI_AVX2 0 "register_operand")
7559 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7560 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7561 "TARGET_SSE2 && <mask_mode512bit_condition>"
7562 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7564 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7565 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7567 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7568 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7569 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7571 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7572 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7573 [(set_attr "isa" "noavx,avx")
7574 (set_attr "type" "sseiadd")
7575 (set_attr "prefix_data16" "1,*")
7576 (set_attr "prefix" "<mask_prefix3>")
7577 (set_attr "mode" "<sseinsnmode>")])
7579 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7580 [(set (match_operand:VI12_AVX2 0 "register_operand")
7581 (sat_plusminus:VI12_AVX2
7582 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7583 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7585 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7587 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7588 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7589 (sat_plusminus:VI12_AVX2
7590 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7591 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7592 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7594 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7595 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7596 [(set_attr "isa" "noavx,avx")
7597 (set_attr "type" "sseiadd")
7598 (set_attr "prefix_data16" "1,*")
7599 (set_attr "prefix" "orig,vex")
7600 (set_attr "mode" "TI")])
7602 (define_expand "mul<mode>3"
7603 [(set (match_operand:VI1_AVX2 0 "register_operand")
7604 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7605 (match_operand:VI1_AVX2 2 "register_operand")))]
7608 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7612 (define_expand "mul<mode>3"
7613 [(set (match_operand:VI2_AVX2 0 "register_operand")
7614 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7615 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7617 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7619 (define_insn "*mul<mode>3"
7620 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7621 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7622 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7623 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7625 pmullw\t{%2, %0|%0, %2}
7626 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7627 [(set_attr "isa" "noavx,avx")
7628 (set_attr "type" "sseimul")
7629 (set_attr "prefix_data16" "1,*")
7630 (set_attr "prefix" "orig,vex")
7631 (set_attr "mode" "<sseinsnmode>")])
7633 (define_expand "<s>mul<mode>3_highpart"
7634 [(set (match_operand:VI2_AVX2 0 "register_operand")
7636 (lshiftrt:<ssedoublemode>
7637 (mult:<ssedoublemode>
7638 (any_extend:<ssedoublemode>
7639 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7640 (any_extend:<ssedoublemode>
7641 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7644 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7646 (define_insn "*<s>mul<mode>3_highpart"
7647 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7649 (lshiftrt:<ssedoublemode>
7650 (mult:<ssedoublemode>
7651 (any_extend:<ssedoublemode>
7652 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7653 (any_extend:<ssedoublemode>
7654 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7656 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7658 pmulh<u>w\t{%2, %0|%0, %2}
7659 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7660 [(set_attr "isa" "noavx,avx")
7661 (set_attr "type" "sseimul")
7662 (set_attr "prefix_data16" "1,*")
7663 (set_attr "prefix" "orig,vex")
7664 (set_attr "mode" "<sseinsnmode>")])
7666 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7667 [(set (match_operand:V8DI 0 "register_operand")
7671 (match_operand:V16SI 1 "nonimmediate_operand")
7672 (parallel [(const_int 0) (const_int 2)
7673 (const_int 4) (const_int 6)
7674 (const_int 8) (const_int 10)
7675 (const_int 12) (const_int 14)])))
7678 (match_operand:V16SI 2 "nonimmediate_operand")
7679 (parallel [(const_int 0) (const_int 2)
7680 (const_int 4) (const_int 6)
7681 (const_int 8) (const_int 10)
7682 (const_int 12) (const_int 14)])))))]
7684 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7686 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7687 [(set (match_operand:V8DI 0 "register_operand" "=v")
7691 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7692 (parallel [(const_int 0) (const_int 2)
7693 (const_int 4) (const_int 6)
7694 (const_int 8) (const_int 10)
7695 (const_int 12) (const_int 14)])))
7698 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7699 (parallel [(const_int 0) (const_int 2)
7700 (const_int 4) (const_int 6)
7701 (const_int 8) (const_int 10)
7702 (const_int 12) (const_int 14)])))))]
7703 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7704 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7705 [(set_attr "isa" "avx512f")
7706 (set_attr "type" "sseimul")
7707 (set_attr "prefix_extra" "1")
7708 (set_attr "prefix" "evex")
7709 (set_attr "mode" "XI")])
7711 (define_expand "vec_widen_umult_even_v8si"
7712 [(set (match_operand:V4DI 0 "register_operand")
7716 (match_operand:V8SI 1 "nonimmediate_operand")
7717 (parallel [(const_int 0) (const_int 2)
7718 (const_int 4) (const_int 6)])))
7721 (match_operand:V8SI 2 "nonimmediate_operand")
7722 (parallel [(const_int 0) (const_int 2)
7723 (const_int 4) (const_int 6)])))))]
7725 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7727 (define_insn "*vec_widen_umult_even_v8si"
7728 [(set (match_operand:V4DI 0 "register_operand" "=x")
7732 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7733 (parallel [(const_int 0) (const_int 2)
7734 (const_int 4) (const_int 6)])))
7737 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7738 (parallel [(const_int 0) (const_int 2)
7739 (const_int 4) (const_int 6)])))))]
7740 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7741 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7742 [(set_attr "type" "sseimul")
7743 (set_attr "prefix" "vex")
7744 (set_attr "mode" "OI")])
7746 (define_expand "vec_widen_umult_even_v4si"
7747 [(set (match_operand:V2DI 0 "register_operand")
7751 (match_operand:V4SI 1 "nonimmediate_operand")
7752 (parallel [(const_int 0) (const_int 2)])))
7755 (match_operand:V4SI 2 "nonimmediate_operand")
7756 (parallel [(const_int 0) (const_int 2)])))))]
7758 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7760 (define_insn "*vec_widen_umult_even_v4si"
7761 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7765 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7766 (parallel [(const_int 0) (const_int 2)])))
7769 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7770 (parallel [(const_int 0) (const_int 2)])))))]
7771 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7773 pmuludq\t{%2, %0|%0, %2}
7774 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7775 [(set_attr "isa" "noavx,avx")
7776 (set_attr "type" "sseimul")
7777 (set_attr "prefix_data16" "1,*")
7778 (set_attr "prefix" "orig,vex")
7779 (set_attr "mode" "TI")])
7781 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7782 [(set (match_operand:V8DI 0 "register_operand")
7786 (match_operand:V16SI 1 "nonimmediate_operand")
7787 (parallel [(const_int 0) (const_int 2)
7788 (const_int 4) (const_int 6)
7789 (const_int 8) (const_int 10)
7790 (const_int 12) (const_int 14)])))
7793 (match_operand:V16SI 2 "nonimmediate_operand")
7794 (parallel [(const_int 0) (const_int 2)
7795 (const_int 4) (const_int 6)
7796 (const_int 8) (const_int 10)
7797 (const_int 12) (const_int 14)])))))]
7799 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7801 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7802 [(set (match_operand:V8DI 0 "register_operand" "=v")
7806 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7807 (parallel [(const_int 0) (const_int 2)
7808 (const_int 4) (const_int 6)
7809 (const_int 8) (const_int 10)
7810 (const_int 12) (const_int 14)])))
7813 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7814 (parallel [(const_int 0) (const_int 2)
7815 (const_int 4) (const_int 6)
7816 (const_int 8) (const_int 10)
7817 (const_int 12) (const_int 14)])))))]
7818 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7819 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7820 [(set_attr "isa" "avx512f")
7821 (set_attr "type" "sseimul")
7822 (set_attr "prefix_extra" "1")
7823 (set_attr "prefix" "evex")
7824 (set_attr "mode" "XI")])
7826 (define_expand "vec_widen_smult_even_v8si"
7827 [(set (match_operand:V4DI 0 "register_operand")
7831 (match_operand:V8SI 1 "nonimmediate_operand")
7832 (parallel [(const_int 0) (const_int 2)
7833 (const_int 4) (const_int 6)])))
7836 (match_operand:V8SI 2 "nonimmediate_operand")
7837 (parallel [(const_int 0) (const_int 2)
7838 (const_int 4) (const_int 6)])))))]
7840 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7842 (define_insn "*vec_widen_smult_even_v8si"
7843 [(set (match_operand:V4DI 0 "register_operand" "=x")
7847 (match_operand:V8SI 1 "nonimmediate_operand" "x")
7848 (parallel [(const_int 0) (const_int 2)
7849 (const_int 4) (const_int 6)])))
7852 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7853 (parallel [(const_int 0) (const_int 2)
7854 (const_int 4) (const_int 6)])))))]
7855 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7856 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7857 [(set_attr "type" "sseimul")
7858 (set_attr "prefix_extra" "1")
7859 (set_attr "prefix" "vex")
7860 (set_attr "mode" "OI")])
7862 (define_expand "sse4_1_mulv2siv2di3"
7863 [(set (match_operand:V2DI 0 "register_operand")
7867 (match_operand:V4SI 1 "nonimmediate_operand")
7868 (parallel [(const_int 0) (const_int 2)])))
7871 (match_operand:V4SI 2 "nonimmediate_operand")
7872 (parallel [(const_int 0) (const_int 2)])))))]
7874 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7876 (define_insn "*sse4_1_mulv2siv2di3"
7877 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7881 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7882 (parallel [(const_int 0) (const_int 2)])))
7885 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7886 (parallel [(const_int 0) (const_int 2)])))))]
7887 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7889 pmuldq\t{%2, %0|%0, %2}
7890 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7891 [(set_attr "isa" "noavx,avx")
7892 (set_attr "type" "sseimul")
7893 (set_attr "prefix_data16" "1,*")
7894 (set_attr "prefix_extra" "1")
7895 (set_attr "prefix" "orig,vex")
7896 (set_attr "mode" "TI")])
7898 (define_expand "avx2_pmaddwd"
7899 [(set (match_operand:V8SI 0 "register_operand")
7904 (match_operand:V16HI 1 "nonimmediate_operand")
7905 (parallel [(const_int 0) (const_int 2)
7906 (const_int 4) (const_int 6)
7907 (const_int 8) (const_int 10)
7908 (const_int 12) (const_int 14)])))
7911 (match_operand:V16HI 2 "nonimmediate_operand")
7912 (parallel [(const_int 0) (const_int 2)
7913 (const_int 4) (const_int 6)
7914 (const_int 8) (const_int 10)
7915 (const_int 12) (const_int 14)]))))
7918 (vec_select:V8HI (match_dup 1)
7919 (parallel [(const_int 1) (const_int 3)
7920 (const_int 5) (const_int 7)
7921 (const_int 9) (const_int 11)
7922 (const_int 13) (const_int 15)])))
7924 (vec_select:V8HI (match_dup 2)
7925 (parallel [(const_int 1) (const_int 3)
7926 (const_int 5) (const_int 7)
7927 (const_int 9) (const_int 11)
7928 (const_int 13) (const_int 15)]))))))]
7930 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
7932 (define_insn "*avx2_pmaddwd"
7933 [(set (match_operand:V8SI 0 "register_operand" "=x")
7938 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
7939 (parallel [(const_int 0) (const_int 2)
7940 (const_int 4) (const_int 6)
7941 (const_int 8) (const_int 10)
7942 (const_int 12) (const_int 14)])))
7945 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7946 (parallel [(const_int 0) (const_int 2)
7947 (const_int 4) (const_int 6)
7948 (const_int 8) (const_int 10)
7949 (const_int 12) (const_int 14)]))))
7952 (vec_select:V8HI (match_dup 1)
7953 (parallel [(const_int 1) (const_int 3)
7954 (const_int 5) (const_int 7)
7955 (const_int 9) (const_int 11)
7956 (const_int 13) (const_int 15)])))
7958 (vec_select:V8HI (match_dup 2)
7959 (parallel [(const_int 1) (const_int 3)
7960 (const_int 5) (const_int 7)
7961 (const_int 9) (const_int 11)
7962 (const_int 13) (const_int 15)]))))))]
7963 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
7964 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7965 [(set_attr "type" "sseiadd")
7966 (set_attr "prefix" "vex")
7967 (set_attr "mode" "OI")])
7969 (define_expand "sse2_pmaddwd"
7970 [(set (match_operand:V4SI 0 "register_operand")
7975 (match_operand:V8HI 1 "nonimmediate_operand")
7976 (parallel [(const_int 0) (const_int 2)
7977 (const_int 4) (const_int 6)])))
7980 (match_operand:V8HI 2 "nonimmediate_operand")
7981 (parallel [(const_int 0) (const_int 2)
7982 (const_int 4) (const_int 6)]))))
7985 (vec_select:V4HI (match_dup 1)
7986 (parallel [(const_int 1) (const_int 3)
7987 (const_int 5) (const_int 7)])))
7989 (vec_select:V4HI (match_dup 2)
7990 (parallel [(const_int 1) (const_int 3)
7991 (const_int 5) (const_int 7)]))))))]
7993 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7995 (define_insn "*sse2_pmaddwd"
7996 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8001 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8002 (parallel [(const_int 0) (const_int 2)
8003 (const_int 4) (const_int 6)])))
8006 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8007 (parallel [(const_int 0) (const_int 2)
8008 (const_int 4) (const_int 6)]))))
8011 (vec_select:V4HI (match_dup 1)
8012 (parallel [(const_int 1) (const_int 3)
8013 (const_int 5) (const_int 7)])))
8015 (vec_select:V4HI (match_dup 2)
8016 (parallel [(const_int 1) (const_int 3)
8017 (const_int 5) (const_int 7)]))))))]
8018 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8020 pmaddwd\t{%2, %0|%0, %2}
8021 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8022 [(set_attr "isa" "noavx,avx")
8023 (set_attr "type" "sseiadd")
8024 (set_attr "atom_unit" "simul")
8025 (set_attr "prefix_data16" "1,*")
8026 (set_attr "prefix" "orig,vex")
8027 (set_attr "mode" "TI")])
8029 (define_expand "mul<mode>3<mask_name>"
8030 [(set (match_operand:VI4_AVX512F 0 "register_operand")
8032 (match_operand:VI4_AVX512F 1 "general_vector_operand")
8033 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
8034 "TARGET_SSE2 && <mask_mode512bit_condition>"
8038 if (!nonimmediate_operand (operands[1], <MODE>mode))
8039 operands[1] = force_reg (<MODE>mode, operands[1]);
8040 if (!nonimmediate_operand (operands[2], <MODE>mode))
8041 operands[2] = force_reg (<MODE>mode, operands[2]);
8042 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8046 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
8051 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
8052 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
8054 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
8055 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
8056 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
8058 pmulld\t{%2, %0|%0, %2}
8059 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8060 [(set_attr "isa" "noavx,avx")
8061 (set_attr "type" "sseimul")
8062 (set_attr "prefix_extra" "1")
8063 (set_attr "prefix" "<mask_prefix3>")
8064 (set_attr "btver2_decode" "vector,vector")
8065 (set_attr "mode" "<sseinsnmode>")])
8067 (define_expand "mul<mode>3"
8068 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
8069 (mult:VI8_AVX2_AVX512F
8070 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
8071 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
8074 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
8078 (define_expand "vec_widen_<s>mult_hi_<mode>"
8079 [(match_operand:<sseunpackmode> 0 "register_operand")
8080 (any_extend:<sseunpackmode>
8081 (match_operand:VI124_AVX2 1 "register_operand"))
8082 (match_operand:VI124_AVX2 2 "register_operand")]
8085 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8090 (define_expand "vec_widen_<s>mult_lo_<mode>"
8091 [(match_operand:<sseunpackmode> 0 "register_operand")
8092 (any_extend:<sseunpackmode>
8093 (match_operand:VI124_AVX2 1 "register_operand"))
8094 (match_operand:VI124_AVX2 2 "register_operand")]
8097 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8102 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
8103 ;; named patterns, but signed V4SI needs special help for plain SSE2.
8104 (define_expand "vec_widen_smult_even_v4si"
8105 [(match_operand:V2DI 0 "register_operand")
8106 (match_operand:V4SI 1 "nonimmediate_operand")
8107 (match_operand:V4SI 2 "nonimmediate_operand")]
8110 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8115 (define_expand "vec_widen_<s>mult_odd_<mode>"
8116 [(match_operand:<sseunpackmode> 0 "register_operand")
8117 (any_extend:<sseunpackmode>
8118 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
8119 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
8122 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8127 (define_expand "sdot_prod<mode>"
8128 [(match_operand:<sseunpackmode> 0 "register_operand")
8129 (match_operand:VI2_AVX2 1 "register_operand")
8130 (match_operand:VI2_AVX2 2 "register_operand")
8131 (match_operand:<sseunpackmode> 3 "register_operand")]
8134 rtx t = gen_reg_rtx (<sseunpackmode>mode);
8135 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
8136 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8137 gen_rtx_PLUS (<sseunpackmode>mode,
8142 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
8143 ;; back together when madd is available.
8144 (define_expand "sdot_prodv4si"
8145 [(match_operand:V2DI 0 "register_operand")
8146 (match_operand:V4SI 1 "register_operand")
8147 (match_operand:V4SI 2 "register_operand")
8148 (match_operand:V2DI 3 "register_operand")]
8151 rtx t = gen_reg_rtx (V2DImode);
8152 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
8153 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
8157 (define_insn "ashr<mode>3"
8158 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
8160 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
8161 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8164 psra<ssemodesuffix>\t{%2, %0|%0, %2}
8165 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8166 [(set_attr "isa" "noavx,avx")
8167 (set_attr "type" "sseishft")
8168 (set (attr "length_immediate")
8169 (if_then_else (match_operand 2 "const_int_operand")
8171 (const_string "0")))
8172 (set_attr "prefix_data16" "1,*")
8173 (set_attr "prefix" "orig,vex")
8174 (set_attr "mode" "<sseinsnmode>")])
8176 (define_insn "ashr<mode>3<mask_name>"
8177 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8179 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
8180 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
8181 "TARGET_AVX512F && <mask_mode512bit_condition>"
8182 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8183 [(set_attr "type" "sseishft")
8184 (set (attr "length_immediate")
8185 (if_then_else (match_operand 2 "const_int_operand")
8187 (const_string "0")))
8188 (set_attr "mode" "<sseinsnmode>")])
8190 (define_insn "<shift_insn><mode>3"
8191 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
8192 (any_lshift:VI248_AVX2
8193 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
8194 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8197 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8198 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8199 [(set_attr "isa" "noavx,avx")
8200 (set_attr "type" "sseishft")
8201 (set (attr "length_immediate")
8202 (if_then_else (match_operand 2 "const_int_operand")
8204 (const_string "0")))
8205 (set_attr "prefix_data16" "1,*")
8206 (set_attr "prefix" "orig,vex")
8207 (set_attr "mode" "<sseinsnmode>")])
8209 (define_insn "<shift_insn><mode>3<mask_name>"
8210 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8211 (any_lshift:VI48_512
8212 (match_operand:VI48_512 1 "register_operand" "v,m")
8213 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8214 "TARGET_AVX512F && <mask_mode512bit_condition>"
8215 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8216 [(set_attr "isa" "avx512f")
8217 (set_attr "type" "sseishft")
8218 (set (attr "length_immediate")
8219 (if_then_else (match_operand 2 "const_int_operand")
8221 (const_string "0")))
8222 (set_attr "prefix" "evex")
8223 (set_attr "mode" "<sseinsnmode>")])
8226 (define_expand "vec_shl_<mode>"
8229 (match_operand:VI_128 1 "register_operand")
8230 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8231 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8234 operands[1] = gen_lowpart (V1TImode, operands[1]);
8235 operands[3] = gen_reg_rtx (V1TImode);
8236 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8239 (define_insn "<sse2_avx2>_ashl<mode>3"
8240 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8242 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8243 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8246 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8248 switch (which_alternative)
8251 return "pslldq\t{%2, %0|%0, %2}";
8253 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8258 [(set_attr "isa" "noavx,avx")
8259 (set_attr "type" "sseishft")
8260 (set_attr "length_immediate" "1")
8261 (set_attr "prefix_data16" "1,*")
8262 (set_attr "prefix" "orig,vex")
8263 (set_attr "mode" "<sseinsnmode>")])
8265 (define_expand "vec_shr_<mode>"
8268 (match_operand:VI_128 1 "register_operand")
8269 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8270 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8273 operands[1] = gen_lowpart (V1TImode, operands[1]);
8274 operands[3] = gen_reg_rtx (V1TImode);
8275 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8278 (define_insn "<sse2_avx2>_lshr<mode>3"
8279 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8280 (lshiftrt:VIMAX_AVX2
8281 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8282 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8285 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8287 switch (which_alternative)
8290 return "psrldq\t{%2, %0|%0, %2}";
8292 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8297 [(set_attr "isa" "noavx,avx")
8298 (set_attr "type" "sseishft")
8299 (set_attr "length_immediate" "1")
8300 (set_attr "atom_unit" "sishuf")
8301 (set_attr "prefix_data16" "1,*")
8302 (set_attr "prefix" "orig,vex")
8303 (set_attr "mode" "<sseinsnmode>")])
8305 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8306 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8307 (any_rotate:VI48_512
8308 (match_operand:VI48_512 1 "register_operand" "v")
8309 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8311 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8312 [(set_attr "prefix" "evex")
8313 (set_attr "mode" "<sseinsnmode>")])
8315 (define_insn "avx512f_<rotate><mode><mask_name>"
8316 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8317 (any_rotate:VI48_512
8318 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8319 (match_operand:SI 2 "const_0_to_255_operand")))]
8321 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8322 [(set_attr "prefix" "evex")
8323 (set_attr "mode" "<sseinsnmode>")])
8325 (define_expand "<code><mode>3<mask_name><round_name>"
8326 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8327 (maxmin:VI124_256_48_512
8328 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
8329 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
8330 "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8331 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8333 (define_insn "*avx2_<code><mode>3<mask_name><round_name>"
8334 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8335 (maxmin:VI124_256_48_512
8336 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
8337 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
8338 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8339 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8340 "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8341 [(set_attr "type" "sseiadd")
8342 (set_attr "prefix_extra" "1")
8343 (set_attr "prefix" "maybe_evex")
8344 (set_attr "mode" "OI")])
8346 (define_expand "<code><mode>3"
8347 [(set (match_operand:VI8_AVX2 0 "register_operand")
8349 (match_operand:VI8_AVX2 1 "register_operand")
8350 (match_operand:VI8_AVX2 2 "register_operand")))]
8357 xops[0] = operands[0];
8359 if (<CODE> == SMAX || <CODE> == UMAX)
8361 xops[1] = operands[1];
8362 xops[2] = operands[2];
8366 xops[1] = operands[2];
8367 xops[2] = operands[1];
8370 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8372 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8373 xops[4] = operands[1];
8374 xops[5] = operands[2];
8376 ok = ix86_expand_int_vcond (xops);
8381 (define_expand "<code><mode>3"
8382 [(set (match_operand:VI124_128 0 "register_operand")
8384 (match_operand:VI124_128 1 "nonimmediate_operand")
8385 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8388 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8389 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8395 xops[0] = operands[0];
8396 operands[1] = force_reg (<MODE>mode, operands[1]);
8397 operands[2] = force_reg (<MODE>mode, operands[2]);
8401 xops[1] = operands[1];
8402 xops[2] = operands[2];
8406 xops[1] = operands[2];
8407 xops[2] = operands[1];
8410 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8411 xops[4] = operands[1];
8412 xops[5] = operands[2];
8414 ok = ix86_expand_int_vcond (xops);
8420 (define_insn "*sse4_1_<code><mode>3"
8421 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8423 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8424 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8425 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8427 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8428 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8429 [(set_attr "isa" "noavx,avx")
8430 (set_attr "type" "sseiadd")
8431 (set_attr "prefix_extra" "1,*")
8432 (set_attr "prefix" "orig,vex")
8433 (set_attr "mode" "TI")])
8435 (define_insn "*<code>v8hi3"
8436 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8438 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8439 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8440 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8442 p<maxmin_int>w\t{%2, %0|%0, %2}
8443 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8444 [(set_attr "isa" "noavx,avx")
8445 (set_attr "type" "sseiadd")
8446 (set_attr "prefix_data16" "1,*")
8447 (set_attr "prefix_extra" "*,1")
8448 (set_attr "prefix" "orig,vex")
8449 (set_attr "mode" "TI")])
8451 (define_expand "<code><mode>3"
8452 [(set (match_operand:VI124_128 0 "register_operand")
8454 (match_operand:VI124_128 1 "nonimmediate_operand")
8455 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8458 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8459 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8460 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8462 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8463 operands[1] = force_reg (<MODE>mode, operands[1]);
8464 if (rtx_equal_p (op3, op2))
8465 op3 = gen_reg_rtx (V8HImode);
8466 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8467 emit_insn (gen_addv8hi3 (op0, op3, op2));
8475 operands[1] = force_reg (<MODE>mode, operands[1]);
8476 operands[2] = force_reg (<MODE>mode, operands[2]);
8478 xops[0] = operands[0];
8482 xops[1] = operands[1];
8483 xops[2] = operands[2];
8487 xops[1] = operands[2];
8488 xops[2] = operands[1];
8491 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8492 xops[4] = operands[1];
8493 xops[5] = operands[2];
8495 ok = ix86_expand_int_vcond (xops);
8501 (define_insn "*sse4_1_<code><mode>3"
8502 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8504 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8505 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8506 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8508 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8509 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8510 [(set_attr "isa" "noavx,avx")
8511 (set_attr "type" "sseiadd")
8512 (set_attr "prefix_extra" "1,*")
8513 (set_attr "prefix" "orig,vex")
8514 (set_attr "mode" "TI")])
8516 (define_insn "*<code>v16qi3"
8517 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8519 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8520 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8521 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8523 p<maxmin_int>b\t{%2, %0|%0, %2}
8524 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8525 [(set_attr "isa" "noavx,avx")
8526 (set_attr "type" "sseiadd")
8527 (set_attr "prefix_data16" "1,*")
8528 (set_attr "prefix_extra" "*,1")
8529 (set_attr "prefix" "orig,vex")
8530 (set_attr "mode" "TI")])
8532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8534 ;; Parallel integral comparisons
8536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8538 (define_expand "avx2_eq<mode>3"
8539 [(set (match_operand:VI_256 0 "register_operand")
8541 (match_operand:VI_256 1 "nonimmediate_operand")
8542 (match_operand:VI_256 2 "nonimmediate_operand")))]
8544 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8546 (define_insn "*avx2_eq<mode>3"
8547 [(set (match_operand:VI_256 0 "register_operand" "=x")
8549 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8550 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8551 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8552 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8553 [(set_attr "type" "ssecmp")
8554 (set_attr "prefix_extra" "1")
8555 (set_attr "prefix" "vex")
8556 (set_attr "mode" "OI")])
8558 (define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
8559 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8560 (unspec:<avx512fmaskmode>
8561 [(match_operand:VI48_512 1 "register_operand")
8562 (match_operand:VI48_512 2 "nonimmediate_operand")]
8565 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8567 (define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
8568 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8569 (unspec:<avx512fmaskmode>
8570 [(match_operand:VI48_512 1 "register_operand" "%v")
8571 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8573 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8574 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8575 [(set_attr "type" "ssecmp")
8576 (set_attr "prefix_extra" "1")
8577 (set_attr "prefix" "evex")
8578 (set_attr "mode" "<sseinsnmode>")])
8580 (define_insn "*sse4_1_eqv2di3"
8581 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8583 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8584 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8585 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8587 pcmpeqq\t{%2, %0|%0, %2}
8588 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8589 [(set_attr "isa" "noavx,avx")
8590 (set_attr "type" "ssecmp")
8591 (set_attr "prefix_extra" "1")
8592 (set_attr "prefix" "orig,vex")
8593 (set_attr "mode" "TI")])
8595 (define_insn "*sse2_eq<mode>3"
8596 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8598 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8599 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8600 "TARGET_SSE2 && !TARGET_XOP
8601 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8603 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8604 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8605 [(set_attr "isa" "noavx,avx")
8606 (set_attr "type" "ssecmp")
8607 (set_attr "prefix_data16" "1,*")
8608 (set_attr "prefix" "orig,vex")
8609 (set_attr "mode" "TI")])
8611 (define_expand "sse2_eq<mode>3"
8612 [(set (match_operand:VI124_128 0 "register_operand")
8614 (match_operand:VI124_128 1 "nonimmediate_operand")
8615 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8616 "TARGET_SSE2 && !TARGET_XOP "
8617 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8619 (define_expand "sse4_1_eqv2di3"
8620 [(set (match_operand:V2DI 0 "register_operand")
8622 (match_operand:V2DI 1 "nonimmediate_operand")
8623 (match_operand:V2DI 2 "nonimmediate_operand")))]
8625 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8627 (define_insn "sse4_2_gtv2di3"
8628 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8630 (match_operand:V2DI 1 "register_operand" "0,x")
8631 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8634 pcmpgtq\t{%2, %0|%0, %2}
8635 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8636 [(set_attr "isa" "noavx,avx")
8637 (set_attr "type" "ssecmp")
8638 (set_attr "prefix_extra" "1")
8639 (set_attr "prefix" "orig,vex")
8640 (set_attr "mode" "TI")])
8642 (define_insn "avx2_gt<mode>3"
8643 [(set (match_operand:VI_256 0 "register_operand" "=x")
8645 (match_operand:VI_256 1 "register_operand" "x")
8646 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8648 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8649 [(set_attr "type" "ssecmp")
8650 (set_attr "prefix_extra" "1")
8651 (set_attr "prefix" "vex")
8652 (set_attr "mode" "OI")])
8654 (define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
8655 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8656 (unspec:<avx512fmaskmode>
8657 [(match_operand:VI48_512 1 "register_operand" "v")
8658 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8660 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8661 [(set_attr "type" "ssecmp")
8662 (set_attr "prefix_extra" "1")
8663 (set_attr "prefix" "evex")
8664 (set_attr "mode" "<sseinsnmode>")])
8666 (define_insn "sse2_gt<mode>3"
8667 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8669 (match_operand:VI124_128 1 "register_operand" "0,x")
8670 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8671 "TARGET_SSE2 && !TARGET_XOP"
8673 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8674 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8675 [(set_attr "isa" "noavx,avx")
8676 (set_attr "type" "ssecmp")
8677 (set_attr "prefix_data16" "1,*")
8678 (set_attr "prefix" "orig,vex")
8679 (set_attr "mode" "TI")])
8681 (define_expand "vcond<V_512:mode><VI_512:mode>"
8682 [(set (match_operand:V_512 0 "register_operand")
8684 (match_operator 3 ""
8685 [(match_operand:VI_512 4 "nonimmediate_operand")
8686 (match_operand:VI_512 5 "general_operand")])
8687 (match_operand:V_512 1)
8688 (match_operand:V_512 2)))]
8690 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8691 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8693 bool ok = ix86_expand_int_vcond (operands);
8698 (define_expand "vcond<V_256:mode><VI_256:mode>"
8699 [(set (match_operand:V_256 0 "register_operand")
8701 (match_operator 3 ""
8702 [(match_operand:VI_256 4 "nonimmediate_operand")
8703 (match_operand:VI_256 5 "general_operand")])
8704 (match_operand:V_256 1)
8705 (match_operand:V_256 2)))]
8707 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8708 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8710 bool ok = ix86_expand_int_vcond (operands);
8715 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8716 [(set (match_operand:V_128 0 "register_operand")
8718 (match_operator 3 ""
8719 [(match_operand:VI124_128 4 "nonimmediate_operand")
8720 (match_operand:VI124_128 5 "general_operand")])
8721 (match_operand:V_128 1)
8722 (match_operand:V_128 2)))]
8724 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8725 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8727 bool ok = ix86_expand_int_vcond (operands);
8732 (define_expand "vcond<VI8F_128:mode>v2di"
8733 [(set (match_operand:VI8F_128 0 "register_operand")
8734 (if_then_else:VI8F_128
8735 (match_operator 3 ""
8736 [(match_operand:V2DI 4 "nonimmediate_operand")
8737 (match_operand:V2DI 5 "general_operand")])
8738 (match_operand:VI8F_128 1)
8739 (match_operand:VI8F_128 2)))]
8742 bool ok = ix86_expand_int_vcond (operands);
8747 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8748 [(set (match_operand:V_512 0 "register_operand")
8750 (match_operator 3 ""
8751 [(match_operand:VI_512 4 "nonimmediate_operand")
8752 (match_operand:VI_512 5 "nonimmediate_operand")])
8753 (match_operand:V_512 1 "general_operand")
8754 (match_operand:V_512 2 "general_operand")))]
8756 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8757 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8759 bool ok = ix86_expand_int_vcond (operands);
8764 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8765 [(set (match_operand:V_256 0 "register_operand")
8767 (match_operator 3 ""
8768 [(match_operand:VI_256 4 "nonimmediate_operand")
8769 (match_operand:VI_256 5 "nonimmediate_operand")])
8770 (match_operand:V_256 1 "general_operand")
8771 (match_operand:V_256 2 "general_operand")))]
8773 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8774 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8776 bool ok = ix86_expand_int_vcond (operands);
8781 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8782 [(set (match_operand:V_128 0 "register_operand")
8784 (match_operator 3 ""
8785 [(match_operand:VI124_128 4 "nonimmediate_operand")
8786 (match_operand:VI124_128 5 "nonimmediate_operand")])
8787 (match_operand:V_128 1 "general_operand")
8788 (match_operand:V_128 2 "general_operand")))]
8790 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8791 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8793 bool ok = ix86_expand_int_vcond (operands);
8798 (define_expand "vcondu<VI8F_128:mode>v2di"
8799 [(set (match_operand:VI8F_128 0 "register_operand")
8800 (if_then_else:VI8F_128
8801 (match_operator 3 ""
8802 [(match_operand:V2DI 4 "nonimmediate_operand")
8803 (match_operand:V2DI 5 "nonimmediate_operand")])
8804 (match_operand:VI8F_128 1 "general_operand")
8805 (match_operand:VI8F_128 2 "general_operand")))]
8808 bool ok = ix86_expand_int_vcond (operands);
8813 (define_mode_iterator VEC_PERM_AVX2
8814 [V16QI V8HI V4SI V2DI V4SF V2DF
8815 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8816 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
8817 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
8818 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
8819 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
8821 (define_expand "vec_perm<mode>"
8822 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
8823 (match_operand:VEC_PERM_AVX2 1 "register_operand")
8824 (match_operand:VEC_PERM_AVX2 2 "register_operand")
8825 (match_operand:<sseintvecmode> 3 "register_operand")]
8826 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
8828 ix86_expand_vec_perm (operands);
8832 (define_mode_iterator VEC_PERM_CONST
8833 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
8834 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
8835 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
8836 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
8837 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
8838 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8839 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
8840 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
8842 (define_expand "vec_perm_const<mode>"
8843 [(match_operand:VEC_PERM_CONST 0 "register_operand")
8844 (match_operand:VEC_PERM_CONST 1 "register_operand")
8845 (match_operand:VEC_PERM_CONST 2 "register_operand")
8846 (match_operand:<sseintvecmode> 3)]
8849 if (ix86_expand_vec_perm_const (operands))
8855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8857 ;; Parallel bitwise logical operations
8859 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8861 (define_expand "one_cmpl<mode>2"
8862 [(set (match_operand:VI 0 "register_operand")
8863 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
8867 int i, n = GET_MODE_NUNITS (<MODE>mode);
8868 rtvec v = rtvec_alloc (n);
8870 for (i = 0; i < n; ++i)
8871 RTVEC_ELT (v, i) = constm1_rtx;
8873 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
8876 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
8877 [(set (match_operand:VI_AVX2 0 "register_operand")
8879 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
8880 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8881 "TARGET_SSE2 && <mask_mode512bit_condition>")
8883 (define_insn "*andnot<mode>3<mask_name>"
8884 [(set (match_operand:VI 0 "register_operand" "=x,v")
8886 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
8887 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8888 "TARGET_SSE && <mask_mode512bit_condition>"
8890 static char buf[64];
8894 switch (get_attr_mode (insn))
8897 gcc_assert (TARGET_AVX512F);
8899 tmp = "pandn<ssemodesuffix>";
8903 gcc_assert (TARGET_AVX2);
8905 gcc_assert (TARGET_SSE2);
8911 gcc_assert (TARGET_AVX512F);
8913 gcc_assert (TARGET_AVX);
8915 gcc_assert (TARGET_SSE);
8924 switch (which_alternative)
8927 ops = "%s\t{%%2, %%0|%%0, %%2}";
8930 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8936 snprintf (buf, sizeof (buf), ops, tmp);
8939 [(set_attr "isa" "noavx,avx")
8940 (set_attr "type" "sselog")
8941 (set (attr "prefix_data16")
8943 (and (eq_attr "alternative" "0")
8944 (eq_attr "mode" "TI"))
8946 (const_string "*")))
8947 (set_attr "prefix" "<mask_prefix3>")
8949 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8950 (const_string "<ssePSmode>")
8951 (match_test "TARGET_AVX2")
8952 (const_string "<sseinsnmode>")
8953 (match_test "TARGET_AVX")
8955 (match_test "<MODE_SIZE> > 16")
8956 (const_string "V8SF")
8957 (const_string "<sseinsnmode>"))
8958 (ior (not (match_test "TARGET_SSE2"))
8959 (match_test "optimize_function_for_size_p (cfun)"))
8960 (const_string "V4SF")
8962 (const_string "<sseinsnmode>")))])
8964 (define_expand "<code><mode>3"
8965 [(set (match_operand:VI 0 "register_operand")
8967 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
8968 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
8971 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
8975 (define_insn "<mask_codefor><code><mode>3<mask_name>"
8976 [(set (match_operand:VI 0 "register_operand" "=x,v")
8978 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
8979 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8980 "TARGET_SSE && <mask_mode512bit_condition>
8981 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8983 static char buf[64];
8987 switch (get_attr_mode (insn))
8990 gcc_assert (TARGET_AVX512F);
8992 tmp = "p<logic><ssemodesuffix>";
8996 gcc_assert (TARGET_AVX2);
8998 gcc_assert (TARGET_SSE2);
9004 gcc_assert (TARGET_AVX512F);
9006 gcc_assert (TARGET_AVX);
9008 gcc_assert (TARGET_SSE);
9017 switch (which_alternative)
9020 ops = "%s\t{%%2, %%0|%%0, %%2}";
9023 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9029 snprintf (buf, sizeof (buf), ops, tmp);
9032 [(set_attr "isa" "noavx,avx")
9033 (set_attr "type" "sselog")
9034 (set (attr "prefix_data16")
9036 (and (eq_attr "alternative" "0")
9037 (eq_attr "mode" "TI"))
9039 (const_string "*")))
9040 (set_attr "prefix" "<mask_prefix3>")
9042 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
9043 (const_string "<ssePSmode>")
9044 (match_test "TARGET_AVX2")
9045 (const_string "<sseinsnmode>")
9046 (match_test "TARGET_AVX")
9048 (match_test "<MODE_SIZE> > 16")
9049 (const_string "V8SF")
9050 (const_string "<sseinsnmode>"))
9051 (ior (not (match_test "TARGET_SSE2"))
9052 (match_test "optimize_function_for_size_p (cfun)"))
9053 (const_string "V4SF")
9055 (const_string "<sseinsnmode>")))])
9057 (define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
9058 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
9059 (unspec:<avx512fmaskmode>
9060 [(match_operand:VI48_512 1 "register_operand" "v")
9061 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9064 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9065 [(set_attr "prefix" "evex")
9066 (set_attr "mode" "<sseinsnmode>")])
9068 (define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
9069 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
9070 (unspec:<avx512fmaskmode>
9071 [(match_operand:VI48_512 1 "register_operand" "v")
9072 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9075 "%vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9076 [(set_attr "prefix" "evex")
9077 (set_attr "mode" "<sseinsnmode>")])
9079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9081 ;; Parallel integral element swizzling
9083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9085 (define_expand "vec_pack_trunc_<mode>"
9086 [(match_operand:<ssepackmode> 0 "register_operand")
9087 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
9088 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
9091 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
9092 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
9093 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
9097 (define_insn "<sse2_avx2>_packsswb"
9098 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9099 (vec_concat:VI1_AVX2
9100 (ss_truncate:<ssehalfvecmode>
9101 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9102 (ss_truncate:<ssehalfvecmode>
9103 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9106 packsswb\t{%2, %0|%0, %2}
9107 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
9108 [(set_attr "isa" "noavx,avx")
9109 (set_attr "type" "sselog")
9110 (set_attr "prefix_data16" "1,*")
9111 (set_attr "prefix" "orig,vex")
9112 (set_attr "mode" "<sseinsnmode>")])
9114 (define_insn "<sse2_avx2>_packssdw"
9115 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9116 (vec_concat:VI2_AVX2
9117 (ss_truncate:<ssehalfvecmode>
9118 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9119 (ss_truncate:<ssehalfvecmode>
9120 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9123 packssdw\t{%2, %0|%0, %2}
9124 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
9125 [(set_attr "isa" "noavx,avx")
9126 (set_attr "type" "sselog")
9127 (set_attr "prefix_data16" "1,*")
9128 (set_attr "prefix" "orig,vex")
9129 (set_attr "mode" "<sseinsnmode>")])
9131 (define_insn "<sse2_avx2>_packuswb"
9132 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9133 (vec_concat:VI1_AVX2
9134 (us_truncate:<ssehalfvecmode>
9135 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9136 (us_truncate:<ssehalfvecmode>
9137 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9140 packuswb\t{%2, %0|%0, %2}
9141 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
9142 [(set_attr "isa" "noavx,avx")
9143 (set_attr "type" "sselog")
9144 (set_attr "prefix_data16" "1,*")
9145 (set_attr "prefix" "orig,vex")
9146 (set_attr "mode" "<sseinsnmode>")])
9148 (define_insn "avx2_interleave_highv32qi"
9149 [(set (match_operand:V32QI 0 "register_operand" "=x")
9152 (match_operand:V32QI 1 "register_operand" "x")
9153 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9154 (parallel [(const_int 8) (const_int 40)
9155 (const_int 9) (const_int 41)
9156 (const_int 10) (const_int 42)
9157 (const_int 11) (const_int 43)
9158 (const_int 12) (const_int 44)
9159 (const_int 13) (const_int 45)
9160 (const_int 14) (const_int 46)
9161 (const_int 15) (const_int 47)
9162 (const_int 24) (const_int 56)
9163 (const_int 25) (const_int 57)
9164 (const_int 26) (const_int 58)
9165 (const_int 27) (const_int 59)
9166 (const_int 28) (const_int 60)
9167 (const_int 29) (const_int 61)
9168 (const_int 30) (const_int 62)
9169 (const_int 31) (const_int 63)])))]
9171 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9172 [(set_attr "type" "sselog")
9173 (set_attr "prefix" "vex")
9174 (set_attr "mode" "OI")])
9176 (define_insn "vec_interleave_highv16qi"
9177 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9180 (match_operand:V16QI 1 "register_operand" "0,x")
9181 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9182 (parallel [(const_int 8) (const_int 24)
9183 (const_int 9) (const_int 25)
9184 (const_int 10) (const_int 26)
9185 (const_int 11) (const_int 27)
9186 (const_int 12) (const_int 28)
9187 (const_int 13) (const_int 29)
9188 (const_int 14) (const_int 30)
9189 (const_int 15) (const_int 31)])))]
9192 punpckhbw\t{%2, %0|%0, %2}
9193 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9194 [(set_attr "isa" "noavx,avx")
9195 (set_attr "type" "sselog")
9196 (set_attr "prefix_data16" "1,*")
9197 (set_attr "prefix" "orig,vex")
9198 (set_attr "mode" "TI")])
9200 (define_insn "avx2_interleave_lowv32qi"
9201 [(set (match_operand:V32QI 0 "register_operand" "=x")
9204 (match_operand:V32QI 1 "register_operand" "x")
9205 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9206 (parallel [(const_int 0) (const_int 32)
9207 (const_int 1) (const_int 33)
9208 (const_int 2) (const_int 34)
9209 (const_int 3) (const_int 35)
9210 (const_int 4) (const_int 36)
9211 (const_int 5) (const_int 37)
9212 (const_int 6) (const_int 38)
9213 (const_int 7) (const_int 39)
9214 (const_int 16) (const_int 48)
9215 (const_int 17) (const_int 49)
9216 (const_int 18) (const_int 50)
9217 (const_int 19) (const_int 51)
9218 (const_int 20) (const_int 52)
9219 (const_int 21) (const_int 53)
9220 (const_int 22) (const_int 54)
9221 (const_int 23) (const_int 55)])))]
9223 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9224 [(set_attr "type" "sselog")
9225 (set_attr "prefix" "vex")
9226 (set_attr "mode" "OI")])
9228 (define_insn "vec_interleave_lowv16qi"
9229 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9232 (match_operand:V16QI 1 "register_operand" "0,x")
9233 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9234 (parallel [(const_int 0) (const_int 16)
9235 (const_int 1) (const_int 17)
9236 (const_int 2) (const_int 18)
9237 (const_int 3) (const_int 19)
9238 (const_int 4) (const_int 20)
9239 (const_int 5) (const_int 21)
9240 (const_int 6) (const_int 22)
9241 (const_int 7) (const_int 23)])))]
9244 punpcklbw\t{%2, %0|%0, %2}
9245 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9246 [(set_attr "isa" "noavx,avx")
9247 (set_attr "type" "sselog")
9248 (set_attr "prefix_data16" "1,*")
9249 (set_attr "prefix" "orig,vex")
9250 (set_attr "mode" "TI")])
9252 (define_insn "avx2_interleave_highv16hi"
9253 [(set (match_operand:V16HI 0 "register_operand" "=x")
9256 (match_operand:V16HI 1 "register_operand" "x")
9257 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9258 (parallel [(const_int 4) (const_int 20)
9259 (const_int 5) (const_int 21)
9260 (const_int 6) (const_int 22)
9261 (const_int 7) (const_int 23)
9262 (const_int 12) (const_int 28)
9263 (const_int 13) (const_int 29)
9264 (const_int 14) (const_int 30)
9265 (const_int 15) (const_int 31)])))]
9267 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9268 [(set_attr "type" "sselog")
9269 (set_attr "prefix" "vex")
9270 (set_attr "mode" "OI")])
9272 (define_insn "vec_interleave_highv8hi"
9273 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9276 (match_operand:V8HI 1 "register_operand" "0,x")
9277 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9278 (parallel [(const_int 4) (const_int 12)
9279 (const_int 5) (const_int 13)
9280 (const_int 6) (const_int 14)
9281 (const_int 7) (const_int 15)])))]
9284 punpckhwd\t{%2, %0|%0, %2}
9285 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9286 [(set_attr "isa" "noavx,avx")
9287 (set_attr "type" "sselog")
9288 (set_attr "prefix_data16" "1,*")
9289 (set_attr "prefix" "orig,vex")
9290 (set_attr "mode" "TI")])
9292 (define_insn "avx2_interleave_lowv16hi"
9293 [(set (match_operand:V16HI 0 "register_operand" "=x")
9296 (match_operand:V16HI 1 "register_operand" "x")
9297 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9298 (parallel [(const_int 0) (const_int 16)
9299 (const_int 1) (const_int 17)
9300 (const_int 2) (const_int 18)
9301 (const_int 3) (const_int 19)
9302 (const_int 8) (const_int 24)
9303 (const_int 9) (const_int 25)
9304 (const_int 10) (const_int 26)
9305 (const_int 11) (const_int 27)])))]
9307 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9308 [(set_attr "type" "sselog")
9309 (set_attr "prefix" "vex")
9310 (set_attr "mode" "OI")])
9312 (define_insn "vec_interleave_lowv8hi"
9313 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9316 (match_operand:V8HI 1 "register_operand" "0,x")
9317 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9318 (parallel [(const_int 0) (const_int 8)
9319 (const_int 1) (const_int 9)
9320 (const_int 2) (const_int 10)
9321 (const_int 3) (const_int 11)])))]
9324 punpcklwd\t{%2, %0|%0, %2}
9325 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9326 [(set_attr "isa" "noavx,avx")
9327 (set_attr "type" "sselog")
9328 (set_attr "prefix_data16" "1,*")
9329 (set_attr "prefix" "orig,vex")
9330 (set_attr "mode" "TI")])
9332 (define_insn "avx2_interleave_highv8si"
9333 [(set (match_operand:V8SI 0 "register_operand" "=x")
9336 (match_operand:V8SI 1 "register_operand" "x")
9337 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9338 (parallel [(const_int 2) (const_int 10)
9339 (const_int 3) (const_int 11)
9340 (const_int 6) (const_int 14)
9341 (const_int 7) (const_int 15)])))]
9343 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9344 [(set_attr "type" "sselog")
9345 (set_attr "prefix" "vex")
9346 (set_attr "mode" "OI")])
9348 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9349 [(set (match_operand:V16SI 0 "register_operand" "=v")
9352 (match_operand:V16SI 1 "register_operand" "v")
9353 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9354 (parallel [(const_int 2) (const_int 18)
9355 (const_int 3) (const_int 19)
9356 (const_int 6) (const_int 22)
9357 (const_int 7) (const_int 23)
9358 (const_int 10) (const_int 26)
9359 (const_int 11) (const_int 27)
9360 (const_int 14) (const_int 30)
9361 (const_int 15) (const_int 31)])))]
9363 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9364 [(set_attr "type" "sselog")
9365 (set_attr "prefix" "evex")
9366 (set_attr "mode" "XI")])
9369 (define_insn "vec_interleave_highv4si"
9370 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9373 (match_operand:V4SI 1 "register_operand" "0,x")
9374 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9375 (parallel [(const_int 2) (const_int 6)
9376 (const_int 3) (const_int 7)])))]
9379 punpckhdq\t{%2, %0|%0, %2}
9380 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9381 [(set_attr "isa" "noavx,avx")
9382 (set_attr "type" "sselog")
9383 (set_attr "prefix_data16" "1,*")
9384 (set_attr "prefix" "orig,vex")
9385 (set_attr "mode" "TI")])
9387 (define_insn "avx2_interleave_lowv8si"
9388 [(set (match_operand:V8SI 0 "register_operand" "=x")
9391 (match_operand:V8SI 1 "register_operand" "x")
9392 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9393 (parallel [(const_int 0) (const_int 8)
9394 (const_int 1) (const_int 9)
9395 (const_int 4) (const_int 12)
9396 (const_int 5) (const_int 13)])))]
9398 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9399 [(set_attr "type" "sselog")
9400 (set_attr "prefix" "vex")
9401 (set_attr "mode" "OI")])
9403 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9404 [(set (match_operand:V16SI 0 "register_operand" "=v")
9407 (match_operand:V16SI 1 "register_operand" "v")
9408 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9409 (parallel [(const_int 0) (const_int 16)
9410 (const_int 1) (const_int 17)
9411 (const_int 4) (const_int 20)
9412 (const_int 5) (const_int 21)
9413 (const_int 8) (const_int 24)
9414 (const_int 9) (const_int 25)
9415 (const_int 12) (const_int 28)
9416 (const_int 13) (const_int 29)])))]
9418 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9419 [(set_attr "type" "sselog")
9420 (set_attr "prefix" "evex")
9421 (set_attr "mode" "XI")])
9423 (define_insn "vec_interleave_lowv4si"
9424 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9427 (match_operand:V4SI 1 "register_operand" "0,x")
9428 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9429 (parallel [(const_int 0) (const_int 4)
9430 (const_int 1) (const_int 5)])))]
9433 punpckldq\t{%2, %0|%0, %2}
9434 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9435 [(set_attr "isa" "noavx,avx")
9436 (set_attr "type" "sselog")
9437 (set_attr "prefix_data16" "1,*")
9438 (set_attr "prefix" "orig,vex")
9439 (set_attr "mode" "TI")])
9441 (define_expand "vec_interleave_high<mode>"
9442 [(match_operand:VI_256 0 "register_operand" "=x")
9443 (match_operand:VI_256 1 "register_operand" "x")
9444 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9447 rtx t1 = gen_reg_rtx (<MODE>mode);
9448 rtx t2 = gen_reg_rtx (<MODE>mode);
9449 rtx t3 = gen_reg_rtx (V4DImode);
9450 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9451 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9452 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9453 gen_lowpart (V4DImode, t2),
9454 GEN_INT (1 + (3 << 4))));
9455 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9459 (define_expand "vec_interleave_low<mode>"
9460 [(match_operand:VI_256 0 "register_operand" "=x")
9461 (match_operand:VI_256 1 "register_operand" "x")
9462 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9465 rtx t1 = gen_reg_rtx (<MODE>mode);
9466 rtx t2 = gen_reg_rtx (<MODE>mode);
9467 rtx t3 = gen_reg_rtx (V4DImode);
9468 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9469 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9470 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9471 gen_lowpart (V4DImode, t2),
9472 GEN_INT (0 + (2 << 4))));
9473 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9477 ;; Modes handled by pinsr patterns.
9478 (define_mode_iterator PINSR_MODE
9479 [(V16QI "TARGET_SSE4_1") V8HI
9480 (V4SI "TARGET_SSE4_1")
9481 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9483 (define_mode_attr sse2p4_1
9484 [(V16QI "sse4_1") (V8HI "sse2")
9485 (V4SI "sse4_1") (V2DI "sse4_1")])
9487 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9488 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9489 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9490 (vec_merge:PINSR_MODE
9491 (vec_duplicate:PINSR_MODE
9492 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9493 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9494 (match_operand:SI 3 "const_int_operand")))]
9496 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9497 < GET_MODE_NUNITS (<MODE>mode))"
9499 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9501 switch (which_alternative)
9504 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9505 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9508 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9510 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9511 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9514 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9519 [(set_attr "isa" "noavx,noavx,avx,avx")
9520 (set_attr "type" "sselog")
9521 (set (attr "prefix_rex")
9523 (and (not (match_test "TARGET_AVX"))
9524 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9526 (const_string "*")))
9527 (set (attr "prefix_data16")
9529 (and (not (match_test "TARGET_AVX"))
9530 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9532 (const_string "*")))
9533 (set (attr "prefix_extra")
9535 (and (not (match_test "TARGET_AVX"))
9536 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9538 (const_string "1")))
9539 (set_attr "length_immediate" "1")
9540 (set_attr "prefix" "orig,orig,vex,vex")
9541 (set_attr "mode" "TI")])
9543 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9544 [(match_operand:V16FI 0 "register_operand")
9545 (match_operand:V16FI 1 "register_operand")
9546 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9547 (match_operand:SI 3 "const_0_to_3_operand")
9548 (match_operand:V16FI 4 "register_operand")
9549 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9552 switch (INTVAL (operands[3]))
9555 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9556 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9560 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9561 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9565 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9566 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9570 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9571 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9581 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9582 [(set (match_operand:V16FI 0 "register_operand" "=v")
9584 (match_operand:V16FI 1 "register_operand" "v")
9585 (vec_duplicate:V16FI
9586 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9587 (match_operand:SI 3 "const_int_operand" "n")))]
9591 if (INTVAL (operands[3]) == 0xFFF)
9593 else if ( INTVAL (operands[3]) == 0xF0FF)
9595 else if ( INTVAL (operands[3]) == 0xFF0F)
9597 else if ( INTVAL (operands[3]) == 0xFFF0)
9602 operands[3] = GEN_INT (mask);
9604 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9606 [(set_attr "type" "sselog")
9607 (set_attr "length_immediate" "1")
9608 (set_attr "prefix" "evex")
9609 (set_attr "mode" "<sseinsnmode>")])
9611 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9612 [(match_operand:V8FI 0 "register_operand")
9613 (match_operand:V8FI 1 "register_operand")
9614 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9615 (match_operand:SI 3 "const_0_to_1_operand")
9616 (match_operand:V8FI 4 "register_operand")
9617 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9620 int mask = INTVAL (operands[3]);
9622 emit_insn (gen_vec_set_lo_<mode>_mask
9623 (operands[0], operands[1], operands[2],
9624 operands[4], operands[5]));
9626 emit_insn (gen_vec_set_hi_<mode>_mask
9627 (operands[0], operands[1], operands[2],
9628 operands[4], operands[5]));
9632 (define_insn "vec_set_lo_<mode><mask_name>"
9633 [(set (match_operand:V8FI 0 "register_operand" "=v")
9635 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9636 (vec_select:<ssehalfvecmode>
9637 (match_operand:V8FI 1 "register_operand" "v")
9638 (parallel [(const_int 4) (const_int 5)
9639 (const_int 6) (const_int 7)]))))]
9641 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9642 [(set_attr "type" "sselog")
9643 (set_attr "length_immediate" "1")
9644 (set_attr "prefix" "evex")
9645 (set_attr "mode" "XI")])
9647 (define_insn "vec_set_hi_<mode><mask_name>"
9648 [(set (match_operand:V8FI 0 "register_operand" "=v")
9650 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9651 (vec_select:<ssehalfvecmode>
9652 (match_operand:V8FI 1 "register_operand" "v")
9653 (parallel [(const_int 0) (const_int 1)
9654 (const_int 2) (const_int 3)]))))]
9656 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9657 [(set_attr "type" "sselog")
9658 (set_attr "length_immediate" "1")
9659 (set_attr "prefix" "evex")
9660 (set_attr "mode" "XI")])
9662 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9663 [(match_operand:V8FI 0 "register_operand")
9664 (match_operand:V8FI 1 "register_operand")
9665 (match_operand:V8FI 2 "nonimmediate_operand")
9666 (match_operand:SI 3 "const_0_to_255_operand")
9667 (match_operand:V8FI 4 "register_operand")
9668 (match_operand:QI 5 "register_operand")]
9671 int mask = INTVAL (operands[3]);
9672 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9673 (operands[0], operands[1], operands[2],
9674 GEN_INT (((mask >> 0) & 3) * 2),
9675 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9676 GEN_INT (((mask >> 2) & 3) * 2),
9677 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9678 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9679 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9680 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9681 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9682 operands[4], operands[5]));
9686 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9687 [(set (match_operand:V8FI 0 "register_operand" "=v")
9689 (vec_concat:<ssedoublemode>
9690 (match_operand:V8FI 1 "register_operand" "v")
9691 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9692 (parallel [(match_operand 3 "const_0_to_7_operand")
9693 (match_operand 4 "const_0_to_7_operand")
9694 (match_operand 5 "const_0_to_7_operand")
9695 (match_operand 6 "const_0_to_7_operand")
9696 (match_operand 7 "const_8_to_15_operand")
9697 (match_operand 8 "const_8_to_15_operand")
9698 (match_operand 9 "const_8_to_15_operand")
9699 (match_operand 10 "const_8_to_15_operand")])))]
9701 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9702 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9703 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9704 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9707 mask = INTVAL (operands[3]) / 2;
9708 mask |= INTVAL (operands[5]) / 2 << 2;
9709 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9710 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9711 operands[3] = GEN_INT (mask);
9713 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9715 [(set_attr "type" "sselog")
9716 (set_attr "length_immediate" "1")
9717 (set_attr "prefix" "evex")
9718 (set_attr "mode" "<sseinsnmode>")])
9720 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9721 [(match_operand:V16FI 0 "register_operand")
9722 (match_operand:V16FI 1 "register_operand")
9723 (match_operand:V16FI 2 "nonimmediate_operand")
9724 (match_operand:SI 3 "const_0_to_255_operand")
9725 (match_operand:V16FI 4 "register_operand")
9726 (match_operand:HI 5 "register_operand")]
9729 int mask = INTVAL (operands[3]);
9730 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9731 (operands[0], operands[1], operands[2],
9732 GEN_INT (((mask >> 0) & 3) * 4),
9733 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9734 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9735 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9736 GEN_INT (((mask >> 2) & 3) * 4),
9737 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9738 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9739 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9740 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9741 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9742 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9743 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9744 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9745 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9746 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9747 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9748 operands[4], operands[5]));
9752 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9753 [(set (match_operand:V16FI 0 "register_operand" "=v")
9755 (vec_concat:<ssedoublemode>
9756 (match_operand:V16FI 1 "register_operand" "v")
9757 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9758 (parallel [(match_operand 3 "const_0_to_15_operand")
9759 (match_operand 4 "const_0_to_15_operand")
9760 (match_operand 5 "const_0_to_15_operand")
9761 (match_operand 6 "const_0_to_15_operand")
9762 (match_operand 7 "const_0_to_15_operand")
9763 (match_operand 8 "const_0_to_15_operand")
9764 (match_operand 9 "const_0_to_15_operand")
9765 (match_operand 10 "const_0_to_15_operand")
9766 (match_operand 11 "const_16_to_31_operand")
9767 (match_operand 12 "const_16_to_31_operand")
9768 (match_operand 13 "const_16_to_31_operand")
9769 (match_operand 14 "const_16_to_31_operand")
9770 (match_operand 15 "const_16_to_31_operand")
9771 (match_operand 16 "const_16_to_31_operand")
9772 (match_operand 17 "const_16_to_31_operand")
9773 (match_operand 18 "const_16_to_31_operand")])))]
9775 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9776 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9777 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9778 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9779 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9780 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9781 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9782 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9783 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9784 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9785 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
9786 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
9789 mask = INTVAL (operands[3]) / 4;
9790 mask |= INTVAL (operands[7]) / 4 << 2;
9791 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
9792 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
9793 operands[3] = GEN_INT (mask);
9795 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9797 [(set_attr "type" "sselog")
9798 (set_attr "length_immediate" "1")
9799 (set_attr "prefix" "evex")
9800 (set_attr "mode" "<sseinsnmode>")])
9802 (define_expand "avx512f_pshufdv3_mask"
9803 [(match_operand:V16SI 0 "register_operand")
9804 (match_operand:V16SI 1 "nonimmediate_operand")
9805 (match_operand:SI 2 "const_0_to_255_operand")
9806 (match_operand:V16SI 3 "register_operand")
9807 (match_operand:HI 4 "register_operand")]
9810 int mask = INTVAL (operands[2]);
9811 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
9812 GEN_INT ((mask >> 0) & 3),
9813 GEN_INT ((mask >> 2) & 3),
9814 GEN_INT ((mask >> 4) & 3),
9815 GEN_INT ((mask >> 6) & 3),
9816 GEN_INT (((mask >> 0) & 3) + 4),
9817 GEN_INT (((mask >> 2) & 3) + 4),
9818 GEN_INT (((mask >> 4) & 3) + 4),
9819 GEN_INT (((mask >> 6) & 3) + 4),
9820 GEN_INT (((mask >> 0) & 3) + 8),
9821 GEN_INT (((mask >> 2) & 3) + 8),
9822 GEN_INT (((mask >> 4) & 3) + 8),
9823 GEN_INT (((mask >> 6) & 3) + 8),
9824 GEN_INT (((mask >> 0) & 3) + 12),
9825 GEN_INT (((mask >> 2) & 3) + 12),
9826 GEN_INT (((mask >> 4) & 3) + 12),
9827 GEN_INT (((mask >> 6) & 3) + 12),
9828 operands[3], operands[4]));
9832 (define_insn "avx512f_pshufd_1<mask_name>"
9833 [(set (match_operand:V16SI 0 "register_operand" "=v")
9835 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
9836 (parallel [(match_operand 2 "const_0_to_3_operand")
9837 (match_operand 3 "const_0_to_3_operand")
9838 (match_operand 4 "const_0_to_3_operand")
9839 (match_operand 5 "const_0_to_3_operand")
9840 (match_operand 6 "const_4_to_7_operand")
9841 (match_operand 7 "const_4_to_7_operand")
9842 (match_operand 8 "const_4_to_7_operand")
9843 (match_operand 9 "const_4_to_7_operand")
9844 (match_operand 10 "const_8_to_11_operand")
9845 (match_operand 11 "const_8_to_11_operand")
9846 (match_operand 12 "const_8_to_11_operand")
9847 (match_operand 13 "const_8_to_11_operand")
9848 (match_operand 14 "const_12_to_15_operand")
9849 (match_operand 15 "const_12_to_15_operand")
9850 (match_operand 16 "const_12_to_15_operand")
9851 (match_operand 17 "const_12_to_15_operand")])))]
9853 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9854 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9855 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9856 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
9857 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
9858 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
9859 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
9860 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
9861 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
9862 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
9863 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
9864 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
9867 mask |= INTVAL (operands[2]) << 0;
9868 mask |= INTVAL (operands[3]) << 2;
9869 mask |= INTVAL (operands[4]) << 4;
9870 mask |= INTVAL (operands[5]) << 6;
9871 operands[2] = GEN_INT (mask);
9873 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
9875 [(set_attr "type" "sselog1")
9876 (set_attr "prefix" "evex")
9877 (set_attr "length_immediate" "1")
9878 (set_attr "mode" "XI")])
9880 (define_expand "avx2_pshufdv3"
9881 [(match_operand:V8SI 0 "register_operand")
9882 (match_operand:V8SI 1 "nonimmediate_operand")
9883 (match_operand:SI 2 "const_0_to_255_operand")]
9886 int mask = INTVAL (operands[2]);
9887 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
9888 GEN_INT ((mask >> 0) & 3),
9889 GEN_INT ((mask >> 2) & 3),
9890 GEN_INT ((mask >> 4) & 3),
9891 GEN_INT ((mask >> 6) & 3),
9892 GEN_INT (((mask >> 0) & 3) + 4),
9893 GEN_INT (((mask >> 2) & 3) + 4),
9894 GEN_INT (((mask >> 4) & 3) + 4),
9895 GEN_INT (((mask >> 6) & 3) + 4)));
9899 (define_insn "avx2_pshufd_1"
9900 [(set (match_operand:V8SI 0 "register_operand" "=x")
9902 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
9903 (parallel [(match_operand 2 "const_0_to_3_operand")
9904 (match_operand 3 "const_0_to_3_operand")
9905 (match_operand 4 "const_0_to_3_operand")
9906 (match_operand 5 "const_0_to_3_operand")
9907 (match_operand 6 "const_4_to_7_operand")
9908 (match_operand 7 "const_4_to_7_operand")
9909 (match_operand 8 "const_4_to_7_operand")
9910 (match_operand 9 "const_4_to_7_operand")])))]
9912 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9913 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9914 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9915 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
9918 mask |= INTVAL (operands[2]) << 0;
9919 mask |= INTVAL (operands[3]) << 2;
9920 mask |= INTVAL (operands[4]) << 4;
9921 mask |= INTVAL (operands[5]) << 6;
9922 operands[2] = GEN_INT (mask);
9924 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
9926 [(set_attr "type" "sselog1")
9927 (set_attr "prefix" "vex")
9928 (set_attr "length_immediate" "1")
9929 (set_attr "mode" "OI")])
9931 (define_expand "sse2_pshufd"
9932 [(match_operand:V4SI 0 "register_operand")
9933 (match_operand:V4SI 1 "nonimmediate_operand")
9934 (match_operand:SI 2 "const_int_operand")]
9937 int mask = INTVAL (operands[2]);
9938 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
9939 GEN_INT ((mask >> 0) & 3),
9940 GEN_INT ((mask >> 2) & 3),
9941 GEN_INT ((mask >> 4) & 3),
9942 GEN_INT ((mask >> 6) & 3)));
9946 (define_insn "sse2_pshufd_1"
9947 [(set (match_operand:V4SI 0 "register_operand" "=x")
9949 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9950 (parallel [(match_operand 2 "const_0_to_3_operand")
9951 (match_operand 3 "const_0_to_3_operand")
9952 (match_operand 4 "const_0_to_3_operand")
9953 (match_operand 5 "const_0_to_3_operand")])))]
9957 mask |= INTVAL (operands[2]) << 0;
9958 mask |= INTVAL (operands[3]) << 2;
9959 mask |= INTVAL (operands[4]) << 4;
9960 mask |= INTVAL (operands[5]) << 6;
9961 operands[2] = GEN_INT (mask);
9963 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
9965 [(set_attr "type" "sselog1")
9966 (set_attr "prefix_data16" "1")
9967 (set_attr "prefix" "maybe_vex")
9968 (set_attr "length_immediate" "1")
9969 (set_attr "mode" "TI")])
9971 (define_expand "avx2_pshuflwv3"
9972 [(match_operand:V16HI 0 "register_operand")
9973 (match_operand:V16HI 1 "nonimmediate_operand")
9974 (match_operand:SI 2 "const_0_to_255_operand")]
9977 int mask = INTVAL (operands[2]);
9978 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
9979 GEN_INT ((mask >> 0) & 3),
9980 GEN_INT ((mask >> 2) & 3),
9981 GEN_INT ((mask >> 4) & 3),
9982 GEN_INT ((mask >> 6) & 3),
9983 GEN_INT (((mask >> 0) & 3) + 8),
9984 GEN_INT (((mask >> 2) & 3) + 8),
9985 GEN_INT (((mask >> 4) & 3) + 8),
9986 GEN_INT (((mask >> 6) & 3) + 8)));
9990 (define_insn "avx2_pshuflw_1"
9991 [(set (match_operand:V16HI 0 "register_operand" "=x")
9993 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9994 (parallel [(match_operand 2 "const_0_to_3_operand")
9995 (match_operand 3 "const_0_to_3_operand")
9996 (match_operand 4 "const_0_to_3_operand")
9997 (match_operand 5 "const_0_to_3_operand")
10002 (match_operand 6 "const_8_to_11_operand")
10003 (match_operand 7 "const_8_to_11_operand")
10004 (match_operand 8 "const_8_to_11_operand")
10005 (match_operand 9 "const_8_to_11_operand")
10009 (const_int 15)])))]
10011 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10012 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10013 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10014 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10017 mask |= INTVAL (operands[2]) << 0;
10018 mask |= INTVAL (operands[3]) << 2;
10019 mask |= INTVAL (operands[4]) << 4;
10020 mask |= INTVAL (operands[5]) << 6;
10021 operands[2] = GEN_INT (mask);
10023 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10025 [(set_attr "type" "sselog")
10026 (set_attr "prefix" "vex")
10027 (set_attr "length_immediate" "1")
10028 (set_attr "mode" "OI")])
10030 (define_expand "sse2_pshuflw"
10031 [(match_operand:V8HI 0 "register_operand")
10032 (match_operand:V8HI 1 "nonimmediate_operand")
10033 (match_operand:SI 2 "const_int_operand")]
10036 int mask = INTVAL (operands[2]);
10037 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
10038 GEN_INT ((mask >> 0) & 3),
10039 GEN_INT ((mask >> 2) & 3),
10040 GEN_INT ((mask >> 4) & 3),
10041 GEN_INT ((mask >> 6) & 3)));
10045 (define_insn "sse2_pshuflw_1"
10046 [(set (match_operand:V8HI 0 "register_operand" "=x")
10048 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10049 (parallel [(match_operand 2 "const_0_to_3_operand")
10050 (match_operand 3 "const_0_to_3_operand")
10051 (match_operand 4 "const_0_to_3_operand")
10052 (match_operand 5 "const_0_to_3_operand")
10060 mask |= INTVAL (operands[2]) << 0;
10061 mask |= INTVAL (operands[3]) << 2;
10062 mask |= INTVAL (operands[4]) << 4;
10063 mask |= INTVAL (operands[5]) << 6;
10064 operands[2] = GEN_INT (mask);
10066 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10068 [(set_attr "type" "sselog")
10069 (set_attr "prefix_data16" "0")
10070 (set_attr "prefix_rep" "1")
10071 (set_attr "prefix" "maybe_vex")
10072 (set_attr "length_immediate" "1")
10073 (set_attr "mode" "TI")])
10075 (define_expand "avx2_pshufhwv3"
10076 [(match_operand:V16HI 0 "register_operand")
10077 (match_operand:V16HI 1 "nonimmediate_operand")
10078 (match_operand:SI 2 "const_0_to_255_operand")]
10081 int mask = INTVAL (operands[2]);
10082 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
10083 GEN_INT (((mask >> 0) & 3) + 4),
10084 GEN_INT (((mask >> 2) & 3) + 4),
10085 GEN_INT (((mask >> 4) & 3) + 4),
10086 GEN_INT (((mask >> 6) & 3) + 4),
10087 GEN_INT (((mask >> 0) & 3) + 12),
10088 GEN_INT (((mask >> 2) & 3) + 12),
10089 GEN_INT (((mask >> 4) & 3) + 12),
10090 GEN_INT (((mask >> 6) & 3) + 12)));
10094 (define_insn "avx2_pshufhw_1"
10095 [(set (match_operand:V16HI 0 "register_operand" "=x")
10097 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10098 (parallel [(const_int 0)
10102 (match_operand 2 "const_4_to_7_operand")
10103 (match_operand 3 "const_4_to_7_operand")
10104 (match_operand 4 "const_4_to_7_operand")
10105 (match_operand 5 "const_4_to_7_operand")
10110 (match_operand 6 "const_12_to_15_operand")
10111 (match_operand 7 "const_12_to_15_operand")
10112 (match_operand 8 "const_12_to_15_operand")
10113 (match_operand 9 "const_12_to_15_operand")])))]
10115 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10116 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10117 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10118 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10121 mask |= (INTVAL (operands[2]) - 4) << 0;
10122 mask |= (INTVAL (operands[3]) - 4) << 2;
10123 mask |= (INTVAL (operands[4]) - 4) << 4;
10124 mask |= (INTVAL (operands[5]) - 4) << 6;
10125 operands[2] = GEN_INT (mask);
10127 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10129 [(set_attr "type" "sselog")
10130 (set_attr "prefix" "vex")
10131 (set_attr "length_immediate" "1")
10132 (set_attr "mode" "OI")])
10134 (define_expand "sse2_pshufhw"
10135 [(match_operand:V8HI 0 "register_operand")
10136 (match_operand:V8HI 1 "nonimmediate_operand")
10137 (match_operand:SI 2 "const_int_operand")]
10140 int mask = INTVAL (operands[2]);
10141 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
10142 GEN_INT (((mask >> 0) & 3) + 4),
10143 GEN_INT (((mask >> 2) & 3) + 4),
10144 GEN_INT (((mask >> 4) & 3) + 4),
10145 GEN_INT (((mask >> 6) & 3) + 4)));
10149 (define_insn "sse2_pshufhw_1"
10150 [(set (match_operand:V8HI 0 "register_operand" "=x")
10152 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10153 (parallel [(const_int 0)
10157 (match_operand 2 "const_4_to_7_operand")
10158 (match_operand 3 "const_4_to_7_operand")
10159 (match_operand 4 "const_4_to_7_operand")
10160 (match_operand 5 "const_4_to_7_operand")])))]
10164 mask |= (INTVAL (operands[2]) - 4) << 0;
10165 mask |= (INTVAL (operands[3]) - 4) << 2;
10166 mask |= (INTVAL (operands[4]) - 4) << 4;
10167 mask |= (INTVAL (operands[5]) - 4) << 6;
10168 operands[2] = GEN_INT (mask);
10170 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10172 [(set_attr "type" "sselog")
10173 (set_attr "prefix_rep" "1")
10174 (set_attr "prefix_data16" "0")
10175 (set_attr "prefix" "maybe_vex")
10176 (set_attr "length_immediate" "1")
10177 (set_attr "mode" "TI")])
10179 (define_expand "sse2_loadd"
10180 [(set (match_operand:V4SI 0 "register_operand")
10182 (vec_duplicate:V4SI
10183 (match_operand:SI 1 "nonimmediate_operand"))
10187 "operands[2] = CONST0_RTX (V4SImode);")
10189 (define_insn "sse2_loadld"
10190 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
10192 (vec_duplicate:V4SI
10193 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
10194 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
10198 %vmovd\t{%2, %0|%0, %2}
10199 %vmovd\t{%2, %0|%0, %2}
10200 movss\t{%2, %0|%0, %2}
10201 movss\t{%2, %0|%0, %2}
10202 vmovss\t{%2, %1, %0|%0, %1, %2}"
10203 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
10204 (set_attr "type" "ssemov")
10205 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10206 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10208 (define_insn "*vec_extract<mode>"
10209 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10210 (vec_select:<ssescalarmode>
10211 (match_operand:VI12_128 1 "register_operand" "x,x")
10213 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10216 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10217 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10218 [(set_attr "type" "sselog1")
10219 (set (attr "prefix_data16")
10221 (and (eq_attr "alternative" "0")
10222 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10224 (const_string "*")))
10225 (set (attr "prefix_extra")
10227 (and (eq_attr "alternative" "0")
10228 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10230 (const_string "1")))
10231 (set_attr "length_immediate" "1")
10232 (set_attr "prefix" "maybe_vex")
10233 (set_attr "mode" "TI")])
10235 (define_insn "*vec_extractv8hi_sse2"
10236 [(set (match_operand:HI 0 "register_operand" "=r")
10238 (match_operand:V8HI 1 "register_operand" "x")
10240 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10241 "TARGET_SSE2 && !TARGET_SSE4_1"
10242 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10243 [(set_attr "type" "sselog1")
10244 (set_attr "prefix_data16" "1")
10245 (set_attr "length_immediate" "1")
10246 (set_attr "mode" "TI")])
10248 (define_insn "*vec_extractv16qi_zext"
10249 [(set (match_operand:SWI48 0 "register_operand" "=r")
10252 (match_operand:V16QI 1 "register_operand" "x")
10254 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10256 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10257 [(set_attr "type" "sselog1")
10258 (set_attr "prefix_extra" "1")
10259 (set_attr "length_immediate" "1")
10260 (set_attr "prefix" "maybe_vex")
10261 (set_attr "mode" "TI")])
10263 (define_insn "*vec_extractv8hi_zext"
10264 [(set (match_operand:SWI48 0 "register_operand" "=r")
10267 (match_operand:V8HI 1 "register_operand" "x")
10269 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10271 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10272 [(set_attr "type" "sselog1")
10273 (set_attr "prefix_data16" "1")
10274 (set_attr "length_immediate" "1")
10275 (set_attr "prefix" "maybe_vex")
10276 (set_attr "mode" "TI")])
10278 (define_insn "*vec_extract<mode>_mem"
10279 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10280 (vec_select:<ssescalarmode>
10281 (match_operand:VI12_128 1 "memory_operand" "o")
10283 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10287 (define_insn "*vec_extract<ssevecmodelower>_0"
10288 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10290 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10291 (parallel [(const_int 0)])))]
10292 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10294 [(set_attr "isa" "*,sse4,*,*")])
10296 (define_insn_and_split "*vec_extractv4si_0_zext"
10297 [(set (match_operand:DI 0 "register_operand" "=r")
10300 (match_operand:V4SI 1 "register_operand" "x")
10301 (parallel [(const_int 0)]))))]
10302 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10304 "&& reload_completed"
10305 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10306 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10308 (define_insn "*vec_extractv2di_0_sse"
10309 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10311 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10312 (parallel [(const_int 0)])))]
10313 "TARGET_SSE && !TARGET_64BIT
10314 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10318 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10320 (match_operand:<ssevecmode> 1 "register_operand")
10321 (parallel [(const_int 0)])))]
10322 "TARGET_SSE && reload_completed"
10323 [(set (match_dup 0) (match_dup 1))]
10324 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10326 (define_insn "*vec_extractv4si"
10327 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10329 (match_operand:V4SI 1 "register_operand" "x,0,x")
10330 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10333 switch (which_alternative)
10336 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10339 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10340 return "psrldq\t{%2, %0|%0, %2}";
10343 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10344 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10347 gcc_unreachable ();
10350 [(set_attr "isa" "*,noavx,avx")
10351 (set_attr "type" "sselog1,sseishft1,sseishft1")
10352 (set_attr "prefix_extra" "1,*,*")
10353 (set_attr "length_immediate" "1")
10354 (set_attr "prefix" "maybe_vex,orig,vex")
10355 (set_attr "mode" "TI")])
10357 (define_insn "*vec_extractv4si_zext"
10358 [(set (match_operand:DI 0 "register_operand" "=r")
10361 (match_operand:V4SI 1 "register_operand" "x")
10362 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10363 "TARGET_64BIT && TARGET_SSE4_1"
10364 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10365 [(set_attr "type" "sselog1")
10366 (set_attr "prefix_extra" "1")
10367 (set_attr "length_immediate" "1")
10368 (set_attr "prefix" "maybe_vex")
10369 (set_attr "mode" "TI")])
10371 (define_insn "*vec_extractv4si_mem"
10372 [(set (match_operand:SI 0 "register_operand" "=x,r")
10374 (match_operand:V4SI 1 "memory_operand" "o,o")
10375 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10379 (define_insn_and_split "*vec_extractv4si_zext_mem"
10380 [(set (match_operand:DI 0 "register_operand" "=x,r")
10383 (match_operand:V4SI 1 "memory_operand" "o,o")
10384 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10385 "TARGET_64BIT && TARGET_SSE"
10387 "&& reload_completed"
10388 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10390 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10393 (define_insn "*vec_extractv2di_1"
10394 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10396 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10397 (parallel [(const_int 1)])))]
10398 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10400 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10401 %vmovhps\t{%1, %0|%0, %1}
10402 psrldq\t{$8, %0|%0, 8}
10403 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10404 movhlps\t{%1, %0|%0, %1}
10407 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10408 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10409 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10410 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10411 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10412 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10413 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10416 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10417 (vec_select:<ssescalarmode>
10418 (match_operand:VI_128 1 "memory_operand")
10420 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10421 "TARGET_SSE && reload_completed"
10422 [(set (match_dup 0) (match_dup 1))]
10424 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10426 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10429 (define_insn "*vec_dupv4si"
10430 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10431 (vec_duplicate:V4SI
10432 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10435 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10436 vbroadcastss\t{%1, %0|%0, %1}
10437 shufps\t{$0, %0, %0|%0, %0, 0}"
10438 [(set_attr "isa" "sse2,avx,noavx")
10439 (set_attr "type" "sselog1,ssemov,sselog1")
10440 (set_attr "length_immediate" "1,0,1")
10441 (set_attr "prefix_extra" "0,1,*")
10442 (set_attr "prefix" "maybe_vex,vex,orig")
10443 (set_attr "mode" "TI,V4SF,V4SF")])
10445 (define_insn "*vec_dupv2di"
10446 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10447 (vec_duplicate:V2DI
10448 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10452 vpunpcklqdq\t{%d1, %0|%0, %d1}
10453 %vmovddup\t{%1, %0|%0, %1}
10455 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10456 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10457 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10458 (set_attr "mode" "TI,TI,DF,V4SF")])
10460 (define_insn "*vec_concatv2si_sse4_1"
10461 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10463 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10464 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10467 pinsrd\t{$1, %2, %0|%0, %2, 1}
10468 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10469 punpckldq\t{%2, %0|%0, %2}
10470 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10471 %vmovd\t{%1, %0|%0, %1}
10472 punpckldq\t{%2, %0|%0, %2}
10473 movd\t{%1, %0|%0, %1}"
10474 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10475 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10476 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10477 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10478 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10479 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10481 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10482 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10483 ;; alternatives pretty much forces the MMX alternative to be chosen.
10484 (define_insn "*vec_concatv2si"
10485 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10487 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10488 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10489 "TARGET_SSE && !TARGET_SSE4_1"
10491 punpckldq\t{%2, %0|%0, %2}
10492 movd\t{%1, %0|%0, %1}
10493 movd\t{%1, %0|%0, %1}
10494 unpcklps\t{%2, %0|%0, %2}
10495 movss\t{%1, %0|%0, %1}
10496 punpckldq\t{%2, %0|%0, %2}
10497 movd\t{%1, %0|%0, %1}"
10498 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10499 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10500 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10502 (define_insn "*vec_concatv4si"
10503 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10505 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10506 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10509 punpcklqdq\t{%2, %0|%0, %2}
10510 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10511 movlhps\t{%2, %0|%0, %2}
10512 movhps\t{%2, %0|%0, %q2}
10513 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10514 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10515 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10516 (set_attr "prefix" "orig,vex,orig,orig,vex")
10517 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10519 ;; movd instead of movq is required to handle broken assemblers.
10520 (define_insn "vec_concatv2di"
10521 [(set (match_operand:V2DI 0 "register_operand"
10522 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10524 (match_operand:DI 1 "nonimmediate_operand"
10525 " 0,x ,r ,xm,*y,0,x,0,0,x")
10526 (match_operand:DI 2 "vector_move_operand"
10527 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10530 pinsrq\t{$1, %2, %0|%0, %2, 1}
10531 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10532 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10533 %vmovq\t{%1, %0|%0, %1}
10534 movq2dq\t{%1, %0|%0, %1}
10535 punpcklqdq\t{%2, %0|%0, %2}
10536 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10537 movlhps\t{%2, %0|%0, %2}
10538 movhps\t{%2, %0|%0, %2}
10539 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10540 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10543 (eq_attr "alternative" "0,1,5,6")
10544 (const_string "sselog")
10545 (const_string "ssemov")))
10546 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10547 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10548 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10549 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10550 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10552 (define_expand "vec_unpacks_lo_<mode>"
10553 [(match_operand:<sseunpackmode> 0 "register_operand")
10554 (match_operand:VI124_AVX512F 1 "register_operand")]
10556 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10558 (define_expand "vec_unpacks_hi_<mode>"
10559 [(match_operand:<sseunpackmode> 0 "register_operand")
10560 (match_operand:VI124_AVX512F 1 "register_operand")]
10562 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10564 (define_expand "vec_unpacku_lo_<mode>"
10565 [(match_operand:<sseunpackmode> 0 "register_operand")
10566 (match_operand:VI124_AVX512F 1 "register_operand")]
10568 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10570 (define_expand "vec_unpacku_hi_<mode>"
10571 [(match_operand:<sseunpackmode> 0 "register_operand")
10572 (match_operand:VI124_AVX512F 1 "register_operand")]
10574 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10576 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10580 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10582 (define_expand "<sse2_avx2>_uavg<mode>3"
10583 [(set (match_operand:VI12_AVX2 0 "register_operand")
10584 (truncate:VI12_AVX2
10585 (lshiftrt:<ssedoublemode>
10586 (plus:<ssedoublemode>
10587 (plus:<ssedoublemode>
10588 (zero_extend:<ssedoublemode>
10589 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10590 (zero_extend:<ssedoublemode>
10591 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10596 operands[3] = CONST1_RTX(<MODE>mode);
10597 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10600 (define_insn "*<sse2_avx2>_uavg<mode>3"
10601 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10602 (truncate:VI12_AVX2
10603 (lshiftrt:<ssedoublemode>
10604 (plus:<ssedoublemode>
10605 (plus:<ssedoublemode>
10606 (zero_extend:<ssedoublemode>
10607 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10608 (zero_extend:<ssedoublemode>
10609 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10610 (match_operand:VI12_AVX2 3 "const1_operand"))
10612 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10614 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10615 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10616 [(set_attr "isa" "noavx,avx")
10617 (set_attr "type" "sseiadd")
10618 (set_attr "prefix_data16" "1,*")
10619 (set_attr "prefix" "orig,vex")
10620 (set_attr "mode" "<sseinsnmode>")])
10622 ;; The correct representation for this is absolutely enormous, and
10623 ;; surely not generally useful.
10624 (define_insn "<sse2_avx2>_psadbw"
10625 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10627 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10628 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10632 psadbw\t{%2, %0|%0, %2}
10633 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10634 [(set_attr "isa" "noavx,avx")
10635 (set_attr "type" "sseiadd")
10636 (set_attr "atom_unit" "simul")
10637 (set_attr "prefix_data16" "1,*")
10638 (set_attr "prefix" "orig,vex")
10639 (set_attr "mode" "<sseinsnmode>")])
10641 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10642 [(set (match_operand:SI 0 "register_operand" "=r")
10644 [(match_operand:VF_128_256 1 "register_operand" "x")]
10647 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10648 [(set_attr "type" "ssemov")
10649 (set_attr "prefix" "maybe_vex")
10650 (set_attr "mode" "<MODE>")])
10652 (define_insn "avx2_pmovmskb"
10653 [(set (match_operand:SI 0 "register_operand" "=r")
10654 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10657 "vpmovmskb\t{%1, %0|%0, %1}"
10658 [(set_attr "type" "ssemov")
10659 (set_attr "prefix" "vex")
10660 (set_attr "mode" "DI")])
10662 (define_insn "sse2_pmovmskb"
10663 [(set (match_operand:SI 0 "register_operand" "=r")
10664 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10667 "%vpmovmskb\t{%1, %0|%0, %1}"
10668 [(set_attr "type" "ssemov")
10669 (set_attr "prefix_data16" "1")
10670 (set_attr "prefix" "maybe_vex")
10671 (set_attr "mode" "SI")])
10673 (define_expand "sse2_maskmovdqu"
10674 [(set (match_operand:V16QI 0 "memory_operand")
10675 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10676 (match_operand:V16QI 2 "register_operand")
10681 (define_insn "*sse2_maskmovdqu"
10682 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10683 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10684 (match_operand:V16QI 2 "register_operand" "x")
10685 (mem:V16QI (match_dup 0))]
10689 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10690 that requires %v to be at the beginning of the opcode name. */
10691 if (Pmode != word_mode)
10692 fputs ("\taddr32", asm_out_file);
10693 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10695 [(set_attr "type" "ssemov")
10696 (set_attr "prefix_data16" "1")
10697 (set (attr "length_address")
10698 (symbol_ref ("Pmode != word_mode")))
10699 ;; The implicit %rdi operand confuses default length_vex computation.
10700 (set (attr "length_vex")
10701 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10702 (set_attr "prefix" "maybe_vex")
10703 (set_attr "mode" "TI")])
10705 (define_insn "sse_ldmxcsr"
10706 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10710 [(set_attr "type" "sse")
10711 (set_attr "atom_sse_attr" "mxcsr")
10712 (set_attr "prefix" "maybe_vex")
10713 (set_attr "memory" "load")])
10715 (define_insn "sse_stmxcsr"
10716 [(set (match_operand:SI 0 "memory_operand" "=m")
10717 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10720 [(set_attr "type" "sse")
10721 (set_attr "atom_sse_attr" "mxcsr")
10722 (set_attr "prefix" "maybe_vex")
10723 (set_attr "memory" "store")])
10725 (define_insn "sse2_clflush"
10726 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10730 [(set_attr "type" "sse")
10731 (set_attr "atom_sse_attr" "fence")
10732 (set_attr "memory" "unknown")])
10735 (define_insn "sse3_mwait"
10736 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
10737 (match_operand:SI 1 "register_operand" "c")]
10740 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10741 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10742 ;; we only need to set up 32bit registers.
10744 [(set_attr "length" "3")])
10746 (define_insn "sse3_monitor_<mode>"
10747 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10748 (match_operand:SI 1 "register_operand" "c")
10749 (match_operand:SI 2 "register_operand" "d")]
10752 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10753 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10754 ;; zero extended to 64bit, we only need to set up 32bit registers.
10756 [(set (attr "length")
10757 (symbol_ref ("(Pmode != word_mode) + 3")))])
10759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10761 ;; SSSE3 instructions
10763 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10765 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10767 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10768 [(set (match_operand:V16HI 0 "register_operand" "=x")
10773 (ssse3_plusminus:HI
10775 (match_operand:V16HI 1 "register_operand" "x")
10776 (parallel [(const_int 0)]))
10777 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10778 (ssse3_plusminus:HI
10779 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10780 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10782 (ssse3_plusminus:HI
10783 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10784 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10785 (ssse3_plusminus:HI
10786 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10787 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10790 (ssse3_plusminus:HI
10791 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
10792 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
10793 (ssse3_plusminus:HI
10794 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
10795 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
10797 (ssse3_plusminus:HI
10798 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
10799 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
10800 (ssse3_plusminus:HI
10801 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
10802 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
10806 (ssse3_plusminus:HI
10808 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10809 (parallel [(const_int 0)]))
10810 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10811 (ssse3_plusminus:HI
10812 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10813 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10815 (ssse3_plusminus:HI
10816 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10817 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10818 (ssse3_plusminus:HI
10819 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10820 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
10823 (ssse3_plusminus:HI
10824 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
10825 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
10826 (ssse3_plusminus:HI
10827 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
10828 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
10830 (ssse3_plusminus:HI
10831 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
10832 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
10833 (ssse3_plusminus:HI
10834 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
10835 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
10837 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10838 [(set_attr "type" "sseiadd")
10839 (set_attr "prefix_extra" "1")
10840 (set_attr "prefix" "vex")
10841 (set_attr "mode" "OI")])
10843 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
10844 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10848 (ssse3_plusminus:HI
10850 (match_operand:V8HI 1 "register_operand" "0,x")
10851 (parallel [(const_int 0)]))
10852 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10853 (ssse3_plusminus:HI
10854 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10855 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10857 (ssse3_plusminus:HI
10858 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10859 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10860 (ssse3_plusminus:HI
10861 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10862 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10865 (ssse3_plusminus:HI
10867 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10868 (parallel [(const_int 0)]))
10869 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10870 (ssse3_plusminus:HI
10871 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10872 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10874 (ssse3_plusminus:HI
10875 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10876 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10877 (ssse3_plusminus:HI
10878 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10879 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
10882 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
10883 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10884 [(set_attr "isa" "noavx,avx")
10885 (set_attr "type" "sseiadd")
10886 (set_attr "atom_unit" "complex")
10887 (set_attr "prefix_data16" "1,*")
10888 (set_attr "prefix_extra" "1")
10889 (set_attr "prefix" "orig,vex")
10890 (set_attr "mode" "TI")])
10892 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
10893 [(set (match_operand:V4HI 0 "register_operand" "=y")
10896 (ssse3_plusminus:HI
10898 (match_operand:V4HI 1 "register_operand" "0")
10899 (parallel [(const_int 0)]))
10900 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10901 (ssse3_plusminus:HI
10902 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10903 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10905 (ssse3_plusminus:HI
10907 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
10908 (parallel [(const_int 0)]))
10909 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10910 (ssse3_plusminus:HI
10911 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10912 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
10914 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
10915 [(set_attr "type" "sseiadd")
10916 (set_attr "atom_unit" "complex")
10917 (set_attr "prefix_extra" "1")
10918 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10919 (set_attr "mode" "DI")])
10921 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
10922 [(set (match_operand:V8SI 0 "register_operand" "=x")
10928 (match_operand:V8SI 1 "register_operand" "x")
10929 (parallel [(const_int 0)]))
10930 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10932 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10933 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10936 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
10937 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
10939 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
10940 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
10945 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
10946 (parallel [(const_int 0)]))
10947 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10949 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10950 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
10953 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
10954 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
10956 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
10957 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
10959 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10960 [(set_attr "type" "sseiadd")
10961 (set_attr "prefix_extra" "1")
10962 (set_attr "prefix" "vex")
10963 (set_attr "mode" "OI")])
10965 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
10966 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10971 (match_operand:V4SI 1 "register_operand" "0,x")
10972 (parallel [(const_int 0)]))
10973 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10975 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10976 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10980 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
10981 (parallel [(const_int 0)]))
10982 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10984 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10985 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
10988 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
10989 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10990 [(set_attr "isa" "noavx,avx")
10991 (set_attr "type" "sseiadd")
10992 (set_attr "atom_unit" "complex")
10993 (set_attr "prefix_data16" "1,*")
10994 (set_attr "prefix_extra" "1")
10995 (set_attr "prefix" "orig,vex")
10996 (set_attr "mode" "TI")])
10998 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
10999 [(set (match_operand:V2SI 0 "register_operand" "=y")
11003 (match_operand:V2SI 1 "register_operand" "0")
11004 (parallel [(const_int 0)]))
11005 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11008 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
11009 (parallel [(const_int 0)]))
11010 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
11012 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
11013 [(set_attr "type" "sseiadd")
11014 (set_attr "atom_unit" "complex")
11015 (set_attr "prefix_extra" "1")
11016 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11017 (set_attr "mode" "DI")])
11019 (define_insn "avx2_pmaddubsw256"
11020 [(set (match_operand:V16HI 0 "register_operand" "=x")
11025 (match_operand:V32QI 1 "register_operand" "x")
11026 (parallel [(const_int 0) (const_int 2)
11027 (const_int 4) (const_int 6)
11028 (const_int 8) (const_int 10)
11029 (const_int 12) (const_int 14)
11030 (const_int 16) (const_int 18)
11031 (const_int 20) (const_int 22)
11032 (const_int 24) (const_int 26)
11033 (const_int 28) (const_int 30)])))
11036 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
11037 (parallel [(const_int 0) (const_int 2)
11038 (const_int 4) (const_int 6)
11039 (const_int 8) (const_int 10)
11040 (const_int 12) (const_int 14)
11041 (const_int 16) (const_int 18)
11042 (const_int 20) (const_int 22)
11043 (const_int 24) (const_int 26)
11044 (const_int 28) (const_int 30)]))))
11047 (vec_select:V16QI (match_dup 1)
11048 (parallel [(const_int 1) (const_int 3)
11049 (const_int 5) (const_int 7)
11050 (const_int 9) (const_int 11)
11051 (const_int 13) (const_int 15)
11052 (const_int 17) (const_int 19)
11053 (const_int 21) (const_int 23)
11054 (const_int 25) (const_int 27)
11055 (const_int 29) (const_int 31)])))
11057 (vec_select:V16QI (match_dup 2)
11058 (parallel [(const_int 1) (const_int 3)
11059 (const_int 5) (const_int 7)
11060 (const_int 9) (const_int 11)
11061 (const_int 13) (const_int 15)
11062 (const_int 17) (const_int 19)
11063 (const_int 21) (const_int 23)
11064 (const_int 25) (const_int 27)
11065 (const_int 29) (const_int 31)]))))))]
11067 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11068 [(set_attr "type" "sseiadd")
11069 (set_attr "prefix_extra" "1")
11070 (set_attr "prefix" "vex")
11071 (set_attr "mode" "OI")])
11073 (define_insn "ssse3_pmaddubsw128"
11074 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11079 (match_operand:V16QI 1 "register_operand" "0,x")
11080 (parallel [(const_int 0) (const_int 2)
11081 (const_int 4) (const_int 6)
11082 (const_int 8) (const_int 10)
11083 (const_int 12) (const_int 14)])))
11086 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
11087 (parallel [(const_int 0) (const_int 2)
11088 (const_int 4) (const_int 6)
11089 (const_int 8) (const_int 10)
11090 (const_int 12) (const_int 14)]))))
11093 (vec_select:V8QI (match_dup 1)
11094 (parallel [(const_int 1) (const_int 3)
11095 (const_int 5) (const_int 7)
11096 (const_int 9) (const_int 11)
11097 (const_int 13) (const_int 15)])))
11099 (vec_select:V8QI (match_dup 2)
11100 (parallel [(const_int 1) (const_int 3)
11101 (const_int 5) (const_int 7)
11102 (const_int 9) (const_int 11)
11103 (const_int 13) (const_int 15)]))))))]
11106 pmaddubsw\t{%2, %0|%0, %2}
11107 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11108 [(set_attr "isa" "noavx,avx")
11109 (set_attr "type" "sseiadd")
11110 (set_attr "atom_unit" "simul")
11111 (set_attr "prefix_data16" "1,*")
11112 (set_attr "prefix_extra" "1")
11113 (set_attr "prefix" "orig,vex")
11114 (set_attr "mode" "TI")])
11116 (define_insn "ssse3_pmaddubsw"
11117 [(set (match_operand:V4HI 0 "register_operand" "=y")
11122 (match_operand:V8QI 1 "register_operand" "0")
11123 (parallel [(const_int 0) (const_int 2)
11124 (const_int 4) (const_int 6)])))
11127 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
11128 (parallel [(const_int 0) (const_int 2)
11129 (const_int 4) (const_int 6)]))))
11132 (vec_select:V4QI (match_dup 1)
11133 (parallel [(const_int 1) (const_int 3)
11134 (const_int 5) (const_int 7)])))
11136 (vec_select:V4QI (match_dup 2)
11137 (parallel [(const_int 1) (const_int 3)
11138 (const_int 5) (const_int 7)]))))))]
11140 "pmaddubsw\t{%2, %0|%0, %2}"
11141 [(set_attr "type" "sseiadd")
11142 (set_attr "atom_unit" "simul")
11143 (set_attr "prefix_extra" "1")
11144 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11145 (set_attr "mode" "DI")])
11147 (define_mode_iterator PMULHRSW
11148 [V4HI V8HI (V16HI "TARGET_AVX2")])
11150 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
11151 [(set (match_operand:PMULHRSW 0 "register_operand")
11153 (lshiftrt:<ssedoublemode>
11154 (plus:<ssedoublemode>
11155 (lshiftrt:<ssedoublemode>
11156 (mult:<ssedoublemode>
11157 (sign_extend:<ssedoublemode>
11158 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
11159 (sign_extend:<ssedoublemode>
11160 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
11166 operands[3] = CONST1_RTX(<MODE>mode);
11167 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11170 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
11171 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
11173 (lshiftrt:<ssedoublemode>
11174 (plus:<ssedoublemode>
11175 (lshiftrt:<ssedoublemode>
11176 (mult:<ssedoublemode>
11177 (sign_extend:<ssedoublemode>
11178 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
11179 (sign_extend:<ssedoublemode>
11180 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
11182 (match_operand:VI2_AVX2 3 "const1_operand"))
11184 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
11186 pmulhrsw\t{%2, %0|%0, %2}
11187 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
11188 [(set_attr "isa" "noavx,avx")
11189 (set_attr "type" "sseimul")
11190 (set_attr "prefix_data16" "1,*")
11191 (set_attr "prefix_extra" "1")
11192 (set_attr "prefix" "orig,vex")
11193 (set_attr "mode" "<sseinsnmode>")])
11195 (define_insn "*ssse3_pmulhrswv4hi3"
11196 [(set (match_operand:V4HI 0 "register_operand" "=y")
11203 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11205 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11207 (match_operand:V4HI 3 "const1_operand"))
11209 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11210 "pmulhrsw\t{%2, %0|%0, %2}"
11211 [(set_attr "type" "sseimul")
11212 (set_attr "prefix_extra" "1")
11213 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11214 (set_attr "mode" "DI")])
11216 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11217 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11219 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11220 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11224 pshufb\t{%2, %0|%0, %2}
11225 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11226 [(set_attr "isa" "noavx,avx")
11227 (set_attr "type" "sselog1")
11228 (set_attr "prefix_data16" "1,*")
11229 (set_attr "prefix_extra" "1")
11230 (set_attr "prefix" "orig,vex")
11231 (set_attr "btver2_decode" "vector,vector")
11232 (set_attr "mode" "<sseinsnmode>")])
11234 (define_insn "ssse3_pshufbv8qi3"
11235 [(set (match_operand:V8QI 0 "register_operand" "=y")
11236 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11237 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11240 "pshufb\t{%2, %0|%0, %2}";
11241 [(set_attr "type" "sselog1")
11242 (set_attr "prefix_extra" "1")
11243 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11244 (set_attr "mode" "DI")])
11246 (define_insn "<ssse3_avx2>_psign<mode>3"
11247 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11249 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11250 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11254 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11255 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11256 [(set_attr "isa" "noavx,avx")
11257 (set_attr "type" "sselog1")
11258 (set_attr "prefix_data16" "1,*")
11259 (set_attr "prefix_extra" "1")
11260 (set_attr "prefix" "orig,vex")
11261 (set_attr "mode" "<sseinsnmode>")])
11263 (define_insn "ssse3_psign<mode>3"
11264 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11266 [(match_operand:MMXMODEI 1 "register_operand" "0")
11267 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11270 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11271 [(set_attr "type" "sselog1")
11272 (set_attr "prefix_extra" "1")
11273 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11274 (set_attr "mode" "DI")])
11276 (define_insn "<ssse3_avx2>_palignr<mode>"
11277 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11278 (unspec:SSESCALARMODE
11279 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11280 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11281 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11285 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11287 switch (which_alternative)
11290 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11292 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11294 gcc_unreachable ();
11297 [(set_attr "isa" "noavx,avx")
11298 (set_attr "type" "sseishft")
11299 (set_attr "atom_unit" "sishuf")
11300 (set_attr "prefix_data16" "1,*")
11301 (set_attr "prefix_extra" "1")
11302 (set_attr "length_immediate" "1")
11303 (set_attr "prefix" "orig,vex")
11304 (set_attr "mode" "<sseinsnmode>")])
11306 (define_insn "ssse3_palignrdi"
11307 [(set (match_operand:DI 0 "register_operand" "=y")
11308 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11309 (match_operand:DI 2 "nonimmediate_operand" "ym")
11310 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11314 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11315 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11317 [(set_attr "type" "sseishft")
11318 (set_attr "atom_unit" "sishuf")
11319 (set_attr "prefix_extra" "1")
11320 (set_attr "length_immediate" "1")
11321 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11322 (set_attr "mode" "DI")])
11324 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11325 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11326 (abs:VI124_AVX2_48_AVX512F
11327 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11328 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11329 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11330 [(set_attr "type" "sselog1")
11331 (set_attr "prefix_data16" "1")
11332 (set_attr "prefix_extra" "1")
11333 (set_attr "prefix" "maybe_vex")
11334 (set_attr "mode" "<sseinsnmode>")])
11336 (define_expand "abs<mode>2"
11337 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11338 (abs:VI124_AVX2_48_AVX512F
11339 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11344 ix86_expand_sse2_abs (operands[0], operands[1]);
11349 (define_insn "abs<mode>2"
11350 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11352 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11354 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11355 [(set_attr "type" "sselog1")
11356 (set_attr "prefix_rep" "0")
11357 (set_attr "prefix_extra" "1")
11358 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11359 (set_attr "mode" "DI")])
11361 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11363 ;; AMD SSE4A instructions
11365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11367 (define_insn "sse4a_movnt<mode>"
11368 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11370 [(match_operand:MODEF 1 "register_operand" "x")]
11373 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11374 [(set_attr "type" "ssemov")
11375 (set_attr "mode" "<MODE>")])
11377 (define_insn "sse4a_vmmovnt<mode>"
11378 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11379 (unspec:<ssescalarmode>
11380 [(vec_select:<ssescalarmode>
11381 (match_operand:VF_128 1 "register_operand" "x")
11382 (parallel [(const_int 0)]))]
11385 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11386 [(set_attr "type" "ssemov")
11387 (set_attr "mode" "<ssescalarmode>")])
11389 (define_insn "sse4a_extrqi"
11390 [(set (match_operand:V2DI 0 "register_operand" "=x")
11391 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11392 (match_operand 2 "const_0_to_255_operand")
11393 (match_operand 3 "const_0_to_255_operand")]
11396 "extrq\t{%3, %2, %0|%0, %2, %3}"
11397 [(set_attr "type" "sse")
11398 (set_attr "prefix_data16" "1")
11399 (set_attr "length_immediate" "2")
11400 (set_attr "mode" "TI")])
11402 (define_insn "sse4a_extrq"
11403 [(set (match_operand:V2DI 0 "register_operand" "=x")
11404 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11405 (match_operand:V16QI 2 "register_operand" "x")]
11408 "extrq\t{%2, %0|%0, %2}"
11409 [(set_attr "type" "sse")
11410 (set_attr "prefix_data16" "1")
11411 (set_attr "mode" "TI")])
11413 (define_insn "sse4a_insertqi"
11414 [(set (match_operand:V2DI 0 "register_operand" "=x")
11415 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11416 (match_operand:V2DI 2 "register_operand" "x")
11417 (match_operand 3 "const_0_to_255_operand")
11418 (match_operand 4 "const_0_to_255_operand")]
11421 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11422 [(set_attr "type" "sseins")
11423 (set_attr "prefix_data16" "0")
11424 (set_attr "prefix_rep" "1")
11425 (set_attr "length_immediate" "2")
11426 (set_attr "mode" "TI")])
11428 (define_insn "sse4a_insertq"
11429 [(set (match_operand:V2DI 0 "register_operand" "=x")
11430 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11431 (match_operand:V2DI 2 "register_operand" "x")]
11434 "insertq\t{%2, %0|%0, %2}"
11435 [(set_attr "type" "sseins")
11436 (set_attr "prefix_data16" "0")
11437 (set_attr "prefix_rep" "1")
11438 (set_attr "mode" "TI")])
11440 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11442 ;; Intel SSE4.1 instructions
11444 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11446 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11447 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11448 (vec_merge:VF_128_256
11449 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11450 (match_operand:VF_128_256 1 "register_operand" "0,x")
11451 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11454 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11455 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11456 [(set_attr "isa" "noavx,avx")
11457 (set_attr "type" "ssemov")
11458 (set_attr "length_immediate" "1")
11459 (set_attr "prefix_data16" "1,*")
11460 (set_attr "prefix_extra" "1")
11461 (set_attr "prefix" "orig,vex")
11462 (set_attr "mode" "<MODE>")])
11464 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11465 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11467 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11468 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11469 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11473 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11474 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11475 [(set_attr "isa" "noavx,avx")
11476 (set_attr "type" "ssemov")
11477 (set_attr "length_immediate" "1")
11478 (set_attr "prefix_data16" "1,*")
11479 (set_attr "prefix_extra" "1")
11480 (set_attr "prefix" "orig,vex")
11481 (set_attr "btver2_decode" "vector,vector")
11482 (set_attr "mode" "<MODE>")])
11484 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11485 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11487 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11488 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11489 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11493 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11494 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11495 [(set_attr "isa" "noavx,avx")
11496 (set_attr "type" "ssemul")
11497 (set_attr "length_immediate" "1")
11498 (set_attr "prefix_data16" "1,*")
11499 (set_attr "prefix_extra" "1")
11500 (set_attr "prefix" "orig,vex")
11501 (set_attr "btver2_decode" "vector,vector")
11502 (set_attr "mode" "<MODE>")])
11504 (define_insn "<sse4_1_avx2>_movntdqa"
11505 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
11506 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
11509 "%vmovntdqa\t{%1, %0|%0, %1}"
11510 [(set_attr "type" "ssemov")
11511 (set_attr "prefix_extra" "1, *")
11512 (set_attr "prefix" "maybe_vex, evex")
11513 (set_attr "mode" "<sseinsnmode>")])
11515 (define_insn "<sse4_1_avx2>_mpsadbw"
11516 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11518 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11519 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11520 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11524 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11525 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11526 [(set_attr "isa" "noavx,avx")
11527 (set_attr "type" "sselog1")
11528 (set_attr "length_immediate" "1")
11529 (set_attr "prefix_extra" "1")
11530 (set_attr "prefix" "orig,vex")
11531 (set_attr "btver2_decode" "vector,vector")
11532 (set_attr "mode" "<sseinsnmode>")])
11534 (define_insn "avx2_packusdw"
11535 [(set (match_operand:V16HI 0 "register_operand" "=x")
11538 (match_operand:V8SI 1 "register_operand" "x"))
11540 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11542 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11543 [(set_attr "type" "sselog")
11544 (set_attr "prefix_extra" "1")
11545 (set_attr "prefix" "vex")
11546 (set_attr "mode" "OI")])
11548 (define_insn "sse4_1_packusdw"
11549 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11552 (match_operand:V4SI 1 "register_operand" "0,x"))
11554 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11557 packusdw\t{%2, %0|%0, %2}
11558 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11559 [(set_attr "isa" "noavx,avx")
11560 (set_attr "type" "sselog")
11561 (set_attr "prefix_extra" "1")
11562 (set_attr "prefix" "orig,vex")
11563 (set_attr "mode" "TI")])
11565 (define_insn "<sse4_1_avx2>_pblendvb"
11566 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11568 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11569 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11570 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11574 pblendvb\t{%3, %2, %0|%0, %2, %3}
11575 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11576 [(set_attr "isa" "noavx,avx")
11577 (set_attr "type" "ssemov")
11578 (set_attr "prefix_extra" "1")
11579 (set_attr "length_immediate" "*,1")
11580 (set_attr "prefix" "orig,vex")
11581 (set_attr "btver2_decode" "vector,vector")
11582 (set_attr "mode" "<sseinsnmode>")])
11584 (define_insn "sse4_1_pblendw"
11585 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11587 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11588 (match_operand:V8HI 1 "register_operand" "0,x")
11589 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11592 pblendw\t{%3, %2, %0|%0, %2, %3}
11593 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11594 [(set_attr "isa" "noavx,avx")
11595 (set_attr "type" "ssemov")
11596 (set_attr "prefix_extra" "1")
11597 (set_attr "length_immediate" "1")
11598 (set_attr "prefix" "orig,vex")
11599 (set_attr "mode" "TI")])
11601 ;; The builtin uses an 8-bit immediate. Expand that.
11602 (define_expand "avx2_pblendw"
11603 [(set (match_operand:V16HI 0 "register_operand")
11605 (match_operand:V16HI 2 "nonimmediate_operand")
11606 (match_operand:V16HI 1 "register_operand")
11607 (match_operand:SI 3 "const_0_to_255_operand")))]
11610 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11611 operands[3] = GEN_INT (val << 8 | val);
11614 (define_insn "*avx2_pblendw"
11615 [(set (match_operand:V16HI 0 "register_operand" "=x")
11617 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11618 (match_operand:V16HI 1 "register_operand" "x")
11619 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11622 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11623 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11625 [(set_attr "type" "ssemov")
11626 (set_attr "prefix_extra" "1")
11627 (set_attr "length_immediate" "1")
11628 (set_attr "prefix" "vex")
11629 (set_attr "mode" "OI")])
11631 (define_insn "avx2_pblendd<mode>"
11632 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11633 (vec_merge:VI4_AVX2
11634 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11635 (match_operand:VI4_AVX2 1 "register_operand" "x")
11636 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11638 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11639 [(set_attr "type" "ssemov")
11640 (set_attr "prefix_extra" "1")
11641 (set_attr "length_immediate" "1")
11642 (set_attr "prefix" "vex")
11643 (set_attr "mode" "<sseinsnmode>")])
11645 (define_insn "sse4_1_phminposuw"
11646 [(set (match_operand:V8HI 0 "register_operand" "=x")
11647 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11648 UNSPEC_PHMINPOSUW))]
11650 "%vphminposuw\t{%1, %0|%0, %1}"
11651 [(set_attr "type" "sselog1")
11652 (set_attr "prefix_extra" "1")
11653 (set_attr "prefix" "maybe_vex")
11654 (set_attr "mode" "TI")])
11656 (define_insn "avx2_<code>v16qiv16hi2"
11657 [(set (match_operand:V16HI 0 "register_operand" "=x")
11659 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11661 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11662 [(set_attr "type" "ssemov")
11663 (set_attr "prefix_extra" "1")
11664 (set_attr "prefix" "vex")
11665 (set_attr "mode" "OI")])
11667 (define_insn "sse4_1_<code>v8qiv8hi2"
11668 [(set (match_operand:V8HI 0 "register_operand" "=x")
11671 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11672 (parallel [(const_int 0) (const_int 1)
11673 (const_int 2) (const_int 3)
11674 (const_int 4) (const_int 5)
11675 (const_int 6) (const_int 7)]))))]
11677 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11678 [(set_attr "type" "ssemov")
11679 (set_attr "ssememalign" "64")
11680 (set_attr "prefix_extra" "1")
11681 (set_attr "prefix" "maybe_vex")
11682 (set_attr "mode" "TI")])
11684 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11685 [(set (match_operand:V16SI 0 "register_operand" "=v")
11687 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11689 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11690 [(set_attr "type" "ssemov")
11691 (set_attr "prefix" "evex")
11692 (set_attr "mode" "XI")])
11694 (define_insn "avx2_<code>v8qiv8si2"
11695 [(set (match_operand:V8SI 0 "register_operand" "=x")
11698 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11699 (parallel [(const_int 0) (const_int 1)
11700 (const_int 2) (const_int 3)
11701 (const_int 4) (const_int 5)
11702 (const_int 6) (const_int 7)]))))]
11704 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11705 [(set_attr "type" "ssemov")
11706 (set_attr "prefix_extra" "1")
11707 (set_attr "prefix" "vex")
11708 (set_attr "mode" "OI")])
11710 (define_insn "sse4_1_<code>v4qiv4si2"
11711 [(set (match_operand:V4SI 0 "register_operand" "=x")
11714 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11715 (parallel [(const_int 0) (const_int 1)
11716 (const_int 2) (const_int 3)]))))]
11718 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11719 [(set_attr "type" "ssemov")
11720 (set_attr "ssememalign" "32")
11721 (set_attr "prefix_extra" "1")
11722 (set_attr "prefix" "maybe_vex")
11723 (set_attr "mode" "TI")])
11725 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11726 [(set (match_operand:V16SI 0 "register_operand" "=v")
11728 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11730 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11731 [(set_attr "type" "ssemov")
11732 (set_attr "prefix" "evex")
11733 (set_attr "mode" "XI")])
11735 (define_insn "avx2_<code>v8hiv8si2"
11736 [(set (match_operand:V8SI 0 "register_operand" "=x")
11738 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11740 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11741 [(set_attr "type" "ssemov")
11742 (set_attr "prefix_extra" "1")
11743 (set_attr "prefix" "vex")
11744 (set_attr "mode" "OI")])
11746 (define_insn "sse4_1_<code>v4hiv4si2"
11747 [(set (match_operand:V4SI 0 "register_operand" "=x")
11750 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11751 (parallel [(const_int 0) (const_int 1)
11752 (const_int 2) (const_int 3)]))))]
11754 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11755 [(set_attr "type" "ssemov")
11756 (set_attr "ssememalign" "64")
11757 (set_attr "prefix_extra" "1")
11758 (set_attr "prefix" "maybe_vex")
11759 (set_attr "mode" "TI")])
11761 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11762 [(set (match_operand:V8DI 0 "register_operand" "=v")
11765 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11766 (parallel [(const_int 0) (const_int 1)
11767 (const_int 2) (const_int 3)
11768 (const_int 4) (const_int 5)
11769 (const_int 6) (const_int 7)]))))]
11771 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11772 [(set_attr "type" "ssemov")
11773 (set_attr "prefix" "evex")
11774 (set_attr "mode" "XI")])
11776 (define_insn "avx2_<code>v4qiv4di2"
11777 [(set (match_operand:V4DI 0 "register_operand" "=x")
11780 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11781 (parallel [(const_int 0) (const_int 1)
11782 (const_int 2) (const_int 3)]))))]
11784 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
11785 [(set_attr "type" "ssemov")
11786 (set_attr "prefix_extra" "1")
11787 (set_attr "prefix" "vex")
11788 (set_attr "mode" "OI")])
11790 (define_insn "sse4_1_<code>v2qiv2di2"
11791 [(set (match_operand:V2DI 0 "register_operand" "=x")
11794 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11795 (parallel [(const_int 0) (const_int 1)]))))]
11797 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
11798 [(set_attr "type" "ssemov")
11799 (set_attr "ssememalign" "16")
11800 (set_attr "prefix_extra" "1")
11801 (set_attr "prefix" "maybe_vex")
11802 (set_attr "mode" "TI")])
11804 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
11805 [(set (match_operand:V8DI 0 "register_operand" "=v")
11807 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
11809 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11810 [(set_attr "type" "ssemov")
11811 (set_attr "prefix" "evex")
11812 (set_attr "mode" "XI")])
11814 (define_insn "avx2_<code>v4hiv4di2"
11815 [(set (match_operand:V4DI 0 "register_operand" "=x")
11818 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11819 (parallel [(const_int 0) (const_int 1)
11820 (const_int 2) (const_int 3)]))))]
11822 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
11823 [(set_attr "type" "ssemov")
11824 (set_attr "prefix_extra" "1")
11825 (set_attr "prefix" "vex")
11826 (set_attr "mode" "OI")])
11828 (define_insn "sse4_1_<code>v2hiv2di2"
11829 [(set (match_operand:V2DI 0 "register_operand" "=x")
11832 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11833 (parallel [(const_int 0) (const_int 1)]))))]
11835 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
11836 [(set_attr "type" "ssemov")
11837 (set_attr "ssememalign" "32")
11838 (set_attr "prefix_extra" "1")
11839 (set_attr "prefix" "maybe_vex")
11840 (set_attr "mode" "TI")])
11842 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
11843 [(set (match_operand:V8DI 0 "register_operand" "=v")
11845 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
11847 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11848 [(set_attr "type" "ssemov")
11849 (set_attr "prefix" "evex")
11850 (set_attr "mode" "XI")])
11852 (define_insn "avx2_<code>v4siv4di2"
11853 [(set (match_operand:V4DI 0 "register_operand" "=x")
11855 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
11857 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
11858 [(set_attr "type" "ssemov")
11859 (set_attr "prefix_extra" "1")
11860 (set_attr "mode" "OI")])
11862 (define_insn "sse4_1_<code>v2siv2di2"
11863 [(set (match_operand:V2DI 0 "register_operand" "=x")
11866 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11867 (parallel [(const_int 0) (const_int 1)]))))]
11869 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
11870 [(set_attr "type" "ssemov")
11871 (set_attr "ssememalign" "64")
11872 (set_attr "prefix_extra" "1")
11873 (set_attr "prefix" "maybe_vex")
11874 (set_attr "mode" "TI")])
11876 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
11877 ;; setting FLAGS_REG. But it is not a really compare instruction.
11878 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
11879 [(set (reg:CC FLAGS_REG)
11880 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
11881 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
11884 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
11885 [(set_attr "type" "ssecomi")
11886 (set_attr "prefix_extra" "1")
11887 (set_attr "prefix" "vex")
11888 (set_attr "mode" "<MODE>")])
11890 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
11891 ;; But it is not a really compare instruction.
11892 (define_insn "avx_ptest256"
11893 [(set (reg:CC FLAGS_REG)
11894 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
11895 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
11898 "vptest\t{%1, %0|%0, %1}"
11899 [(set_attr "type" "ssecomi")
11900 (set_attr "prefix_extra" "1")
11901 (set_attr "prefix" "vex")
11902 (set_attr "btver2_decode" "vector")
11903 (set_attr "mode" "OI")])
11905 (define_insn "sse4_1_ptest"
11906 [(set (reg:CC FLAGS_REG)
11907 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
11908 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11911 "%vptest\t{%1, %0|%0, %1}"
11912 [(set_attr "type" "ssecomi")
11913 (set_attr "prefix_extra" "1")
11914 (set_attr "prefix" "maybe_vex")
11915 (set_attr "mode" "TI")])
11917 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
11918 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
11920 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
11921 (match_operand:SI 2 "const_0_to_15_operand" "n")]
11924 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11925 [(set_attr "type" "ssecvt")
11926 (set (attr "prefix_data16")
11928 (match_test "TARGET_AVX")
11930 (const_string "1")))
11931 (set_attr "prefix_extra" "1")
11932 (set_attr "length_immediate" "1")
11933 (set_attr "prefix" "maybe_vex")
11934 (set_attr "mode" "<MODE>")])
11936 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
11937 [(match_operand:<sseintvecmode> 0 "register_operand")
11938 (match_operand:VF1_128_256 1 "nonimmediate_operand")
11939 (match_operand:SI 2 "const_0_to_15_operand")]
11942 rtx tmp = gen_reg_rtx (<MODE>mode);
11945 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
11948 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11952 (define_expand "avx512f_roundpd512"
11953 [(match_operand:V8DF 0 "register_operand")
11954 (match_operand:V8DF 1 "nonimmediate_operand")
11955 (match_operand:SI 2 "const_0_to_15_operand")]
11958 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
11962 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
11963 [(match_operand:<ssepackfltmode> 0 "register_operand")
11964 (match_operand:VF2 1 "nonimmediate_operand")
11965 (match_operand:VF2 2 "nonimmediate_operand")
11966 (match_operand:SI 3 "const_0_to_15_operand")]
11971 if (<MODE>mode == V2DFmode
11972 && TARGET_AVX && !TARGET_PREFER_AVX128)
11974 rtx tmp2 = gen_reg_rtx (V4DFmode);
11976 tmp0 = gen_reg_rtx (V4DFmode);
11977 tmp1 = force_reg (V2DFmode, operands[1]);
11979 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11980 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
11981 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11985 tmp0 = gen_reg_rtx (<MODE>mode);
11986 tmp1 = gen_reg_rtx (<MODE>mode);
11989 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
11992 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
11995 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12000 (define_insn "sse4_1_round<ssescalarmodesuffix>"
12001 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
12004 [(match_operand:VF_128 2 "register_operand" "x,x")
12005 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
12007 (match_operand:VF_128 1 "register_operand" "0,x")
12011 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
12012 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12013 [(set_attr "isa" "noavx,avx")
12014 (set_attr "type" "ssecvt")
12015 (set_attr "length_immediate" "1")
12016 (set_attr "prefix_data16" "1,*")
12017 (set_attr "prefix_extra" "1")
12018 (set_attr "prefix" "orig,vex")
12019 (set_attr "mode" "<MODE>")])
12021 (define_expand "round<mode>2"
12022 [(set (match_dup 4)
12024 (match_operand:VF 1 "register_operand")
12026 (set (match_operand:VF 0 "register_operand")
12028 [(match_dup 4) (match_dup 5)]
12030 "TARGET_ROUND && !flag_trapping_math"
12032 enum machine_mode scalar_mode;
12033 const struct real_format *fmt;
12034 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
12035 rtx half, vec_half;
12037 scalar_mode = GET_MODE_INNER (<MODE>mode);
12039 /* load nextafter (0.5, 0.0) */
12040 fmt = REAL_MODE_FORMAT (scalar_mode);
12041 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
12042 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
12043 half = const_double_from_real_value (pred_half, scalar_mode);
12045 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
12046 vec_half = force_reg (<MODE>mode, vec_half);
12048 operands[3] = gen_reg_rtx (<MODE>mode);
12049 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
12051 operands[4] = gen_reg_rtx (<MODE>mode);
12052 operands[5] = GEN_INT (ROUND_TRUNC);
12055 (define_expand "round<mode>2_sfix"
12056 [(match_operand:<sseintvecmode> 0 "register_operand")
12057 (match_operand:VF1_128_256 1 "register_operand")]
12058 "TARGET_ROUND && !flag_trapping_math"
12060 rtx tmp = gen_reg_rtx (<MODE>mode);
12062 emit_insn (gen_round<mode>2 (tmp, operands[1]));
12065 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12069 (define_expand "round<mode>2_vec_pack_sfix"
12070 [(match_operand:<ssepackfltmode> 0 "register_operand")
12071 (match_operand:VF2 1 "register_operand")
12072 (match_operand:VF2 2 "register_operand")]
12073 "TARGET_ROUND && !flag_trapping_math"
12077 if (<MODE>mode == V2DFmode
12078 && TARGET_AVX && !TARGET_PREFER_AVX128)
12080 rtx tmp2 = gen_reg_rtx (V4DFmode);
12082 tmp0 = gen_reg_rtx (V4DFmode);
12083 tmp1 = force_reg (V2DFmode, operands[1]);
12085 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12086 emit_insn (gen_roundv4df2 (tmp2, tmp0));
12087 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12091 tmp0 = gen_reg_rtx (<MODE>mode);
12092 tmp1 = gen_reg_rtx (<MODE>mode);
12094 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
12095 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
12098 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12103 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12105 ;; Intel SSE4.2 string/text processing instructions
12107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12109 (define_insn_and_split "sse4_2_pcmpestr"
12110 [(set (match_operand:SI 0 "register_operand" "=c,c")
12112 [(match_operand:V16QI 2 "register_operand" "x,x")
12113 (match_operand:SI 3 "register_operand" "a,a")
12114 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
12115 (match_operand:SI 5 "register_operand" "d,d")
12116 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
12118 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12126 (set (reg:CC FLAGS_REG)
12135 && can_create_pseudo_p ()"
12140 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12141 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12142 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12145 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12146 operands[3], operands[4],
12147 operands[5], operands[6]));
12149 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12150 operands[3], operands[4],
12151 operands[5], operands[6]));
12152 if (flags && !(ecx || xmm0))
12153 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12154 operands[2], operands[3],
12155 operands[4], operands[5],
12157 if (!(flags || ecx || xmm0))
12158 emit_note (NOTE_INSN_DELETED);
12162 [(set_attr "type" "sselog")
12163 (set_attr "prefix_data16" "1")
12164 (set_attr "prefix_extra" "1")
12165 (set_attr "ssememalign" "8")
12166 (set_attr "length_immediate" "1")
12167 (set_attr "memory" "none,load")
12168 (set_attr "mode" "TI")])
12170 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
12171 [(set (match_operand:SI 0 "register_operand" "=c")
12173 [(match_operand:V16QI 2 "register_operand" "x")
12174 (match_operand:SI 3 "register_operand" "a")
12176 [(match_operand:V16QI 4 "memory_operand" "m")]
12178 (match_operand:SI 5 "register_operand" "d")
12179 (match_operand:SI 6 "const_0_to_255_operand" "n")]
12181 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12185 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12189 (set (reg:CC FLAGS_REG)
12193 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12198 && can_create_pseudo_p ()"
12203 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12204 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12205 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12208 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12209 operands[3], operands[4],
12210 operands[5], operands[6]));
12212 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12213 operands[3], operands[4],
12214 operands[5], operands[6]));
12215 if (flags && !(ecx || xmm0))
12216 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12217 operands[2], operands[3],
12218 operands[4], operands[5],
12220 if (!(flags || ecx || xmm0))
12221 emit_note (NOTE_INSN_DELETED);
12225 [(set_attr "type" "sselog")
12226 (set_attr "prefix_data16" "1")
12227 (set_attr "prefix_extra" "1")
12228 (set_attr "ssememalign" "8")
12229 (set_attr "length_immediate" "1")
12230 (set_attr "memory" "load")
12231 (set_attr "mode" "TI")])
12233 (define_insn "sse4_2_pcmpestri"
12234 [(set (match_operand:SI 0 "register_operand" "=c,c")
12236 [(match_operand:V16QI 1 "register_operand" "x,x")
12237 (match_operand:SI 2 "register_operand" "a,a")
12238 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12239 (match_operand:SI 4 "register_operand" "d,d")
12240 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12242 (set (reg:CC FLAGS_REG)
12251 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12252 [(set_attr "type" "sselog")
12253 (set_attr "prefix_data16" "1")
12254 (set_attr "prefix_extra" "1")
12255 (set_attr "prefix" "maybe_vex")
12256 (set_attr "ssememalign" "8")
12257 (set_attr "length_immediate" "1")
12258 (set_attr "btver2_decode" "vector")
12259 (set_attr "memory" "none,load")
12260 (set_attr "mode" "TI")])
12262 (define_insn "sse4_2_pcmpestrm"
12263 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12265 [(match_operand:V16QI 1 "register_operand" "x,x")
12266 (match_operand:SI 2 "register_operand" "a,a")
12267 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12268 (match_operand:SI 4 "register_operand" "d,d")
12269 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12271 (set (reg:CC FLAGS_REG)
12280 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12281 [(set_attr "type" "sselog")
12282 (set_attr "prefix_data16" "1")
12283 (set_attr "prefix_extra" "1")
12284 (set_attr "ssememalign" "8")
12285 (set_attr "length_immediate" "1")
12286 (set_attr "prefix" "maybe_vex")
12287 (set_attr "btver2_decode" "vector")
12288 (set_attr "memory" "none,load")
12289 (set_attr "mode" "TI")])
12291 (define_insn "sse4_2_pcmpestr_cconly"
12292 [(set (reg:CC FLAGS_REG)
12294 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12295 (match_operand:SI 3 "register_operand" "a,a,a,a")
12296 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12297 (match_operand:SI 5 "register_operand" "d,d,d,d")
12298 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12300 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12301 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12304 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12305 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12306 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12307 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12308 [(set_attr "type" "sselog")
12309 (set_attr "prefix_data16" "1")
12310 (set_attr "prefix_extra" "1")
12311 (set_attr "ssememalign" "8")
12312 (set_attr "length_immediate" "1")
12313 (set_attr "memory" "none,load,none,load")
12314 (set_attr "btver2_decode" "vector,vector,vector,vector")
12315 (set_attr "prefix" "maybe_vex")
12316 (set_attr "mode" "TI")])
12318 (define_insn_and_split "sse4_2_pcmpistr"
12319 [(set (match_operand:SI 0 "register_operand" "=c,c")
12321 [(match_operand:V16QI 2 "register_operand" "x,x")
12322 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12323 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12325 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12331 (set (reg:CC FLAGS_REG)
12338 && can_create_pseudo_p ()"
12343 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12344 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12345 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12348 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12349 operands[3], operands[4]));
12351 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12352 operands[3], operands[4]));
12353 if (flags && !(ecx || xmm0))
12354 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12355 operands[2], operands[3],
12357 if (!(flags || ecx || xmm0))
12358 emit_note (NOTE_INSN_DELETED);
12362 [(set_attr "type" "sselog")
12363 (set_attr "prefix_data16" "1")
12364 (set_attr "prefix_extra" "1")
12365 (set_attr "ssememalign" "8")
12366 (set_attr "length_immediate" "1")
12367 (set_attr "memory" "none,load")
12368 (set_attr "mode" "TI")])
12370 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12371 [(set (match_operand:SI 0 "register_operand" "=c")
12373 [(match_operand:V16QI 2 "register_operand" "x")
12375 [(match_operand:V16QI 3 "memory_operand" "m")]
12377 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12379 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12382 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12385 (set (reg:CC FLAGS_REG)
12388 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12392 && can_create_pseudo_p ()"
12397 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12398 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12399 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12402 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12403 operands[3], operands[4]));
12405 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12406 operands[3], operands[4]));
12407 if (flags && !(ecx || xmm0))
12408 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12409 operands[2], operands[3],
12411 if (!(flags || ecx || xmm0))
12412 emit_note (NOTE_INSN_DELETED);
12416 [(set_attr "type" "sselog")
12417 (set_attr "prefix_data16" "1")
12418 (set_attr "prefix_extra" "1")
12419 (set_attr "ssememalign" "8")
12420 (set_attr "length_immediate" "1")
12421 (set_attr "memory" "load")
12422 (set_attr "mode" "TI")])
12424 (define_insn "sse4_2_pcmpistri"
12425 [(set (match_operand:SI 0 "register_operand" "=c,c")
12427 [(match_operand:V16QI 1 "register_operand" "x,x")
12428 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12429 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12431 (set (reg:CC FLAGS_REG)
12438 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12439 [(set_attr "type" "sselog")
12440 (set_attr "prefix_data16" "1")
12441 (set_attr "prefix_extra" "1")
12442 (set_attr "ssememalign" "8")
12443 (set_attr "length_immediate" "1")
12444 (set_attr "prefix" "maybe_vex")
12445 (set_attr "memory" "none,load")
12446 (set_attr "btver2_decode" "vector")
12447 (set_attr "mode" "TI")])
12449 (define_insn "sse4_2_pcmpistrm"
12450 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12452 [(match_operand:V16QI 1 "register_operand" "x,x")
12453 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12454 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12456 (set (reg:CC FLAGS_REG)
12463 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12464 [(set_attr "type" "sselog")
12465 (set_attr "prefix_data16" "1")
12466 (set_attr "prefix_extra" "1")
12467 (set_attr "ssememalign" "8")
12468 (set_attr "length_immediate" "1")
12469 (set_attr "prefix" "maybe_vex")
12470 (set_attr "memory" "none,load")
12471 (set_attr "btver2_decode" "vector")
12472 (set_attr "mode" "TI")])
12474 (define_insn "sse4_2_pcmpistr_cconly"
12475 [(set (reg:CC FLAGS_REG)
12477 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12478 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12479 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12481 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12482 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12485 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12486 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12487 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12488 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12489 [(set_attr "type" "sselog")
12490 (set_attr "prefix_data16" "1")
12491 (set_attr "prefix_extra" "1")
12492 (set_attr "ssememalign" "8")
12493 (set_attr "length_immediate" "1")
12494 (set_attr "memory" "none,load,none,load")
12495 (set_attr "prefix" "maybe_vex")
12496 (set_attr "btver2_decode" "vector,vector,vector,vector")
12497 (set_attr "mode" "TI")])
12499 ;; Packed float variants
12500 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
12501 [(V8DI "V8SF") (V16SI "V16SF")])
12503 (define_expand "avx512pf_gatherpf<mode>sf"
12505 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12506 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12508 [(match_operand 2 "vsib_address_operand")
12509 (match_operand:VI48_512 1 "register_operand")
12510 (match_operand:SI 3 "const1248_operand")]))
12511 (match_operand:SI 4 "const_0_to_1_operand")]
12512 UNSPEC_GATHER_PREFETCH)]
12516 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12517 operands[3]), UNSPEC_VSIBADDR);
12520 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
12522 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12523 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12525 [(match_operand:P 2 "vsib_address_operand" "Tv")
12526 (match_operand:VI48_512 1 "register_operand" "v")
12527 (match_operand:SI 3 "const1248_operand" "n")]
12529 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12530 UNSPEC_GATHER_PREFETCH)]
12533 switch (INTVAL (operands[4]))
12536 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12538 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12540 gcc_unreachable ();
12543 [(set_attr "type" "sse")
12544 (set_attr "prefix" "evex")
12545 (set_attr "mode" "XI")])
12547 (define_insn "*avx512pf_gatherpf<mode>sf"
12550 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12552 [(match_operand:P 1 "vsib_address_operand" "Tv")
12553 (match_operand:VI48_512 0 "register_operand" "v")
12554 (match_operand:SI 2 "const1248_operand" "n")]
12556 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12557 UNSPEC_GATHER_PREFETCH)]
12560 switch (INTVAL (operands[3]))
12563 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12565 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12567 gcc_unreachable ();
12570 [(set_attr "type" "sse")
12571 (set_attr "prefix" "evex")
12572 (set_attr "mode" "XI")])
12574 ;; Packed double variants
12575 (define_expand "avx512pf_gatherpf<mode>df"
12577 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12580 [(match_operand 2 "vsib_address_operand")
12581 (match_operand:VI4_256_8_512 1 "register_operand")
12582 (match_operand:SI 3 "const1248_operand")]))
12583 (match_operand:SI 4 "const_0_to_1_operand")]
12584 UNSPEC_GATHER_PREFETCH)]
12588 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12589 operands[3]), UNSPEC_VSIBADDR);
12592 (define_insn "*avx512pf_gatherpf<mode>df_mask"
12594 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12595 (match_operator:V8DF 5 "vsib_mem_operator"
12597 [(match_operand:P 2 "vsib_address_operand" "Tv")
12598 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12599 (match_operand:SI 3 "const1248_operand" "n")]
12601 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12602 UNSPEC_GATHER_PREFETCH)]
12605 switch (INTVAL (operands[4]))
12608 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12610 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12612 gcc_unreachable ();
12615 [(set_attr "type" "sse")
12616 (set_attr "prefix" "evex")
12617 (set_attr "mode" "XI")])
12619 (define_insn "*avx512pf_gatherpf<mode>df"
12622 (match_operator:V8DF 4 "vsib_mem_operator"
12624 [(match_operand:P 1 "vsib_address_operand" "Tv")
12625 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12626 (match_operand:SI 2 "const1248_operand" "n")]
12628 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12629 UNSPEC_GATHER_PREFETCH)]
12632 switch (INTVAL (operands[3]))
12635 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
12637 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
12639 gcc_unreachable ();
12642 [(set_attr "type" "sse")
12643 (set_attr "prefix" "evex")
12644 (set_attr "mode" "XI")])
12646 ;; Packed float variants
12647 (define_expand "avx512pf_scatterpf<mode>sf"
12649 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12650 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12652 [(match_operand 2 "vsib_address_operand")
12653 (match_operand:VI48_512 1 "register_operand")
12654 (match_operand:SI 3 "const1248_operand")]))
12655 (match_operand:SI 4 "const_0_to_1_operand")]
12656 UNSPEC_SCATTER_PREFETCH)]
12660 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12661 operands[3]), UNSPEC_VSIBADDR);
12664 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
12666 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12667 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12669 [(match_operand:P 2 "vsib_address_operand" "Tv")
12670 (match_operand:VI48_512 1 "register_operand" "v")
12671 (match_operand:SI 3 "const1248_operand" "n")]
12673 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12674 UNSPEC_SCATTER_PREFETCH)]
12677 switch (INTVAL (operands[4]))
12680 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12682 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12684 gcc_unreachable ();
12687 [(set_attr "type" "sse")
12688 (set_attr "prefix" "evex")
12689 (set_attr "mode" "XI")])
12691 (define_insn "*avx512pf_scatterpf<mode>sf"
12694 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12696 [(match_operand:P 1 "vsib_address_operand" "Tv")
12697 (match_operand:VI48_512 0 "register_operand" "v")
12698 (match_operand:SI 2 "const1248_operand" "n")]
12700 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12701 UNSPEC_SCATTER_PREFETCH)]
12704 switch (INTVAL (operands[3]))
12707 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12709 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12711 gcc_unreachable ();
12714 [(set_attr "type" "sse")
12715 (set_attr "prefix" "evex")
12716 (set_attr "mode" "XI")])
12718 ;; Packed double variants
12719 (define_expand "avx512pf_scatterpf<mode>df"
12721 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12724 [(match_operand 2 "vsib_address_operand")
12725 (match_operand:VI4_256_8_512 1 "register_operand")
12726 (match_operand:SI 3 "const1248_operand")]))
12727 (match_operand:SI 4 "const_0_to_1_operand")]
12728 UNSPEC_SCATTER_PREFETCH)]
12732 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12733 operands[3]), UNSPEC_VSIBADDR);
12736 (define_insn "*avx512pf_scatterpf<mode>df_mask"
12738 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12739 (match_operator:V8DF 5 "vsib_mem_operator"
12741 [(match_operand:P 2 "vsib_address_operand" "Tv")
12742 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12743 (match_operand:SI 3 "const1248_operand" "n")]
12745 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12746 UNSPEC_SCATTER_PREFETCH)]
12749 switch (INTVAL (operands[4]))
12752 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12754 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12756 gcc_unreachable ();
12759 [(set_attr "type" "sse")
12760 (set_attr "prefix" "evex")
12761 (set_attr "mode" "XI")])
12763 (define_insn "*avx512pf_scatterpf<mode>df"
12766 (match_operator:V8DF 4 "vsib_mem_operator"
12768 [(match_operand:P 1 "vsib_address_operand" "Tv")
12769 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12770 (match_operand:SI 2 "const1248_operand" "n")]
12772 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12773 UNSPEC_SCATTER_PREFETCH)]
12776 switch (INTVAL (operands[3]))
12779 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
12781 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
12783 gcc_unreachable ();
12786 [(set_attr "type" "sse")
12787 (set_attr "prefix" "evex")
12788 (set_attr "mode" "XI")])
12790 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
12791 [(set (match_operand:VF_512 0 "register_operand" "=v")
12793 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12796 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12797 [(set_attr "prefix" "evex")
12798 (set_attr "mode" "<MODE>")])
12800 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
12801 [(set (match_operand:VF_512 0 "register_operand" "=v")
12803 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12806 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12807 [(set_attr "prefix" "evex")
12808 (set_attr "mode" "<MODE>")])
12810 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
12811 [(set (match_operand:VF_128 0 "register_operand" "=v")
12814 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12816 (match_operand:VF_128 2 "register_operand" "v")
12819 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
12820 [(set_attr "length_immediate" "1")
12821 (set_attr "prefix" "evex")
12822 (set_attr "mode" "<MODE>")])
12824 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
12825 [(set (match_operand:VF_512 0 "register_operand" "=v")
12827 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12830 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12831 [(set_attr "prefix" "evex")
12832 (set_attr "mode" "<MODE>")])
12834 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
12835 [(set (match_operand:VF_128 0 "register_operand" "=v")
12838 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12840 (match_operand:VF_128 2 "register_operand" "v")
12843 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
12844 [(set_attr "length_immediate" "1")
12845 (set_attr "prefix" "evex")
12846 (set_attr "mode" "<MODE>")])
12848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12850 ;; XOP instructions
12852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12854 (define_code_iterator xop_plus [plus ss_plus])
12856 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
12857 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
12859 ;; XOP parallel integer multiply/add instructions.
12861 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
12862 [(set (match_operand:VI24_128 0 "register_operand" "=x")
12865 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
12866 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
12867 (match_operand:VI24_128 3 "register_operand" "x")))]
12869 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12870 [(set_attr "type" "ssemuladd")
12871 (set_attr "mode" "TI")])
12873 (define_insn "xop_p<macs>dql"
12874 [(set (match_operand:V2DI 0 "register_operand" "=x")
12879 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12880 (parallel [(const_int 0) (const_int 2)])))
12883 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12884 (parallel [(const_int 0) (const_int 2)]))))
12885 (match_operand:V2DI 3 "register_operand" "x")))]
12887 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12888 [(set_attr "type" "ssemuladd")
12889 (set_attr "mode" "TI")])
12891 (define_insn "xop_p<macs>dqh"
12892 [(set (match_operand:V2DI 0 "register_operand" "=x")
12897 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12898 (parallel [(const_int 1) (const_int 3)])))
12901 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12902 (parallel [(const_int 1) (const_int 3)]))))
12903 (match_operand:V2DI 3 "register_operand" "x")))]
12905 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12906 [(set_attr "type" "ssemuladd")
12907 (set_attr "mode" "TI")])
12909 ;; XOP parallel integer multiply/add instructions for the intrinisics
12910 (define_insn "xop_p<macs>wd"
12911 [(set (match_operand:V4SI 0 "register_operand" "=x")
12916 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12917 (parallel [(const_int 1) (const_int 3)
12918 (const_int 5) (const_int 7)])))
12921 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12922 (parallel [(const_int 1) (const_int 3)
12923 (const_int 5) (const_int 7)]))))
12924 (match_operand:V4SI 3 "register_operand" "x")))]
12926 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12927 [(set_attr "type" "ssemuladd")
12928 (set_attr "mode" "TI")])
12930 (define_insn "xop_p<madcs>wd"
12931 [(set (match_operand:V4SI 0 "register_operand" "=x")
12937 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12938 (parallel [(const_int 0) (const_int 2)
12939 (const_int 4) (const_int 6)])))
12942 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12943 (parallel [(const_int 0) (const_int 2)
12944 (const_int 4) (const_int 6)]))))
12949 (parallel [(const_int 1) (const_int 3)
12950 (const_int 5) (const_int 7)])))
12954 (parallel [(const_int 1) (const_int 3)
12955 (const_int 5) (const_int 7)])))))
12956 (match_operand:V4SI 3 "register_operand" "x")))]
12958 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12959 [(set_attr "type" "ssemuladd")
12960 (set_attr "mode" "TI")])
12962 ;; XOP parallel XMM conditional moves
12963 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
12964 [(set (match_operand:V 0 "register_operand" "=x,x")
12966 (match_operand:V 3 "nonimmediate_operand" "x,m")
12967 (match_operand:V 1 "register_operand" "x,x")
12968 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
12970 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12971 [(set_attr "type" "sse4arg")])
12973 ;; XOP horizontal add/subtract instructions
12974 (define_insn "xop_phadd<u>bw"
12975 [(set (match_operand:V8HI 0 "register_operand" "=x")
12979 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12980 (parallel [(const_int 0) (const_int 2)
12981 (const_int 4) (const_int 6)
12982 (const_int 8) (const_int 10)
12983 (const_int 12) (const_int 14)])))
12987 (parallel [(const_int 1) (const_int 3)
12988 (const_int 5) (const_int 7)
12989 (const_int 9) (const_int 11)
12990 (const_int 13) (const_int 15)])))))]
12992 "vphadd<u>bw\t{%1, %0|%0, %1}"
12993 [(set_attr "type" "sseiadd1")])
12995 (define_insn "xop_phadd<u>bd"
12996 [(set (match_operand:V4SI 0 "register_operand" "=x")
13001 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13002 (parallel [(const_int 0) (const_int 4)
13003 (const_int 8) (const_int 12)])))
13007 (parallel [(const_int 1) (const_int 5)
13008 (const_int 9) (const_int 13)]))))
13013 (parallel [(const_int 2) (const_int 6)
13014 (const_int 10) (const_int 14)])))
13018 (parallel [(const_int 3) (const_int 7)
13019 (const_int 11) (const_int 15)]))))))]
13021 "vphadd<u>bd\t{%1, %0|%0, %1}"
13022 [(set_attr "type" "sseiadd1")])
13024 (define_insn "xop_phadd<u>bq"
13025 [(set (match_operand:V2DI 0 "register_operand" "=x")
13031 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13032 (parallel [(const_int 0) (const_int 8)])))
13036 (parallel [(const_int 1) (const_int 9)]))))
13041 (parallel [(const_int 2) (const_int 10)])))
13045 (parallel [(const_int 3) (const_int 11)])))))
13051 (parallel [(const_int 4) (const_int 12)])))
13055 (parallel [(const_int 5) (const_int 13)]))))
13060 (parallel [(const_int 6) (const_int 14)])))
13064 (parallel [(const_int 7) (const_int 15)])))))))]
13066 "vphadd<u>bq\t{%1, %0|%0, %1}"
13067 [(set_attr "type" "sseiadd1")])
13069 (define_insn "xop_phadd<u>wd"
13070 [(set (match_operand:V4SI 0 "register_operand" "=x")
13074 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13075 (parallel [(const_int 0) (const_int 2)
13076 (const_int 4) (const_int 6)])))
13080 (parallel [(const_int 1) (const_int 3)
13081 (const_int 5) (const_int 7)])))))]
13083 "vphadd<u>wd\t{%1, %0|%0, %1}"
13084 [(set_attr "type" "sseiadd1")])
13086 (define_insn "xop_phadd<u>wq"
13087 [(set (match_operand:V2DI 0 "register_operand" "=x")
13092 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13093 (parallel [(const_int 0) (const_int 4)])))
13097 (parallel [(const_int 1) (const_int 5)]))))
13102 (parallel [(const_int 2) (const_int 6)])))
13106 (parallel [(const_int 3) (const_int 7)]))))))]
13108 "vphadd<u>wq\t{%1, %0|%0, %1}"
13109 [(set_attr "type" "sseiadd1")])
13111 (define_insn "xop_phadd<u>dq"
13112 [(set (match_operand:V2DI 0 "register_operand" "=x")
13116 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13117 (parallel [(const_int 0) (const_int 2)])))
13121 (parallel [(const_int 1) (const_int 3)])))))]
13123 "vphadd<u>dq\t{%1, %0|%0, %1}"
13124 [(set_attr "type" "sseiadd1")])
13126 (define_insn "xop_phsubbw"
13127 [(set (match_operand:V8HI 0 "register_operand" "=x")
13131 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13132 (parallel [(const_int 0) (const_int 2)
13133 (const_int 4) (const_int 6)
13134 (const_int 8) (const_int 10)
13135 (const_int 12) (const_int 14)])))
13139 (parallel [(const_int 1) (const_int 3)
13140 (const_int 5) (const_int 7)
13141 (const_int 9) (const_int 11)
13142 (const_int 13) (const_int 15)])))))]
13144 "vphsubbw\t{%1, %0|%0, %1}"
13145 [(set_attr "type" "sseiadd1")])
13147 (define_insn "xop_phsubwd"
13148 [(set (match_operand:V4SI 0 "register_operand" "=x")
13152 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13153 (parallel [(const_int 0) (const_int 2)
13154 (const_int 4) (const_int 6)])))
13158 (parallel [(const_int 1) (const_int 3)
13159 (const_int 5) (const_int 7)])))))]
13161 "vphsubwd\t{%1, %0|%0, %1}"
13162 [(set_attr "type" "sseiadd1")])
13164 (define_insn "xop_phsubdq"
13165 [(set (match_operand:V2DI 0 "register_operand" "=x")
13169 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13170 (parallel [(const_int 0) (const_int 2)])))
13174 (parallel [(const_int 1) (const_int 3)])))))]
13176 "vphsubdq\t{%1, %0|%0, %1}"
13177 [(set_attr "type" "sseiadd1")])
13179 ;; XOP permute instructions
13180 (define_insn "xop_pperm"
13181 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13183 [(match_operand:V16QI 1 "register_operand" "x,x")
13184 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13185 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
13186 UNSPEC_XOP_PERMUTE))]
13187 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13188 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13189 [(set_attr "type" "sse4arg")
13190 (set_attr "mode" "TI")])
13192 ;; XOP pack instructions that combine two vectors into a smaller vector
13193 (define_insn "xop_pperm_pack_v2di_v4si"
13194 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13197 (match_operand:V2DI 1 "register_operand" "x,x"))
13199 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
13200 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13201 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13202 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13203 [(set_attr "type" "sse4arg")
13204 (set_attr "mode" "TI")])
13206 (define_insn "xop_pperm_pack_v4si_v8hi"
13207 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13210 (match_operand:V4SI 1 "register_operand" "x,x"))
13212 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
13213 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13214 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13215 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13216 [(set_attr "type" "sse4arg")
13217 (set_attr "mode" "TI")])
13219 (define_insn "xop_pperm_pack_v8hi_v16qi"
13220 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13223 (match_operand:V8HI 1 "register_operand" "x,x"))
13225 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
13226 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13227 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13228 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13229 [(set_attr "type" "sse4arg")
13230 (set_attr "mode" "TI")])
13232 ;; XOP packed rotate instructions
13233 (define_expand "rotl<mode>3"
13234 [(set (match_operand:VI_128 0 "register_operand")
13236 (match_operand:VI_128 1 "nonimmediate_operand")
13237 (match_operand:SI 2 "general_operand")))]
13240 /* If we were given a scalar, convert it to parallel */
13241 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13243 rtvec vs = rtvec_alloc (<ssescalarnum>);
13244 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13245 rtx reg = gen_reg_rtx (<MODE>mode);
13246 rtx op2 = operands[2];
13249 if (GET_MODE (op2) != <ssescalarmode>mode)
13251 op2 = gen_reg_rtx (<ssescalarmode>mode);
13252 convert_move (op2, operands[2], false);
13255 for (i = 0; i < <ssescalarnum>; i++)
13256 RTVEC_ELT (vs, i) = op2;
13258 emit_insn (gen_vec_init<mode> (reg, par));
13259 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13264 (define_expand "rotr<mode>3"
13265 [(set (match_operand:VI_128 0 "register_operand")
13267 (match_operand:VI_128 1 "nonimmediate_operand")
13268 (match_operand:SI 2 "general_operand")))]
13271 /* If we were given a scalar, convert it to parallel */
13272 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13274 rtvec vs = rtvec_alloc (<ssescalarnum>);
13275 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13276 rtx neg = gen_reg_rtx (<MODE>mode);
13277 rtx reg = gen_reg_rtx (<MODE>mode);
13278 rtx op2 = operands[2];
13281 if (GET_MODE (op2) != <ssescalarmode>mode)
13283 op2 = gen_reg_rtx (<ssescalarmode>mode);
13284 convert_move (op2, operands[2], false);
13287 for (i = 0; i < <ssescalarnum>; i++)
13288 RTVEC_ELT (vs, i) = op2;
13290 emit_insn (gen_vec_init<mode> (reg, par));
13291 emit_insn (gen_neg<mode>2 (neg, reg));
13292 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
13297 (define_insn "xop_rotl<mode>3"
13298 [(set (match_operand:VI_128 0 "register_operand" "=x")
13300 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13301 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13303 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13304 [(set_attr "type" "sseishft")
13305 (set_attr "length_immediate" "1")
13306 (set_attr "mode" "TI")])
13308 (define_insn "xop_rotr<mode>3"
13309 [(set (match_operand:VI_128 0 "register_operand" "=x")
13311 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13312 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13316 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
13317 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
13319 [(set_attr "type" "sseishft")
13320 (set_attr "length_immediate" "1")
13321 (set_attr "mode" "TI")])
13323 (define_expand "vrotr<mode>3"
13324 [(match_operand:VI_128 0 "register_operand")
13325 (match_operand:VI_128 1 "register_operand")
13326 (match_operand:VI_128 2 "register_operand")]
13329 rtx reg = gen_reg_rtx (<MODE>mode);
13330 emit_insn (gen_neg<mode>2 (reg, operands[2]));
13331 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13335 (define_expand "vrotl<mode>3"
13336 [(match_operand:VI_128 0 "register_operand")
13337 (match_operand:VI_128 1 "register_operand")
13338 (match_operand:VI_128 2 "register_operand")]
13341 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
13345 (define_insn "xop_vrotl<mode>3"
13346 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13347 (if_then_else:VI_128
13349 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13352 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13356 (neg:VI_128 (match_dup 2)))))]
13357 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13358 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13359 [(set_attr "type" "sseishft")
13360 (set_attr "prefix_data16" "0")
13361 (set_attr "prefix_extra" "2")
13362 (set_attr "mode" "TI")])
13364 ;; XOP packed shift instructions.
13365 (define_expand "vlshr<mode>3"
13366 [(set (match_operand:VI12_128 0 "register_operand")
13368 (match_operand:VI12_128 1 "register_operand")
13369 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13372 rtx neg = gen_reg_rtx (<MODE>mode);
13373 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13374 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13378 (define_expand "vlshr<mode>3"
13379 [(set (match_operand:VI48_128 0 "register_operand")
13381 (match_operand:VI48_128 1 "register_operand")
13382 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13383 "TARGET_AVX2 || TARGET_XOP"
13387 rtx neg = gen_reg_rtx (<MODE>mode);
13388 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13389 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13394 (define_expand "vlshr<mode>3"
13395 [(set (match_operand:VI48_512 0 "register_operand")
13397 (match_operand:VI48_512 1 "register_operand")
13398 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13401 (define_expand "vlshr<mode>3"
13402 [(set (match_operand:VI48_256 0 "register_operand")
13404 (match_operand:VI48_256 1 "register_operand")
13405 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13408 (define_expand "vashr<mode>3"
13409 [(set (match_operand:VI128_128 0 "register_operand")
13410 (ashiftrt:VI128_128
13411 (match_operand:VI128_128 1 "register_operand")
13412 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13415 rtx neg = gen_reg_rtx (<MODE>mode);
13416 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13417 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13421 (define_expand "vashrv4si3"
13422 [(set (match_operand:V4SI 0 "register_operand")
13423 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13424 (match_operand:V4SI 2 "nonimmediate_operand")))]
13425 "TARGET_AVX2 || TARGET_XOP"
13429 rtx neg = gen_reg_rtx (V4SImode);
13430 emit_insn (gen_negv4si2 (neg, operands[2]));
13431 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13436 (define_expand "vashrv16si3"
13437 [(set (match_operand:V16SI 0 "register_operand")
13438 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13439 (match_operand:V16SI 2 "nonimmediate_operand")))]
13442 (define_expand "vashrv8si3"
13443 [(set (match_operand:V8SI 0 "register_operand")
13444 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13445 (match_operand:V8SI 2 "nonimmediate_operand")))]
13448 (define_expand "vashl<mode>3"
13449 [(set (match_operand:VI12_128 0 "register_operand")
13451 (match_operand:VI12_128 1 "register_operand")
13452 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13455 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13459 (define_expand "vashl<mode>3"
13460 [(set (match_operand:VI48_128 0 "register_operand")
13462 (match_operand:VI48_128 1 "register_operand")
13463 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13464 "TARGET_AVX2 || TARGET_XOP"
13468 operands[2] = force_reg (<MODE>mode, operands[2]);
13469 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13474 (define_expand "vashl<mode>3"
13475 [(set (match_operand:VI48_512 0 "register_operand")
13477 (match_operand:VI48_512 1 "register_operand")
13478 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13481 (define_expand "vashl<mode>3"
13482 [(set (match_operand:VI48_256 0 "register_operand")
13484 (match_operand:VI48_256 1 "register_operand")
13485 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13488 (define_insn "xop_sha<mode>3"
13489 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13490 (if_then_else:VI_128
13492 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13495 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13499 (neg:VI_128 (match_dup 2)))))]
13500 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13501 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13502 [(set_attr "type" "sseishft")
13503 (set_attr "prefix_data16" "0")
13504 (set_attr "prefix_extra" "2")
13505 (set_attr "mode" "TI")])
13507 (define_insn "xop_shl<mode>3"
13508 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13509 (if_then_else:VI_128
13511 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13514 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13518 (neg:VI_128 (match_dup 2)))))]
13519 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13520 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13521 [(set_attr "type" "sseishft")
13522 (set_attr "prefix_data16" "0")
13523 (set_attr "prefix_extra" "2")
13524 (set_attr "mode" "TI")])
13526 (define_expand "<shift_insn><mode>3"
13527 [(set (match_operand:VI1_AVX2 0 "register_operand")
13528 (any_shift:VI1_AVX2
13529 (match_operand:VI1_AVX2 1 "register_operand")
13530 (match_operand:SI 2 "nonmemory_operand")))]
13533 if (TARGET_XOP && <MODE>mode == V16QImode)
13535 bool negate = false;
13536 rtx (*gen) (rtx, rtx, rtx);
13540 if (<CODE> != ASHIFT)
13542 if (CONST_INT_P (operands[2]))
13543 operands[2] = GEN_INT (-INTVAL (operands[2]));
13547 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13548 for (i = 0; i < 16; i++)
13549 XVECEXP (par, 0, i) = operands[2];
13551 tmp = gen_reg_rtx (V16QImode);
13552 emit_insn (gen_vec_initv16qi (tmp, par));
13555 emit_insn (gen_negv16qi2 (tmp, tmp));
13557 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13558 emit_insn (gen (operands[0], operands[1], tmp));
13561 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13565 (define_expand "ashrv2di3"
13566 [(set (match_operand:V2DI 0 "register_operand")
13568 (match_operand:V2DI 1 "register_operand")
13569 (match_operand:DI 2 "nonmemory_operand")))]
13572 rtx reg = gen_reg_rtx (V2DImode);
13574 bool negate = false;
13577 if (CONST_INT_P (operands[2]))
13578 operands[2] = GEN_INT (-INTVAL (operands[2]));
13582 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13583 for (i = 0; i < 2; i++)
13584 XVECEXP (par, 0, i) = operands[2];
13586 emit_insn (gen_vec_initv2di (reg, par));
13589 emit_insn (gen_negv2di2 (reg, reg));
13591 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13595 ;; XOP FRCZ support
13596 (define_insn "xop_frcz<mode>2"
13597 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13599 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13602 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13603 [(set_attr "type" "ssecvt1")
13604 (set_attr "mode" "<MODE>")])
13606 (define_expand "xop_vmfrcz<mode>2"
13607 [(set (match_operand:VF_128 0 "register_operand")
13610 [(match_operand:VF_128 1 "nonimmediate_operand")]
13615 "operands[3] = CONST0_RTX (<MODE>mode);")
13617 (define_insn "*xop_vmfrcz<mode>2"
13618 [(set (match_operand:VF_128 0 "register_operand" "=x")
13621 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13623 (match_operand:VF_128 2 "const0_operand")
13626 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13627 [(set_attr "type" "ssecvt1")
13628 (set_attr "mode" "<MODE>")])
13630 (define_insn "xop_maskcmp<mode>3"
13631 [(set (match_operand:VI_128 0 "register_operand" "=x")
13632 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13633 [(match_operand:VI_128 2 "register_operand" "x")
13634 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13636 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13637 [(set_attr "type" "sse4arg")
13638 (set_attr "prefix_data16" "0")
13639 (set_attr "prefix_rep" "0")
13640 (set_attr "prefix_extra" "2")
13641 (set_attr "length_immediate" "1")
13642 (set_attr "mode" "TI")])
13644 (define_insn "xop_maskcmp_uns<mode>3"
13645 [(set (match_operand:VI_128 0 "register_operand" "=x")
13646 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13647 [(match_operand:VI_128 2 "register_operand" "x")
13648 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13650 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13651 [(set_attr "type" "ssecmp")
13652 (set_attr "prefix_data16" "0")
13653 (set_attr "prefix_rep" "0")
13654 (set_attr "prefix_extra" "2")
13655 (set_attr "length_immediate" "1")
13656 (set_attr "mode" "TI")])
13658 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13659 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13660 ;; the exact instruction generated for the intrinsic.
13661 (define_insn "xop_maskcmp_uns2<mode>3"
13662 [(set (match_operand:VI_128 0 "register_operand" "=x")
13664 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13665 [(match_operand:VI_128 2 "register_operand" "x")
13666 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13667 UNSPEC_XOP_UNSIGNED_CMP))]
13669 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13670 [(set_attr "type" "ssecmp")
13671 (set_attr "prefix_data16" "0")
13672 (set_attr "prefix_extra" "2")
13673 (set_attr "length_immediate" "1")
13674 (set_attr "mode" "TI")])
13676 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13677 ;; being added here to be complete.
13678 (define_insn "xop_pcom_tf<mode>3"
13679 [(set (match_operand:VI_128 0 "register_operand" "=x")
13681 [(match_operand:VI_128 1 "register_operand" "x")
13682 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13683 (match_operand:SI 3 "const_int_operand" "n")]
13684 UNSPEC_XOP_TRUEFALSE))]
13687 return ((INTVAL (operands[3]) != 0)
13688 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13689 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13691 [(set_attr "type" "ssecmp")
13692 (set_attr "prefix_data16" "0")
13693 (set_attr "prefix_extra" "2")
13694 (set_attr "length_immediate" "1")
13695 (set_attr "mode" "TI")])
13697 (define_insn "xop_vpermil2<mode>3"
13698 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13700 [(match_operand:VF_128_256 1 "register_operand" "x")
13701 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13702 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13703 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13706 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13707 [(set_attr "type" "sse4arg")
13708 (set_attr "length_immediate" "1")
13709 (set_attr "mode" "<MODE>")])
13711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13713 (define_insn "aesenc"
13714 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13715 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13716 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13720 aesenc\t{%2, %0|%0, %2}
13721 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13722 [(set_attr "isa" "noavx,avx")
13723 (set_attr "type" "sselog1")
13724 (set_attr "prefix_extra" "1")
13725 (set_attr "prefix" "orig,vex")
13726 (set_attr "btver2_decode" "double,double")
13727 (set_attr "mode" "TI")])
13729 (define_insn "aesenclast"
13730 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13731 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13732 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13733 UNSPEC_AESENCLAST))]
13736 aesenclast\t{%2, %0|%0, %2}
13737 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13738 [(set_attr "isa" "noavx,avx")
13739 (set_attr "type" "sselog1")
13740 (set_attr "prefix_extra" "1")
13741 (set_attr "prefix" "orig,vex")
13742 (set_attr "btver2_decode" "double,double")
13743 (set_attr "mode" "TI")])
13745 (define_insn "aesdec"
13746 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13747 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13748 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13752 aesdec\t{%2, %0|%0, %2}
13753 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13754 [(set_attr "isa" "noavx,avx")
13755 (set_attr "type" "sselog1")
13756 (set_attr "prefix_extra" "1")
13757 (set_attr "prefix" "orig,vex")
13758 (set_attr "btver2_decode" "double,double")
13759 (set_attr "mode" "TI")])
13761 (define_insn "aesdeclast"
13762 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13763 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13764 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13765 UNSPEC_AESDECLAST))]
13768 aesdeclast\t{%2, %0|%0, %2}
13769 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
13770 [(set_attr "isa" "noavx,avx")
13771 (set_attr "type" "sselog1")
13772 (set_attr "prefix_extra" "1")
13773 (set_attr "prefix" "orig,vex")
13774 (set_attr "btver2_decode" "double,double")
13775 (set_attr "mode" "TI")])
13777 (define_insn "aesimc"
13778 [(set (match_operand:V2DI 0 "register_operand" "=x")
13779 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13782 "%vaesimc\t{%1, %0|%0, %1}"
13783 [(set_attr "type" "sselog1")
13784 (set_attr "prefix_extra" "1")
13785 (set_attr "prefix" "maybe_vex")
13786 (set_attr "mode" "TI")])
13788 (define_insn "aeskeygenassist"
13789 [(set (match_operand:V2DI 0 "register_operand" "=x")
13790 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
13791 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13792 UNSPEC_AESKEYGENASSIST))]
13794 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
13795 [(set_attr "type" "sselog1")
13796 (set_attr "prefix_extra" "1")
13797 (set_attr "length_immediate" "1")
13798 (set_attr "prefix" "maybe_vex")
13799 (set_attr "mode" "TI")])
13801 (define_insn "pclmulqdq"
13802 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13803 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13804 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
13805 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13809 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
13810 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13811 [(set_attr "isa" "noavx,avx")
13812 (set_attr "type" "sselog1")
13813 (set_attr "prefix_extra" "1")
13814 (set_attr "length_immediate" "1")
13815 (set_attr "prefix" "orig,vex")
13816 (set_attr "mode" "TI")])
13818 (define_expand "avx_vzeroall"
13819 [(match_par_dup 0 [(const_int 0)])]
13822 int nregs = TARGET_64BIT ? 16 : 8;
13825 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
13827 XVECEXP (operands[0], 0, 0)
13828 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
13831 for (regno = 0; regno < nregs; regno++)
13832 XVECEXP (operands[0], 0, regno + 1)
13833 = gen_rtx_SET (VOIDmode,
13834 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
13835 CONST0_RTX (V8SImode));
13838 (define_insn "*avx_vzeroall"
13839 [(match_parallel 0 "vzeroall_operation"
13840 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
13843 [(set_attr "type" "sse")
13844 (set_attr "modrm" "0")
13845 (set_attr "memory" "none")
13846 (set_attr "prefix" "vex")
13847 (set_attr "btver2_decode" "vector")
13848 (set_attr "mode" "OI")])
13850 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
13851 ;; if the upper 128bits are unused.
13852 (define_insn "avx_vzeroupper"
13853 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
13856 [(set_attr "type" "sse")
13857 (set_attr "modrm" "0")
13858 (set_attr "memory" "none")
13859 (set_attr "prefix" "vex")
13860 (set_attr "btver2_decode" "vector")
13861 (set_attr "mode" "OI")])
13863 (define_insn "avx2_pbroadcast<mode>"
13864 [(set (match_operand:VI 0 "register_operand" "=x")
13866 (vec_select:<ssescalarmode>
13867 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
13868 (parallel [(const_int 0)]))))]
13870 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
13871 [(set_attr "type" "ssemov")
13872 (set_attr "prefix_extra" "1")
13873 (set_attr "prefix" "vex")
13874 (set_attr "mode" "<sseinsnmode>")])
13876 (define_insn "avx2_pbroadcast<mode>_1"
13877 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
13878 (vec_duplicate:VI_256
13879 (vec_select:<ssescalarmode>
13880 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
13881 (parallel [(const_int 0)]))))]
13884 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
13885 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
13886 [(set_attr "type" "ssemov")
13887 (set_attr "prefix_extra" "1")
13888 (set_attr "prefix" "vex")
13889 (set_attr "mode" "<sseinsnmode>")])
13891 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
13892 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
13893 (unspec:VI48F_256_512
13894 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
13895 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
13897 "TARGET_AVX2 && <mask_mode512bit_condition>"
13898 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
13899 [(set_attr "type" "sselog")
13900 (set_attr "prefix" "<mask_prefix2>")
13901 (set_attr "mode" "<sseinsnmode>")])
13903 (define_expand "<avx2_avx512f>_perm<mode>"
13904 [(match_operand:VI8F_256_512 0 "register_operand")
13905 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
13906 (match_operand:SI 2 "const_0_to_255_operand")]
13909 int mask = INTVAL (operands[2]);
13910 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
13911 GEN_INT ((mask >> 0) & 3),
13912 GEN_INT ((mask >> 2) & 3),
13913 GEN_INT ((mask >> 4) & 3),
13914 GEN_INT ((mask >> 6) & 3)));
13918 (define_expand "avx512f_perm<mode>_mask"
13919 [(match_operand:V8FI 0 "register_operand")
13920 (match_operand:V8FI 1 "nonimmediate_operand")
13921 (match_operand:SI 2 "const_0_to_255_operand")
13922 (match_operand:V8FI 3 "vector_move_operand")
13923 (match_operand:<avx512fmaskmode> 4 "register_operand")]
13926 int mask = INTVAL (operands[2]);
13927 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
13928 GEN_INT ((mask >> 0) & 3),
13929 GEN_INT ((mask >> 2) & 3),
13930 GEN_INT ((mask >> 4) & 3),
13931 GEN_INT ((mask >> 6) & 3),
13932 operands[3], operands[4]));
13936 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
13937 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
13938 (vec_select:VI8F_256_512
13939 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
13940 (parallel [(match_operand 2 "const_0_to_3_operand")
13941 (match_operand 3 "const_0_to_3_operand")
13942 (match_operand 4 "const_0_to_3_operand")
13943 (match_operand 5 "const_0_to_3_operand")])))]
13944 "TARGET_AVX2 && <mask_mode512bit_condition>"
13947 mask |= INTVAL (operands[2]) << 0;
13948 mask |= INTVAL (operands[3]) << 2;
13949 mask |= INTVAL (operands[4]) << 4;
13950 mask |= INTVAL (operands[5]) << 6;
13951 operands[2] = GEN_INT (mask);
13952 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13954 [(set_attr "type" "sselog")
13955 (set_attr "prefix" "<mask_prefix2>")
13956 (set_attr "mode" "<sseinsnmode>")])
13958 (define_insn "avx2_permv2ti"
13959 [(set (match_operand:V4DI 0 "register_operand" "=x")
13961 [(match_operand:V4DI 1 "register_operand" "x")
13962 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
13963 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13966 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13967 [(set_attr "type" "sselog")
13968 (set_attr "prefix" "vex")
13969 (set_attr "mode" "OI")])
13971 (define_insn "avx2_vec_dupv4df"
13972 [(set (match_operand:V4DF 0 "register_operand" "=x")
13973 (vec_duplicate:V4DF
13975 (match_operand:V2DF 1 "register_operand" "x")
13976 (parallel [(const_int 0)]))))]
13978 "vbroadcastsd\t{%1, %0|%0, %1}"
13979 [(set_attr "type" "sselog1")
13980 (set_attr "prefix" "vex")
13981 (set_attr "mode" "V4DF")])
13983 ;; Modes handled by AVX vec_dup patterns.
13984 (define_mode_iterator AVX_VEC_DUP_MODE
13985 [V8SI V8SF V4DI V4DF])
13987 (define_insn "vec_dup<mode>"
13988 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
13989 (vec_duplicate:AVX_VEC_DUP_MODE
13990 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
13993 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
13994 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
13996 [(set_attr "type" "ssemov")
13997 (set_attr "prefix_extra" "1")
13998 (set_attr "prefix" "vex")
13999 (set_attr "isa" "*,avx2,noavx2")
14000 (set_attr "mode" "V8SF")])
14002 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
14003 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14004 (vec_duplicate:VI48F_512
14005 (vec_select:<ssescalarmode>
14006 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
14007 (parallel [(const_int 0)]))))]
14009 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14010 [(set_attr "type" "ssemov")
14011 (set_attr "prefix" "evex")
14012 (set_attr "mode" "<sseinsnmode>")])
14014 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14015 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
14016 (vec_duplicate:V16FI
14017 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
14020 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
14021 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14022 [(set_attr "type" "ssemov")
14023 (set_attr "prefix" "evex")
14024 (set_attr "mode" "<sseinsnmode>")])
14026 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14027 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
14028 (vec_duplicate:V8FI
14029 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
14032 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
14033 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14034 [(set_attr "type" "ssemov")
14035 (set_attr "prefix" "evex")
14036 (set_attr "mode" "<sseinsnmode>")])
14038 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
14039 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14040 (vec_duplicate:VI48_512
14041 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
14042 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
14043 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14044 [(set_attr "type" "ssemov")
14045 (set_attr "prefix" "evex")
14046 (set_attr "mode" "<sseinsnmode>")])
14048 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
14049 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14050 (vec_duplicate:VI48F_512
14051 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
14053 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14054 [(set_attr "type" "ssemov")
14055 (set_attr "prefix" "evex")
14056 (set_attr "mode" "<sseinsnmode>")])
14058 (define_insn "avx2_vbroadcasti128_<mode>"
14059 [(set (match_operand:VI_256 0 "register_operand" "=x")
14061 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
14064 "vbroadcasti128\t{%1, %0|%0, %1}"
14065 [(set_attr "type" "ssemov")
14066 (set_attr "prefix_extra" "1")
14067 (set_attr "prefix" "vex")
14068 (set_attr "mode" "OI")])
14071 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
14072 (vec_duplicate:AVX_VEC_DUP_MODE
14073 (match_operand:<ssescalarmode> 1 "register_operand")))]
14074 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
14075 [(set (match_dup 2)
14076 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
14078 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
14079 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
14081 (define_insn "avx_vbroadcastf128_<mode>"
14082 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
14084 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
14088 vbroadcast<i128>\t{%1, %0|%0, %1}
14089 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
14090 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
14091 [(set_attr "type" "ssemov,sselog1,sselog1")
14092 (set_attr "prefix_extra" "1")
14093 (set_attr "length_immediate" "0,1,1")
14094 (set_attr "prefix" "vex")
14095 (set_attr "mode" "<sseinsnmode>")])
14097 (define_insn "avx512cd_maskb_vec_dupv8di"
14098 [(set (match_operand:V8DI 0 "register_operand" "=v")
14099 (vec_duplicate:V8DI
14101 (match_operand:QI 1 "register_operand" "k"))))]
14103 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
14104 [(set_attr "type" "mskmov")
14105 (set_attr "prefix" "evex")
14106 (set_attr "mode" "XI")])
14108 (define_insn "avx512cd_maskw_vec_dupv16si"
14109 [(set (match_operand:V16SI 0 "register_operand" "=v")
14110 (vec_duplicate:V16SI
14112 (match_operand:HI 1 "register_operand" "k"))))]
14114 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
14115 [(set_attr "type" "mskmov")
14116 (set_attr "prefix" "evex")
14117 (set_attr "mode" "XI")])
14119 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
14120 ;; If it so happens that the input is in memory, use vbroadcast.
14121 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
14122 (define_insn "*avx_vperm_broadcast_v4sf"
14123 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
14125 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
14126 (match_parallel 2 "avx_vbroadcast_operand"
14127 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14130 int elt = INTVAL (operands[3]);
14131 switch (which_alternative)
14135 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
14136 return "vbroadcastss\t{%1, %0|%0, %k1}";
14138 operands[2] = GEN_INT (elt * 0x55);
14139 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
14141 gcc_unreachable ();
14144 [(set_attr "type" "ssemov,ssemov,sselog1")
14145 (set_attr "prefix_extra" "1")
14146 (set_attr "length_immediate" "0,0,1")
14147 (set_attr "prefix" "vex")
14148 (set_attr "mode" "SF,SF,V4SF")])
14150 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
14151 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
14153 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
14154 (match_parallel 2 "avx_vbroadcast_operand"
14155 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14158 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
14159 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
14161 rtx op0 = operands[0], op1 = operands[1];
14162 int elt = INTVAL (operands[3]);
14168 if (TARGET_AVX2 && elt == 0)
14170 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
14175 /* Shuffle element we care about into all elements of the 128-bit lane.
14176 The other lane gets shuffled too, but we don't care. */
14177 if (<MODE>mode == V4DFmode)
14178 mask = (elt & 1 ? 15 : 0);
14180 mask = (elt & 3) * 0x55;
14181 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
14183 /* Shuffle the lane we care about into both lanes of the dest. */
14184 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
14185 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
14189 operands[1] = adjust_address (op1, <ssescalarmode>mode,
14190 elt * GET_MODE_SIZE (<ssescalarmode>mode));
14193 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14194 [(set (match_operand:VF2 0 "register_operand")
14196 (match_operand:VF2 1 "nonimmediate_operand")
14197 (match_operand:SI 2 "const_0_to_255_operand")))]
14198 "TARGET_AVX && <mask_mode512bit_condition>"
14200 int mask = INTVAL (operands[2]);
14201 rtx perm[<ssescalarnum>];
14204 for (i = 0; i < <ssescalarnum>; i = i + 2)
14206 perm[i] = GEN_INT (((mask >> i) & 1) + i);
14207 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
14211 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14214 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14215 [(set (match_operand:VF1 0 "register_operand")
14217 (match_operand:VF1 1 "nonimmediate_operand")
14218 (match_operand:SI 2 "const_0_to_255_operand")))]
14219 "TARGET_AVX && <mask_mode512bit_condition>"
14221 int mask = INTVAL (operands[2]);
14222 rtx perm[<ssescalarnum>];
14225 for (i = 0; i < <ssescalarnum>; i = i + 4)
14227 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
14228 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
14229 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
14230 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
14234 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14237 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
14238 [(set (match_operand:VF 0 "register_operand" "=v")
14240 (match_operand:VF 1 "nonimmediate_operand" "vm")
14241 (match_parallel 2 ""
14242 [(match_operand 3 "const_int_operand")])))]
14243 "TARGET_AVX && <mask_mode512bit_condition>
14244 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
14246 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
14247 operands[2] = GEN_INT (mask);
14248 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
14250 [(set_attr "type" "sselog")
14251 (set_attr "prefix_extra" "1")
14252 (set_attr "length_immediate" "1")
14253 (set_attr "prefix" "<mask_prefix>")
14254 (set_attr "mode" "<sseinsnmode>")])
14256 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
14257 [(set (match_operand:VF 0 "register_operand" "=v")
14259 [(match_operand:VF 1 "register_operand" "v")
14260 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
14262 "TARGET_AVX && <mask_mode512bit_condition>"
14263 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14264 [(set_attr "type" "sselog")
14265 (set_attr "prefix_extra" "1")
14266 (set_attr "btver2_decode" "vector")
14267 (set_attr "prefix" "<mask_prefix>")
14268 (set_attr "mode" "<sseinsnmode>")])
14270 (define_expand "avx512f_vpermi2var<mode>3_maskz"
14271 [(match_operand:VI48F_512 0 "register_operand" "=v")
14272 (match_operand:VI48F_512 1 "register_operand" "v")
14273 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14274 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14275 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")]
14278 emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
14279 operands[0], operands[1], operands[2], operands[3],
14280 CONST0_RTX (<MODE>mode), operands[4]));
14284 (define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
14285 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14287 [(match_operand:VI48F_512 1 "register_operand" "v")
14288 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14289 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14292 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14293 [(set_attr "type" "sselog")
14294 (set_attr "prefix" "evex")
14295 (set_attr "mode" "<sseinsnmode>")])
14297 (define_insn "avx512f_vpermi2var<mode>3_mask"
14298 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14299 (vec_merge:VI48F_512
14301 [(match_operand:VI48F_512 1 "register_operand" "v")
14302 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14303 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14304 UNSPEC_VPERMI2_MASK)
14306 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
14308 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14309 [(set_attr "type" "sselog")
14310 (set_attr "prefix" "evex")
14311 (set_attr "mode" "<sseinsnmode>")])
14313 (define_expand "avx512f_vpermt2var<mode>3_maskz"
14314 [(match_operand:VI48F_512 0 "register_operand" "=v")
14315 (match_operand:<sseintvecmode> 1 "register_operand" "v")
14316 (match_operand:VI48F_512 2 "register_operand" "0")
14317 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14318 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")]
14321 emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
14322 operands[0], operands[1], operands[2], operands[3],
14323 CONST0_RTX (<MODE>mode), operands[4]));
14327 (define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
14328 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14330 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14331 (match_operand:VI48F_512 2 "register_operand" "0")
14332 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14335 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14336 [(set_attr "type" "sselog")
14337 (set_attr "prefix" "evex")
14338 (set_attr "mode" "<sseinsnmode>")])
14340 (define_insn "avx512f_vpermt2var<mode>3_mask"
14341 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14342 (vec_merge:VI48F_512
14344 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14345 (match_operand:VI48F_512 2 "register_operand" "0")
14346 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14349 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
14351 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14352 [(set_attr "type" "sselog")
14353 (set_attr "prefix" "evex")
14354 (set_attr "mode" "<sseinsnmode>")])
14356 (define_expand "avx_vperm2f128<mode>3"
14357 [(set (match_operand:AVX256MODE2P 0 "register_operand")
14358 (unspec:AVX256MODE2P
14359 [(match_operand:AVX256MODE2P 1 "register_operand")
14360 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
14361 (match_operand:SI 3 "const_0_to_255_operand")]
14362 UNSPEC_VPERMIL2F128))]
14365 int mask = INTVAL (operands[3]);
14366 if ((mask & 0x88) == 0)
14368 rtx perm[<ssescalarnum>], t1, t2;
14369 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
14371 base = (mask & 3) * nelt2;
14372 for (i = 0; i < nelt2; ++i)
14373 perm[i] = GEN_INT (base + i);
14375 base = ((mask >> 4) & 3) * nelt2;
14376 for (i = 0; i < nelt2; ++i)
14377 perm[i + nelt2] = GEN_INT (base + i);
14379 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
14380 operands[1], operands[2]);
14381 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
14382 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
14383 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
14389 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
14390 ;; means that in order to represent this properly in rtl we'd have to
14391 ;; nest *another* vec_concat with a zero operand and do the select from
14392 ;; a 4x wide vector. That doesn't seem very nice.
14393 (define_insn "*avx_vperm2f128<mode>_full"
14394 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14395 (unspec:AVX256MODE2P
14396 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
14397 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
14398 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14399 UNSPEC_VPERMIL2F128))]
14401 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14402 [(set_attr "type" "sselog")
14403 (set_attr "prefix_extra" "1")
14404 (set_attr "length_immediate" "1")
14405 (set_attr "prefix" "vex")
14406 (set_attr "mode" "<sseinsnmode>")])
14408 (define_insn "*avx_vperm2f128<mode>_nozero"
14409 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14410 (vec_select:AVX256MODE2P
14411 (vec_concat:<ssedoublevecmode>
14412 (match_operand:AVX256MODE2P 1 "register_operand" "x")
14413 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
14414 (match_parallel 3 ""
14415 [(match_operand 4 "const_int_operand")])))]
14417 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
14419 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
14421 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
14423 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
14424 operands[3] = GEN_INT (mask);
14425 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14427 [(set_attr "type" "sselog")
14428 (set_attr "prefix_extra" "1")
14429 (set_attr "length_immediate" "1")
14430 (set_attr "prefix" "vex")
14431 (set_attr "mode" "<sseinsnmode>")])
14433 (define_expand "avx_vinsertf128<mode>"
14434 [(match_operand:V_256 0 "register_operand")
14435 (match_operand:V_256 1 "register_operand")
14436 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14437 (match_operand:SI 3 "const_0_to_1_operand")]
14440 rtx (*insn)(rtx, rtx, rtx);
14442 switch (INTVAL (operands[3]))
14445 insn = gen_vec_set_lo_<mode>;
14448 insn = gen_vec_set_hi_<mode>;
14451 gcc_unreachable ();
14454 emit_insn (insn (operands[0], operands[1], operands[2]));
14458 (define_insn "avx2_vec_set_lo_v4di"
14459 [(set (match_operand:V4DI 0 "register_operand" "=x")
14461 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14463 (match_operand:V4DI 1 "register_operand" "x")
14464 (parallel [(const_int 2) (const_int 3)]))))]
14466 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14467 [(set_attr "type" "sselog")
14468 (set_attr "prefix_extra" "1")
14469 (set_attr "length_immediate" "1")
14470 (set_attr "prefix" "vex")
14471 (set_attr "mode" "OI")])
14473 (define_insn "avx2_vec_set_hi_v4di"
14474 [(set (match_operand:V4DI 0 "register_operand" "=x")
14477 (match_operand:V4DI 1 "register_operand" "x")
14478 (parallel [(const_int 0) (const_int 1)]))
14479 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14481 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14482 [(set_attr "type" "sselog")
14483 (set_attr "prefix_extra" "1")
14484 (set_attr "length_immediate" "1")
14485 (set_attr "prefix" "vex")
14486 (set_attr "mode" "OI")])
14488 (define_insn "vec_set_lo_<mode>"
14489 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14490 (vec_concat:VI8F_256
14491 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14492 (vec_select:<ssehalfvecmode>
14493 (match_operand:VI8F_256 1 "register_operand" "x")
14494 (parallel [(const_int 2) (const_int 3)]))))]
14496 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14497 [(set_attr "type" "sselog")
14498 (set_attr "prefix_extra" "1")
14499 (set_attr "length_immediate" "1")
14500 (set_attr "prefix" "vex")
14501 (set_attr "mode" "<sseinsnmode>")])
14503 (define_insn "vec_set_hi_<mode>"
14504 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14505 (vec_concat:VI8F_256
14506 (vec_select:<ssehalfvecmode>
14507 (match_operand:VI8F_256 1 "register_operand" "x")
14508 (parallel [(const_int 0) (const_int 1)]))
14509 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14511 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14512 [(set_attr "type" "sselog")
14513 (set_attr "prefix_extra" "1")
14514 (set_attr "length_immediate" "1")
14515 (set_attr "prefix" "vex")
14516 (set_attr "mode" "<sseinsnmode>")])
14518 (define_insn "vec_set_lo_<mode>"
14519 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14520 (vec_concat:VI4F_256
14521 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14522 (vec_select:<ssehalfvecmode>
14523 (match_operand:VI4F_256 1 "register_operand" "x")
14524 (parallel [(const_int 4) (const_int 5)
14525 (const_int 6) (const_int 7)]))))]
14527 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14528 [(set_attr "type" "sselog")
14529 (set_attr "prefix_extra" "1")
14530 (set_attr "length_immediate" "1")
14531 (set_attr "prefix" "vex")
14532 (set_attr "mode" "<sseinsnmode>")])
14534 (define_insn "vec_set_hi_<mode>"
14535 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14536 (vec_concat:VI4F_256
14537 (vec_select:<ssehalfvecmode>
14538 (match_operand:VI4F_256 1 "register_operand" "x")
14539 (parallel [(const_int 0) (const_int 1)
14540 (const_int 2) (const_int 3)]))
14541 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14543 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14544 [(set_attr "type" "sselog")
14545 (set_attr "prefix_extra" "1")
14546 (set_attr "length_immediate" "1")
14547 (set_attr "prefix" "vex")
14548 (set_attr "mode" "<sseinsnmode>")])
14550 (define_insn "vec_set_lo_v16hi"
14551 [(set (match_operand:V16HI 0 "register_operand" "=x")
14553 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14555 (match_operand:V16HI 1 "register_operand" "x")
14556 (parallel [(const_int 8) (const_int 9)
14557 (const_int 10) (const_int 11)
14558 (const_int 12) (const_int 13)
14559 (const_int 14) (const_int 15)]))))]
14561 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14562 [(set_attr "type" "sselog")
14563 (set_attr "prefix_extra" "1")
14564 (set_attr "length_immediate" "1")
14565 (set_attr "prefix" "vex")
14566 (set_attr "mode" "OI")])
14568 (define_insn "vec_set_hi_v16hi"
14569 [(set (match_operand:V16HI 0 "register_operand" "=x")
14572 (match_operand:V16HI 1 "register_operand" "x")
14573 (parallel [(const_int 0) (const_int 1)
14574 (const_int 2) (const_int 3)
14575 (const_int 4) (const_int 5)
14576 (const_int 6) (const_int 7)]))
14577 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14579 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14580 [(set_attr "type" "sselog")
14581 (set_attr "prefix_extra" "1")
14582 (set_attr "length_immediate" "1")
14583 (set_attr "prefix" "vex")
14584 (set_attr "mode" "OI")])
14586 (define_insn "vec_set_lo_v32qi"
14587 [(set (match_operand:V32QI 0 "register_operand" "=x")
14589 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14591 (match_operand:V32QI 1 "register_operand" "x")
14592 (parallel [(const_int 16) (const_int 17)
14593 (const_int 18) (const_int 19)
14594 (const_int 20) (const_int 21)
14595 (const_int 22) (const_int 23)
14596 (const_int 24) (const_int 25)
14597 (const_int 26) (const_int 27)
14598 (const_int 28) (const_int 29)
14599 (const_int 30) (const_int 31)]))))]
14601 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14602 [(set_attr "type" "sselog")
14603 (set_attr "prefix_extra" "1")
14604 (set_attr "length_immediate" "1")
14605 (set_attr "prefix" "vex")
14606 (set_attr "mode" "OI")])
14608 (define_insn "vec_set_hi_v32qi"
14609 [(set (match_operand:V32QI 0 "register_operand" "=x")
14612 (match_operand:V32QI 1 "register_operand" "x")
14613 (parallel [(const_int 0) (const_int 1)
14614 (const_int 2) (const_int 3)
14615 (const_int 4) (const_int 5)
14616 (const_int 6) (const_int 7)
14617 (const_int 8) (const_int 9)
14618 (const_int 10) (const_int 11)
14619 (const_int 12) (const_int 13)
14620 (const_int 14) (const_int 15)]))
14621 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14623 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14624 [(set_attr "type" "sselog")
14625 (set_attr "prefix_extra" "1")
14626 (set_attr "length_immediate" "1")
14627 (set_attr "prefix" "vex")
14628 (set_attr "mode" "OI")])
14630 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14631 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14633 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14634 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14637 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14638 [(set_attr "type" "sselog1")
14639 (set_attr "prefix_extra" "1")
14640 (set_attr "prefix" "vex")
14641 (set_attr "btver2_decode" "vector")
14642 (set_attr "mode" "<sseinsnmode>")])
14644 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14645 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14647 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14648 (match_operand:V48_AVX2 2 "register_operand" "x")
14652 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14653 [(set_attr "type" "sselog1")
14654 (set_attr "prefix_extra" "1")
14655 (set_attr "prefix" "vex")
14656 (set_attr "btver2_decode" "vector")
14657 (set_attr "mode" "<sseinsnmode>")])
14659 (define_expand "maskload<mode>"
14660 [(set (match_operand:V48_AVX2 0 "register_operand")
14662 [(match_operand:<sseintvecmode> 2 "register_operand")
14663 (match_operand:V48_AVX2 1 "memory_operand")]
14667 (define_expand "maskstore<mode>"
14668 [(set (match_operand:V48_AVX2 0 "memory_operand")
14670 [(match_operand:<sseintvecmode> 2 "register_operand")
14671 (match_operand:V48_AVX2 1 "register_operand")
14676 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14677 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14678 (unspec:AVX256MODE2P
14679 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14683 "&& reload_completed"
14686 rtx op0 = operands[0];
14687 rtx op1 = operands[1];
14689 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14691 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14692 emit_move_insn (op0, op1);
14696 (define_expand "vec_init<mode>"
14697 [(match_operand:V_256 0 "register_operand")
14701 ix86_expand_vector_init (false, operands[0], operands[1]);
14705 (define_expand "vec_init<mode>"
14706 [(match_operand:VI48F_512 0 "register_operand")
14710 ix86_expand_vector_init (false, operands[0], operands[1]);
14714 (define_expand "avx2_extracti128"
14715 [(match_operand:V2DI 0 "nonimmediate_operand")
14716 (match_operand:V4DI 1 "register_operand")
14717 (match_operand:SI 2 "const_0_to_1_operand")]
14720 rtx (*insn)(rtx, rtx);
14722 switch (INTVAL (operands[2]))
14725 insn = gen_vec_extract_lo_v4di;
14728 insn = gen_vec_extract_hi_v4di;
14731 gcc_unreachable ();
14734 emit_insn (insn (operands[0], operands[1]));
14738 (define_expand "avx2_inserti128"
14739 [(match_operand:V4DI 0 "register_operand")
14740 (match_operand:V4DI 1 "register_operand")
14741 (match_operand:V2DI 2 "nonimmediate_operand")
14742 (match_operand:SI 3 "const_0_to_1_operand")]
14745 rtx (*insn)(rtx, rtx, rtx);
14747 switch (INTVAL (operands[3]))
14750 insn = gen_avx2_vec_set_lo_v4di;
14753 insn = gen_avx2_vec_set_hi_v4di;
14756 gcc_unreachable ();
14759 emit_insn (insn (operands[0], operands[1], operands[2]));
14763 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
14764 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
14765 (ashiftrt:VI48_AVX512F
14766 (match_operand:VI48_AVX512F 1 "register_operand" "v")
14767 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
14768 "TARGET_AVX2 && <mask_mode512bit_condition>"
14769 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14770 [(set_attr "type" "sseishft")
14771 (set_attr "prefix" "maybe_evex")
14772 (set_attr "mode" "<sseinsnmode>")])
14774 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
14775 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
14776 (any_lshift:VI48_AVX2_48_AVX512F
14777 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
14778 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
14779 "TARGET_AVX2 && <mask_mode512bit_condition>"
14780 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14781 [(set_attr "type" "sseishft")
14782 (set_attr "prefix" "maybe_evex")
14783 (set_attr "mode" "<sseinsnmode>")])
14785 ;; For avx_vec_concat<mode> insn pattern
14786 (define_mode_attr concat_tg_mode
14787 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
14788 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
14790 (define_insn "avx_vec_concat<mode>"
14791 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
14792 (vec_concat:V_256_512
14793 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
14794 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
14797 switch (which_alternative)
14800 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
14802 switch (get_attr_mode (insn))
14805 return "vmovaps\t{%1, %t0|%t0, %1}";
14807 return "vmovapd\t{%1, %t0|%t0, %1}";
14809 return "vmovaps\t{%1, %x0|%x0, %1}";
14811 return "vmovapd\t{%1, %x0|%x0, %1}";
14813 return "vmovdqa\t{%1, %t0|%t0, %1}";
14815 return "vmovdqa\t{%1, %x0|%x0, %1}";
14817 gcc_unreachable ();
14820 gcc_unreachable ();
14823 [(set_attr "type" "sselog,ssemov")
14824 (set_attr "prefix_extra" "1,*")
14825 (set_attr "length_immediate" "1,*")
14826 (set_attr "prefix" "maybe_evex")
14827 (set_attr "mode" "<sseinsnmode>")])
14829 (define_insn "vcvtph2ps"
14830 [(set (match_operand:V4SF 0 "register_operand" "=x")
14832 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
14834 (parallel [(const_int 0) (const_int 1)
14835 (const_int 2) (const_int 3)])))]
14837 "vcvtph2ps\t{%1, %0|%0, %1}"
14838 [(set_attr "type" "ssecvt")
14839 (set_attr "prefix" "vex")
14840 (set_attr "mode" "V4SF")])
14842 (define_insn "*vcvtph2ps_load"
14843 [(set (match_operand:V4SF 0 "register_operand" "=x")
14844 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
14845 UNSPEC_VCVTPH2PS))]
14847 "vcvtph2ps\t{%1, %0|%0, %1}"
14848 [(set_attr "type" "ssecvt")
14849 (set_attr "prefix" "vex")
14850 (set_attr "mode" "V8SF")])
14852 (define_insn "vcvtph2ps256"
14853 [(set (match_operand:V8SF 0 "register_operand" "=x")
14854 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14855 UNSPEC_VCVTPH2PS))]
14857 "vcvtph2ps\t{%1, %0|%0, %1}"
14858 [(set_attr "type" "ssecvt")
14859 (set_attr "prefix" "vex")
14860 (set_attr "btver2_decode" "double")
14861 (set_attr "mode" "V8SF")])
14863 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
14864 [(set (match_operand:V16SF 0 "register_operand" "=v")
14866 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14867 UNSPEC_VCVTPH2PS))]
14869 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14870 [(set_attr "type" "ssecvt")
14871 (set_attr "prefix" "evex")
14872 (set_attr "mode" "V16SF")])
14874 (define_expand "vcvtps2ph"
14875 [(set (match_operand:V8HI 0 "register_operand")
14877 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
14878 (match_operand:SI 2 "const_0_to_255_operand")]
14882 "operands[3] = CONST0_RTX (V4HImode);")
14884 (define_insn "*vcvtps2ph"
14885 [(set (match_operand:V8HI 0 "register_operand" "=x")
14887 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14888 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14890 (match_operand:V4HI 3 "const0_operand")))]
14892 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14893 [(set_attr "type" "ssecvt")
14894 (set_attr "prefix" "vex")
14895 (set_attr "mode" "V4SF")])
14897 (define_insn "*vcvtps2ph_store"
14898 [(set (match_operand:V4HI 0 "memory_operand" "=m")
14899 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14900 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14901 UNSPEC_VCVTPS2PH))]
14903 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14904 [(set_attr "type" "ssecvt")
14905 (set_attr "prefix" "vex")
14906 (set_attr "mode" "V4SF")])
14908 (define_insn "vcvtps2ph256"
14909 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
14910 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
14911 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14912 UNSPEC_VCVTPS2PH))]
14914 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14915 [(set_attr "type" "ssecvt")
14916 (set_attr "prefix" "vex")
14917 (set_attr "btver2_decode" "vector")
14918 (set_attr "mode" "V8SF")])
14920 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
14921 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
14923 [(match_operand:V16SF 1 "register_operand" "v")
14924 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14925 UNSPEC_VCVTPS2PH))]
14927 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14928 [(set_attr "type" "ssecvt")
14929 (set_attr "prefix" "evex")
14930 (set_attr "mode" "V16SF")])
14932 ;; For gather* insn patterns
14933 (define_mode_iterator VEC_GATHER_MODE
14934 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
14935 (define_mode_attr VEC_GATHER_IDXSI
14936 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
14937 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
14938 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
14939 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
14941 (define_mode_attr VEC_GATHER_IDXDI
14942 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14943 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
14944 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
14945 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
14947 (define_mode_attr VEC_GATHER_SRCDI
14948 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14949 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
14950 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
14951 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
14953 (define_expand "avx2_gathersi<mode>"
14954 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14955 (unspec:VEC_GATHER_MODE
14956 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
14957 (mem:<ssescalarmode>
14959 [(match_operand 2 "vsib_address_operand")
14960 (match_operand:<VEC_GATHER_IDXSI>
14961 3 "register_operand")
14962 (match_operand:SI 5 "const1248_operand ")]))
14963 (mem:BLK (scratch))
14964 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
14966 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14970 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14971 operands[5]), UNSPEC_VSIBADDR);
14974 (define_insn "*avx2_gathersi<mode>"
14975 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14976 (unspec:VEC_GATHER_MODE
14977 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
14978 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14980 [(match_operand:P 3 "vsib_address_operand" "Tv")
14981 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
14982 (match_operand:SI 6 "const1248_operand" "n")]
14984 (mem:BLK (scratch))
14985 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
14987 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14989 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
14990 [(set_attr "type" "ssemov")
14991 (set_attr "prefix" "vex")
14992 (set_attr "mode" "<sseinsnmode>")])
14994 (define_insn "*avx2_gathersi<mode>_2"
14995 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14996 (unspec:VEC_GATHER_MODE
14998 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15000 [(match_operand:P 2 "vsib_address_operand" "Tv")
15001 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
15002 (match_operand:SI 5 "const1248_operand" "n")]
15004 (mem:BLK (scratch))
15005 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
15007 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15009 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
15010 [(set_attr "type" "ssemov")
15011 (set_attr "prefix" "vex")
15012 (set_attr "mode" "<sseinsnmode>")])
15014 (define_expand "avx2_gatherdi<mode>"
15015 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15016 (unspec:VEC_GATHER_MODE
15017 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15018 (mem:<ssescalarmode>
15020 [(match_operand 2 "vsib_address_operand")
15021 (match_operand:<VEC_GATHER_IDXDI>
15022 3 "register_operand")
15023 (match_operand:SI 5 "const1248_operand ")]))
15024 (mem:BLK (scratch))
15025 (match_operand:<VEC_GATHER_SRCDI>
15026 4 "register_operand")]
15028 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15032 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15033 operands[5]), UNSPEC_VSIBADDR);
15036 (define_insn "*avx2_gatherdi<mode>"
15037 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15038 (unspec:VEC_GATHER_MODE
15039 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15040 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15042 [(match_operand:P 3 "vsib_address_operand" "Tv")
15043 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15044 (match_operand:SI 6 "const1248_operand" "n")]
15046 (mem:BLK (scratch))
15047 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15049 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15051 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
15052 [(set_attr "type" "ssemov")
15053 (set_attr "prefix" "vex")
15054 (set_attr "mode" "<sseinsnmode>")])
15056 (define_insn "*avx2_gatherdi<mode>_2"
15057 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15058 (unspec:VEC_GATHER_MODE
15060 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15062 [(match_operand:P 2 "vsib_address_operand" "Tv")
15063 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15064 (match_operand:SI 5 "const1248_operand" "n")]
15066 (mem:BLK (scratch))
15067 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15069 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15072 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15073 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
15074 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
15076 [(set_attr "type" "ssemov")
15077 (set_attr "prefix" "vex")
15078 (set_attr "mode" "<sseinsnmode>")])
15080 (define_insn "*avx2_gatherdi<mode>_3"
15081 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15082 (vec_select:<VEC_GATHER_SRCDI>
15084 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15085 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15087 [(match_operand:P 3 "vsib_address_operand" "Tv")
15088 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15089 (match_operand:SI 6 "const1248_operand" "n")]
15091 (mem:BLK (scratch))
15092 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15094 (parallel [(const_int 0) (const_int 1)
15095 (const_int 2) (const_int 3)])))
15096 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15098 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
15099 [(set_attr "type" "ssemov")
15100 (set_attr "prefix" "vex")
15101 (set_attr "mode" "<sseinsnmode>")])
15103 (define_insn "*avx2_gatherdi<mode>_4"
15104 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15105 (vec_select:<VEC_GATHER_SRCDI>
15108 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15110 [(match_operand:P 2 "vsib_address_operand" "Tv")
15111 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15112 (match_operand:SI 5 "const1248_operand" "n")]
15114 (mem:BLK (scratch))
15115 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15117 (parallel [(const_int 0) (const_int 1)
15118 (const_int 2) (const_int 3)])))
15119 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15121 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
15122 [(set_attr "type" "ssemov")
15123 (set_attr "prefix" "vex")
15124 (set_attr "mode" "<sseinsnmode>")])
15126 (define_expand "avx512f_gathersi<mode>"
15127 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15129 [(match_operand:VI48F_512 1 "register_operand")
15130 (match_operand:<avx512fmaskmode> 4 "register_operand")
15131 (mem:<ssescalarmode>
15133 [(match_operand 2 "vsib_address_operand")
15134 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
15135 (match_operand:SI 5 "const1248_operand")]))]
15137 (clobber (match_scratch:<avx512fmaskmode> 7))])]
15141 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15142 operands[5]), UNSPEC_VSIBADDR);
15145 (define_insn "*avx512f_gathersi<mode>"
15146 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15148 [(match_operand:VI48F_512 1 "register_operand" "0")
15149 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
15150 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15152 [(match_operand:P 4 "vsib_address_operand" "Tv")
15153 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
15154 (match_operand:SI 5 "const1248_operand" "n")]
15155 UNSPEC_VSIBADDR)])]
15157 (clobber (match_scratch:<avx512fmaskmode> 2 "=&k"))]
15159 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
15160 [(set_attr "type" "ssemov")
15161 (set_attr "prefix" "evex")
15162 (set_attr "mode" "<sseinsnmode>")])
15164 (define_insn "*avx512f_gathersi<mode>_2"
15165 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15168 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15169 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15171 [(match_operand:P 3 "vsib_address_operand" "Tv")
15172 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15173 (match_operand:SI 4 "const1248_operand" "n")]
15174 UNSPEC_VSIBADDR)])]
15176 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
15178 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
15179 [(set_attr "type" "ssemov")
15180 (set_attr "prefix" "evex")
15181 (set_attr "mode" "<sseinsnmode>")])
15184 (define_expand "avx512f_gatherdi<mode>"
15185 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15187 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15188 (match_operand:QI 4 "register_operand")
15189 (mem:<ssescalarmode>
15191 [(match_operand 2 "vsib_address_operand")
15192 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
15193 (match_operand:SI 5 "const1248_operand")]))]
15195 (clobber (match_scratch:QI 7))])]
15199 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15200 operands[5]), UNSPEC_VSIBADDR);
15203 (define_insn "*avx512f_gatherdi<mode>"
15204 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15206 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
15207 (match_operand:QI 7 "register_operand" "2")
15208 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15210 [(match_operand:P 4 "vsib_address_operand" "Tv")
15211 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
15212 (match_operand:SI 5 "const1248_operand" "n")]
15213 UNSPEC_VSIBADDR)])]
15215 (clobber (match_scratch:QI 2 "=&k"))]
15217 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
15218 [(set_attr "type" "ssemov")
15219 (set_attr "prefix" "evex")
15220 (set_attr "mode" "<sseinsnmode>")])
15222 (define_insn "*avx512f_gatherdi<mode>_2"
15223 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15226 (match_operand:QI 6 "register_operand" "1")
15227 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15229 [(match_operand:P 3 "vsib_address_operand" "Tv")
15230 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
15231 (match_operand:SI 4 "const1248_operand" "n")]
15232 UNSPEC_VSIBADDR)])]
15234 (clobber (match_scratch:QI 1 "=&k"))]
15237 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15238 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
15239 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
15241 [(set_attr "type" "ssemov")
15242 (set_attr "prefix" "evex")
15243 (set_attr "mode" "<sseinsnmode>")])
15245 (define_expand "avx512f_scattersi<mode>"
15246 [(parallel [(set (mem:VI48F_512
15248 [(match_operand 0 "vsib_address_operand")
15249 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
15250 (match_operand:SI 4 "const1248_operand")]))
15252 [(match_operand:<avx512fmaskmode> 1 "register_operand")
15253 (match_operand:VI48F_512 3 "register_operand")]
15255 (clobber (match_scratch:<avx512fmaskmode> 6))])]
15259 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15260 operands[4]), UNSPEC_VSIBADDR);
15263 (define_insn "*avx512f_scattersi<mode>"
15264 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15266 [(match_operand:P 0 "vsib_address_operand" "Tv")
15267 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15268 (match_operand:SI 4 "const1248_operand" "n")]
15271 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15272 (match_operand:VI48F_512 3 "register_operand" "v")]
15274 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
15276 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15277 [(set_attr "type" "ssemov")
15278 (set_attr "prefix" "evex")
15279 (set_attr "mode" "<sseinsnmode>")])
15281 (define_expand "avx512f_scatterdi<mode>"
15282 [(parallel [(set (mem:VI48F_512
15284 [(match_operand 0 "vsib_address_operand")
15285 (match_operand:V8DI 2 "register_operand")
15286 (match_operand:SI 4 "const1248_operand")]))
15288 [(match_operand:QI 1 "register_operand")
15289 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
15291 (clobber (match_scratch:QI 6))])]
15295 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15296 operands[4]), UNSPEC_VSIBADDR);
15299 (define_insn "*avx512f_scatterdi<mode>"
15300 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15302 [(match_operand:P 0 "vsib_address_operand" "Tv")
15303 (match_operand:V8DI 2 "register_operand" "v")
15304 (match_operand:SI 4 "const1248_operand" "n")]
15307 [(match_operand:QI 6 "register_operand" "1")
15308 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
15310 (clobber (match_scratch:QI 1 "=&k"))]
15312 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15313 [(set_attr "type" "ssemov")
15314 (set_attr "prefix" "evex")
15315 (set_attr "mode" "<sseinsnmode>")])
15317 (define_insn "avx512f_compress<mode>_mask"
15318 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
15320 [(match_operand:VI48F_512 1 "register_operand" "v")
15321 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
15322 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
15325 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15326 [(set_attr "type" "ssemov")
15327 (set_attr "prefix" "evex")
15328 (set_attr "mode" "<sseinsnmode>")])
15330 (define_insn "avx512f_compressstore<mode>_mask"
15331 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
15333 [(match_operand:VI48F_512 1 "register_operand" "x")
15335 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
15336 UNSPEC_COMPRESS_STORE))]
15338 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
15339 [(set_attr "type" "ssemov")
15340 (set_attr "prefix" "evex")
15341 (set_attr "memory" "store")
15342 (set_attr "mode" "<sseinsnmode>")])
15344 (define_expand "avx512f_expand<mode>_maskz"
15345 [(set (match_operand:VI48F_512 0 "register_operand")
15347 [(match_operand:VI48F_512 1 "nonimmediate_operand")
15348 (match_operand:VI48F_512 2 "vector_move_operand")
15349 (match_operand:<avx512fmaskmode> 3 "register_operand")]
15352 "operands[2] = CONST0_RTX (<MODE>mode);")
15354 (define_insn "avx512f_expand<mode>_mask"
15355 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
15357 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
15358 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
15359 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
15362 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15363 [(set_attr "type" "ssemov")
15364 (set_attr "prefix" "evex")
15365 (set_attr "memory" "none,load")
15366 (set_attr "mode" "<sseinsnmode>")])
15368 (define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
15369 [(set (match_operand:VF_512 0 "register_operand" "=v")
15371 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15372 (match_operand:SI 2 "const_0_to_15_operand")]
15375 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
15376 [(set_attr "prefix" "evex")
15377 (set_attr "mode" "<MODE>")])
15379 (define_insn "avx512f_getmant<mode><round_saeonly_name>"
15380 [(set (match_operand:VF_128 0 "register_operand" "=v")
15383 [(match_operand:VF_128 1 "register_operand" "v")
15384 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15385 (match_operand:SI 3 "const_0_to_15_operand")]
15390 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
15391 [(set_attr "prefix" "evex")
15392 (set_attr "mode" "<ssescalarmode>")])
15394 (define_insn "clz<mode>2<mask_name>"
15395 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15397 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
15399 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15400 [(set_attr "type" "sse")
15401 (set_attr "prefix" "evex")
15402 (set_attr "mode" "<sseinsnmode>")])
15404 (define_insn "<mask_codefor>conflict<mode><mask_name>"
15405 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15407 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
15410 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15411 [(set_attr "type" "sse")
15412 (set_attr "prefix" "evex")
15413 (set_attr "mode" "<sseinsnmode>")])
15415 (define_insn "sha1msg1"
15416 [(set (match_operand:V4SI 0 "register_operand" "=x")
15418 [(match_operand:V4SI 1 "register_operand" "0")
15419 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15422 "sha1msg1\t{%2, %0|%0, %2}"
15423 [(set_attr "type" "sselog1")
15424 (set_attr "mode" "TI")])
15426 (define_insn "sha1msg2"
15427 [(set (match_operand:V4SI 0 "register_operand" "=x")
15429 [(match_operand:V4SI 1 "register_operand" "0")
15430 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15433 "sha1msg2\t{%2, %0|%0, %2}"
15434 [(set_attr "type" "sselog1")
15435 (set_attr "mode" "TI")])
15437 (define_insn "sha1nexte"
15438 [(set (match_operand:V4SI 0 "register_operand" "=x")
15440 [(match_operand:V4SI 1 "register_operand" "0")
15441 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15442 UNSPEC_SHA1NEXTE))]
15444 "sha1nexte\t{%2, %0|%0, %2}"
15445 [(set_attr "type" "sselog1")
15446 (set_attr "mode" "TI")])
15448 (define_insn "sha1rnds4"
15449 [(set (match_operand:V4SI 0 "register_operand" "=x")
15451 [(match_operand:V4SI 1 "register_operand" "0")
15452 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15453 (match_operand:SI 3 "const_0_to_3_operand" "n")]
15454 UNSPEC_SHA1RNDS4))]
15456 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
15457 [(set_attr "type" "sselog1")
15458 (set_attr "length_immediate" "1")
15459 (set_attr "mode" "TI")])
15461 (define_insn "sha256msg1"
15462 [(set (match_operand:V4SI 0 "register_operand" "=x")
15464 [(match_operand:V4SI 1 "register_operand" "0")
15465 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15466 UNSPEC_SHA256MSG1))]
15468 "sha256msg1\t{%2, %0|%0, %2}"
15469 [(set_attr "type" "sselog1")
15470 (set_attr "mode" "TI")])
15472 (define_insn "sha256msg2"
15473 [(set (match_operand:V4SI 0 "register_operand" "=x")
15475 [(match_operand:V4SI 1 "register_operand" "0")
15476 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15477 UNSPEC_SHA256MSG2))]
15479 "sha256msg2\t{%2, %0|%0, %2}"
15480 [(set_attr "type" "sselog1")
15481 (set_attr "mode" "TI")])
15483 (define_insn "sha256rnds2"
15484 [(set (match_operand:V4SI 0 "register_operand" "=x")
15486 [(match_operand:V4SI 1 "register_operand" "0")
15487 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15488 (match_operand:V4SI 3 "register_operand" "Yz")]
15489 UNSPEC_SHA256RNDS2))]
15491 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
15492 [(set_attr "type" "sselog1")
15493 (set_attr "length_immediate" "1")
15494 (set_attr "mode" "TI")])