1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
132 ;; For AVX512BW support
136 ;; For AVX512DQ support
142 (define_c_enum "unspecv" [
152 ;; All vector modes including V?TImode, used in move patterns.
153 (define_mode_iterator VMOVE
154 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
155 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
156 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
157 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
158 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
159 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
160 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
162 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
163 (define_mode_iterator V48_AVX512VL
164 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
165 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
166 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
167 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
169 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
170 (define_mode_iterator VI12_AVX512VL
171 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
172 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
175 (define_mode_iterator V
176 [(V32QI "TARGET_AVX") V16QI
177 (V16HI "TARGET_AVX") V8HI
178 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
179 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
180 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
181 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
183 ;; All 128bit vector modes
184 (define_mode_iterator V_128
185 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
187 ;; All 256bit vector modes
188 (define_mode_iterator V_256
189 [V32QI V16HI V8SI V4DI V8SF V4DF])
191 ;; All 512bit vector modes
192 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
194 ;; All 256bit and 512bit vector modes
195 (define_mode_iterator V_256_512
196 [V32QI V16HI V8SI V4DI V8SF V4DF
197 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
198 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
200 ;; All vector float modes
201 (define_mode_iterator VF
202 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
203 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
205 ;; 128- and 256-bit float vector modes
206 (define_mode_iterator VF_128_256
207 [(V8SF "TARGET_AVX") V4SF
208 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
210 ;; All SFmode vector float modes
211 (define_mode_iterator VF1
212 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
214 ;; 128- and 256-bit SF vector modes
215 (define_mode_iterator VF1_128_256
216 [(V8SF "TARGET_AVX") V4SF])
218 (define_mode_iterator VF1_128_256VL
219 [V8SF (V4SF "TARGET_AVX512VL")])
221 ;; All DFmode vector float modes
222 (define_mode_iterator VF2
223 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
225 ;; 128- and 256-bit DF vector modes
226 (define_mode_iterator VF2_128_256
227 [(V4DF "TARGET_AVX") V2DF])
229 (define_mode_iterator VF2_512_256
230 [(V8DF "TARGET_AVX512F") V4DF])
232 (define_mode_iterator VF2_512_256VL
233 [V8DF (V4DF "TARGET_AVX512VL")])
235 ;; All 128bit vector float modes
236 (define_mode_iterator VF_128
237 [V4SF (V2DF "TARGET_SSE2")])
239 ;; All 256bit vector float modes
240 (define_mode_iterator VF_256
243 ;; All 512bit vector float modes
244 (define_mode_iterator VF_512
247 (define_mode_iterator VI48_AVX512VL
248 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
249 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
251 (define_mode_iterator VF_AVX512VL
252 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
253 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
255 (define_mode_iterator VF2_AVX512VL
256 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
258 (define_mode_iterator VF1_AVX512VL
259 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
261 ;; All vector integer modes
262 (define_mode_iterator VI
263 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
264 (V32QI "TARGET_AVX") V16QI
265 (V16HI "TARGET_AVX") V8HI
266 (V8SI "TARGET_AVX") V4SI
267 (V4DI "TARGET_AVX") V2DI])
269 (define_mode_iterator VI_AVX2
270 [(V32QI "TARGET_AVX2") V16QI
271 (V16HI "TARGET_AVX2") V8HI
272 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
273 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
275 ;; All QImode vector integer modes
276 (define_mode_iterator VI1
277 [(V32QI "TARGET_AVX") V16QI])
279 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
281 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
283 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
284 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
285 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
287 ;; All DImode vector integer modes
288 (define_mode_iterator VI8
289 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
291 (define_mode_iterator VI8_AVX512VL
292 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
294 (define_mode_iterator VI8_256_512
295 [V8DI (V4DI "TARGET_AVX512VL")])
297 (define_mode_iterator VI1_AVX2
298 [(V32QI "TARGET_AVX2") V16QI])
300 (define_mode_iterator VI2_AVX2
301 [(V16HI "TARGET_AVX2") V8HI])
303 (define_mode_iterator VI2_AVX512F
304 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
306 (define_mode_iterator VI4_AVX
307 [(V8SI "TARGET_AVX") V4SI])
309 (define_mode_iterator VI4_AVX2
310 [(V8SI "TARGET_AVX2") V4SI])
312 (define_mode_iterator VI4_AVX512F
313 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
315 (define_mode_iterator VI4_AVX512VL
316 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
318 (define_mode_iterator VI48_AVX512F_AVX512VL
319 [V4SI V8SI (V16SI "TARGET_AVX512F")
320 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
322 (define_mode_iterator VI2_AVX512VL
323 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
325 (define_mode_iterator VI8_AVX2_AVX512BW
326 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
328 (define_mode_iterator VI8_AVX2
329 [(V4DI "TARGET_AVX2") V2DI])
331 (define_mode_iterator VI8_AVX2_AVX512F
332 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
334 (define_mode_iterator VI4_128_8_256
338 (define_mode_iterator V8FI
342 (define_mode_iterator V16FI
345 ;; ??? We should probably use TImode instead.
346 (define_mode_iterator VIMAX_AVX2
347 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
349 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
350 (define_mode_iterator SSESCALARMODE
351 [(V2TI "TARGET_AVX2") TI])
353 (define_mode_iterator VI12_AVX2
354 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
355 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
357 (define_mode_iterator VI24_AVX2
358 [(V16HI "TARGET_AVX2") V8HI
359 (V8SI "TARGET_AVX2") V4SI])
361 (define_mode_iterator VI124_AVX2_48_AVX512F
362 [(V32QI "TARGET_AVX2") V16QI
363 (V16HI "TARGET_AVX2") V8HI
364 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
365 (V8DI "TARGET_AVX512F")])
367 (define_mode_iterator VI124_AVX512F
368 [(V32QI "TARGET_AVX2") V16QI
369 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
370 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
372 (define_mode_iterator VI124_AVX2
373 [(V32QI "TARGET_AVX2") V16QI
374 (V16HI "TARGET_AVX2") V8HI
375 (V8SI "TARGET_AVX2") V4SI])
377 (define_mode_iterator VI2_AVX2_AVX512BW
378 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
380 (define_mode_iterator VI48_AVX2
381 [(V8SI "TARGET_AVX2") V4SI
382 (V4DI "TARGET_AVX2") V2DI])
384 (define_mode_iterator VI248_AVX2_8_AVX512F
385 [(V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI
387 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
389 (define_mode_iterator VI248_AVX512BW_AVX512VL
390 [(V32HI "TARGET_AVX512BW")
391 (V4DI "TARGET_AVX512VL") V16SI V8DI])
393 ;; Suppose TARGET_AVX512VL as baseline
394 (define_mode_iterator VI24_AVX512BW_1
395 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
398 (define_mode_iterator VI48_AVX512F
399 [(V16SI "TARGET_AVX512F") V8SI V4SI
400 (V8DI "TARGET_AVX512F") V4DI V2DI])
402 (define_mode_iterator V48_AVX2
405 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
406 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
408 (define_mode_attr avx512
409 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
410 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
411 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
412 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
413 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
414 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
416 (define_mode_attr sse2_avx_avx512f
417 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
418 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
419 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
420 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
421 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
422 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
424 (define_mode_attr sse2_avx2
425 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
426 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
427 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
428 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
429 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
431 (define_mode_attr ssse3_avx2
432 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
433 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
434 (V4SI "ssse3") (V8SI "avx2")
435 (V2DI "ssse3") (V4DI "avx2")
436 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
438 (define_mode_attr sse4_1_avx2
439 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
440 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
441 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
442 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
444 (define_mode_attr avx_avx2
445 [(V4SF "avx") (V2DF "avx")
446 (V8SF "avx") (V4DF "avx")
447 (V4SI "avx2") (V2DI "avx2")
448 (V8SI "avx2") (V4DI "avx2")])
450 (define_mode_attr vec_avx2
451 [(V16QI "vec") (V32QI "avx2")
452 (V8HI "vec") (V16HI "avx2")
453 (V4SI "vec") (V8SI "avx2")
454 (V2DI "vec") (V4DI "avx2")])
456 (define_mode_attr avx2_avx512f
457 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
458 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
459 (V8SF "avx2") (V16SF "avx512f")
460 (V4DF "avx2") (V8DF "avx512f")])
462 (define_mode_attr avx2_avx512
463 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
464 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
465 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
466 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
467 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
469 (define_mode_attr shuffletype
470 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
471 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
472 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
473 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
474 (V64QI "i") (V1TI "i") (V2TI "i")])
476 (define_mode_attr ssequartermode
477 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
479 (define_mode_attr ssedoublemodelower
480 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
481 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
482 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
484 (define_mode_attr ssedoublemode
485 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
486 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
487 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
488 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
490 (define_mode_attr ssebytemode
491 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
493 ;; All 128bit vector integer modes
494 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
496 ;; All 256bit vector integer modes
497 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
499 ;; All 512bit vector integer modes
500 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
502 ;; Various 128bit vector integer mode combinations
503 (define_mode_iterator VI12_128 [V16QI V8HI])
504 (define_mode_iterator VI14_128 [V16QI V4SI])
505 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
506 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
507 (define_mode_iterator VI24_128 [V8HI V4SI])
508 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
509 (define_mode_iterator VI48_128 [V4SI V2DI])
511 ;; Various 256bit and 512 vector integer mode combinations
512 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
513 (define_mode_iterator VI124_256_AVX512F_AVX512BW
515 (V64QI "TARGET_AVX512BW")
516 (V32HI "TARGET_AVX512BW")
517 (V16SI "TARGET_AVX512F")])
518 (define_mode_iterator VI48_256 [V8SI V4DI])
519 (define_mode_iterator VI48_512 [V16SI V8DI])
520 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
522 ;; Int-float size matches
523 (define_mode_iterator VI4F_128 [V4SI V4SF])
524 (define_mode_iterator VI8F_128 [V2DI V2DF])
525 (define_mode_iterator VI4F_256 [V8SI V8SF])
526 (define_mode_iterator VI8F_256 [V4DI V4DF])
527 (define_mode_iterator VI8F_256_512
528 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
529 (define_mode_iterator VI48F_256_512
531 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
532 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
533 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
534 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
535 (define_mode_iterator VI48F
536 [V16SI V16SF V8DI V8DF
537 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
538 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
539 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
540 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
541 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
543 ;; Mapping from float mode to required SSE level
544 (define_mode_attr sse
545 [(SF "sse") (DF "sse2")
546 (V4SF "sse") (V2DF "sse2")
547 (V16SF "avx512f") (V8SF "avx")
548 (V8DF "avx512f") (V4DF "avx")])
550 (define_mode_attr sse2
551 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
552 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
554 (define_mode_attr sse3
555 [(V16QI "sse3") (V32QI "avx")])
557 (define_mode_attr sse4_1
558 [(V4SF "sse4_1") (V2DF "sse4_1")
559 (V8SF "avx") (V4DF "avx")
562 (define_mode_attr avxsizesuffix
563 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
564 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
565 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
566 (V16SF "512") (V8DF "512")
567 (V8SF "256") (V4DF "256")
568 (V4SF "") (V2DF "")])
570 ;; SSE instruction mode
571 (define_mode_attr sseinsnmode
572 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
573 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
574 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
575 (V16SF "V16SF") (V8DF "V8DF")
576 (V8SF "V8SF") (V4DF "V4DF")
577 (V4SF "V4SF") (V2DF "V2DF")
580 ;; Mapping of vector modes to corresponding mask size
581 (define_mode_attr avx512fmaskmode
582 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
583 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
584 (V16SI "HI") (V8SI "QI") (V4SI "QI")
585 (V8DI "QI") (V4DI "QI") (V2DI "QI")
586 (V16SF "HI") (V8SF "QI") (V4SF "QI")
587 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
589 ;; Mapping of vector float modes to an integer mode of the same size
590 (define_mode_attr sseintvecmode
591 [(V16SF "V16SI") (V8DF "V8DI")
592 (V8SF "V8SI") (V4DF "V4DI")
593 (V4SF "V4SI") (V2DF "V2DI")
594 (V16SI "V16SI") (V8DI "V8DI")
595 (V8SI "V8SI") (V4DI "V4DI")
596 (V4SI "V4SI") (V2DI "V2DI")
597 (V16HI "V16HI") (V8HI "V8HI")
598 (V32HI "V32HI") (V64QI "V64QI")
599 (V32QI "V32QI") (V16QI "V16QI")])
601 (define_mode_attr sseintvecmode2
602 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
603 (V8SF "OI") (V4SF "TI")])
605 (define_mode_attr sseintvecmodelower
606 [(V16SF "v16si") (V8DF "v8di")
607 (V8SF "v8si") (V4DF "v4di")
608 (V4SF "v4si") (V2DF "v2di")
609 (V8SI "v8si") (V4DI "v4di")
610 (V4SI "v4si") (V2DI "v2di")
611 (V16HI "v16hi") (V8HI "v8hi")
612 (V32QI "v32qi") (V16QI "v16qi")])
614 ;; Mapping of vector modes to a vector mode of double size
615 (define_mode_attr ssedoublevecmode
616 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
617 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
618 (V8SF "V16SF") (V4DF "V8DF")
619 (V4SF "V8SF") (V2DF "V4DF")])
621 ;; Mapping of vector modes to a vector mode of half size
622 (define_mode_attr ssehalfvecmode
623 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
624 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
625 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
626 (V16SF "V8SF") (V8DF "V4DF")
627 (V8SF "V4SF") (V4DF "V2DF")
630 ;; Mapping of vector modes ti packed single mode of the same size
631 (define_mode_attr ssePSmode
632 [(V16SI "V16SF") (V8DF "V16SF")
633 (V16SF "V16SF") (V8DI "V16SF")
634 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
635 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
636 (V8SI "V8SF") (V4SI "V4SF")
637 (V4DI "V8SF") (V2DI "V4SF")
638 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
639 (V8SF "V8SF") (V4SF "V4SF")
640 (V4DF "V8SF") (V2DF "V4SF")])
642 (define_mode_attr ssePSmode2
643 [(V8DI "V8SF") (V4DI "V4SF")])
645 ;; Mapping of vector modes back to the scalar modes
646 (define_mode_attr ssescalarmode
647 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
648 (V32HI "HI") (V16HI "HI") (V8HI "HI")
649 (V16SI "SI") (V8SI "SI") (V4SI "SI")
650 (V8DI "DI") (V4DI "DI") (V2DI "DI")
651 (V16SF "SF") (V8SF "SF") (V4SF "SF")
652 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
654 ;; Mapping of vector modes to the 128bit modes
655 (define_mode_attr ssexmmmode
656 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
657 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
658 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
659 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
660 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
661 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
663 ;; Pointer size override for scalar modes (Intel asm dialect)
664 (define_mode_attr iptr
665 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
666 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
667 (V8SF "k") (V4DF "q")
668 (V4SF "k") (V2DF "q")
671 ;; Number of scalar elements in each vector type
672 (define_mode_attr ssescalarnum
673 [(V64QI "64") (V16SI "16") (V8DI "8")
674 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
675 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
676 (V16SF "16") (V8DF "8")
677 (V8SF "8") (V4DF "4")
678 (V4SF "4") (V2DF "2")])
680 ;; Mask of scalar elements in each vector type
681 (define_mode_attr ssescalarnummask
682 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
683 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
684 (V8SF "7") (V4DF "3")
685 (V4SF "3") (V2DF "1")])
687 (define_mode_attr ssescalarsize
688 [(V8DI "64") (V4DI "64") (V2DI "64")
689 (V64QI "8") (V32QI "8") (V16QI "8")
690 (V32HI "16") (V16HI "16") (V8HI "16")
691 (V16SI "32") (V8SI "32") (V4SI "32")
692 (V16SF "32") (V8DF "64")])
694 ;; SSE prefix for integer vector modes
695 (define_mode_attr sseintprefix
696 [(V2DI "p") (V2DF "")
701 (V16SI "p") (V16SF "")
702 (V16QI "p") (V8HI "p")
703 (V32QI "p") (V16HI "p")
704 (V64QI "p") (V32HI "p")])
706 ;; SSE scalar suffix for vector modes
707 (define_mode_attr ssescalarmodesuffix
709 (V8SF "ss") (V4DF "sd")
710 (V4SF "ss") (V2DF "sd")
711 (V8SI "ss") (V4DI "sd")
714 ;; Pack/unpack vector modes
715 (define_mode_attr sseunpackmode
716 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
717 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
718 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
720 (define_mode_attr ssepackmode
721 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
722 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
723 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
725 ;; Mapping of the max integer size for xop rotate immediate constraint
726 (define_mode_attr sserotatemax
727 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
729 ;; Mapping of mode to cast intrinsic name
730 (define_mode_attr castmode
731 [(V8SI "si") (V8SF "ps") (V4DF "pd")
732 (V16SI "si") (V16SF "ps") (V8DF "pd")])
734 ;; Instruction suffix for sign and zero extensions.
735 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
737 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
738 ;; i64x4 or f64x4 for 512bit modes.
739 (define_mode_attr i128
740 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
741 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
742 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
745 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
746 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
748 ;; Mapping for dbpsabbw modes
749 (define_mode_attr dbpsadbwmode
750 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
752 ;; Mapping suffixes for broadcast
753 (define_mode_attr bcstscalarsuff
754 [(V64QI "b") (V32QI "b") (V16QI "b")
755 (V32HI "w") (V16HI "w") (V8HI "w")
756 (V16SI "d") (V8SI "d") (V4SI "d")
757 (V8DI "q") (V4DI "q") (V2DI "q")
758 (V16SF "ss") (V8SF "ss") (V4SF "ss")
759 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
761 ;; Tie mode of assembler operand to mode iterator
762 (define_mode_attr concat_tg_mode
763 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
764 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
767 ;; Include define_subst patterns for instructions with mask
770 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
772 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
778 ;; All of these patterns are enabled for SSE1 as well as SSE2.
779 ;; This is essential for maintaining stable calling conventions.
781 (define_expand "mov<mode>"
782 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
783 (match_operand:VMOVE 1 "nonimmediate_operand"))]
786 ix86_expand_vector_move (<MODE>mode, operands);
790 (define_insn "*mov<mode>_internal"
791 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
792 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
794 && (register_operand (operands[0], <MODE>mode)
795 || register_operand (operands[1], <MODE>mode))"
797 int mode = get_attr_mode (insn);
798 switch (which_alternative)
801 return standard_sse_constant_opcode (insn, operands[1]);
804 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
805 in avx512f, so we need to use workarounds, to access sse registers
806 16-31, which are evex-only. In avx512vl we don't need workarounds. */
807 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
808 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
809 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
811 if (memory_operand (operands[0], <MODE>mode))
813 if (<MODE_SIZE> == 32)
814 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
815 else if (<MODE_SIZE> == 16)
816 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
820 else if (memory_operand (operands[1], <MODE>mode))
822 if (<MODE_SIZE> == 32)
823 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
824 else if (<MODE_SIZE> == 16)
825 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
830 /* Reg -> reg move is always aligned. Just use wider move. */
835 return "vmovaps\t{%g1, %g0|%g0, %g1}";
838 return "vmovapd\t{%g1, %g0|%g0, %g1}";
841 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
852 && (misaligned_operand (operands[0], <MODE>mode)
853 || misaligned_operand (operands[1], <MODE>mode)))
854 return "vmovups\t{%1, %0|%0, %1}";
856 return "%vmovaps\t{%1, %0|%0, %1}";
862 && (misaligned_operand (operands[0], <MODE>mode)
863 || misaligned_operand (operands[1], <MODE>mode)))
864 return "vmovupd\t{%1, %0|%0, %1}";
866 return "%vmovapd\t{%1, %0|%0, %1}";
871 && (misaligned_operand (operands[0], <MODE>mode)
872 || misaligned_operand (operands[1], <MODE>mode)))
873 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
874 : "vmovdqu\t{%1, %0|%0, %1}";
876 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
877 : "%vmovdqa\t{%1, %0|%0, %1}";
879 if (misaligned_operand (operands[0], <MODE>mode)
880 || misaligned_operand (operands[1], <MODE>mode))
881 return "vmovdqu64\t{%1, %0|%0, %1}";
883 return "vmovdqa64\t{%1, %0|%0, %1}";
892 [(set_attr "type" "sselog1,ssemov,ssemov")
893 (set_attr "prefix" "maybe_vex")
895 (cond [(and (match_test "<MODE_SIZE> == 16")
896 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
897 (and (eq_attr "alternative" "2")
898 (match_test "TARGET_SSE_TYPELESS_STORES"))))
899 (const_string "<ssePSmode>")
900 (match_test "TARGET_AVX")
901 (const_string "<sseinsnmode>")
902 (ior (not (match_test "TARGET_SSE2"))
903 (match_test "optimize_function_for_size_p (cfun)"))
904 (const_string "V4SF")
905 (and (eq_attr "alternative" "0")
906 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
909 (const_string "<sseinsnmode>")))])
911 (define_insn "<avx512>_load<mode>_mask"
912 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
913 (vec_merge:V48_AVX512VL
914 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
915 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
916 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
919 static char buf [64];
922 const char *sse_suffix;
924 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
927 sse_suffix = "<ssemodesuffix>";
932 sse_suffix = "<ssescalarsize>";
935 if (misaligned_operand (operands[1], <MODE>mode))
940 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
941 insn_op, align, sse_suffix);
944 [(set_attr "type" "ssemov")
945 (set_attr "prefix" "evex")
946 (set_attr "memory" "none,load")
947 (set_attr "mode" "<sseinsnmode>")])
949 (define_insn "<avx512>_load<mode>_mask"
950 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
951 (vec_merge:VI12_AVX512VL
952 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
953 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
954 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
956 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
957 [(set_attr "type" "ssemov")
958 (set_attr "prefix" "evex")
959 (set_attr "memory" "none,load")
960 (set_attr "mode" "<sseinsnmode>")])
962 (define_insn "avx512f_blendm<mode>"
963 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
965 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
966 (match_operand:VI48F_512 1 "register_operand" "v")
967 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
969 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
970 [(set_attr "type" "ssemov")
971 (set_attr "prefix" "evex")
972 (set_attr "mode" "<sseinsnmode>")])
974 (define_insn "<avx512>_store<mode>_mask"
975 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
976 (vec_merge:V48_AVX512VL
977 (match_operand:V48_AVX512VL 1 "register_operand" "v")
979 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
982 static char buf [64];
985 const char *sse_suffix;
987 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
990 sse_suffix = "<ssemodesuffix>";
995 sse_suffix = "<ssescalarsize>";
998 if (misaligned_operand (operands[1], <MODE>mode))
1003 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1004 insn_op, align, sse_suffix);
1007 [(set_attr "type" "ssemov")
1008 (set_attr "prefix" "evex")
1009 (set_attr "memory" "store")
1010 (set_attr "mode" "<sseinsnmode>")])
1012 (define_insn "<avx512>_store<mode>_mask"
1013 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1014 (vec_merge:VI12_AVX512VL
1015 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1017 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1019 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1020 [(set_attr "type" "ssemov")
1021 (set_attr "prefix" "evex")
1022 (set_attr "memory" "store")
1023 (set_attr "mode" "<sseinsnmode>")])
1025 (define_insn "sse2_movq128"
1026 [(set (match_operand:V2DI 0 "register_operand" "=x")
1029 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1030 (parallel [(const_int 0)]))
1033 "%vmovq\t{%1, %0|%0, %q1}"
1034 [(set_attr "type" "ssemov")
1035 (set_attr "prefix" "maybe_vex")
1036 (set_attr "mode" "TI")])
1038 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1039 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1040 ;; from memory, we'd prefer to load the memory directly into the %xmm
1041 ;; register. To facilitate this happy circumstance, this pattern won't
1042 ;; split until after register allocation. If the 64-bit value didn't
1043 ;; come from memory, this is the best we can do. This is much better
1044 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1047 (define_insn_and_split "movdi_to_sse"
1049 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1050 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1051 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1052 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1054 "&& reload_completed"
1057 if (register_operand (operands[1], DImode))
1059 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1060 Assemble the 64-bit DImode value in an xmm register. */
1061 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1062 gen_rtx_SUBREG (SImode, operands[1], 0)));
1063 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1064 gen_rtx_SUBREG (SImode, operands[1], 4)));
1065 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1068 else if (memory_operand (operands[1], DImode))
1070 rtx tmp = gen_reg_rtx (V2DImode);
1071 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1072 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1079 [(set (match_operand:V4SF 0 "register_operand")
1080 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1081 "TARGET_SSE && reload_completed"
1084 (vec_duplicate:V4SF (match_dup 1))
1088 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1089 operands[2] = CONST0_RTX (V4SFmode);
1093 [(set (match_operand:V2DF 0 "register_operand")
1094 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1095 "TARGET_SSE2 && reload_completed"
1096 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1098 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1099 operands[2] = CONST0_RTX (DFmode);
1102 (define_expand "movmisalign<mode>"
1103 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1104 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1107 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1111 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1112 [(set (match_operand:VF 0 "register_operand")
1113 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1115 "TARGET_SSE && <mask_mode512bit_condition>"
1117 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1118 just fine if misaligned_operand is true, and without the UNSPEC it can
1119 be combined with arithmetic instructions. If misaligned_operand is
1120 false, still emit UNSPEC_LOADU insn to honor user's request for
1123 && misaligned_operand (operands[1], <MODE>mode))
1125 rtx src = operands[1];
1127 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1128 operands[2 * <mask_applied>],
1129 operands[3 * <mask_applied>]);
1130 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1135 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1136 [(set (match_operand:VF 0 "register_operand" "=v")
1138 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1140 "TARGET_SSE && <mask_mode512bit_condition>"
1142 switch (get_attr_mode (insn))
1147 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1149 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1152 [(set_attr "type" "ssemov")
1153 (set_attr "movu" "1")
1154 (set_attr "ssememalign" "8")
1155 (set_attr "prefix" "maybe_vex")
1157 (cond [(and (match_test "<MODE_SIZE> == 16")
1158 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1159 (const_string "<ssePSmode>")
1160 (match_test "TARGET_AVX")
1161 (const_string "<MODE>")
1162 (match_test "optimize_function_for_size_p (cfun)")
1163 (const_string "V4SF")
1165 (const_string "<MODE>")))])
1167 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1168 [(set (match_operand:VF 0 "memory_operand" "=m")
1170 [(match_operand:VF 1 "register_operand" "v")]
1174 switch (get_attr_mode (insn))
1179 return "%vmovups\t{%1, %0|%0, %1}";
1181 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1184 [(set_attr "type" "ssemov")
1185 (set_attr "movu" "1")
1186 (set_attr "ssememalign" "8")
1187 (set_attr "prefix" "maybe_vex")
1189 (cond [(and (match_test "<MODE_SIZE> == 16")
1190 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1191 (match_test "TARGET_SSE_TYPELESS_STORES")))
1192 (const_string "<ssePSmode>")
1193 (match_test "TARGET_AVX")
1194 (const_string "<MODE>")
1195 (match_test "optimize_function_for_size_p (cfun)")
1196 (const_string "V4SF")
1198 (const_string "<MODE>")))])
1200 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1201 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1202 (vec_merge:VF_AVX512VL
1204 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1207 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1210 switch (get_attr_mode (insn))
1215 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1217 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1220 [(set_attr "type" "ssemov")
1221 (set_attr "movu" "1")
1222 (set_attr "memory" "store")
1223 (set_attr "prefix" "evex")
1224 (set_attr "mode" "<sseinsnmode>")])
1226 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1227 just fine if misaligned_operand is true, and without the UNSPEC it can
1228 be combined with arithmetic instructions. If misaligned_operand is
1229 false, still emit UNSPEC_LOADU insn to honor user's request for
1231 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1232 [(set (match_operand:VI1 0 "register_operand")
1234 [(match_operand:VI1 1 "nonimmediate_operand")]
1236 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1239 && misaligned_operand (operands[1], <MODE>mode))
1241 rtx src = operands[1];
1243 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1244 operands[2 * <mask_applied>],
1245 operands[3 * <mask_applied>]);
1246 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1251 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1252 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1253 (unspec:VI_ULOADSTORE_BW_AVX512VL
1254 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1258 if (misaligned_operand (operands[1], <MODE>mode))
1260 rtx src = operands[1];
1262 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1263 operands[2 * <mask_applied>],
1264 operands[3 * <mask_applied>]);
1265 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1270 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1271 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1272 (unspec:VI_ULOADSTORE_F_AVX512VL
1273 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1277 if (misaligned_operand (operands[1], <MODE>mode))
1279 rtx src = operands[1];
1281 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1282 operands[2 * <mask_applied>],
1283 operands[3 * <mask_applied>]);
1284 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1289 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1290 [(set (match_operand:VI1 0 "register_operand" "=v")
1292 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1294 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1296 switch (get_attr_mode (insn))
1300 return "%vmovups\t{%1, %0|%0, %1}";
1302 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1303 return "%vmovdqu\t{%1, %0|%0, %1}";
1305 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1308 [(set_attr "type" "ssemov")
1309 (set_attr "movu" "1")
1310 (set_attr "ssememalign" "8")
1311 (set (attr "prefix_data16")
1313 (match_test "TARGET_AVX")
1315 (const_string "1")))
1316 (set_attr "prefix" "maybe_vex")
1318 (cond [(and (match_test "<MODE_SIZE> == 16")
1319 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1320 (const_string "<ssePSmode>")
1321 (match_test "TARGET_AVX")
1322 (const_string "<sseinsnmode>")
1323 (match_test "optimize_function_for_size_p (cfun)")
1324 (const_string "V4SF")
1326 (const_string "<sseinsnmode>")))])
1328 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1329 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1330 (unspec:VI_ULOADSTORE_BW_AVX512VL
1331 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1334 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1335 [(set_attr "type" "ssemov")
1336 (set_attr "movu" "1")
1337 (set_attr "ssememalign" "8")
1338 (set_attr "prefix" "maybe_evex")])
1340 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1341 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1342 (unspec:VI_ULOADSTORE_F_AVX512VL
1343 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1346 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1347 [(set_attr "type" "ssemov")
1348 (set_attr "movu" "1")
1349 (set_attr "ssememalign" "8")
1350 (set_attr "prefix" "maybe_evex")])
1352 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1353 [(set (match_operand:VI1 0 "memory_operand" "=m")
1355 [(match_operand:VI1 1 "register_operand" "v")]
1359 switch (get_attr_mode (insn))
1364 return "%vmovups\t{%1, %0|%0, %1}";
1370 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1371 return "%vmovdqu\t{%1, %0|%0, %1}";
1373 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1377 [(set_attr "type" "ssemov")
1378 (set_attr "movu" "1")
1379 (set_attr "ssememalign" "8")
1380 (set (attr "prefix_data16")
1382 (match_test "TARGET_AVX")
1384 (const_string "1")))
1385 (set_attr "prefix" "maybe_vex")
1387 (cond [(and (match_test "<MODE_SIZE> == 16")
1388 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1389 (match_test "TARGET_SSE_TYPELESS_STORES")))
1390 (const_string "<ssePSmode>")
1391 (match_test "TARGET_AVX")
1392 (const_string "<sseinsnmode>")
1393 (match_test "optimize_function_for_size_p (cfun)")
1394 (const_string "V4SF")
1396 (const_string "<sseinsnmode>")))])
1398 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1399 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1400 (unspec:VI_ULOADSTORE_BW_AVX512VL
1401 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1404 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1405 [(set_attr "type" "ssemov")
1406 (set_attr "movu" "1")
1407 (set_attr "ssememalign" "8")
1408 (set_attr "prefix" "maybe_evex")])
1410 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1411 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1412 (unspec:VI_ULOADSTORE_F_AVX512VL
1413 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1416 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1417 [(set_attr "type" "ssemov")
1418 (set_attr "movu" "1")
1419 (set_attr "ssememalign" "8")
1420 (set_attr "prefix" "maybe_vex")])
1422 (define_insn "<avx512>_storedqu<mode>_mask"
1423 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1424 (vec_merge:VI48_AVX512VL
1425 (unspec:VI48_AVX512VL
1426 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1429 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1431 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1432 [(set_attr "type" "ssemov")
1433 (set_attr "movu" "1")
1434 (set_attr "memory" "store")
1435 (set_attr "prefix" "evex")
1436 (set_attr "mode" "<sseinsnmode>")])
1438 (define_insn "<avx512>_storedqu<mode>_mask"
1439 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1440 (vec_merge:VI12_AVX512VL
1441 (unspec:VI12_AVX512VL
1442 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1445 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1447 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1448 [(set_attr "type" "ssemov")
1449 (set_attr "movu" "1")
1450 (set_attr "memory" "store")
1451 (set_attr "prefix" "evex")
1452 (set_attr "mode" "<sseinsnmode>")])
1454 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1455 [(set (match_operand:VI1 0 "register_operand" "=x")
1456 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1459 "%vlddqu\t{%1, %0|%0, %1}"
1460 [(set_attr "type" "ssemov")
1461 (set_attr "movu" "1")
1462 (set_attr "ssememalign" "8")
1463 (set (attr "prefix_data16")
1465 (match_test "TARGET_AVX")
1467 (const_string "0")))
1468 (set (attr "prefix_rep")
1470 (match_test "TARGET_AVX")
1472 (const_string "1")))
1473 (set_attr "prefix" "maybe_vex")
1474 (set_attr "mode" "<sseinsnmode>")])
1476 (define_insn "sse2_movnti<mode>"
1477 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1478 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1481 "movnti\t{%1, %0|%0, %1}"
1482 [(set_attr "type" "ssemov")
1483 (set_attr "prefix_data16" "0")
1484 (set_attr "mode" "<MODE>")])
1486 (define_insn "<sse>_movnt<mode>"
1487 [(set (match_operand:VF 0 "memory_operand" "=m")
1489 [(match_operand:VF 1 "register_operand" "v")]
1492 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1493 [(set_attr "type" "ssemov")
1494 (set_attr "prefix" "maybe_vex")
1495 (set_attr "mode" "<MODE>")])
1497 (define_insn "<sse2>_movnt<mode>"
1498 [(set (match_operand:VI8 0 "memory_operand" "=m")
1499 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1502 "%vmovntdq\t{%1, %0|%0, %1}"
1503 [(set_attr "type" "ssecvt")
1504 (set (attr "prefix_data16")
1506 (match_test "TARGET_AVX")
1508 (const_string "1")))
1509 (set_attr "prefix" "maybe_vex")
1510 (set_attr "mode" "<sseinsnmode>")])
1512 ; Expand patterns for non-temporal stores. At the moment, only those
1513 ; that directly map to insns are defined; it would be possible to
1514 ; define patterns for other modes that would expand to several insns.
1516 ;; Modes handled by storent patterns.
1517 (define_mode_iterator STORENT_MODE
1518 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1519 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1520 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1521 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1522 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1524 (define_expand "storent<mode>"
1525 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1526 (unspec:STORENT_MODE
1527 [(match_operand:STORENT_MODE 1 "register_operand")]
1531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1533 ;; Parallel floating point arithmetic
1535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1537 (define_expand "<code><mode>2"
1538 [(set (match_operand:VF 0 "register_operand")
1540 (match_operand:VF 1 "register_operand")))]
1542 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1544 (define_insn_and_split "*absneg<mode>2"
1545 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1546 (match_operator:VF 3 "absneg_operator"
1547 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1548 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1551 "&& reload_completed"
1554 enum rtx_code absneg_op;
1560 if (MEM_P (operands[1]))
1561 op1 = operands[2], op2 = operands[1];
1563 op1 = operands[1], op2 = operands[2];
1568 if (rtx_equal_p (operands[0], operands[1]))
1574 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1575 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1576 t = gen_rtx_SET (VOIDmode, operands[0], t);
1580 [(set_attr "isa" "noavx,noavx,avx,avx")])
1582 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1583 [(set (match_operand:VF 0 "register_operand")
1585 (match_operand:VF 1 "<round_nimm_predicate>")
1586 (match_operand:VF 2 "<round_nimm_predicate>")))]
1587 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1588 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1590 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1591 [(set (match_operand:VF 0 "register_operand" "=x,v")
1593 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1594 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1595 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1597 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1598 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1599 [(set_attr "isa" "noavx,avx")
1600 (set_attr "type" "sseadd")
1601 (set_attr "prefix" "<mask_prefix3>")
1602 (set_attr "mode" "<MODE>")])
1604 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1605 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1608 (match_operand:VF_128 1 "register_operand" "0,v")
1609 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1614 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1615 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1616 [(set_attr "isa" "noavx,avx")
1617 (set_attr "type" "sseadd")
1618 (set_attr "prefix" "<round_prefix>")
1619 (set_attr "mode" "<ssescalarmode>")])
1621 (define_expand "mul<mode>3<mask_name><round_name>"
1622 [(set (match_operand:VF 0 "register_operand")
1624 (match_operand:VF 1 "<round_nimm_predicate>")
1625 (match_operand:VF 2 "<round_nimm_predicate>")))]
1626 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1627 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1629 (define_insn "*mul<mode>3<mask_name><round_name>"
1630 [(set (match_operand:VF 0 "register_operand" "=x,v")
1632 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1633 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1634 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1636 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1637 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1638 [(set_attr "isa" "noavx,avx")
1639 (set_attr "type" "ssemul")
1640 (set_attr "prefix" "<mask_prefix3>")
1641 (set_attr "btver2_decode" "direct,double")
1642 (set_attr "mode" "<MODE>")])
1644 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1645 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1648 (match_operand:VF_128 1 "register_operand" "0,v")
1649 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1654 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1655 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1656 [(set_attr "isa" "noavx,avx")
1657 (set_attr "type" "sse<multdiv_mnemonic>")
1658 (set_attr "prefix" "<round_prefix>")
1659 (set_attr "btver2_decode" "direct,double")
1660 (set_attr "mode" "<ssescalarmode>")])
1662 (define_expand "div<mode>3"
1663 [(set (match_operand:VF2 0 "register_operand")
1664 (div:VF2 (match_operand:VF2 1 "register_operand")
1665 (match_operand:VF2 2 "nonimmediate_operand")))]
1667 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1669 (define_expand "div<mode>3"
1670 [(set (match_operand:VF1 0 "register_operand")
1671 (div:VF1 (match_operand:VF1 1 "register_operand")
1672 (match_operand:VF1 2 "nonimmediate_operand")))]
1675 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1678 && TARGET_RECIP_VEC_DIV
1679 && !optimize_insn_for_size_p ()
1680 && flag_finite_math_only && !flag_trapping_math
1681 && flag_unsafe_math_optimizations)
1683 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1688 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1689 [(set (match_operand:VF 0 "register_operand" "=x,v")
1691 (match_operand:VF 1 "register_operand" "0,v")
1692 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1693 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1695 div<ssemodesuffix>\t{%2, %0|%0, %2}
1696 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1697 [(set_attr "isa" "noavx,avx")
1698 (set_attr "type" "ssediv")
1699 (set_attr "prefix" "<mask_prefix3>")
1700 (set_attr "mode" "<MODE>")])
1702 (define_insn "<sse>_rcp<mode>2"
1703 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1705 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1707 "%vrcpps\t{%1, %0|%0, %1}"
1708 [(set_attr "type" "sse")
1709 (set_attr "atom_sse_attr" "rcp")
1710 (set_attr "btver2_sse_attr" "rcp")
1711 (set_attr "prefix" "maybe_vex")
1712 (set_attr "mode" "<MODE>")])
1714 (define_insn "sse_vmrcpv4sf2"
1715 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1717 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1719 (match_operand:V4SF 2 "register_operand" "0,x")
1723 rcpss\t{%1, %0|%0, %k1}
1724 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1725 [(set_attr "isa" "noavx,avx")
1726 (set_attr "type" "sse")
1727 (set_attr "ssememalign" "32")
1728 (set_attr "atom_sse_attr" "rcp")
1729 (set_attr "btver2_sse_attr" "rcp")
1730 (set_attr "prefix" "orig,vex")
1731 (set_attr "mode" "SF")])
1733 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1734 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1736 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1739 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1740 [(set_attr "type" "sse")
1741 (set_attr "prefix" "evex")
1742 (set_attr "mode" "<MODE>")])
1744 (define_insn "srcp14<mode>"
1745 [(set (match_operand:VF_128 0 "register_operand" "=v")
1748 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1750 (match_operand:VF_128 2 "register_operand" "v")
1753 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1754 [(set_attr "type" "sse")
1755 (set_attr "prefix" "evex")
1756 (set_attr "mode" "<MODE>")])
1758 (define_expand "sqrt<mode>2"
1759 [(set (match_operand:VF2 0 "register_operand")
1760 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1763 (define_expand "sqrt<mode>2"
1764 [(set (match_operand:VF1 0 "register_operand")
1765 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1769 && TARGET_RECIP_VEC_SQRT
1770 && !optimize_insn_for_size_p ()
1771 && flag_finite_math_only && !flag_trapping_math
1772 && flag_unsafe_math_optimizations)
1774 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1779 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1780 [(set (match_operand:VF 0 "register_operand" "=v")
1781 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1782 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1783 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1784 [(set_attr "type" "sse")
1785 (set_attr "atom_sse_attr" "sqrt")
1786 (set_attr "btver2_sse_attr" "sqrt")
1787 (set_attr "prefix" "maybe_vex")
1788 (set_attr "mode" "<MODE>")])
1790 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1791 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1794 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1795 (match_operand:VF_128 2 "register_operand" "0,v")
1799 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1800 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1801 [(set_attr "isa" "noavx,avx")
1802 (set_attr "type" "sse")
1803 (set_attr "atom_sse_attr" "sqrt")
1804 (set_attr "prefix" "<round_prefix>")
1805 (set_attr "btver2_sse_attr" "sqrt")
1806 (set_attr "mode" "<ssescalarmode>")])
1808 (define_expand "rsqrt<mode>2"
1809 [(set (match_operand:VF1_128_256 0 "register_operand")
1811 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1814 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1818 (define_insn "<sse>_rsqrt<mode>2"
1819 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1821 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1823 "%vrsqrtps\t{%1, %0|%0, %1}"
1824 [(set_attr "type" "sse")
1825 (set_attr "prefix" "maybe_vex")
1826 (set_attr "mode" "<MODE>")])
1828 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1829 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1831 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1834 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1835 [(set_attr "type" "sse")
1836 (set_attr "prefix" "evex")
1837 (set_attr "mode" "<MODE>")])
1839 (define_insn "rsqrt14<mode>"
1840 [(set (match_operand:VF_128 0 "register_operand" "=v")
1843 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1845 (match_operand:VF_128 2 "register_operand" "v")
1848 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1849 [(set_attr "type" "sse")
1850 (set_attr "prefix" "evex")
1851 (set_attr "mode" "<MODE>")])
1853 (define_insn "sse_vmrsqrtv4sf2"
1854 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1856 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1858 (match_operand:V4SF 2 "register_operand" "0,x")
1862 rsqrtss\t{%1, %0|%0, %k1}
1863 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1864 [(set_attr "isa" "noavx,avx")
1865 (set_attr "type" "sse")
1866 (set_attr "ssememalign" "32")
1867 (set_attr "prefix" "orig,vex")
1868 (set_attr "mode" "SF")])
1870 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1871 ;; isn't really correct, as those rtl operators aren't defined when
1872 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1874 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1875 [(set (match_operand:VF 0 "register_operand")
1877 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1878 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1879 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1881 if (!flag_finite_math_only)
1882 operands[1] = force_reg (<MODE>mode, operands[1]);
1883 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1886 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1887 [(set (match_operand:VF 0 "register_operand" "=x,v")
1889 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1890 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1891 "TARGET_SSE && flag_finite_math_only
1892 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1893 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1895 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1896 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1897 [(set_attr "isa" "noavx,avx")
1898 (set_attr "type" "sseadd")
1899 (set_attr "btver2_sse_attr" "maxmin")
1900 (set_attr "prefix" "<mask_prefix3>")
1901 (set_attr "mode" "<MODE>")])
1903 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1904 [(set (match_operand:VF 0 "register_operand" "=x,v")
1906 (match_operand:VF 1 "register_operand" "0,v")
1907 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1908 "TARGET_SSE && !flag_finite_math_only
1909 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1911 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1912 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1913 [(set_attr "isa" "noavx,avx")
1914 (set_attr "type" "sseadd")
1915 (set_attr "btver2_sse_attr" "maxmin")
1916 (set_attr "prefix" "<mask_prefix3>")
1917 (set_attr "mode" "<MODE>")])
1919 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1920 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1923 (match_operand:VF_128 1 "register_operand" "0,v")
1924 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1929 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1930 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1931 [(set_attr "isa" "noavx,avx")
1932 (set_attr "type" "sse")
1933 (set_attr "btver2_sse_attr" "maxmin")
1934 (set_attr "prefix" "<round_saeonly_prefix>")
1935 (set_attr "mode" "<ssescalarmode>")])
1937 ;; These versions of the min/max patterns implement exactly the operations
1938 ;; min = (op1 < op2 ? op1 : op2)
1939 ;; max = (!(op1 < op2) ? op1 : op2)
1940 ;; Their operands are not commutative, and thus they may be used in the
1941 ;; presence of -0.0 and NaN.
1943 (define_insn "*ieee_smin<mode>3"
1944 [(set (match_operand:VF 0 "register_operand" "=v,v")
1946 [(match_operand:VF 1 "register_operand" "0,v")
1947 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1951 min<ssemodesuffix>\t{%2, %0|%0, %2}
1952 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1953 [(set_attr "isa" "noavx,avx")
1954 (set_attr "type" "sseadd")
1955 (set_attr "prefix" "orig,vex")
1956 (set_attr "mode" "<MODE>")])
1958 (define_insn "*ieee_smax<mode>3"
1959 [(set (match_operand:VF 0 "register_operand" "=v,v")
1961 [(match_operand:VF 1 "register_operand" "0,v")
1962 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1966 max<ssemodesuffix>\t{%2, %0|%0, %2}
1967 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1968 [(set_attr "isa" "noavx,avx")
1969 (set_attr "type" "sseadd")
1970 (set_attr "prefix" "orig,vex")
1971 (set_attr "mode" "<MODE>")])
1973 (define_insn "avx_addsubv4df3"
1974 [(set (match_operand:V4DF 0 "register_operand" "=x")
1977 (match_operand:V4DF 1 "register_operand" "x")
1978 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1979 (minus:V4DF (match_dup 1) (match_dup 2))
1982 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1983 [(set_attr "type" "sseadd")
1984 (set_attr "prefix" "vex")
1985 (set_attr "mode" "V4DF")])
1987 (define_insn "sse3_addsubv2df3"
1988 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1991 (match_operand:V2DF 1 "register_operand" "0,x")
1992 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1993 (minus:V2DF (match_dup 1) (match_dup 2))
1997 addsubpd\t{%2, %0|%0, %2}
1998 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1999 [(set_attr "isa" "noavx,avx")
2000 (set_attr "type" "sseadd")
2001 (set_attr "atom_unit" "complex")
2002 (set_attr "prefix" "orig,vex")
2003 (set_attr "mode" "V2DF")])
2005 (define_insn "avx_addsubv8sf3"
2006 [(set (match_operand:V8SF 0 "register_operand" "=x")
2009 (match_operand:V8SF 1 "register_operand" "x")
2010 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2011 (minus:V8SF (match_dup 1) (match_dup 2))
2014 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2015 [(set_attr "type" "sseadd")
2016 (set_attr "prefix" "vex")
2017 (set_attr "mode" "V8SF")])
2019 (define_insn "sse3_addsubv4sf3"
2020 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2023 (match_operand:V4SF 1 "register_operand" "0,x")
2024 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2025 (minus:V4SF (match_dup 1) (match_dup 2))
2029 addsubps\t{%2, %0|%0, %2}
2030 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2031 [(set_attr "isa" "noavx,avx")
2032 (set_attr "type" "sseadd")
2033 (set_attr "prefix" "orig,vex")
2034 (set_attr "prefix_rep" "1,*")
2035 (set_attr "mode" "V4SF")])
2037 (define_insn "avx_h<plusminus_insn>v4df3"
2038 [(set (match_operand:V4DF 0 "register_operand" "=x")
2043 (match_operand:V4DF 1 "register_operand" "x")
2044 (parallel [(const_int 0)]))
2045 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2048 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2049 (parallel [(const_int 0)]))
2050 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2053 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2054 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2056 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2057 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2059 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2060 [(set_attr "type" "sseadd")
2061 (set_attr "prefix" "vex")
2062 (set_attr "mode" "V4DF")])
2064 (define_expand "sse3_haddv2df3"
2065 [(set (match_operand:V2DF 0 "register_operand")
2069 (match_operand:V2DF 1 "register_operand")
2070 (parallel [(const_int 0)]))
2071 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2074 (match_operand:V2DF 2 "nonimmediate_operand")
2075 (parallel [(const_int 0)]))
2076 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2079 (define_insn "*sse3_haddv2df3"
2080 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2084 (match_operand:V2DF 1 "register_operand" "0,x")
2085 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2088 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2091 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2092 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2095 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2097 && INTVAL (operands[3]) != INTVAL (operands[4])
2098 && INTVAL (operands[5]) != INTVAL (operands[6])"
2100 haddpd\t{%2, %0|%0, %2}
2101 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2102 [(set_attr "isa" "noavx,avx")
2103 (set_attr "type" "sseadd")
2104 (set_attr "prefix" "orig,vex")
2105 (set_attr "mode" "V2DF")])
2107 (define_insn "sse3_hsubv2df3"
2108 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2112 (match_operand:V2DF 1 "register_operand" "0,x")
2113 (parallel [(const_int 0)]))
2114 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2117 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2118 (parallel [(const_int 0)]))
2119 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2122 hsubpd\t{%2, %0|%0, %2}
2123 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2124 [(set_attr "isa" "noavx,avx")
2125 (set_attr "type" "sseadd")
2126 (set_attr "prefix" "orig,vex")
2127 (set_attr "mode" "V2DF")])
2129 (define_insn "*sse3_haddv2df3_low"
2130 [(set (match_operand:DF 0 "register_operand" "=x,x")
2133 (match_operand:V2DF 1 "register_operand" "0,x")
2134 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2137 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2139 && INTVAL (operands[2]) != INTVAL (operands[3])"
2141 haddpd\t{%0, %0|%0, %0}
2142 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2143 [(set_attr "isa" "noavx,avx")
2144 (set_attr "type" "sseadd1")
2145 (set_attr "prefix" "orig,vex")
2146 (set_attr "mode" "V2DF")])
2148 (define_insn "*sse3_hsubv2df3_low"
2149 [(set (match_operand:DF 0 "register_operand" "=x,x")
2152 (match_operand:V2DF 1 "register_operand" "0,x")
2153 (parallel [(const_int 0)]))
2156 (parallel [(const_int 1)]))))]
2159 hsubpd\t{%0, %0|%0, %0}
2160 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2161 [(set_attr "isa" "noavx,avx")
2162 (set_attr "type" "sseadd1")
2163 (set_attr "prefix" "orig,vex")
2164 (set_attr "mode" "V2DF")])
2166 (define_insn "avx_h<plusminus_insn>v8sf3"
2167 [(set (match_operand:V8SF 0 "register_operand" "=x")
2173 (match_operand:V8SF 1 "register_operand" "x")
2174 (parallel [(const_int 0)]))
2175 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2177 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2178 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2182 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2183 (parallel [(const_int 0)]))
2184 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2186 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2187 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2191 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2192 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2194 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2195 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2198 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2199 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2201 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2202 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2204 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2205 [(set_attr "type" "sseadd")
2206 (set_attr "prefix" "vex")
2207 (set_attr "mode" "V8SF")])
2209 (define_insn "sse3_h<plusminus_insn>v4sf3"
2210 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2215 (match_operand:V4SF 1 "register_operand" "0,x")
2216 (parallel [(const_int 0)]))
2217 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2219 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2220 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2224 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2225 (parallel [(const_int 0)]))
2226 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2228 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2229 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2232 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2233 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2234 [(set_attr "isa" "noavx,avx")
2235 (set_attr "type" "sseadd")
2236 (set_attr "atom_unit" "complex")
2237 (set_attr "prefix" "orig,vex")
2238 (set_attr "prefix_rep" "1,*")
2239 (set_attr "mode" "V4SF")])
2241 (define_expand "reduc_splus_v8df"
2242 [(match_operand:V8DF 0 "register_operand")
2243 (match_operand:V8DF 1 "register_operand")]
2246 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2250 (define_expand "reduc_splus_v4df"
2251 [(match_operand:V4DF 0 "register_operand")
2252 (match_operand:V4DF 1 "register_operand")]
2255 rtx tmp = gen_reg_rtx (V4DFmode);
2256 rtx tmp2 = gen_reg_rtx (V4DFmode);
2257 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2258 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2259 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2263 (define_expand "reduc_splus_v2df"
2264 [(match_operand:V2DF 0 "register_operand")
2265 (match_operand:V2DF 1 "register_operand")]
2268 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2272 (define_expand "reduc_splus_v16sf"
2273 [(match_operand:V16SF 0 "register_operand")
2274 (match_operand:V16SF 1 "register_operand")]
2277 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2281 (define_expand "reduc_splus_v8sf"
2282 [(match_operand:V8SF 0 "register_operand")
2283 (match_operand:V8SF 1 "register_operand")]
2286 rtx tmp = gen_reg_rtx (V8SFmode);
2287 rtx tmp2 = gen_reg_rtx (V8SFmode);
2288 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2289 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2290 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2291 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2295 (define_expand "reduc_splus_v4sf"
2296 [(match_operand:V4SF 0 "register_operand")
2297 (match_operand:V4SF 1 "register_operand")]
2302 rtx tmp = gen_reg_rtx (V4SFmode);
2303 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2304 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2307 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2311 ;; Modes handled by reduc_sm{in,ax}* patterns.
2312 (define_mode_iterator REDUC_SMINMAX_MODE
2313 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2314 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2315 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2316 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
2317 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2318 (V8DF "TARGET_AVX512F")])
2320 (define_expand "reduc_<code>_<mode>"
2321 [(smaxmin:REDUC_SMINMAX_MODE
2322 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2323 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2326 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2330 (define_expand "reduc_<code>_<mode>"
2332 (match_operand:VI48_512 0 "register_operand")
2333 (match_operand:VI48_512 1 "register_operand"))]
2336 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2340 (define_expand "reduc_<code>_<mode>"
2342 (match_operand:VI_256 0 "register_operand")
2343 (match_operand:VI_256 1 "register_operand"))]
2346 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2350 (define_expand "reduc_umin_v8hi"
2352 (match_operand:V8HI 0 "register_operand")
2353 (match_operand:V8HI 1 "register_operand"))]
2356 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2360 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2361 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2363 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2364 (match_operand:SI 2 "const_0_to_255_operand")]
2367 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2368 [(set_attr "type" "sse")
2369 (set_attr "prefix" "evex")
2370 (set_attr "mode" "<MODE>")])
2372 (define_insn "reduces<mode>"
2373 [(set (match_operand:VF_128 0 "register_operand" "=v")
2376 [(match_operand:VF_128 1 "register_operand" "v")
2377 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2378 (match_operand:SI 3 "const_0_to_255_operand")]
2383 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2384 [(set_attr "type" "sse")
2385 (set_attr "prefix" "evex")
2386 (set_attr "mode" "<MODE>")])
2388 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2390 ;; Parallel floating point comparisons
2392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2394 (define_insn "avx_cmp<mode>3"
2395 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2397 [(match_operand:VF_128_256 1 "register_operand" "x")
2398 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2399 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2402 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2403 [(set_attr "type" "ssecmp")
2404 (set_attr "length_immediate" "1")
2405 (set_attr "prefix" "vex")
2406 (set_attr "mode" "<MODE>")])
2408 (define_insn "avx_vmcmp<mode>3"
2409 [(set (match_operand:VF_128 0 "register_operand" "=x")
2412 [(match_operand:VF_128 1 "register_operand" "x")
2413 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2414 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2419 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2420 [(set_attr "type" "ssecmp")
2421 (set_attr "length_immediate" "1")
2422 (set_attr "prefix" "vex")
2423 (set_attr "mode" "<ssescalarmode>")])
2425 (define_insn "*<sse>_maskcmp<mode>3_comm"
2426 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2427 (match_operator:VF_128_256 3 "sse_comparison_operator"
2428 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2429 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2431 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2433 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2434 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2435 [(set_attr "isa" "noavx,avx")
2436 (set_attr "type" "ssecmp")
2437 (set_attr "length_immediate" "1")
2438 (set_attr "prefix" "orig,vex")
2439 (set_attr "mode" "<MODE>")])
2441 (define_insn "<sse>_maskcmp<mode>3"
2442 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2443 (match_operator:VF_128_256 3 "sse_comparison_operator"
2444 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2445 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2448 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2449 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2450 [(set_attr "isa" "noavx,avx")
2451 (set_attr "type" "ssecmp")
2452 (set_attr "length_immediate" "1")
2453 (set_attr "prefix" "orig,vex")
2454 (set_attr "mode" "<MODE>")])
2456 (define_insn "<sse>_vmmaskcmp<mode>3"
2457 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2459 (match_operator:VF_128 3 "sse_comparison_operator"
2460 [(match_operand:VF_128 1 "register_operand" "0,x")
2461 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2466 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2467 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2468 [(set_attr "isa" "noavx,avx")
2469 (set_attr "type" "ssecmp")
2470 (set_attr "length_immediate" "1,*")
2471 (set_attr "prefix" "orig,vex")
2472 (set_attr "mode" "<ssescalarmode>")])
2474 (define_mode_attr cmp_imm_predicate
2475 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2476 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2478 (define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2479 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2480 (unspec:<avx512fmaskmode>
2481 [(match_operand:VI48F_512 1 "register_operand" "v")
2482 (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2483 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2485 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2486 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2487 [(set_attr "type" "ssecmp")
2488 (set_attr "length_immediate" "1")
2489 (set_attr "prefix" "evex")
2490 (set_attr "mode" "<sseinsnmode>")])
2492 (define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
2493 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2494 (unspec:<avx512fmaskmode>
2495 [(match_operand:VI48_512 1 "register_operand" "v")
2496 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2497 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2498 UNSPEC_UNSIGNED_PCMP))]
2500 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2501 [(set_attr "type" "ssecmp")
2502 (set_attr "length_immediate" "1")
2503 (set_attr "prefix" "evex")
2504 (set_attr "mode" "<sseinsnmode>")])
2506 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2507 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2508 (and:<avx512fmaskmode>
2509 (unspec:<avx512fmaskmode>
2510 [(match_operand:VF_128 1 "register_operand" "v")
2511 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2512 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2516 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2517 [(set_attr "type" "ssecmp")
2518 (set_attr "length_immediate" "1")
2519 (set_attr "prefix" "evex")
2520 (set_attr "mode" "<ssescalarmode>")])
2522 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2523 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2524 (and:<avx512fmaskmode>
2525 (unspec:<avx512fmaskmode>
2526 [(match_operand:VF_128 1 "register_operand" "v")
2527 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2528 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2530 (and:<avx512fmaskmode>
2531 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2534 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2535 [(set_attr "type" "ssecmp")
2536 (set_attr "length_immediate" "1")
2537 (set_attr "prefix" "evex")
2538 (set_attr "mode" "<ssescalarmode>")])
2540 (define_insn "avx512f_maskcmp<mode>3"
2541 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2542 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2543 [(match_operand:VF 1 "register_operand" "v")
2544 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2546 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2547 [(set_attr "type" "ssecmp")
2548 (set_attr "length_immediate" "1")
2549 (set_attr "prefix" "evex")
2550 (set_attr "mode" "<sseinsnmode>")])
2552 (define_insn "<sse>_comi<round_saeonly_name>"
2553 [(set (reg:CCFP FLAGS_REG)
2556 (match_operand:<ssevecmode> 0 "register_operand" "v")
2557 (parallel [(const_int 0)]))
2559 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2560 (parallel [(const_int 0)]))))]
2561 "SSE_FLOAT_MODE_P (<MODE>mode)"
2562 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2563 [(set_attr "type" "ssecomi")
2564 (set_attr "prefix" "maybe_vex")
2565 (set_attr "prefix_rep" "0")
2566 (set (attr "prefix_data16")
2567 (if_then_else (eq_attr "mode" "DF")
2569 (const_string "0")))
2570 (set_attr "mode" "<MODE>")])
2572 (define_insn "<sse>_ucomi<round_saeonly_name>"
2573 [(set (reg:CCFPU FLAGS_REG)
2576 (match_operand:<ssevecmode> 0 "register_operand" "v")
2577 (parallel [(const_int 0)]))
2579 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2580 (parallel [(const_int 0)]))))]
2581 "SSE_FLOAT_MODE_P (<MODE>mode)"
2582 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2583 [(set_attr "type" "ssecomi")
2584 (set_attr "prefix" "maybe_vex")
2585 (set_attr "prefix_rep" "0")
2586 (set (attr "prefix_data16")
2587 (if_then_else (eq_attr "mode" "DF")
2589 (const_string "0")))
2590 (set_attr "mode" "<MODE>")])
2592 (define_expand "vcond<V_512:mode><VF_512:mode>"
2593 [(set (match_operand:V_512 0 "register_operand")
2595 (match_operator 3 ""
2596 [(match_operand:VF_512 4 "nonimmediate_operand")
2597 (match_operand:VF_512 5 "nonimmediate_operand")])
2598 (match_operand:V_512 1 "general_operand")
2599 (match_operand:V_512 2 "general_operand")))]
2601 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2602 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2604 bool ok = ix86_expand_fp_vcond (operands);
2609 (define_expand "vcond<V_256:mode><VF_256:mode>"
2610 [(set (match_operand:V_256 0 "register_operand")
2612 (match_operator 3 ""
2613 [(match_operand:VF_256 4 "nonimmediate_operand")
2614 (match_operand:VF_256 5 "nonimmediate_operand")])
2615 (match_operand:V_256 1 "general_operand")
2616 (match_operand:V_256 2 "general_operand")))]
2618 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2619 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2621 bool ok = ix86_expand_fp_vcond (operands);
2626 (define_expand "vcond<V_128:mode><VF_128:mode>"
2627 [(set (match_operand:V_128 0 "register_operand")
2629 (match_operator 3 ""
2630 [(match_operand:VF_128 4 "nonimmediate_operand")
2631 (match_operand:VF_128 5 "nonimmediate_operand")])
2632 (match_operand:V_128 1 "general_operand")
2633 (match_operand:V_128 2 "general_operand")))]
2635 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2636 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2638 bool ok = ix86_expand_fp_vcond (operands);
2643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2645 ;; Parallel floating point logical operations
2647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2649 (define_insn "<sse>_andnot<mode>3"
2650 [(set (match_operand:VF 0 "register_operand" "=x,v")
2653 (match_operand:VF 1 "register_operand" "0,v"))
2654 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2657 static char buf[32];
2661 switch (get_attr_mode (insn))
2668 suffix = "<ssemodesuffix>";
2671 switch (which_alternative)
2674 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2677 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2683 /* There is no vandnp[sd]. Use vpandnq. */
2684 if (<MODE_SIZE> == 64)
2687 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2690 snprintf (buf, sizeof (buf), ops, suffix);
2693 [(set_attr "isa" "noavx,avx")
2694 (set_attr "type" "sselog")
2695 (set_attr "prefix" "orig,maybe_evex")
2697 (cond [(and (match_test "<MODE_SIZE> == 16")
2698 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2699 (const_string "<ssePSmode>")
2700 (match_test "TARGET_AVX")
2701 (const_string "<MODE>")
2702 (match_test "optimize_function_for_size_p (cfun)")
2703 (const_string "V4SF")
2705 (const_string "<MODE>")))])
2707 (define_expand "<code><mode>3"
2708 [(set (match_operand:VF_128_256 0 "register_operand")
2709 (any_logic:VF_128_256
2710 (match_operand:VF_128_256 1 "nonimmediate_operand")
2711 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2713 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2715 (define_expand "<code><mode>3"
2716 [(set (match_operand:VF_512 0 "register_operand")
2718 (match_operand:VF_512 1 "nonimmediate_operand")
2719 (match_operand:VF_512 2 "nonimmediate_operand")))]
2721 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2723 (define_insn "*<code><mode>3"
2724 [(set (match_operand:VF 0 "register_operand" "=x,v")
2726 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2727 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2728 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2730 static char buf[32];
2734 switch (get_attr_mode (insn))
2741 suffix = "<ssemodesuffix>";
2744 switch (which_alternative)
2747 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2750 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2756 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2757 if (<MODE_SIZE> == 64)
2760 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2763 snprintf (buf, sizeof (buf), ops, suffix);
2766 [(set_attr "isa" "noavx,avx")
2767 (set_attr "type" "sselog")
2768 (set_attr "prefix" "orig,maybe_evex")
2770 (cond [(and (match_test "<MODE_SIZE> == 16")
2771 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2772 (const_string "<ssePSmode>")
2773 (match_test "TARGET_AVX")
2774 (const_string "<MODE>")
2775 (match_test "optimize_function_for_size_p (cfun)")
2776 (const_string "V4SF")
2778 (const_string "<MODE>")))])
2780 (define_expand "copysign<mode>3"
2783 (not:VF (match_dup 3))
2784 (match_operand:VF 1 "nonimmediate_operand")))
2786 (and:VF (match_dup 3)
2787 (match_operand:VF 2 "nonimmediate_operand")))
2788 (set (match_operand:VF 0 "register_operand")
2789 (ior:VF (match_dup 4) (match_dup 5)))]
2792 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2794 operands[4] = gen_reg_rtx (<MODE>mode);
2795 operands[5] = gen_reg_rtx (<MODE>mode);
2798 ;; Also define scalar versions. These are used for abs, neg, and
2799 ;; conditional move. Using subregs into vector modes causes register
2800 ;; allocation lossage. These patterns do not allow memory operands
2801 ;; because the native instructions read the full 128-bits.
2803 (define_insn "*andnot<mode>3"
2804 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2807 (match_operand:MODEF 1 "register_operand" "0,x"))
2808 (match_operand:MODEF 2 "register_operand" "x,x")))]
2809 "SSE_FLOAT_MODE_P (<MODE>mode)"
2811 static char buf[32];
2814 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2816 switch (which_alternative)
2819 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2822 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2828 snprintf (buf, sizeof (buf), ops, suffix);
2831 [(set_attr "isa" "noavx,avx")
2832 (set_attr "type" "sselog")
2833 (set_attr "prefix" "orig,vex")
2835 (cond [(and (match_test "<MODE_SIZE> == 16")
2836 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2837 (const_string "V4SF")
2838 (match_test "TARGET_AVX")
2839 (const_string "<ssevecmode>")
2840 (match_test "optimize_function_for_size_p (cfun)")
2841 (const_string "V4SF")
2843 (const_string "<ssevecmode>")))])
2845 (define_insn "*andnottf3"
2846 [(set (match_operand:TF 0 "register_operand" "=x,x")
2848 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2849 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2852 static char buf[32];
2855 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2857 switch (which_alternative)
2860 ops = "%s\t{%%2, %%0|%%0, %%2}";
2863 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2869 snprintf (buf, sizeof (buf), ops, tmp);
2872 [(set_attr "isa" "noavx,avx")
2873 (set_attr "type" "sselog")
2874 (set (attr "prefix_data16")
2876 (and (eq_attr "alternative" "0")
2877 (eq_attr "mode" "TI"))
2879 (const_string "*")))
2880 (set_attr "prefix" "orig,vex")
2882 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2883 (const_string "V4SF")
2884 (match_test "TARGET_AVX")
2886 (ior (not (match_test "TARGET_SSE2"))
2887 (match_test "optimize_function_for_size_p (cfun)"))
2888 (const_string "V4SF")
2890 (const_string "TI")))])
2892 (define_insn "*<code><mode>3"
2893 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2895 (match_operand:MODEF 1 "register_operand" "%0,x")
2896 (match_operand:MODEF 2 "register_operand" "x,x")))]
2897 "SSE_FLOAT_MODE_P (<MODE>mode)"
2899 static char buf[32];
2902 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2904 switch (which_alternative)
2907 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2910 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2916 snprintf (buf, sizeof (buf), ops, suffix);
2919 [(set_attr "isa" "noavx,avx")
2920 (set_attr "type" "sselog")
2921 (set_attr "prefix" "orig,vex")
2923 (cond [(and (match_test "<MODE_SIZE> == 16")
2924 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2925 (const_string "V4SF")
2926 (match_test "TARGET_AVX")
2927 (const_string "<ssevecmode>")
2928 (match_test "optimize_function_for_size_p (cfun)")
2929 (const_string "V4SF")
2931 (const_string "<ssevecmode>")))])
2933 (define_expand "<code>tf3"
2934 [(set (match_operand:TF 0 "register_operand")
2936 (match_operand:TF 1 "nonimmediate_operand")
2937 (match_operand:TF 2 "nonimmediate_operand")))]
2939 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2941 (define_insn "*<code>tf3"
2942 [(set (match_operand:TF 0 "register_operand" "=x,x")
2944 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2945 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2947 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2949 static char buf[32];
2952 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2954 switch (which_alternative)
2957 ops = "%s\t{%%2, %%0|%%0, %%2}";
2960 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2966 snprintf (buf, sizeof (buf), ops, tmp);
2969 [(set_attr "isa" "noavx,avx")
2970 (set_attr "type" "sselog")
2971 (set (attr "prefix_data16")
2973 (and (eq_attr "alternative" "0")
2974 (eq_attr "mode" "TI"))
2976 (const_string "*")))
2977 (set_attr "prefix" "orig,vex")
2979 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2980 (const_string "V4SF")
2981 (match_test "TARGET_AVX")
2983 (ior (not (match_test "TARGET_SSE2"))
2984 (match_test "optimize_function_for_size_p (cfun)"))
2985 (const_string "V4SF")
2987 (const_string "TI")))])
2989 ;; There are no floating point xor for V16SF and V8DF in avx512f
2990 ;; but we need them for negation. Instead we use int versions of
2991 ;; xor. Maybe there could be a better way to do that.
2993 (define_mode_attr avx512flogicsuff
2994 [(V16SF "d") (V8DF "q")])
2996 (define_insn "avx512f_<logic><mode>"
2997 [(set (match_operand:VF_512 0 "register_operand" "=v")
2999 (match_operand:VF_512 1 "register_operand" "v")
3000 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3002 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
3003 [(set_attr "type" "sselog")
3004 (set_attr "prefix" "evex")])
3006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3008 ;; FMA floating point multiply/accumulate instructions. These include
3009 ;; scalar versions of the instructions as well as vector versions.
3011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3013 ;; The standard names for scalar FMA are only available with SSE math enabled.
3014 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3015 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3016 ;; and TARGET_FMA4 are both false.
3017 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3018 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3019 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3020 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3021 (define_mode_iterator FMAMODEM
3022 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3023 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3024 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3025 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3026 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3027 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3028 (V16SF "TARGET_AVX512F")
3029 (V8DF "TARGET_AVX512F")])
3031 (define_expand "fma<mode>4"
3032 [(set (match_operand:FMAMODEM 0 "register_operand")
3034 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3035 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3036 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3038 (define_expand "fms<mode>4"
3039 [(set (match_operand:FMAMODEM 0 "register_operand")
3041 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3042 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3043 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3045 (define_expand "fnma<mode>4"
3046 [(set (match_operand:FMAMODEM 0 "register_operand")
3048 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3049 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3050 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3052 (define_expand "fnms<mode>4"
3053 [(set (match_operand:FMAMODEM 0 "register_operand")
3055 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3056 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3057 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3059 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3060 (define_mode_iterator FMAMODE_AVX512
3061 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3062 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3063 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3064 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3065 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3066 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3067 (V16SF "TARGET_AVX512F")
3068 (V8DF "TARGET_AVX512F")])
3070 (define_mode_iterator FMAMODE
3071 [SF DF V4SF V2DF V8SF V4DF])
3073 (define_expand "fma4i_fmadd_<mode>"
3074 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3076 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3077 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3078 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3080 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3081 [(match_operand:VF_AVX512VL 0 "register_operand")
3082 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3083 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3084 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3085 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3086 "TARGET_AVX512F && <round_mode512bit_condition>"
3088 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3089 operands[0], operands[1], operands[2], operands[3],
3090 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3094 (define_insn "*fma_fmadd_<mode>"
3095 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3097 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3098 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3099 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3100 "TARGET_FMA || TARGET_FMA4"
3102 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3103 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3104 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3105 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3106 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3107 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3108 (set_attr "type" "ssemuladd")
3109 (set_attr "mode" "<MODE>")])
3111 ;; Suppose AVX-512F as baseline
3112 (define_mode_iterator VF_SF_AVX512VL
3113 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3114 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3116 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3117 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3119 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3120 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3121 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3122 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3124 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3125 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3126 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3127 [(set_attr "type" "ssemuladd")
3128 (set_attr "mode" "<MODE>")])
3130 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3131 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3132 (vec_merge:VF_AVX512VL
3134 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3135 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3136 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3138 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3139 "TARGET_AVX512F && <round_mode512bit_condition>"
3141 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3142 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3143 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3144 (set_attr "type" "ssemuladd")
3145 (set_attr "mode" "<MODE>")])
3147 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3148 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3149 (vec_merge:VF_AVX512VL
3151 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3152 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3153 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3155 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3157 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3158 [(set_attr "isa" "fma_avx512f")
3159 (set_attr "type" "ssemuladd")
3160 (set_attr "mode" "<MODE>")])
3162 (define_insn "*fma_fmsub_<mode>"
3163 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3165 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3166 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3168 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3169 "TARGET_FMA || TARGET_FMA4"
3171 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3172 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3173 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3174 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3175 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3176 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3177 (set_attr "type" "ssemuladd")
3178 (set_attr "mode" "<MODE>")])
3180 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3181 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3183 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3184 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3186 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3187 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3189 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3190 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3191 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3192 [(set_attr "type" "ssemuladd")
3193 (set_attr "mode" "<MODE>")])
3195 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3196 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3197 (vec_merge:VF_AVX512VL
3199 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3200 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3202 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3204 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3207 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3208 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3209 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3210 (set_attr "type" "ssemuladd")
3211 (set_attr "mode" "<MODE>")])
3213 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3214 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3215 (vec_merge:VF_AVX512VL
3217 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3218 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3220 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3222 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3223 "TARGET_AVX512F && <round_mode512bit_condition>"
3224 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3225 [(set_attr "isa" "fma_avx512f")
3226 (set_attr "type" "ssemuladd")
3227 (set_attr "mode" "<MODE>")])
3229 (define_insn "*fma_fnmadd_<mode>"
3230 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3233 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3234 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3235 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3236 "TARGET_FMA || TARGET_FMA4"
3238 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3239 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3240 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3241 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3242 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3243 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3244 (set_attr "type" "ssemuladd")
3245 (set_attr "mode" "<MODE>")])
3247 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3248 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3251 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3252 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3253 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3254 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3256 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3257 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3258 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3259 [(set_attr "type" "ssemuladd")
3260 (set_attr "mode" "<MODE>")])
3262 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3263 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3264 (vec_merge:VF_AVX512VL
3267 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3268 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3269 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3271 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3272 "TARGET_AVX512F && <round_mode512bit_condition>"
3274 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3275 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3276 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3277 (set_attr "type" "ssemuladd")
3278 (set_attr "mode" "<MODE>")])
3280 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3281 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3282 (vec_merge:VF_AVX512VL
3285 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3286 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3287 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3289 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3290 "TARGET_AVX512F && <round_mode512bit_condition>"
3291 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3292 [(set_attr "isa" "fma_avx512f")
3293 (set_attr "type" "ssemuladd")
3294 (set_attr "mode" "<MODE>")])
3296 (define_insn "*fma_fnmsub_<mode>"
3297 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3300 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3301 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3303 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3304 "TARGET_FMA || TARGET_FMA4"
3306 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3307 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3308 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3309 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3310 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3311 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3312 (set_attr "type" "ssemuladd")
3313 (set_attr "mode" "<MODE>")])
3315 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3316 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3319 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3320 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3322 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3323 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3325 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3326 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3327 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3328 [(set_attr "type" "ssemuladd")
3329 (set_attr "mode" "<MODE>")])
3331 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3332 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3333 (vec_merge:VF_AVX512VL
3336 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3337 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3339 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3341 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3342 "TARGET_AVX512F && <round_mode512bit_condition>"
3344 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3345 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3346 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3347 (set_attr "type" "ssemuladd")
3348 (set_attr "mode" "<MODE>")])
3350 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3351 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3352 (vec_merge:VF_AVX512VL
3355 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3356 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3358 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3360 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3362 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3363 [(set_attr "isa" "fma_avx512f")
3364 (set_attr "type" "ssemuladd")
3365 (set_attr "mode" "<MODE>")])
3367 ;; FMA parallel floating point multiply addsub and subadd operations.
3369 ;; It would be possible to represent these without the UNSPEC as
3372 ;; (fma op1 op2 op3)
3373 ;; (fma op1 op2 (neg op3))
3376 ;; But this doesn't seem useful in practice.
3378 (define_expand "fmaddsub_<mode>"
3379 [(set (match_operand:VF 0 "register_operand")
3381 [(match_operand:VF 1 "nonimmediate_operand")
3382 (match_operand:VF 2 "nonimmediate_operand")
3383 (match_operand:VF 3 "nonimmediate_operand")]
3385 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3387 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3388 [(match_operand:VF_AVX512VL 0 "register_operand")
3389 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3390 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3391 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3392 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3395 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3396 operands[0], operands[1], operands[2], operands[3],
3397 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3401 (define_insn "*fma_fmaddsub_<mode>"
3402 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3404 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3405 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3406 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3408 "TARGET_FMA || TARGET_FMA4"
3410 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3411 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3412 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3413 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3414 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3415 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3416 (set_attr "type" "ssemuladd")
3417 (set_attr "mode" "<MODE>")])
3419 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3420 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3421 (unspec:VF_SF_AVX512VL
3422 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3423 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3424 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3426 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3428 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3429 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3430 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3431 [(set_attr "type" "ssemuladd")
3432 (set_attr "mode" "<MODE>")])
3434 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3435 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3436 (vec_merge:VF_AVX512VL
3438 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3439 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3440 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3443 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3446 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3447 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3448 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3449 (set_attr "type" "ssemuladd")
3450 (set_attr "mode" "<MODE>")])
3452 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3453 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3454 (vec_merge:VF_AVX512VL
3456 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3457 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3458 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3461 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3463 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3464 [(set_attr "isa" "fma_avx512f")
3465 (set_attr "type" "ssemuladd")
3466 (set_attr "mode" "<MODE>")])
3468 (define_insn "*fma_fmsubadd_<mode>"
3469 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3471 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3472 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3474 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3476 "TARGET_FMA || TARGET_FMA4"
3478 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3479 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3480 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3481 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3482 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3483 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3484 (set_attr "type" "ssemuladd")
3485 (set_attr "mode" "<MODE>")])
3487 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3488 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3489 (unspec:VF_SF_AVX512VL
3490 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3491 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3493 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3495 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3497 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3498 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3499 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3500 [(set_attr "type" "ssemuladd")
3501 (set_attr "mode" "<MODE>")])
3503 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3504 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3505 (vec_merge:VF_AVX512VL
3507 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3508 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3510 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3513 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3516 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3517 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3518 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3519 (set_attr "type" "ssemuladd")
3520 (set_attr "mode" "<MODE>")])
3522 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3523 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3524 (vec_merge:VF_AVX512VL
3526 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3527 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3529 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3532 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3534 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3535 [(set_attr "isa" "fma_avx512f")
3536 (set_attr "type" "ssemuladd")
3537 (set_attr "mode" "<MODE>")])
3539 ;; FMA3 floating point scalar intrinsics. These merge result with
3540 ;; high-order elements from the destination register.
3542 (define_expand "fmai_vmfmadd_<mode><round_name>"
3543 [(set (match_operand:VF_128 0 "register_operand")
3546 (match_operand:VF_128 1 "<round_nimm_predicate>")
3547 (match_operand:VF_128 2 "<round_nimm_predicate>")
3548 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3553 (define_insn "*fmai_fmadd_<mode>"
3554 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3557 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3558 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3559 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3562 "TARGET_FMA || TARGET_AVX512F"
3564 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3565 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3566 [(set_attr "type" "ssemuladd")
3567 (set_attr "mode" "<MODE>")])
3569 (define_insn "*fmai_fmsub_<mode>"
3570 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3573 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3574 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3576 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3579 "TARGET_FMA || TARGET_AVX512F"
3581 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3582 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3583 [(set_attr "type" "ssemuladd")
3584 (set_attr "mode" "<MODE>")])
3586 (define_insn "*fmai_fnmadd_<mode><round_name>"
3587 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3591 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3592 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3593 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3596 "TARGET_FMA || TARGET_AVX512F"
3598 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3599 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3600 [(set_attr "type" "ssemuladd")
3601 (set_attr "mode" "<MODE>")])
3603 (define_insn "*fmai_fnmsub_<mode><round_name>"
3604 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3608 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3609 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3611 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3614 "TARGET_FMA || TARGET_AVX512F"
3616 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3617 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3618 [(set_attr "type" "ssemuladd")
3619 (set_attr "mode" "<MODE>")])
3621 ;; FMA4 floating point scalar intrinsics. These write the
3622 ;; entire destination register, with the high-order elements zeroed.
3624 (define_expand "fma4i_vmfmadd_<mode>"
3625 [(set (match_operand:VF_128 0 "register_operand")
3628 (match_operand:VF_128 1 "nonimmediate_operand")
3629 (match_operand:VF_128 2 "nonimmediate_operand")
3630 (match_operand:VF_128 3 "nonimmediate_operand"))
3634 "operands[4] = CONST0_RTX (<MODE>mode);")
3636 (define_insn "*fma4i_vmfmadd_<mode>"
3637 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3640 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3641 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3642 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3643 (match_operand:VF_128 4 "const0_operand")
3646 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3647 [(set_attr "type" "ssemuladd")
3648 (set_attr "mode" "<MODE>")])
3650 (define_insn "*fma4i_vmfmsub_<mode>"
3651 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3654 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3655 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3657 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3658 (match_operand:VF_128 4 "const0_operand")
3661 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3662 [(set_attr "type" "ssemuladd")
3663 (set_attr "mode" "<MODE>")])
3665 (define_insn "*fma4i_vmfnmadd_<mode>"
3666 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3670 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3671 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3672 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3673 (match_operand:VF_128 4 "const0_operand")
3676 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3677 [(set_attr "type" "ssemuladd")
3678 (set_attr "mode" "<MODE>")])
3680 (define_insn "*fma4i_vmfnmsub_<mode>"
3681 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3685 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3686 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3688 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3689 (match_operand:VF_128 4 "const0_operand")
3692 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3693 [(set_attr "type" "ssemuladd")
3694 (set_attr "mode" "<MODE>")])
3696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3698 ;; Parallel single-precision floating point conversion operations
3700 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3702 (define_insn "sse_cvtpi2ps"
3703 [(set (match_operand:V4SF 0 "register_operand" "=x")
3706 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3707 (match_operand:V4SF 1 "register_operand" "0")
3710 "cvtpi2ps\t{%2, %0|%0, %2}"
3711 [(set_attr "type" "ssecvt")
3712 (set_attr "mode" "V4SF")])
3714 (define_insn "sse_cvtps2pi"
3715 [(set (match_operand:V2SI 0 "register_operand" "=y")
3717 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3719 (parallel [(const_int 0) (const_int 1)])))]
3721 "cvtps2pi\t{%1, %0|%0, %q1}"
3722 [(set_attr "type" "ssecvt")
3723 (set_attr "unit" "mmx")
3724 (set_attr "mode" "DI")])
3726 (define_insn "sse_cvttps2pi"
3727 [(set (match_operand:V2SI 0 "register_operand" "=y")
3729 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3730 (parallel [(const_int 0) (const_int 1)])))]
3732 "cvttps2pi\t{%1, %0|%0, %q1}"
3733 [(set_attr "type" "ssecvt")
3734 (set_attr "unit" "mmx")
3735 (set_attr "prefix_rep" "0")
3736 (set_attr "mode" "SF")])
3738 (define_insn "sse_cvtsi2ss<round_name>"
3739 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3742 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3743 (match_operand:V4SF 1 "register_operand" "0,0,v")
3747 cvtsi2ss\t{%2, %0|%0, %2}
3748 cvtsi2ss\t{%2, %0|%0, %2}
3749 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3750 [(set_attr "isa" "noavx,noavx,avx")
3751 (set_attr "type" "sseicvt")
3752 (set_attr "athlon_decode" "vector,double,*")
3753 (set_attr "amdfam10_decode" "vector,double,*")
3754 (set_attr "bdver1_decode" "double,direct,*")
3755 (set_attr "btver2_decode" "double,double,double")
3756 (set_attr "prefix" "orig,orig,maybe_evex")
3757 (set_attr "mode" "SF")])
3759 (define_insn "sse_cvtsi2ssq<round_name>"
3760 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3763 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3764 (match_operand:V4SF 1 "register_operand" "0,0,v")
3766 "TARGET_SSE && TARGET_64BIT"
3768 cvtsi2ssq\t{%2, %0|%0, %2}
3769 cvtsi2ssq\t{%2, %0|%0, %2}
3770 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3771 [(set_attr "isa" "noavx,noavx,avx")
3772 (set_attr "type" "sseicvt")
3773 (set_attr "athlon_decode" "vector,double,*")
3774 (set_attr "amdfam10_decode" "vector,double,*")
3775 (set_attr "bdver1_decode" "double,direct,*")
3776 (set_attr "btver2_decode" "double,double,double")
3777 (set_attr "length_vex" "*,*,4")
3778 (set_attr "prefix_rex" "1,1,*")
3779 (set_attr "prefix" "orig,orig,maybe_evex")
3780 (set_attr "mode" "SF")])
3782 (define_insn "sse_cvtss2si<round_name>"
3783 [(set (match_operand:SI 0 "register_operand" "=r,r")
3786 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3787 (parallel [(const_int 0)]))]
3788 UNSPEC_FIX_NOTRUNC))]
3790 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3791 [(set_attr "type" "sseicvt")
3792 (set_attr "athlon_decode" "double,vector")
3793 (set_attr "bdver1_decode" "double,double")
3794 (set_attr "prefix_rep" "1")
3795 (set_attr "prefix" "maybe_vex")
3796 (set_attr "mode" "SI")])
3798 (define_insn "sse_cvtss2si_2"
3799 [(set (match_operand:SI 0 "register_operand" "=r,r")
3800 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3801 UNSPEC_FIX_NOTRUNC))]
3803 "%vcvtss2si\t{%1, %0|%0, %k1}"
3804 [(set_attr "type" "sseicvt")
3805 (set_attr "athlon_decode" "double,vector")
3806 (set_attr "amdfam10_decode" "double,double")
3807 (set_attr "bdver1_decode" "double,double")
3808 (set_attr "prefix_rep" "1")
3809 (set_attr "prefix" "maybe_vex")
3810 (set_attr "mode" "SI")])
3812 (define_insn "sse_cvtss2siq<round_name>"
3813 [(set (match_operand:DI 0 "register_operand" "=r,r")
3816 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3817 (parallel [(const_int 0)]))]
3818 UNSPEC_FIX_NOTRUNC))]
3819 "TARGET_SSE && TARGET_64BIT"
3820 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3821 [(set_attr "type" "sseicvt")
3822 (set_attr "athlon_decode" "double,vector")
3823 (set_attr "bdver1_decode" "double,double")
3824 (set_attr "prefix_rep" "1")
3825 (set_attr "prefix" "maybe_vex")
3826 (set_attr "mode" "DI")])
3828 (define_insn "sse_cvtss2siq_2"
3829 [(set (match_operand:DI 0 "register_operand" "=r,r")
3830 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3831 UNSPEC_FIX_NOTRUNC))]
3832 "TARGET_SSE && TARGET_64BIT"
3833 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3834 [(set_attr "type" "sseicvt")
3835 (set_attr "athlon_decode" "double,vector")
3836 (set_attr "amdfam10_decode" "double,double")
3837 (set_attr "bdver1_decode" "double,double")
3838 (set_attr "prefix_rep" "1")
3839 (set_attr "prefix" "maybe_vex")
3840 (set_attr "mode" "DI")])
3842 (define_insn "sse_cvttss2si<round_saeonly_name>"
3843 [(set (match_operand:SI 0 "register_operand" "=r,r")
3846 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3847 (parallel [(const_int 0)]))))]
3849 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3850 [(set_attr "type" "sseicvt")
3851 (set_attr "athlon_decode" "double,vector")
3852 (set_attr "amdfam10_decode" "double,double")
3853 (set_attr "bdver1_decode" "double,double")
3854 (set_attr "prefix_rep" "1")
3855 (set_attr "prefix" "maybe_vex")
3856 (set_attr "mode" "SI")])
3858 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3859 [(set (match_operand:DI 0 "register_operand" "=r,r")
3862 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3863 (parallel [(const_int 0)]))))]
3864 "TARGET_SSE && TARGET_64BIT"
3865 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3866 [(set_attr "type" "sseicvt")
3867 (set_attr "athlon_decode" "double,vector")
3868 (set_attr "amdfam10_decode" "double,double")
3869 (set_attr "bdver1_decode" "double,double")
3870 (set_attr "prefix_rep" "1")
3871 (set_attr "prefix" "maybe_vex")
3872 (set_attr "mode" "DI")])
3874 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3875 [(set (match_operand:VF_128 0 "register_operand" "=v")
3877 (vec_duplicate:VF_128
3878 (unsigned_float:<ssescalarmode>
3879 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3880 (match_operand:VF_128 1 "register_operand" "v")
3882 "TARGET_AVX512F && <round_modev4sf_condition>"
3883 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3884 [(set_attr "type" "sseicvt")
3885 (set_attr "prefix" "evex")
3886 (set_attr "mode" "<ssescalarmode>")])
3888 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3889 [(set (match_operand:VF_128 0 "register_operand" "=v")
3891 (vec_duplicate:VF_128
3892 (unsigned_float:<ssescalarmode>
3893 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3894 (match_operand:VF_128 1 "register_operand" "v")
3896 "TARGET_AVX512F && TARGET_64BIT"
3897 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3898 [(set_attr "type" "sseicvt")
3899 (set_attr "prefix" "evex")
3900 (set_attr "mode" "<ssescalarmode>")])
3902 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3903 [(set (match_operand:VF1 0 "register_operand" "=v")
3905 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3906 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3907 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3908 [(set_attr "type" "ssecvt")
3909 (set_attr "prefix" "maybe_vex")
3910 (set_attr "mode" "<sseinsnmode>")])
3912 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
3913 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
3914 (unsigned_float:VF1_AVX512VL
3915 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
3917 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3918 [(set_attr "type" "ssecvt")
3919 (set_attr "prefix" "evex")
3920 (set_attr "mode" "<MODE>")])
3922 (define_expand "floatuns<sseintvecmodelower><mode>2"
3923 [(match_operand:VF1 0 "register_operand")
3924 (match_operand:<sseintvecmode> 1 "register_operand")]
3925 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3927 if (<MODE>mode == V16SFmode)
3928 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3930 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3936 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3937 (define_mode_attr sf2simodelower
3938 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3940 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
3941 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3943 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3944 UNSPEC_FIX_NOTRUNC))]
3945 "TARGET_SSE2 && <mask_mode512bit_condition>"
3946 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3947 [(set_attr "type" "ssecvt")
3948 (set (attr "prefix_data16")
3950 (match_test "TARGET_AVX")
3952 (const_string "1")))
3953 (set_attr "prefix" "maybe_vex")
3954 (set_attr "mode" "<sseinsnmode>")])
3956 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3957 [(set (match_operand:V16SI 0 "register_operand" "=v")
3959 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3960 UNSPEC_FIX_NOTRUNC))]
3962 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3963 [(set_attr "type" "ssecvt")
3964 (set_attr "prefix" "evex")
3965 (set_attr "mode" "XI")])
3967 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
3968 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
3969 (unspec:VI4_AVX512VL
3970 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
3971 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3973 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3974 [(set_attr "type" "ssecvt")
3975 (set_attr "prefix" "evex")
3976 (set_attr "mode" "<sseinsnmode>")])
3978 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
3979 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
3980 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
3981 UNSPEC_FIX_NOTRUNC))]
3982 "TARGET_AVX512DQ && <round_mode512bit_condition>"
3983 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3984 [(set_attr "type" "ssecvt")
3985 (set_attr "prefix" "evex")
3986 (set_attr "mode" "<sseinsnmode>")])
3988 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
3989 [(set (match_operand:V2DI 0 "register_operand" "=v")
3992 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3993 (parallel [(const_int 0) (const_int 1)]))]
3994 UNSPEC_FIX_NOTRUNC))]
3995 "TARGET_AVX512DQ && TARGET_AVX512VL"
3996 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3997 [(set_attr "type" "ssecvt")
3998 (set_attr "prefix" "evex")
3999 (set_attr "mode" "TI")])
4001 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4002 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4003 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4004 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4005 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4006 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4007 [(set_attr "type" "ssecvt")
4008 (set_attr "prefix" "evex")
4009 (set_attr "mode" "<sseinsnmode>")])
4011 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4012 [(set (match_operand:V2DI 0 "register_operand" "=v")
4015 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4016 (parallel [(const_int 0) (const_int 1)]))]
4017 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4018 "TARGET_AVX512DQ && TARGET_AVX512VL"
4019 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4020 [(set_attr "type" "ssecvt")
4021 (set_attr "prefix" "evex")
4022 (set_attr "mode" "TI")])
4024 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4025 [(set (match_operand:V16SI 0 "register_operand" "=v")
4027 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4029 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4030 [(set_attr "type" "ssecvt")
4031 (set_attr "prefix" "evex")
4032 (set_attr "mode" "XI")])
4034 (define_insn "fix_truncv8sfv8si2<mask_name>"
4035 [(set (match_operand:V8SI 0 "register_operand" "=v")
4036 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4037 "TARGET_AVX && <mask_avx512vl_condition>"
4038 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4039 [(set_attr "type" "ssecvt")
4040 (set_attr "prefix" "<mask_prefix>")
4041 (set_attr "mode" "OI")])
4043 (define_insn "fix_truncv4sfv4si2<mask_name>"
4044 [(set (match_operand:V4SI 0 "register_operand" "=v")
4045 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4046 "TARGET_SSE2 && <mask_avx512vl_condition>"
4047 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4048 [(set_attr "type" "ssecvt")
4049 (set (attr "prefix_rep")
4051 (match_test "TARGET_AVX")
4053 (const_string "1")))
4054 (set (attr "prefix_data16")
4056 (match_test "TARGET_AVX")
4058 (const_string "0")))
4059 (set_attr "prefix_data16" "0")
4060 (set_attr "prefix" "<mask_prefix2>")
4061 (set_attr "mode" "TI")])
4063 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4064 [(match_operand:<sseintvecmode> 0 "register_operand")
4065 (match_operand:VF1 1 "register_operand")]
4068 if (<MODE>mode == V16SFmode)
4069 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4074 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4075 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4076 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4077 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4084 ;; Parallel double-precision floating point conversion operations
4086 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4088 (define_insn "sse2_cvtpi2pd"
4089 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4090 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4092 "cvtpi2pd\t{%1, %0|%0, %1}"
4093 [(set_attr "type" "ssecvt")
4094 (set_attr "unit" "mmx,*")
4095 (set_attr "prefix_data16" "1,*")
4096 (set_attr "mode" "V2DF")])
4098 (define_insn "sse2_cvtpd2pi"
4099 [(set (match_operand:V2SI 0 "register_operand" "=y")
4100 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4101 UNSPEC_FIX_NOTRUNC))]
4103 "cvtpd2pi\t{%1, %0|%0, %1}"
4104 [(set_attr "type" "ssecvt")
4105 (set_attr "unit" "mmx")
4106 (set_attr "bdver1_decode" "double")
4107 (set_attr "btver2_decode" "direct")
4108 (set_attr "prefix_data16" "1")
4109 (set_attr "mode" "DI")])
4111 (define_insn "sse2_cvttpd2pi"
4112 [(set (match_operand:V2SI 0 "register_operand" "=y")
4113 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4115 "cvttpd2pi\t{%1, %0|%0, %1}"
4116 [(set_attr "type" "ssecvt")
4117 (set_attr "unit" "mmx")
4118 (set_attr "bdver1_decode" "double")
4119 (set_attr "prefix_data16" "1")
4120 (set_attr "mode" "TI")])
4122 (define_insn "sse2_cvtsi2sd"
4123 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4126 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4127 (match_operand:V2DF 1 "register_operand" "0,0,x")
4131 cvtsi2sd\t{%2, %0|%0, %2}
4132 cvtsi2sd\t{%2, %0|%0, %2}
4133 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4134 [(set_attr "isa" "noavx,noavx,avx")
4135 (set_attr "type" "sseicvt")
4136 (set_attr "athlon_decode" "double,direct,*")
4137 (set_attr "amdfam10_decode" "vector,double,*")
4138 (set_attr "bdver1_decode" "double,direct,*")
4139 (set_attr "btver2_decode" "double,double,double")
4140 (set_attr "prefix" "orig,orig,vex")
4141 (set_attr "mode" "DF")])
4143 (define_insn "sse2_cvtsi2sdq<round_name>"
4144 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4147 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4148 (match_operand:V2DF 1 "register_operand" "0,0,v")
4150 "TARGET_SSE2 && TARGET_64BIT"
4152 cvtsi2sdq\t{%2, %0|%0, %2}
4153 cvtsi2sdq\t{%2, %0|%0, %2}
4154 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4155 [(set_attr "isa" "noavx,noavx,avx")
4156 (set_attr "type" "sseicvt")
4157 (set_attr "athlon_decode" "double,direct,*")
4158 (set_attr "amdfam10_decode" "vector,double,*")
4159 (set_attr "bdver1_decode" "double,direct,*")
4160 (set_attr "length_vex" "*,*,4")
4161 (set_attr "prefix_rex" "1,1,*")
4162 (set_attr "prefix" "orig,orig,maybe_evex")
4163 (set_attr "mode" "DF")])
4165 (define_insn "avx512f_vcvtss2usi<round_name>"
4166 [(set (match_operand:SI 0 "register_operand" "=r")
4169 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4170 (parallel [(const_int 0)]))]
4171 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4173 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4174 [(set_attr "type" "sseicvt")
4175 (set_attr "prefix" "evex")
4176 (set_attr "mode" "SI")])
4178 (define_insn "avx512f_vcvtss2usiq<round_name>"
4179 [(set (match_operand:DI 0 "register_operand" "=r")
4182 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4183 (parallel [(const_int 0)]))]
4184 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4185 "TARGET_AVX512F && TARGET_64BIT"
4186 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4187 [(set_attr "type" "sseicvt")
4188 (set_attr "prefix" "evex")
4189 (set_attr "mode" "DI")])
4191 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4192 [(set (match_operand:SI 0 "register_operand" "=r")
4195 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4196 (parallel [(const_int 0)]))))]
4198 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4199 [(set_attr "type" "sseicvt")
4200 (set_attr "prefix" "evex")
4201 (set_attr "mode" "SI")])
4203 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4204 [(set (match_operand:DI 0 "register_operand" "=r")
4207 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4208 (parallel [(const_int 0)]))))]
4209 "TARGET_AVX512F && TARGET_64BIT"
4210 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4211 [(set_attr "type" "sseicvt")
4212 (set_attr "prefix" "evex")
4213 (set_attr "mode" "DI")])
4215 (define_insn "avx512f_vcvtsd2usi<round_name>"
4216 [(set (match_operand:SI 0 "register_operand" "=r")
4219 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4220 (parallel [(const_int 0)]))]
4221 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4223 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4224 [(set_attr "type" "sseicvt")
4225 (set_attr "prefix" "evex")
4226 (set_attr "mode" "SI")])
4228 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4229 [(set (match_operand:DI 0 "register_operand" "=r")
4232 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4233 (parallel [(const_int 0)]))]
4234 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4235 "TARGET_AVX512F && TARGET_64BIT"
4236 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4237 [(set_attr "type" "sseicvt")
4238 (set_attr "prefix" "evex")
4239 (set_attr "mode" "DI")])
4241 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4242 [(set (match_operand:SI 0 "register_operand" "=r")
4245 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4246 (parallel [(const_int 0)]))))]
4248 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4249 [(set_attr "type" "sseicvt")
4250 (set_attr "prefix" "evex")
4251 (set_attr "mode" "SI")])
4253 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4254 [(set (match_operand:DI 0 "register_operand" "=r")
4257 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4258 (parallel [(const_int 0)]))))]
4259 "TARGET_AVX512F && TARGET_64BIT"
4260 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4261 [(set_attr "type" "sseicvt")
4262 (set_attr "prefix" "evex")
4263 (set_attr "mode" "DI")])
4265 (define_insn "sse2_cvtsd2si<round_name>"
4266 [(set (match_operand:SI 0 "register_operand" "=r,r")
4269 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4270 (parallel [(const_int 0)]))]
4271 UNSPEC_FIX_NOTRUNC))]
4273 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4274 [(set_attr "type" "sseicvt")
4275 (set_attr "athlon_decode" "double,vector")
4276 (set_attr "bdver1_decode" "double,double")
4277 (set_attr "btver2_decode" "double,double")
4278 (set_attr "prefix_rep" "1")
4279 (set_attr "prefix" "maybe_vex")
4280 (set_attr "mode" "SI")])
4282 (define_insn "sse2_cvtsd2si_2"
4283 [(set (match_operand:SI 0 "register_operand" "=r,r")
4284 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4285 UNSPEC_FIX_NOTRUNC))]
4287 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4288 [(set_attr "type" "sseicvt")
4289 (set_attr "athlon_decode" "double,vector")
4290 (set_attr "amdfam10_decode" "double,double")
4291 (set_attr "bdver1_decode" "double,double")
4292 (set_attr "prefix_rep" "1")
4293 (set_attr "prefix" "maybe_vex")
4294 (set_attr "mode" "SI")])
4296 (define_insn "sse2_cvtsd2siq<round_name>"
4297 [(set (match_operand:DI 0 "register_operand" "=r,r")
4300 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4301 (parallel [(const_int 0)]))]
4302 UNSPEC_FIX_NOTRUNC))]
4303 "TARGET_SSE2 && TARGET_64BIT"
4304 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4305 [(set_attr "type" "sseicvt")
4306 (set_attr "athlon_decode" "double,vector")
4307 (set_attr "bdver1_decode" "double,double")
4308 (set_attr "prefix_rep" "1")
4309 (set_attr "prefix" "maybe_vex")
4310 (set_attr "mode" "DI")])
4312 (define_insn "sse2_cvtsd2siq_2"
4313 [(set (match_operand:DI 0 "register_operand" "=r,r")
4314 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4315 UNSPEC_FIX_NOTRUNC))]
4316 "TARGET_SSE2 && TARGET_64BIT"
4317 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4318 [(set_attr "type" "sseicvt")
4319 (set_attr "athlon_decode" "double,vector")
4320 (set_attr "amdfam10_decode" "double,double")
4321 (set_attr "bdver1_decode" "double,double")
4322 (set_attr "prefix_rep" "1")
4323 (set_attr "prefix" "maybe_vex")
4324 (set_attr "mode" "DI")])
4326 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4327 [(set (match_operand:SI 0 "register_operand" "=r,r")
4330 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4331 (parallel [(const_int 0)]))))]
4333 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4334 [(set_attr "type" "sseicvt")
4335 (set_attr "athlon_decode" "double,vector")
4336 (set_attr "amdfam10_decode" "double,double")
4337 (set_attr "bdver1_decode" "double,double")
4338 (set_attr "btver2_decode" "double,double")
4339 (set_attr "prefix_rep" "1")
4340 (set_attr "prefix" "maybe_vex")
4341 (set_attr "mode" "SI")])
4343 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4344 [(set (match_operand:DI 0 "register_operand" "=r,r")
4347 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4348 (parallel [(const_int 0)]))))]
4349 "TARGET_SSE2 && TARGET_64BIT"
4350 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4351 [(set_attr "type" "sseicvt")
4352 (set_attr "athlon_decode" "double,vector")
4353 (set_attr "amdfam10_decode" "double,double")
4354 (set_attr "bdver1_decode" "double,double")
4355 (set_attr "prefix_rep" "1")
4356 (set_attr "prefix" "maybe_vex")
4357 (set_attr "mode" "DI")])
4359 ;; For float<si2dfmode><mode>2 insn pattern
4360 (define_mode_attr si2dfmode
4361 [(V8DF "V8SI") (V4DF "V4SI")])
4362 (define_mode_attr si2dfmodelower
4363 [(V8DF "v8si") (V4DF "v4si")])
4365 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4366 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4367 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4368 "TARGET_AVX && <mask_mode512bit_condition>"
4369 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4370 [(set_attr "type" "ssecvt")
4371 (set_attr "prefix" "maybe_vex")
4372 (set_attr "mode" "<MODE>")])
4374 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4375 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4376 (any_float:VF2_AVX512VL
4377 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4379 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4380 [(set_attr "type" "ssecvt")
4381 (set_attr "prefix" "evex")
4382 (set_attr "mode" "<MODE>")])
4384 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4385 (define_mode_attr qq2pssuff
4386 [(V8SF "") (V4SF "{y}")])
4388 (define_mode_attr sselongvecmode
4389 [(V8SF "V8DI") (V4SF "V4DI")])
4391 (define_mode_attr sselongvecmodelower
4392 [(V8SF "v8di") (V4SF "v4di")])
4394 (define_mode_attr sseintvecmode3
4395 [(V8SF "XI") (V4SF "OI")
4396 (V8DF "OI") (V4DF "TI")])
4398 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4399 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4400 (any_float:VF1_128_256VL
4401 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4402 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4403 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4404 [(set_attr "type" "ssecvt")
4405 (set_attr "prefix" "evex")
4406 (set_attr "mode" "<MODE>")])
4408 (define_insn "*<floatsuffix>floatv2div2sf2"
4409 [(set (match_operand:V4SF 0 "register_operand" "=v")
4411 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4412 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4413 "TARGET_AVX512DQ && TARGET_AVX512VL"
4414 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4415 [(set_attr "type" "ssecvt")
4416 (set_attr "prefix" "evex")
4417 (set_attr "mode" "V4SF")])
4419 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4420 [(set (match_operand:V4SF 0 "register_operand" "=v")
4423 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4425 (match_operand:V4SF 2 "vector_move_operand" "0C")
4426 (parallel [(const_int 0) (const_int 1)]))
4427 (match_operand:QI 3 "register_operand" "Yk"))
4428 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4429 "TARGET_AVX512DQ && TARGET_AVX512VL"
4430 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4431 [(set_attr "type" "ssecvt")
4432 (set_attr "prefix" "evex")
4433 (set_attr "mode" "V4SF")])
4435 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4436 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4437 (unsigned_float:VF2_512_256VL
4438 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4440 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4441 [(set_attr "type" "ssecvt")
4442 (set_attr "prefix" "evex")
4443 (set_attr "mode" "<MODE>")])
4445 (define_insn "ufloatv2siv2df2<mask_name>"
4446 [(set (match_operand:V2DF 0 "register_operand" "=v")
4447 (unsigned_float:V2DF
4449 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4450 (parallel [(const_int 0) (const_int 1)]))))]
4452 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4453 [(set_attr "type" "ssecvt")
4454 (set_attr "prefix" "evex")
4455 (set_attr "mode" "V2DF")])
4457 (define_insn "avx512f_cvtdq2pd512_2"
4458 [(set (match_operand:V8DF 0 "register_operand" "=v")
4461 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4462 (parallel [(const_int 0) (const_int 1)
4463 (const_int 2) (const_int 3)
4464 (const_int 4) (const_int 5)
4465 (const_int 6) (const_int 7)]))))]
4467 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4468 [(set_attr "type" "ssecvt")
4469 (set_attr "prefix" "evex")
4470 (set_attr "mode" "V8DF")])
4472 (define_insn "avx_cvtdq2pd256_2"
4473 [(set (match_operand:V4DF 0 "register_operand" "=v")
4476 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4477 (parallel [(const_int 0) (const_int 1)
4478 (const_int 2) (const_int 3)]))))]
4480 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4481 [(set_attr "type" "ssecvt")
4482 (set_attr "prefix" "maybe_evex")
4483 (set_attr "mode" "V4DF")])
4485 (define_insn "sse2_cvtdq2pd<mask_name>"
4486 [(set (match_operand:V2DF 0 "register_operand" "=v")
4489 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4490 (parallel [(const_int 0) (const_int 1)]))))]
4491 "TARGET_SSE2 && <mask_avx512vl_condition>"
4492 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4493 [(set_attr "type" "ssecvt")
4494 (set_attr "prefix" "maybe_vex")
4495 (set_attr "ssememalign" "64")
4496 (set_attr "mode" "V2DF")])
4498 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4499 [(set (match_operand:V8SI 0 "register_operand" "=v")
4501 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4502 UNSPEC_FIX_NOTRUNC))]
4504 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4505 [(set_attr "type" "ssecvt")
4506 (set_attr "prefix" "evex")
4507 (set_attr "mode" "OI")])
4509 (define_insn "avx_cvtpd2dq256<mask_name>"
4510 [(set (match_operand:V4SI 0 "register_operand" "=v")
4511 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4512 UNSPEC_FIX_NOTRUNC))]
4513 "TARGET_AVX && <mask_avx512vl_condition>"
4514 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4515 [(set_attr "type" "ssecvt")
4516 (set_attr "prefix" "<mask_prefix>")
4517 (set_attr "mode" "OI")])
4519 (define_expand "avx_cvtpd2dq256_2"
4520 [(set (match_operand:V8SI 0 "register_operand")
4522 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4526 "operands[2] = CONST0_RTX (V4SImode);")
4528 (define_insn "*avx_cvtpd2dq256_2"
4529 [(set (match_operand:V8SI 0 "register_operand" "=x")
4531 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4533 (match_operand:V4SI 2 "const0_operand")))]
4535 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4536 [(set_attr "type" "ssecvt")
4537 (set_attr "prefix" "vex")
4538 (set_attr "btver2_decode" "vector")
4539 (set_attr "mode" "OI")])
4541 (define_insn "sse2_cvtpd2dq<mask_name>"
4542 [(set (match_operand:V4SI 0 "register_operand" "=v")
4544 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4546 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4547 "TARGET_SSE2 && <mask_avx512vl_condition>"
4550 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4552 return "cvtpd2dq\t{%1, %0|%0, %1}";
4554 [(set_attr "type" "ssecvt")
4555 (set_attr "prefix_rep" "1")
4556 (set_attr "prefix_data16" "0")
4557 (set_attr "prefix" "maybe_vex")
4558 (set_attr "mode" "TI")
4559 (set_attr "amdfam10_decode" "double")
4560 (set_attr "athlon_decode" "vector")
4561 (set_attr "bdver1_decode" "double")])
4563 ;; For ufix_notrunc* insn patterns
4564 (define_mode_attr pd2udqsuff
4565 [(V8DF "") (V4DF "{y}")])
4567 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4568 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4570 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4571 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4573 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4574 [(set_attr "type" "ssecvt")
4575 (set_attr "prefix" "evex")
4576 (set_attr "mode" "<sseinsnmode>")])
4578 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4579 [(set (match_operand:V4SI 0 "register_operand" "=v")
4582 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4583 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4584 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4586 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4587 [(set_attr "type" "ssecvt")
4588 (set_attr "prefix" "evex")
4589 (set_attr "mode" "TI")])
4591 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4592 [(set (match_operand:V8SI 0 "register_operand" "=v")
4594 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4596 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4597 [(set_attr "type" "ssecvt")
4598 (set_attr "prefix" "evex")
4599 (set_attr "mode" "OI")])
4601 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4602 [(set (match_operand:V4SI 0 "register_operand" "=v")
4604 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4605 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4607 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4608 [(set_attr "type" "ssecvt")
4609 (set_attr "prefix" "evex")
4610 (set_attr "mode" "TI")])
4612 (define_insn "fix_truncv4dfv4si2<mask_name>"
4613 [(set (match_operand:V4SI 0 "register_operand" "=v")
4614 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4615 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4616 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4617 [(set_attr "type" "ssecvt")
4618 (set_attr "prefix" "maybe_evex")
4619 (set_attr "mode" "OI")])
4621 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4622 [(set (match_operand:V4SI 0 "register_operand" "=v")
4623 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4624 "TARGET_AVX512VL && TARGET_AVX512F"
4625 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4626 [(set_attr "type" "ssecvt")
4627 (set_attr "prefix" "maybe_evex")
4628 (set_attr "mode" "OI")])
4630 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4631 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4632 (any_fix:<sseintvecmode>
4633 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4634 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4635 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4636 [(set_attr "type" "ssecvt")
4637 (set_attr "prefix" "evex")
4638 (set_attr "mode" "<sseintvecmode2>")])
4640 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4641 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4642 (unspec:<sseintvecmode>
4643 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4644 UNSPEC_FIX_NOTRUNC))]
4645 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4646 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4647 [(set_attr "type" "ssecvt")
4648 (set_attr "prefix" "evex")
4649 (set_attr "mode" "<sseintvecmode2>")])
4651 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4652 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4653 (unspec:<sseintvecmode>
4654 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4655 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4656 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4657 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4658 [(set_attr "type" "ssecvt")
4659 (set_attr "prefix" "evex")
4660 (set_attr "mode" "<sseintvecmode2>")])
4662 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4663 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4664 (any_fix:<sselongvecmode>
4665 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4666 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4667 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4668 [(set_attr "type" "ssecvt")
4669 (set_attr "prefix" "evex")
4670 (set_attr "mode" "<sseintvecmode3>")])
4672 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4673 [(set (match_operand:V2DI 0 "register_operand" "=v")
4676 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4677 (parallel [(const_int 0) (const_int 1)]))))]
4678 "TARGET_AVX512DQ && TARGET_AVX512VL"
4679 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4680 [(set_attr "type" "ssecvt")
4681 (set_attr "prefix" "evex")
4682 (set_attr "mode" "TI")])
4684 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4685 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4686 (unsigned_fix:<sseintvecmode>
4687 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4689 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4690 [(set_attr "type" "ssecvt")
4691 (set_attr "prefix" "evex")
4692 (set_attr "mode" "<sseintvecmode2>")])
4694 (define_expand "avx_cvttpd2dq256_2"
4695 [(set (match_operand:V8SI 0 "register_operand")
4697 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4700 "operands[2] = CONST0_RTX (V4SImode);")
4702 (define_insn "sse2_cvttpd2dq<mask_name>"
4703 [(set (match_operand:V4SI 0 "register_operand" "=v")
4705 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4706 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4707 "TARGET_SSE2 && <mask_avx512vl_condition>"
4710 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4712 return "cvttpd2dq\t{%1, %0|%0, %1}";
4714 [(set_attr "type" "ssecvt")
4715 (set_attr "amdfam10_decode" "double")
4716 (set_attr "athlon_decode" "vector")
4717 (set_attr "bdver1_decode" "double")
4718 (set_attr "prefix" "maybe_vex")
4719 (set_attr "mode" "TI")])
4721 (define_insn "sse2_cvtsd2ss<round_name>"
4722 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4725 (float_truncate:V2SF
4726 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4727 (match_operand:V4SF 1 "register_operand" "0,0,v")
4731 cvtsd2ss\t{%2, %0|%0, %2}
4732 cvtsd2ss\t{%2, %0|%0, %q2}
4733 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4734 [(set_attr "isa" "noavx,noavx,avx")
4735 (set_attr "type" "ssecvt")
4736 (set_attr "athlon_decode" "vector,double,*")
4737 (set_attr "amdfam10_decode" "vector,double,*")
4738 (set_attr "bdver1_decode" "direct,direct,*")
4739 (set_attr "btver2_decode" "double,double,double")
4740 (set_attr "prefix" "orig,orig,<round_prefix>")
4741 (set_attr "mode" "SF")])
4743 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4744 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4748 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4749 (parallel [(const_int 0) (const_int 1)])))
4750 (match_operand:V2DF 1 "register_operand" "0,0,v")
4754 cvtss2sd\t{%2, %0|%0, %2}
4755 cvtss2sd\t{%2, %0|%0, %k2}
4756 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4757 [(set_attr "isa" "noavx,noavx,avx")
4758 (set_attr "type" "ssecvt")
4759 (set_attr "amdfam10_decode" "vector,double,*")
4760 (set_attr "athlon_decode" "direct,direct,*")
4761 (set_attr "bdver1_decode" "direct,direct,*")
4762 (set_attr "btver2_decode" "double,double,double")
4763 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4764 (set_attr "mode" "DF")])
4766 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4767 [(set (match_operand:V8SF 0 "register_operand" "=v")
4768 (float_truncate:V8SF
4769 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4771 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4772 [(set_attr "type" "ssecvt")
4773 (set_attr "prefix" "evex")
4774 (set_attr "mode" "V8SF")])
4776 (define_insn "avx_cvtpd2ps256<mask_name>"
4777 [(set (match_operand:V4SF 0 "register_operand" "=v")
4778 (float_truncate:V4SF
4779 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4780 "TARGET_AVX && <mask_avx512vl_condition>"
4781 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4782 [(set_attr "type" "ssecvt")
4783 (set_attr "prefix" "maybe_evex")
4784 (set_attr "btver2_decode" "vector")
4785 (set_attr "mode" "V4SF")])
4787 (define_expand "sse2_cvtpd2ps"
4788 [(set (match_operand:V4SF 0 "register_operand")
4790 (float_truncate:V2SF
4791 (match_operand:V2DF 1 "nonimmediate_operand"))
4794 "operands[2] = CONST0_RTX (V2SFmode);")
4796 (define_expand "sse2_cvtpd2ps_mask"
4797 [(set (match_operand:V4SF 0 "register_operand")
4800 (float_truncate:V2SF
4801 (match_operand:V2DF 1 "nonimmediate_operand"))
4803 (match_operand:V4SF 2 "register_operand")
4804 (match_operand:QI 3 "register_operand")))]
4806 "operands[4] = CONST0_RTX (V2SFmode);")
4808 (define_insn "*sse2_cvtpd2ps<mask_name>"
4809 [(set (match_operand:V4SF 0 "register_operand" "=v")
4811 (float_truncate:V2SF
4812 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4813 (match_operand:V2SF 2 "const0_operand")))]
4814 "TARGET_SSE2 && <mask_avx512vl_condition>"
4817 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4819 return "cvtpd2ps\t{%1, %0|%0, %1}";
4821 [(set_attr "type" "ssecvt")
4822 (set_attr "amdfam10_decode" "double")
4823 (set_attr "athlon_decode" "vector")
4824 (set_attr "bdver1_decode" "double")
4825 (set_attr "prefix_data16" "1")
4826 (set_attr "prefix" "maybe_vex")
4827 (set_attr "mode" "V4SF")])
4829 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4830 (define_mode_attr sf2dfmode
4831 [(V8DF "V8SF") (V4DF "V4SF")])
4833 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4834 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4835 (float_extend:VF2_512_256
4836 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4837 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4838 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4839 [(set_attr "type" "ssecvt")
4840 (set_attr "prefix" "maybe_vex")
4841 (set_attr "mode" "<MODE>")])
4843 (define_insn "*avx_cvtps2pd256_2"
4844 [(set (match_operand:V4DF 0 "register_operand" "=x")
4847 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4848 (parallel [(const_int 0) (const_int 1)
4849 (const_int 2) (const_int 3)]))))]
4851 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4852 [(set_attr "type" "ssecvt")
4853 (set_attr "prefix" "vex")
4854 (set_attr "mode" "V4DF")])
4856 (define_insn "vec_unpacks_lo_v16sf"
4857 [(set (match_operand:V8DF 0 "register_operand" "=v")
4860 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4861 (parallel [(const_int 0) (const_int 1)
4862 (const_int 2) (const_int 3)
4863 (const_int 4) (const_int 5)
4864 (const_int 6) (const_int 7)]))))]
4866 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4867 [(set_attr "type" "ssecvt")
4868 (set_attr "prefix" "evex")
4869 (set_attr "mode" "V8DF")])
4871 (define_insn "sse2_cvtps2pd<mask_name>"
4872 [(set (match_operand:V2DF 0 "register_operand" "=v")
4875 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4876 (parallel [(const_int 0) (const_int 1)]))))]
4877 "TARGET_SSE2 && <mask_avx512vl_condition>"
4878 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4879 [(set_attr "type" "ssecvt")
4880 (set_attr "amdfam10_decode" "direct")
4881 (set_attr "athlon_decode" "double")
4882 (set_attr "bdver1_decode" "double")
4883 (set_attr "prefix_data16" "0")
4884 (set_attr "prefix" "maybe_vex")
4885 (set_attr "mode" "V2DF")])
4887 (define_expand "vec_unpacks_hi_v4sf"
4892 (match_operand:V4SF 1 "nonimmediate_operand"))
4893 (parallel [(const_int 6) (const_int 7)
4894 (const_int 2) (const_int 3)])))
4895 (set (match_operand:V2DF 0 "register_operand")
4899 (parallel [(const_int 0) (const_int 1)]))))]
4901 "operands[2] = gen_reg_rtx (V4SFmode);")
4903 (define_expand "vec_unpacks_hi_v8sf"
4906 (match_operand:V8SF 1 "nonimmediate_operand")
4907 (parallel [(const_int 4) (const_int 5)
4908 (const_int 6) (const_int 7)])))
4909 (set (match_operand:V4DF 0 "register_operand")
4913 "operands[2] = gen_reg_rtx (V4SFmode);")
4915 (define_expand "vec_unpacks_hi_v16sf"
4918 (match_operand:V16SF 1 "nonimmediate_operand")
4919 (parallel [(const_int 8) (const_int 9)
4920 (const_int 10) (const_int 11)
4921 (const_int 12) (const_int 13)
4922 (const_int 14) (const_int 15)])))
4923 (set (match_operand:V8DF 0 "register_operand")
4927 "operands[2] = gen_reg_rtx (V8SFmode);")
4929 (define_expand "vec_unpacks_lo_v4sf"
4930 [(set (match_operand:V2DF 0 "register_operand")
4933 (match_operand:V4SF 1 "nonimmediate_operand")
4934 (parallel [(const_int 0) (const_int 1)]))))]
4937 (define_expand "vec_unpacks_lo_v8sf"
4938 [(set (match_operand:V4DF 0 "register_operand")
4941 (match_operand:V8SF 1 "nonimmediate_operand")
4942 (parallel [(const_int 0) (const_int 1)
4943 (const_int 2) (const_int 3)]))))]
4946 (define_mode_attr sseunpackfltmode
4947 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4948 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4950 (define_expand "vec_unpacks_float_hi_<mode>"
4951 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4952 (match_operand:VI2_AVX512F 1 "register_operand")]
4955 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4957 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4958 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4959 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4963 (define_expand "vec_unpacks_float_lo_<mode>"
4964 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4965 (match_operand:VI2_AVX512F 1 "register_operand")]
4968 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4970 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4971 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4972 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4976 (define_expand "vec_unpacku_float_hi_<mode>"
4977 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4978 (match_operand:VI2_AVX512F 1 "register_operand")]
4981 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4983 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4984 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4985 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4989 (define_expand "vec_unpacku_float_lo_<mode>"
4990 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4991 (match_operand:VI2_AVX512F 1 "register_operand")]
4994 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4996 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4997 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4998 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5002 (define_expand "vec_unpacks_float_hi_v4si"
5005 (match_operand:V4SI 1 "nonimmediate_operand")
5006 (parallel [(const_int 2) (const_int 3)
5007 (const_int 2) (const_int 3)])))
5008 (set (match_operand:V2DF 0 "register_operand")
5012 (parallel [(const_int 0) (const_int 1)]))))]
5014 "operands[2] = gen_reg_rtx (V4SImode);")
5016 (define_expand "vec_unpacks_float_lo_v4si"
5017 [(set (match_operand:V2DF 0 "register_operand")
5020 (match_operand:V4SI 1 "nonimmediate_operand")
5021 (parallel [(const_int 0) (const_int 1)]))))]
5024 (define_expand "vec_unpacks_float_hi_v8si"
5027 (match_operand:V8SI 1 "nonimmediate_operand")
5028 (parallel [(const_int 4) (const_int 5)
5029 (const_int 6) (const_int 7)])))
5030 (set (match_operand:V4DF 0 "register_operand")
5034 "operands[2] = gen_reg_rtx (V4SImode);")
5036 (define_expand "vec_unpacks_float_lo_v8si"
5037 [(set (match_operand:V4DF 0 "register_operand")
5040 (match_operand:V8SI 1 "nonimmediate_operand")
5041 (parallel [(const_int 0) (const_int 1)
5042 (const_int 2) (const_int 3)]))))]
5045 (define_expand "vec_unpacks_float_hi_v16si"
5048 (match_operand:V16SI 1 "nonimmediate_operand")
5049 (parallel [(const_int 8) (const_int 9)
5050 (const_int 10) (const_int 11)
5051 (const_int 12) (const_int 13)
5052 (const_int 14) (const_int 15)])))
5053 (set (match_operand:V8DF 0 "register_operand")
5057 "operands[2] = gen_reg_rtx (V8SImode);")
5059 (define_expand "vec_unpacks_float_lo_v16si"
5060 [(set (match_operand:V8DF 0 "register_operand")
5063 (match_operand:V16SI 1 "nonimmediate_operand")
5064 (parallel [(const_int 0) (const_int 1)
5065 (const_int 2) (const_int 3)
5066 (const_int 4) (const_int 5)
5067 (const_int 6) (const_int 7)]))))]
5070 (define_expand "vec_unpacku_float_hi_v4si"
5073 (match_operand:V4SI 1 "nonimmediate_operand")
5074 (parallel [(const_int 2) (const_int 3)
5075 (const_int 2) (const_int 3)])))
5080 (parallel [(const_int 0) (const_int 1)]))))
5082 (lt:V2DF (match_dup 6) (match_dup 3)))
5084 (and:V2DF (match_dup 7) (match_dup 4)))
5085 (set (match_operand:V2DF 0 "register_operand")
5086 (plus:V2DF (match_dup 6) (match_dup 8)))]
5089 REAL_VALUE_TYPE TWO32r;
5093 real_ldexp (&TWO32r, &dconst1, 32);
5094 x = const_double_from_real_value (TWO32r, DFmode);
5096 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5097 operands[4] = force_reg (V2DFmode,
5098 ix86_build_const_vector (V2DFmode, 1, x));
5100 operands[5] = gen_reg_rtx (V4SImode);
5102 for (i = 6; i < 9; i++)
5103 operands[i] = gen_reg_rtx (V2DFmode);
5106 (define_expand "vec_unpacku_float_lo_v4si"
5110 (match_operand:V4SI 1 "nonimmediate_operand")
5111 (parallel [(const_int 0) (const_int 1)]))))
5113 (lt:V2DF (match_dup 5) (match_dup 3)))
5115 (and:V2DF (match_dup 6) (match_dup 4)))
5116 (set (match_operand:V2DF 0 "register_operand")
5117 (plus:V2DF (match_dup 5) (match_dup 7)))]
5120 REAL_VALUE_TYPE TWO32r;
5124 real_ldexp (&TWO32r, &dconst1, 32);
5125 x = const_double_from_real_value (TWO32r, DFmode);
5127 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5128 operands[4] = force_reg (V2DFmode,
5129 ix86_build_const_vector (V2DFmode, 1, x));
5131 for (i = 5; i < 8; i++)
5132 operands[i] = gen_reg_rtx (V2DFmode);
5135 (define_expand "vec_unpacku_float_hi_v8si"
5136 [(match_operand:V4DF 0 "register_operand")
5137 (match_operand:V8SI 1 "register_operand")]
5140 REAL_VALUE_TYPE TWO32r;
5144 real_ldexp (&TWO32r, &dconst1, 32);
5145 x = const_double_from_real_value (TWO32r, DFmode);
5147 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5148 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5149 tmp[5] = gen_reg_rtx (V4SImode);
5151 for (i = 2; i < 5; i++)
5152 tmp[i] = gen_reg_rtx (V4DFmode);
5153 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5154 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5155 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5156 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5157 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5158 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5162 (define_expand "vec_unpacku_float_hi_v16si"
5163 [(match_operand:V8DF 0 "register_operand")
5164 (match_operand:V16SI 1 "register_operand")]
5167 REAL_VALUE_TYPE TWO32r;
5170 real_ldexp (&TWO32r, &dconst1, 32);
5171 x = const_double_from_real_value (TWO32r, DFmode);
5173 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5174 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5175 tmp[2] = gen_reg_rtx (V8DFmode);
5176 tmp[3] = gen_reg_rtx (V8SImode);
5177 k = gen_reg_rtx (QImode);
5179 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5180 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5181 emit_insn (gen_rtx_SET (VOIDmode, k,
5182 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5183 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5184 emit_move_insn (operands[0], tmp[2]);
5188 (define_expand "vec_unpacku_float_lo_v8si"
5189 [(match_operand:V4DF 0 "register_operand")
5190 (match_operand:V8SI 1 "nonimmediate_operand")]
5193 REAL_VALUE_TYPE TWO32r;
5197 real_ldexp (&TWO32r, &dconst1, 32);
5198 x = const_double_from_real_value (TWO32r, DFmode);
5200 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5201 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5203 for (i = 2; i < 5; i++)
5204 tmp[i] = gen_reg_rtx (V4DFmode);
5205 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5206 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5207 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5208 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5209 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5213 (define_expand "vec_unpacku_float_lo_v16si"
5214 [(match_operand:V8DF 0 "register_operand")
5215 (match_operand:V16SI 1 "nonimmediate_operand")]
5218 REAL_VALUE_TYPE TWO32r;
5221 real_ldexp (&TWO32r, &dconst1, 32);
5222 x = const_double_from_real_value (TWO32r, DFmode);
5224 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5225 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5226 tmp[2] = gen_reg_rtx (V8DFmode);
5227 k = gen_reg_rtx (QImode);
5229 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5230 emit_insn (gen_rtx_SET (VOIDmode, k,
5231 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5232 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5233 emit_move_insn (operands[0], tmp[2]);
5237 (define_expand "vec_pack_trunc_<mode>"
5239 (float_truncate:<sf2dfmode>
5240 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5242 (float_truncate:<sf2dfmode>
5243 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5244 (set (match_operand:<ssePSmode> 0 "register_operand")
5245 (vec_concat:<ssePSmode>
5250 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5251 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5254 (define_expand "vec_pack_trunc_v2df"
5255 [(match_operand:V4SF 0 "register_operand")
5256 (match_operand:V2DF 1 "nonimmediate_operand")
5257 (match_operand:V2DF 2 "nonimmediate_operand")]
5262 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5264 tmp0 = gen_reg_rtx (V4DFmode);
5265 tmp1 = force_reg (V2DFmode, operands[1]);
5267 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5268 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5272 tmp0 = gen_reg_rtx (V4SFmode);
5273 tmp1 = gen_reg_rtx (V4SFmode);
5275 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5276 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5277 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5282 (define_expand "vec_pack_sfix_trunc_v8df"
5283 [(match_operand:V16SI 0 "register_operand")
5284 (match_operand:V8DF 1 "nonimmediate_operand")
5285 (match_operand:V8DF 2 "nonimmediate_operand")]
5290 r1 = gen_reg_rtx (V8SImode);
5291 r2 = gen_reg_rtx (V8SImode);
5293 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5294 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5295 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5299 (define_expand "vec_pack_sfix_trunc_v4df"
5300 [(match_operand:V8SI 0 "register_operand")
5301 (match_operand:V4DF 1 "nonimmediate_operand")
5302 (match_operand:V4DF 2 "nonimmediate_operand")]
5307 r1 = gen_reg_rtx (V4SImode);
5308 r2 = gen_reg_rtx (V4SImode);
5310 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5311 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5312 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5316 (define_expand "vec_pack_sfix_trunc_v2df"
5317 [(match_operand:V4SI 0 "register_operand")
5318 (match_operand:V2DF 1 "nonimmediate_operand")
5319 (match_operand:V2DF 2 "nonimmediate_operand")]
5322 rtx tmp0, tmp1, tmp2;
5324 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5326 tmp0 = gen_reg_rtx (V4DFmode);
5327 tmp1 = force_reg (V2DFmode, operands[1]);
5329 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5330 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5334 tmp0 = gen_reg_rtx (V4SImode);
5335 tmp1 = gen_reg_rtx (V4SImode);
5336 tmp2 = gen_reg_rtx (V2DImode);
5338 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5339 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5340 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5341 gen_lowpart (V2DImode, tmp0),
5342 gen_lowpart (V2DImode, tmp1)));
5343 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5348 (define_mode_attr ssepackfltmode
5349 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5351 (define_expand "vec_pack_ufix_trunc_<mode>"
5352 [(match_operand:<ssepackfltmode> 0 "register_operand")
5353 (match_operand:VF2 1 "register_operand")
5354 (match_operand:VF2 2 "register_operand")]
5357 if (<MODE>mode == V8DFmode)
5361 r1 = gen_reg_rtx (V8SImode);
5362 r2 = gen_reg_rtx (V8SImode);
5364 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5365 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5366 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5371 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5372 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5373 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5374 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5375 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5377 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5378 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5382 tmp[5] = gen_reg_rtx (V8SFmode);
5383 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5384 gen_lowpart (V8SFmode, tmp[3]), 0);
5385 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5387 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5388 operands[0], 0, OPTAB_DIRECT);
5389 if (tmp[6] != operands[0])
5390 emit_move_insn (operands[0], tmp[6]);
5396 (define_expand "vec_pack_sfix_v4df"
5397 [(match_operand:V8SI 0 "register_operand")
5398 (match_operand:V4DF 1 "nonimmediate_operand")
5399 (match_operand:V4DF 2 "nonimmediate_operand")]
5404 r1 = gen_reg_rtx (V4SImode);
5405 r2 = gen_reg_rtx (V4SImode);
5407 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5408 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5409 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5413 (define_expand "vec_pack_sfix_v2df"
5414 [(match_operand:V4SI 0 "register_operand")
5415 (match_operand:V2DF 1 "nonimmediate_operand")
5416 (match_operand:V2DF 2 "nonimmediate_operand")]
5419 rtx tmp0, tmp1, tmp2;
5421 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5423 tmp0 = gen_reg_rtx (V4DFmode);
5424 tmp1 = force_reg (V2DFmode, operands[1]);
5426 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5427 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5431 tmp0 = gen_reg_rtx (V4SImode);
5432 tmp1 = gen_reg_rtx (V4SImode);
5433 tmp2 = gen_reg_rtx (V2DImode);
5435 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5436 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5437 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5438 gen_lowpart (V2DImode, tmp0),
5439 gen_lowpart (V2DImode, tmp1)));
5440 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5445 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5447 ;; Parallel single-precision floating point element swizzling
5449 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5451 (define_expand "sse_movhlps_exp"
5452 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5455 (match_operand:V4SF 1 "nonimmediate_operand")
5456 (match_operand:V4SF 2 "nonimmediate_operand"))
5457 (parallel [(const_int 6)
5463 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5465 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5467 /* Fix up the destination if needed. */
5468 if (dst != operands[0])
5469 emit_move_insn (operands[0], dst);
5474 (define_insn "sse_movhlps"
5475 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5478 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5479 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5480 (parallel [(const_int 6)
5484 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5486 movhlps\t{%2, %0|%0, %2}
5487 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5488 movlps\t{%H2, %0|%0, %H2}
5489 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5490 %vmovhps\t{%2, %0|%q0, %2}"
5491 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5492 (set_attr "type" "ssemov")
5493 (set_attr "ssememalign" "64")
5494 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5495 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5497 (define_expand "sse_movlhps_exp"
5498 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5501 (match_operand:V4SF 1 "nonimmediate_operand")
5502 (match_operand:V4SF 2 "nonimmediate_operand"))
5503 (parallel [(const_int 0)
5509 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5511 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5513 /* Fix up the destination if needed. */
5514 if (dst != operands[0])
5515 emit_move_insn (operands[0], dst);
5520 (define_insn "sse_movlhps"
5521 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5524 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5525 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5526 (parallel [(const_int 0)
5530 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5532 movlhps\t{%2, %0|%0, %2}
5533 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5534 movhps\t{%2, %0|%0, %q2}
5535 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5536 %vmovlps\t{%2, %H0|%H0, %2}"
5537 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5538 (set_attr "type" "ssemov")
5539 (set_attr "ssememalign" "64")
5540 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5541 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5543 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5544 [(set (match_operand:V16SF 0 "register_operand" "=v")
5547 (match_operand:V16SF 1 "register_operand" "v")
5548 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5549 (parallel [(const_int 2) (const_int 18)
5550 (const_int 3) (const_int 19)
5551 (const_int 6) (const_int 22)
5552 (const_int 7) (const_int 23)
5553 (const_int 10) (const_int 26)
5554 (const_int 11) (const_int 27)
5555 (const_int 14) (const_int 30)
5556 (const_int 15) (const_int 31)])))]
5558 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5559 [(set_attr "type" "sselog")
5560 (set_attr "prefix" "evex")
5561 (set_attr "mode" "V16SF")])
5563 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5564 (define_insn "avx_unpckhps256<mask_name>"
5565 [(set (match_operand:V8SF 0 "register_operand" "=v")
5568 (match_operand:V8SF 1 "register_operand" "v")
5569 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5570 (parallel [(const_int 2) (const_int 10)
5571 (const_int 3) (const_int 11)
5572 (const_int 6) (const_int 14)
5573 (const_int 7) (const_int 15)])))]
5574 "TARGET_AVX && <mask_avx512vl_condition>"
5575 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5576 [(set_attr "type" "sselog")
5577 (set_attr "prefix" "vex")
5578 (set_attr "mode" "V8SF")])
5580 (define_expand "vec_interleave_highv8sf"
5584 (match_operand:V8SF 1 "register_operand" "x")
5585 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5586 (parallel [(const_int 0) (const_int 8)
5587 (const_int 1) (const_int 9)
5588 (const_int 4) (const_int 12)
5589 (const_int 5) (const_int 13)])))
5595 (parallel [(const_int 2) (const_int 10)
5596 (const_int 3) (const_int 11)
5597 (const_int 6) (const_int 14)
5598 (const_int 7) (const_int 15)])))
5599 (set (match_operand:V8SF 0 "register_operand")
5604 (parallel [(const_int 4) (const_int 5)
5605 (const_int 6) (const_int 7)
5606 (const_int 12) (const_int 13)
5607 (const_int 14) (const_int 15)])))]
5610 operands[3] = gen_reg_rtx (V8SFmode);
5611 operands[4] = gen_reg_rtx (V8SFmode);
5614 (define_insn "vec_interleave_highv4sf<mask_name>"
5615 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5618 (match_operand:V4SF 1 "register_operand" "0,v")
5619 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5620 (parallel [(const_int 2) (const_int 6)
5621 (const_int 3) (const_int 7)])))]
5622 "TARGET_SSE && <mask_avx512vl_condition>"
5624 unpckhps\t{%2, %0|%0, %2}
5625 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5626 [(set_attr "isa" "noavx,avx")
5627 (set_attr "type" "sselog")
5628 (set_attr "prefix" "orig,vex")
5629 (set_attr "mode" "V4SF")])
5631 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5632 [(set (match_operand:V16SF 0 "register_operand" "=v")
5635 (match_operand:V16SF 1 "register_operand" "v")
5636 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5637 (parallel [(const_int 0) (const_int 16)
5638 (const_int 1) (const_int 17)
5639 (const_int 4) (const_int 20)
5640 (const_int 5) (const_int 21)
5641 (const_int 8) (const_int 24)
5642 (const_int 9) (const_int 25)
5643 (const_int 12) (const_int 28)
5644 (const_int 13) (const_int 29)])))]
5646 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5647 [(set_attr "type" "sselog")
5648 (set_attr "prefix" "evex")
5649 (set_attr "mode" "V16SF")])
5651 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5652 (define_insn "avx_unpcklps256<mask_name>"
5653 [(set (match_operand:V8SF 0 "register_operand" "=v")
5656 (match_operand:V8SF 1 "register_operand" "v")
5657 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5658 (parallel [(const_int 0) (const_int 8)
5659 (const_int 1) (const_int 9)
5660 (const_int 4) (const_int 12)
5661 (const_int 5) (const_int 13)])))]
5662 "TARGET_AVX && <mask_avx512vl_condition>"
5663 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5664 [(set_attr "type" "sselog")
5665 (set_attr "prefix" "vex")
5666 (set_attr "mode" "V8SF")])
5668 (define_insn "unpcklps128_mask"
5669 [(set (match_operand:V4SF 0 "register_operand" "=v")
5673 (match_operand:V4SF 1 "register_operand" "v")
5674 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5675 (parallel [(const_int 0) (const_int 4)
5676 (const_int 1) (const_int 5)]))
5677 (match_operand:V4SF 3 "vector_move_operand" "0C")
5678 (match_operand:QI 4 "register_operand" "Yk")))]
5680 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5681 [(set_attr "type" "sselog")
5682 (set_attr "prefix" "evex")
5683 (set_attr "mode" "V4SF")])
5685 (define_expand "vec_interleave_lowv8sf"
5689 (match_operand:V8SF 1 "register_operand" "x")
5690 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5691 (parallel [(const_int 0) (const_int 8)
5692 (const_int 1) (const_int 9)
5693 (const_int 4) (const_int 12)
5694 (const_int 5) (const_int 13)])))
5700 (parallel [(const_int 2) (const_int 10)
5701 (const_int 3) (const_int 11)
5702 (const_int 6) (const_int 14)
5703 (const_int 7) (const_int 15)])))
5704 (set (match_operand:V8SF 0 "register_operand")
5709 (parallel [(const_int 0) (const_int 1)
5710 (const_int 2) (const_int 3)
5711 (const_int 8) (const_int 9)
5712 (const_int 10) (const_int 11)])))]
5715 operands[3] = gen_reg_rtx (V8SFmode);
5716 operands[4] = gen_reg_rtx (V8SFmode);
5719 (define_insn "vec_interleave_lowv4sf"
5720 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5723 (match_operand:V4SF 1 "register_operand" "0,x")
5724 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5725 (parallel [(const_int 0) (const_int 4)
5726 (const_int 1) (const_int 5)])))]
5729 unpcklps\t{%2, %0|%0, %2}
5730 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5731 [(set_attr "isa" "noavx,avx")
5732 (set_attr "type" "sselog")
5733 (set_attr "prefix" "orig,vex")
5734 (set_attr "mode" "V4SF")])
5736 ;; These are modeled with the same vec_concat as the others so that we
5737 ;; capture users of shufps that can use the new instructions
5738 (define_insn "avx_movshdup256<mask_name>"
5739 [(set (match_operand:V8SF 0 "register_operand" "=v")
5742 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5744 (parallel [(const_int 1) (const_int 1)
5745 (const_int 3) (const_int 3)
5746 (const_int 5) (const_int 5)
5747 (const_int 7) (const_int 7)])))]
5748 "TARGET_AVX && <mask_avx512vl_condition>"
5749 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5750 [(set_attr "type" "sse")
5751 (set_attr "prefix" "vex")
5752 (set_attr "mode" "V8SF")])
5754 (define_insn "sse3_movshdup<mask_name>"
5755 [(set (match_operand:V4SF 0 "register_operand" "=v")
5758 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5760 (parallel [(const_int 1)
5764 "TARGET_SSE3 && <mask_avx512vl_condition>"
5765 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5766 [(set_attr "type" "sse")
5767 (set_attr "prefix_rep" "1")
5768 (set_attr "prefix" "maybe_vex")
5769 (set_attr "mode" "V4SF")])
5771 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5772 [(set (match_operand:V16SF 0 "register_operand" "=v")
5775 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5777 (parallel [(const_int 1) (const_int 1)
5778 (const_int 3) (const_int 3)
5779 (const_int 5) (const_int 5)
5780 (const_int 7) (const_int 7)
5781 (const_int 9) (const_int 9)
5782 (const_int 11) (const_int 11)
5783 (const_int 13) (const_int 13)
5784 (const_int 15) (const_int 15)])))]
5786 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5787 [(set_attr "type" "sse")
5788 (set_attr "prefix" "evex")
5789 (set_attr "mode" "V16SF")])
5791 (define_insn "avx_movsldup256<mask_name>"
5792 [(set (match_operand:V8SF 0 "register_operand" "=v")
5795 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5797 (parallel [(const_int 0) (const_int 0)
5798 (const_int 2) (const_int 2)
5799 (const_int 4) (const_int 4)
5800 (const_int 6) (const_int 6)])))]
5801 "TARGET_AVX && <mask_avx512vl_condition>"
5802 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5803 [(set_attr "type" "sse")
5804 (set_attr "prefix" "vex")
5805 (set_attr "mode" "V8SF")])
5807 (define_insn "sse3_movsldup<mask_name>"
5808 [(set (match_operand:V4SF 0 "register_operand" "=v")
5811 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5813 (parallel [(const_int 0)
5817 "TARGET_SSE3 && <mask_avx512vl_condition>"
5818 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5819 [(set_attr "type" "sse")
5820 (set_attr "prefix_rep" "1")
5821 (set_attr "prefix" "maybe_vex")
5822 (set_attr "mode" "V4SF")])
5824 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5825 [(set (match_operand:V16SF 0 "register_operand" "=v")
5828 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5830 (parallel [(const_int 0) (const_int 0)
5831 (const_int 2) (const_int 2)
5832 (const_int 4) (const_int 4)
5833 (const_int 6) (const_int 6)
5834 (const_int 8) (const_int 8)
5835 (const_int 10) (const_int 10)
5836 (const_int 12) (const_int 12)
5837 (const_int 14) (const_int 14)])))]
5839 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5840 [(set_attr "type" "sse")
5841 (set_attr "prefix" "evex")
5842 (set_attr "mode" "V16SF")])
5844 (define_expand "avx_shufps256<mask_expand4_name>"
5845 [(match_operand:V8SF 0 "register_operand")
5846 (match_operand:V8SF 1 "register_operand")
5847 (match_operand:V8SF 2 "nonimmediate_operand")
5848 (match_operand:SI 3 "const_int_operand")]
5851 int mask = INTVAL (operands[3]);
5852 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
5855 GEN_INT ((mask >> 0) & 3),
5856 GEN_INT ((mask >> 2) & 3),
5857 GEN_INT (((mask >> 4) & 3) + 8),
5858 GEN_INT (((mask >> 6) & 3) + 8),
5859 GEN_INT (((mask >> 0) & 3) + 4),
5860 GEN_INT (((mask >> 2) & 3) + 4),
5861 GEN_INT (((mask >> 4) & 3) + 12),
5862 GEN_INT (((mask >> 6) & 3) + 12)
5863 <mask_expand4_args>));
5867 ;; One bit in mask selects 2 elements.
5868 (define_insn "avx_shufps256_1<mask_name>"
5869 [(set (match_operand:V8SF 0 "register_operand" "=v")
5872 (match_operand:V8SF 1 "register_operand" "v")
5873 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5874 (parallel [(match_operand 3 "const_0_to_3_operand" )
5875 (match_operand 4 "const_0_to_3_operand" )
5876 (match_operand 5 "const_8_to_11_operand" )
5877 (match_operand 6 "const_8_to_11_operand" )
5878 (match_operand 7 "const_4_to_7_operand" )
5879 (match_operand 8 "const_4_to_7_operand" )
5880 (match_operand 9 "const_12_to_15_operand")
5881 (match_operand 10 "const_12_to_15_operand")])))]
5883 && <mask_avx512vl_condition>
5884 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5885 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5886 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5887 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5890 mask = INTVAL (operands[3]);
5891 mask |= INTVAL (operands[4]) << 2;
5892 mask |= (INTVAL (operands[5]) - 8) << 4;
5893 mask |= (INTVAL (operands[6]) - 8) << 6;
5894 operands[3] = GEN_INT (mask);
5896 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
5898 [(set_attr "type" "sseshuf")
5899 (set_attr "length_immediate" "1")
5900 (set_attr "prefix" "<mask_prefix>")
5901 (set_attr "mode" "V8SF")])
5903 (define_expand "sse_shufps<mask_expand4_name>"
5904 [(match_operand:V4SF 0 "register_operand")
5905 (match_operand:V4SF 1 "register_operand")
5906 (match_operand:V4SF 2 "nonimmediate_operand")
5907 (match_operand:SI 3 "const_int_operand")]
5910 int mask = INTVAL (operands[3]);
5911 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
5914 GEN_INT ((mask >> 0) & 3),
5915 GEN_INT ((mask >> 2) & 3),
5916 GEN_INT (((mask >> 4) & 3) + 4),
5917 GEN_INT (((mask >> 6) & 3) + 4)
5918 <mask_expand4_args>));
5922 (define_insn "sse_shufps_v4sf_mask"
5923 [(set (match_operand:V4SF 0 "register_operand" "=v")
5927 (match_operand:V4SF 1 "register_operand" "v")
5928 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5929 (parallel [(match_operand 3 "const_0_to_3_operand")
5930 (match_operand 4 "const_0_to_3_operand")
5931 (match_operand 5 "const_4_to_7_operand")
5932 (match_operand 6 "const_4_to_7_operand")]))
5933 (match_operand:V4SF 7 "vector_move_operand" "0C")
5934 (match_operand:QI 8 "register_operand" "Yk")))]
5938 mask |= INTVAL (operands[3]) << 0;
5939 mask |= INTVAL (operands[4]) << 2;
5940 mask |= (INTVAL (operands[5]) - 4) << 4;
5941 mask |= (INTVAL (operands[6]) - 4) << 6;
5942 operands[3] = GEN_INT (mask);
5944 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
5946 [(set_attr "type" "sseshuf")
5947 (set_attr "length_immediate" "1")
5948 (set_attr "prefix" "evex")
5949 (set_attr "mode" "V4SF")])
5951 (define_insn "sse_shufps_<mode>"
5952 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5953 (vec_select:VI4F_128
5954 (vec_concat:<ssedoublevecmode>
5955 (match_operand:VI4F_128 1 "register_operand" "0,x")
5956 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5957 (parallel [(match_operand 3 "const_0_to_3_operand")
5958 (match_operand 4 "const_0_to_3_operand")
5959 (match_operand 5 "const_4_to_7_operand")
5960 (match_operand 6 "const_4_to_7_operand")])))]
5964 mask |= INTVAL (operands[3]) << 0;
5965 mask |= INTVAL (operands[4]) << 2;
5966 mask |= (INTVAL (operands[5]) - 4) << 4;
5967 mask |= (INTVAL (operands[6]) - 4) << 6;
5968 operands[3] = GEN_INT (mask);
5970 switch (which_alternative)
5973 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5975 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5980 [(set_attr "isa" "noavx,avx")
5981 (set_attr "type" "sseshuf")
5982 (set_attr "length_immediate" "1")
5983 (set_attr "prefix" "orig,vex")
5984 (set_attr "mode" "V4SF")])
5986 (define_insn "sse_storehps"
5987 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5989 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5990 (parallel [(const_int 2) (const_int 3)])))]
5993 %vmovhps\t{%1, %0|%q0, %1}
5994 %vmovhlps\t{%1, %d0|%d0, %1}
5995 %vmovlps\t{%H1, %d0|%d0, %H1}"
5996 [(set_attr "type" "ssemov")
5997 (set_attr "ssememalign" "64")
5998 (set_attr "prefix" "maybe_vex")
5999 (set_attr "mode" "V2SF,V4SF,V2SF")])
6001 (define_expand "sse_loadhps_exp"
6002 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6005 (match_operand:V4SF 1 "nonimmediate_operand")
6006 (parallel [(const_int 0) (const_int 1)]))
6007 (match_operand:V2SF 2 "nonimmediate_operand")))]
6010 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6012 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6014 /* Fix up the destination if needed. */
6015 if (dst != operands[0])
6016 emit_move_insn (operands[0], dst);
6021 (define_insn "sse_loadhps"
6022 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6025 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6026 (parallel [(const_int 0) (const_int 1)]))
6027 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6030 movhps\t{%2, %0|%0, %q2}
6031 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6032 movlhps\t{%2, %0|%0, %2}
6033 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6034 %vmovlps\t{%2, %H0|%H0, %2}"
6035 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6036 (set_attr "type" "ssemov")
6037 (set_attr "ssememalign" "64")
6038 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6039 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6041 (define_insn "sse_storelps"
6042 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6044 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6045 (parallel [(const_int 0) (const_int 1)])))]
6048 %vmovlps\t{%1, %0|%q0, %1}
6049 %vmovaps\t{%1, %0|%0, %1}
6050 %vmovlps\t{%1, %d0|%d0, %q1}"
6051 [(set_attr "type" "ssemov")
6052 (set_attr "prefix" "maybe_vex")
6053 (set_attr "mode" "V2SF,V4SF,V2SF")])
6055 (define_expand "sse_loadlps_exp"
6056 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6058 (match_operand:V2SF 2 "nonimmediate_operand")
6060 (match_operand:V4SF 1 "nonimmediate_operand")
6061 (parallel [(const_int 2) (const_int 3)]))))]
6064 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6066 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6068 /* Fix up the destination if needed. */
6069 if (dst != operands[0])
6070 emit_move_insn (operands[0], dst);
6075 (define_insn "sse_loadlps"
6076 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6078 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6080 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6081 (parallel [(const_int 2) (const_int 3)]))))]
6084 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6085 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6086 movlps\t{%2, %0|%0, %q2}
6087 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6088 %vmovlps\t{%2, %0|%q0, %2}"
6089 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6090 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6091 (set_attr "ssememalign" "64")
6092 (set_attr "length_immediate" "1,1,*,*,*")
6093 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6094 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6096 (define_insn "sse_movss"
6097 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6099 (match_operand:V4SF 2 "register_operand" " x,x")
6100 (match_operand:V4SF 1 "register_operand" " 0,x")
6104 movss\t{%2, %0|%0, %2}
6105 vmovss\t{%2, %1, %0|%0, %1, %2}"
6106 [(set_attr "isa" "noavx,avx")
6107 (set_attr "type" "ssemov")
6108 (set_attr "prefix" "orig,vex")
6109 (set_attr "mode" "SF")])
6111 (define_insn "avx2_vec_dup<mode>"
6112 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6113 (vec_duplicate:VF1_128_256
6115 (match_operand:V4SF 1 "register_operand" "x")
6116 (parallel [(const_int 0)]))))]
6118 "vbroadcastss\t{%1, %0|%0, %1}"
6119 [(set_attr "type" "sselog1")
6120 (set_attr "prefix" "vex")
6121 (set_attr "mode" "<MODE>")])
6123 (define_insn "avx2_vec_dupv8sf_1"
6124 [(set (match_operand:V8SF 0 "register_operand" "=x")
6127 (match_operand:V8SF 1 "register_operand" "x")
6128 (parallel [(const_int 0)]))))]
6130 "vbroadcastss\t{%x1, %0|%0, %x1}"
6131 [(set_attr "type" "sselog1")
6132 (set_attr "prefix" "vex")
6133 (set_attr "mode" "V8SF")])
6135 (define_insn "vec_dupv4sf"
6136 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
6138 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
6141 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
6142 vbroadcastss\t{%1, %0|%0, %1}
6143 shufps\t{$0, %0, %0|%0, %0, 0}"
6144 [(set_attr "isa" "avx,avx,noavx")
6145 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
6146 (set_attr "length_immediate" "1,0,1")
6147 (set_attr "prefix_extra" "0,1,*")
6148 (set_attr "prefix" "vex,vex,orig")
6149 (set_attr "mode" "V4SF")])
6151 ;; Although insertps takes register source, we prefer
6152 ;; unpcklps with register source since it is shorter.
6153 (define_insn "*vec_concatv2sf_sse4_1"
6154 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
6156 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
6157 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
6160 unpcklps\t{%2, %0|%0, %2}
6161 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6162 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6163 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6164 %vmovss\t{%1, %0|%0, %1}
6165 punpckldq\t{%2, %0|%0, %2}
6166 movd\t{%1, %0|%0, %1}"
6167 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
6168 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6169 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
6170 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
6171 (set_attr "length_immediate" "*,*,1,1,*,*,*")
6172 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6173 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6175 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6176 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6177 ;; alternatives pretty much forces the MMX alternative to be chosen.
6178 (define_insn "*vec_concatv2sf_sse"
6179 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6181 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6182 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6185 unpcklps\t{%2, %0|%0, %2}
6186 movss\t{%1, %0|%0, %1}
6187 punpckldq\t{%2, %0|%0, %2}
6188 movd\t{%1, %0|%0, %1}"
6189 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6190 (set_attr "mode" "V4SF,SF,DI,DI")])
6192 (define_insn "*vec_concatv4sf"
6193 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6195 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6196 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6199 movlhps\t{%2, %0|%0, %2}
6200 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6201 movhps\t{%2, %0|%0, %q2}
6202 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6203 [(set_attr "isa" "noavx,avx,noavx,avx")
6204 (set_attr "type" "ssemov")
6205 (set_attr "prefix" "orig,vex,orig,vex")
6206 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6208 (define_expand "vec_init<mode>"
6209 [(match_operand:V_128 0 "register_operand")
6213 ix86_expand_vector_init (false, operands[0], operands[1]);
6217 ;; Avoid combining registers from different units in a single alternative,
6218 ;; see comment above inline_secondary_memory_needed function in i386.c
6219 (define_insn "vec_set<mode>_0"
6220 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6221 "=v,v,v ,x,x,v,x ,x ,m ,m ,m")
6223 (vec_duplicate:VI4F_128
6224 (match_operand:<ssescalarmode> 2 "general_operand"
6225 " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF"))
6226 (match_operand:VI4F_128 1 "vector_move_operand"
6227 " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0")
6231 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6232 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6233 %vmovd\t{%2, %0|%0, %2}
6234 movss\t{%2, %0|%0, %2}
6235 movss\t{%2, %0|%0, %2}
6236 vmovss\t{%2, %1, %0|%0, %1, %2}
6237 pinsrd\t{$0, %2, %0|%0, %2, 0}
6238 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6242 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
6244 (cond [(eq_attr "alternative" "0,6,7")
6245 (const_string "sselog")
6246 (eq_attr "alternative" "9")
6247 (const_string "imov")
6248 (eq_attr "alternative" "10")
6249 (const_string "fmov")
6251 (const_string "ssemov")))
6252 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
6253 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
6254 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
6255 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
6257 ;; A subset is vec_setv4sf.
6258 (define_insn "*vec_setv4sf_sse4_1"
6259 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6262 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
6263 (match_operand:V4SF 1 "register_operand" "0,x")
6264 (match_operand:SI 3 "const_int_operand")))]
6266 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6267 < GET_MODE_NUNITS (V4SFmode))"
6269 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6270 switch (which_alternative)
6273 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6275 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6280 [(set_attr "isa" "noavx,avx")
6281 (set_attr "type" "sselog")
6282 (set_attr "prefix_data16" "1,*")
6283 (set_attr "prefix_extra" "1")
6284 (set_attr "length_immediate" "1")
6285 (set_attr "prefix" "orig,vex")
6286 (set_attr "mode" "V4SF")])
6288 (define_insn "sse4_1_insertps"
6289 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6290 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
6291 (match_operand:V4SF 1 "register_operand" "0,x")
6292 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6296 if (MEM_P (operands[2]))
6298 unsigned count_s = INTVAL (operands[3]) >> 6;
6300 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6301 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6303 switch (which_alternative)
6306 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6308 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6313 [(set_attr "isa" "noavx,avx")
6314 (set_attr "type" "sselog")
6315 (set_attr "prefix_data16" "1,*")
6316 (set_attr "prefix_extra" "1")
6317 (set_attr "length_immediate" "1")
6318 (set_attr "prefix" "orig,vex")
6319 (set_attr "mode" "V4SF")])
6322 [(set (match_operand:VI4F_128 0 "memory_operand")
6324 (vec_duplicate:VI4F_128
6325 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6328 "TARGET_SSE && reload_completed"
6329 [(set (match_dup 0) (match_dup 1))]
6330 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6332 (define_expand "vec_set<mode>"
6333 [(match_operand:V 0 "register_operand")
6334 (match_operand:<ssescalarmode> 1 "register_operand")
6335 (match_operand 2 "const_int_operand")]
6338 ix86_expand_vector_set (false, operands[0], operands[1],
6339 INTVAL (operands[2]));
6343 (define_insn_and_split "*vec_extractv4sf_0"
6344 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6346 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6347 (parallel [(const_int 0)])))]
6348 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6350 "&& reload_completed"
6351 [(set (match_dup 0) (match_dup 1))]
6353 if (REG_P (operands[1]))
6354 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6356 operands[1] = adjust_address (operands[1], SFmode, 0);
6359 (define_insn_and_split "*sse4_1_extractps"
6360 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
6362 (match_operand:V4SF 1 "register_operand" "x,0,x")
6363 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
6366 %vextractps\t{%2, %1, %0|%0, %1, %2}
6369 "&& reload_completed && SSE_REG_P (operands[0])"
6372 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6373 switch (INTVAL (operands[2]))
6377 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6378 operands[2], operands[2],
6379 GEN_INT (INTVAL (operands[2]) + 4),
6380 GEN_INT (INTVAL (operands[2]) + 4)));
6383 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6386 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6391 [(set_attr "isa" "*,noavx,avx")
6392 (set_attr "type" "sselog,*,*")
6393 (set_attr "prefix_data16" "1,*,*")
6394 (set_attr "prefix_extra" "1,*,*")
6395 (set_attr "length_immediate" "1,*,*")
6396 (set_attr "prefix" "maybe_vex,*,*")
6397 (set_attr "mode" "V4SF,*,*")])
6399 (define_insn_and_split "*vec_extractv4sf_mem"
6400 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6402 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6403 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6406 "&& reload_completed"
6407 [(set (match_dup 0) (match_dup 1))]
6409 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6412 (define_mode_attr extract_type
6413 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6415 (define_mode_attr extract_suf
6416 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6418 (define_mode_iterator AVX512_VEC
6419 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6421 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6422 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6423 (match_operand:AVX512_VEC 1 "register_operand")
6424 (match_operand:SI 2 "const_0_to_3_operand")
6425 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6426 (match_operand:QI 4 "register_operand")]
6430 mask = INTVAL (operands[2]);
6432 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6433 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6435 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6436 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6437 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6438 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6441 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6442 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6447 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6448 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6449 (vec_merge:<ssequartermode>
6450 (vec_select:<ssequartermode>
6451 (match_operand:V8FI 1 "register_operand" "v")
6452 (parallel [(match_operand 2 "const_0_to_7_operand")
6453 (match_operand 3 "const_0_to_7_operand")]))
6454 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6455 (match_operand:QI 5 "register_operand" "k")))]
6457 && (INTVAL (operands[2]) % 2 == 0)
6458 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1 )"
6460 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6461 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6463 [(set_attr "type" "sselog")
6464 (set_attr "prefix_extra" "1")
6465 (set_attr "length_immediate" "1")
6466 (set_attr "memory" "store")
6467 (set_attr "prefix" "evex")
6468 (set_attr "mode" "<sseinsnmode>")])
6470 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6471 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6472 (vec_merge:<ssequartermode>
6473 (vec_select:<ssequartermode>
6474 (match_operand:V16FI 1 "register_operand" "v")
6475 (parallel [(match_operand 2 "const_0_to_15_operand")
6476 (match_operand 3 "const_0_to_15_operand")
6477 (match_operand 4 "const_0_to_15_operand")
6478 (match_operand 5 "const_0_to_15_operand")]))
6479 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6480 (match_operand:QI 7 "register_operand" "Yk")))]
6482 && ((INTVAL (operands[2]) % 4 == 0)
6483 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6484 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6485 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6487 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6488 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6490 [(set_attr "type" "sselog")
6491 (set_attr "prefix_extra" "1")
6492 (set_attr "length_immediate" "1")
6493 (set_attr "memory" "store")
6494 (set_attr "prefix" "evex")
6495 (set_attr "mode" "<sseinsnmode>")])
6497 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6498 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6499 (vec_select:<ssequartermode>
6500 (match_operand:V8FI 1 "register_operand" "v")
6501 (parallel [(match_operand 2 "const_0_to_7_operand")
6502 (match_operand 3 "const_0_to_7_operand")])))]
6503 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6505 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6506 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6508 [(set_attr "type" "sselog1")
6509 (set_attr "prefix_extra" "1")
6510 (set_attr "length_immediate" "1")
6511 (set_attr "prefix" "evex")
6512 (set_attr "mode" "<sseinsnmode>")])
6514 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6515 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6516 (vec_select:<ssequartermode>
6517 (match_operand:V16FI 1 "register_operand" "v")
6518 (parallel [(match_operand 2 "const_0_to_15_operand")
6519 (match_operand 3 "const_0_to_15_operand")
6520 (match_operand 4 "const_0_to_15_operand")
6521 (match_operand 5 "const_0_to_15_operand")])))]
6523 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6524 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6525 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6527 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6528 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6530 [(set_attr "type" "sselog1")
6531 (set_attr "prefix_extra" "1")
6532 (set_attr "length_immediate" "1")
6533 (set_attr "prefix" "evex")
6534 (set_attr "mode" "<sseinsnmode>")])
6536 (define_mode_attr extract_type_2
6537 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6539 (define_mode_attr extract_suf_2
6540 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6542 (define_mode_iterator AVX512_VEC_2
6543 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6545 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6546 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6547 (match_operand:AVX512_VEC_2 1 "register_operand")
6548 (match_operand:SI 2 "const_0_to_1_operand")
6549 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6550 (match_operand:QI 4 "register_operand")]
6553 rtx (*insn)(rtx, rtx, rtx, rtx);
6555 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6556 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6558 switch (INTVAL (operands[2]))
6561 insn = gen_vec_extract_lo_<mode>_mask;
6564 insn = gen_vec_extract_hi_<mode>_mask;
6570 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6575 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6576 (vec_select:<ssehalfvecmode>
6577 (match_operand:V8FI 1 "nonimmediate_operand")
6578 (parallel [(const_int 0) (const_int 1)
6579 (const_int 2) (const_int 3)])))]
6580 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6581 && reload_completed"
6584 rtx op1 = operands[1];
6586 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6588 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6589 emit_move_insn (operands[0], op1);
6593 (define_insn "vec_extract_lo_<mode>_maskm"
6594 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6595 (vec_merge:<ssehalfvecmode>
6596 (vec_select:<ssehalfvecmode>
6597 (match_operand:V8FI 1 "register_operand" "v")
6598 (parallel [(const_int 0) (const_int 1)
6599 (const_int 2) (const_int 3)]))
6600 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6601 (match_operand:QI 3 "register_operand" "Yk")))]
6603 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6604 [(set_attr "type" "sselog1")
6605 (set_attr "prefix_extra" "1")
6606 (set_attr "length_immediate" "1")
6607 (set_attr "prefix" "evex")
6608 (set_attr "mode" "<sseinsnmode>")])
6610 (define_insn "vec_extract_lo_<mode><mask_name>"
6611 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6612 (vec_select:<ssehalfvecmode>
6613 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6614 (parallel [(const_int 0) (const_int 1)
6615 (const_int 2) (const_int 3)])))]
6616 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6619 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6623 [(set_attr "type" "sselog1")
6624 (set_attr "prefix_extra" "1")
6625 (set_attr "length_immediate" "1")
6626 (set_attr "prefix" "evex")
6627 (set_attr "mode" "<sseinsnmode>")])
6629 (define_insn "vec_extract_hi_<mode>_maskm"
6630 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6631 (vec_merge:<ssehalfvecmode>
6632 (vec_select:<ssehalfvecmode>
6633 (match_operand:V8FI 1 "register_operand" "v")
6634 (parallel [(const_int 4) (const_int 5)
6635 (const_int 6) (const_int 7)]))
6636 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6637 (match_operand:QI 3 "register_operand" "Yk")))]
6639 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6640 [(set_attr "type" "sselog")
6641 (set_attr "prefix_extra" "1")
6642 (set_attr "length_immediate" "1")
6643 (set_attr "memory" "store")
6644 (set_attr "prefix" "evex")
6645 (set_attr "mode" "<sseinsnmode>")])
6647 (define_insn "vec_extract_hi_<mode><mask_name>"
6648 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6649 (vec_select:<ssehalfvecmode>
6650 (match_operand:V8FI 1 "register_operand" "v")
6651 (parallel [(const_int 4) (const_int 5)
6652 (const_int 6) (const_int 7)])))]
6654 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6655 [(set_attr "type" "sselog1")
6656 (set_attr "prefix_extra" "1")
6657 (set_attr "length_immediate" "1")
6658 (set_attr "prefix" "evex")
6659 (set_attr "mode" "<sseinsnmode>")])
6661 (define_insn "vec_extract_hi_<mode>_maskm"
6662 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6663 (vec_merge:<ssehalfvecmode>
6664 (vec_select:<ssehalfvecmode>
6665 (match_operand:V16FI 1 "register_operand" "v")
6666 (parallel [(const_int 8) (const_int 9)
6667 (const_int 10) (const_int 11)
6668 (const_int 12) (const_int 13)
6669 (const_int 14) (const_int 15)]))
6670 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6671 (match_operand:QI 3 "register_operand" "k")))]
6673 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6674 [(set_attr "type" "sselog1")
6675 (set_attr "prefix_extra" "1")
6676 (set_attr "length_immediate" "1")
6677 (set_attr "prefix" "evex")
6678 (set_attr "mode" "<sseinsnmode>")])
6680 (define_insn "vec_extract_hi_<mode><mask_name>"
6681 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6682 (vec_select:<ssehalfvecmode>
6683 (match_operand:V16FI 1 "register_operand" "v,v")
6684 (parallel [(const_int 8) (const_int 9)
6685 (const_int 10) (const_int 11)
6686 (const_int 12) (const_int 13)
6687 (const_int 14) (const_int 15)])))]
6688 "TARGET_AVX512F && <mask_avx512dq_condition>"
6690 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6691 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6692 [(set_attr "type" "sselog1")
6693 (set_attr "prefix_extra" "1")
6694 (set_attr "isa" "avx512dq,noavx512dq")
6695 (set_attr "length_immediate" "1")
6696 (set_attr "prefix" "evex")
6697 (set_attr "mode" "<sseinsnmode>")])
6699 (define_expand "avx512vl_vextractf128<mode>"
6700 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6701 (match_operand:VI48F_256 1 "register_operand")
6702 (match_operand:SI 2 "const_0_to_1_operand")
6703 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6704 (match_operand:QI 4 "register_operand")]
6705 "TARGET_AVX512DQ && TARGET_AVX512VL"
6707 rtx (*insn)(rtx, rtx, rtx, rtx);
6709 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6710 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6712 switch (INTVAL (operands[2]))
6715 insn = gen_vec_extract_lo_<mode>_mask;
6718 insn = gen_vec_extract_hi_<mode>_mask;
6724 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6728 (define_expand "avx_vextractf128<mode>"
6729 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6730 (match_operand:V_256 1 "register_operand")
6731 (match_operand:SI 2 "const_0_to_1_operand")]
6734 rtx (*insn)(rtx, rtx);
6736 switch (INTVAL (operands[2]))
6739 insn = gen_vec_extract_lo_<mode>;
6742 insn = gen_vec_extract_hi_<mode>;
6748 emit_insn (insn (operands[0], operands[1]));
6752 (define_insn "vec_extract_lo_<mode><mask_name>"
6753 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6754 (vec_select:<ssehalfvecmode>
6755 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6756 (parallel [(const_int 0) (const_int 1)
6757 (const_int 2) (const_int 3)
6758 (const_int 4) (const_int 5)
6759 (const_int 6) (const_int 7)])))]
6761 && <mask_mode512bit_condition>
6762 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6765 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6771 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6772 (vec_select:<ssehalfvecmode>
6773 (match_operand:V16FI 1 "nonimmediate_operand")
6774 (parallel [(const_int 0) (const_int 1)
6775 (const_int 2) (const_int 3)
6776 (const_int 4) (const_int 5)
6777 (const_int 6) (const_int 7)])))]
6778 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6779 && reload_completed"
6782 rtx op1 = operands[1];
6784 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6786 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6787 emit_move_insn (operands[0], op1);
6791 (define_insn "vec_extract_lo_<mode><mask_name>"
6792 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6793 (vec_select:<ssehalfvecmode>
6794 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6795 (parallel [(const_int 0) (const_int 1)])))]
6797 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
6798 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6801 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6805 [(set_attr "type" "sselog")
6806 (set_attr "prefix_extra" "1")
6807 (set_attr "length_immediate" "1")
6808 (set_attr "memory" "none,store")
6809 (set_attr "prefix" "evex")
6810 (set_attr "mode" "XI")])
6813 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6814 (vec_select:<ssehalfvecmode>
6815 (match_operand:VI8F_256 1 "nonimmediate_operand")
6816 (parallel [(const_int 0) (const_int 1)])))]
6817 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6818 && reload_completed"
6821 rtx op1 = operands[1];
6823 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6825 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6826 emit_move_insn (operands[0], op1);
6830 (define_insn "vec_extract_hi_<mode><mask_name>"
6831 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
6832 (vec_select:<ssehalfvecmode>
6833 (match_operand:VI8F_256 1 "register_operand" "v,v")
6834 (parallel [(const_int 2) (const_int 3)])))]
6837 if (TARGET_AVX512DQ && TARGET_AVX512VL)
6838 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
6840 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
6842 [(set_attr "type" "sselog")
6843 (set_attr "prefix_extra" "1")
6844 (set_attr "length_immediate" "1")
6845 (set_attr "memory" "none,store")
6846 (set_attr "prefix" "vex")
6847 (set_attr "mode" "<sseinsnmode>")])
6850 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6851 (vec_select:<ssehalfvecmode>
6852 (match_operand:VI4F_256 1 "nonimmediate_operand")
6853 (parallel [(const_int 0) (const_int 1)
6854 (const_int 2) (const_int 3)])))]
6855 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
6858 rtx op1 = operands[1];
6860 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6862 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6863 emit_move_insn (operands[0], op1);
6868 (define_insn "vec_extract_lo_<mode><mask_name>"
6869 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6870 (vec_select:<ssehalfvecmode>
6871 (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
6872 (parallel [(const_int 0) (const_int 1)
6873 (const_int 2) (const_int 3)])))]
6874 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
6877 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6881 [(set_attr "type" "sselog1")
6882 (set_attr "prefix_extra" "1")
6883 (set_attr "length_immediate" "1")
6884 (set_attr "prefix" "evex")
6885 (set_attr "mode" "<sseinsnmode>")])
6887 (define_insn "vec_extract_lo_<mode>_maskm"
6888 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6889 (vec_merge:<ssehalfvecmode>
6890 (vec_select:<ssehalfvecmode>
6891 (match_operand:VI4F_256 1 "register_operand" "v")
6892 (parallel [(const_int 0) (const_int 1)
6893 (const_int 2) (const_int 3)]))
6894 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6895 (match_operand:QI 3 "register_operand" "k")))]
6896 "TARGET_AVX512VL && TARGET_AVX512F"
6897 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
6898 [(set_attr "type" "sselog1")
6899 (set_attr "prefix_extra" "1")
6900 (set_attr "length_immediate" "1")
6901 (set_attr "prefix" "evex")
6902 (set_attr "mode" "<sseinsnmode>")])
6904 (define_insn "vec_extract_hi_<mode>_maskm"
6905 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6906 (vec_merge:<ssehalfvecmode>
6907 (vec_select:<ssehalfvecmode>
6908 (match_operand:VI4F_256 1 "register_operand" "v")
6909 (parallel [(const_int 4) (const_int 5)
6910 (const_int 6) (const_int 7)]))
6911 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6912 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
6913 "TARGET_AVX512F && TARGET_AVX512VL"
6915 return "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
6917 [(set_attr "type" "sselog1")
6918 (set_attr "prefix_extra" "1")
6919 (set_attr "length_immediate" "1")
6920 (set_attr "prefix" "evex")
6921 (set_attr "mode" "<sseinsnmode>")])
6923 (define_insn "vec_extract_hi_<mode><mask_name>"
6924 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6925 (vec_select:<ssehalfvecmode>
6926 (match_operand:VI4F_256 1 "register_operand" "v")
6927 (parallel [(const_int 4) (const_int 5)
6928 (const_int 6) (const_int 7)])))]
6929 "TARGET_AVX && <mask_avx512vl_condition>"
6931 if (TARGET_AVX512VL)
6932 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
6934 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
6936 [(set_attr "type" "sselog1")
6937 (set_attr "prefix_extra" "1")
6938 (set_attr "length_immediate" "1")
6939 (set (attr "prefix")
6941 (match_test "TARGET_AVX512VL")
6942 (const_string "evex")
6943 (const_string "vex")))
6944 (set_attr "mode" "<sseinsnmode>")])
6946 (define_insn_and_split "vec_extract_lo_v32hi"
6947 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6949 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6950 (parallel [(const_int 0) (const_int 1)
6951 (const_int 2) (const_int 3)
6952 (const_int 4) (const_int 5)
6953 (const_int 6) (const_int 7)
6954 (const_int 8) (const_int 9)
6955 (const_int 10) (const_int 11)
6956 (const_int 12) (const_int 13)
6957 (const_int 14) (const_int 15)])))]
6958 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6960 "&& reload_completed"
6961 [(set (match_dup 0) (match_dup 1))]
6963 if (REG_P (operands[1]))
6964 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6966 operands[1] = adjust_address (operands[1], V16HImode, 0);
6969 (define_insn "vec_extract_hi_v32hi"
6970 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6972 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6973 (parallel [(const_int 16) (const_int 17)
6974 (const_int 18) (const_int 19)
6975 (const_int 20) (const_int 21)
6976 (const_int 22) (const_int 23)
6977 (const_int 24) (const_int 25)
6978 (const_int 26) (const_int 27)
6979 (const_int 28) (const_int 29)
6980 (const_int 30) (const_int 31)])))]
6982 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6983 [(set_attr "type" "sselog")
6984 (set_attr "prefix_extra" "1")
6985 (set_attr "length_immediate" "1")
6986 (set_attr "memory" "none,store")
6987 (set_attr "prefix" "evex")
6988 (set_attr "mode" "XI")])
6990 (define_insn_and_split "vec_extract_lo_v16hi"
6991 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6993 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6994 (parallel [(const_int 0) (const_int 1)
6995 (const_int 2) (const_int 3)
6996 (const_int 4) (const_int 5)
6997 (const_int 6) (const_int 7)])))]
6998 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7000 "&& reload_completed"
7001 [(set (match_dup 0) (match_dup 1))]
7003 if (REG_P (operands[1]))
7004 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7006 operands[1] = adjust_address (operands[1], V8HImode, 0);
7009 (define_insn "vec_extract_hi_v16hi"
7010 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7012 (match_operand:V16HI 1 "register_operand" "x,x")
7013 (parallel [(const_int 8) (const_int 9)
7014 (const_int 10) (const_int 11)
7015 (const_int 12) (const_int 13)
7016 (const_int 14) (const_int 15)])))]
7018 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7019 [(set_attr "type" "sselog")
7020 (set_attr "prefix_extra" "1")
7021 (set_attr "length_immediate" "1")
7022 (set_attr "memory" "none,store")
7023 (set_attr "prefix" "vex")
7024 (set_attr "mode" "OI")])
7026 (define_insn_and_split "vec_extract_lo_v64qi"
7027 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7029 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7030 (parallel [(const_int 0) (const_int 1)
7031 (const_int 2) (const_int 3)
7032 (const_int 4) (const_int 5)
7033 (const_int 6) (const_int 7)
7034 (const_int 8) (const_int 9)
7035 (const_int 10) (const_int 11)
7036 (const_int 12) (const_int 13)
7037 (const_int 14) (const_int 15)
7038 (const_int 16) (const_int 17)
7039 (const_int 18) (const_int 19)
7040 (const_int 20) (const_int 21)
7041 (const_int 22) (const_int 23)
7042 (const_int 24) (const_int 25)
7043 (const_int 26) (const_int 27)
7044 (const_int 28) (const_int 29)
7045 (const_int 30) (const_int 31)])))]
7046 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7048 "&& reload_completed"
7049 [(set (match_dup 0) (match_dup 1))]
7051 if (REG_P (operands[1]))
7052 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7054 operands[1] = adjust_address (operands[1], V32QImode, 0);
7057 (define_insn "vec_extract_hi_v64qi"
7058 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7060 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
7061 (parallel [(const_int 32) (const_int 33)
7062 (const_int 34) (const_int 35)
7063 (const_int 36) (const_int 37)
7064 (const_int 38) (const_int 39)
7065 (const_int 40) (const_int 41)
7066 (const_int 42) (const_int 43)
7067 (const_int 44) (const_int 45)
7068 (const_int 46) (const_int 47)
7069 (const_int 48) (const_int 49)
7070 (const_int 50) (const_int 51)
7071 (const_int 52) (const_int 53)
7072 (const_int 54) (const_int 55)
7073 (const_int 56) (const_int 57)
7074 (const_int 58) (const_int 59)
7075 (const_int 60) (const_int 61)
7076 (const_int 62) (const_int 63)])))]
7078 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7079 [(set_attr "type" "sselog")
7080 (set_attr "prefix_extra" "1")
7081 (set_attr "length_immediate" "1")
7082 (set_attr "memory" "none,store")
7083 (set_attr "prefix" "evex")
7084 (set_attr "mode" "XI")])
7086 (define_insn_and_split "vec_extract_lo_v32qi"
7087 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7089 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7090 (parallel [(const_int 0) (const_int 1)
7091 (const_int 2) (const_int 3)
7092 (const_int 4) (const_int 5)
7093 (const_int 6) (const_int 7)
7094 (const_int 8) (const_int 9)
7095 (const_int 10) (const_int 11)
7096 (const_int 12) (const_int 13)
7097 (const_int 14) (const_int 15)])))]
7098 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7100 "&& reload_completed"
7101 [(set (match_dup 0) (match_dup 1))]
7103 if (REG_P (operands[1]))
7104 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7106 operands[1] = adjust_address (operands[1], V16QImode, 0);
7109 (define_insn "vec_extract_hi_v32qi"
7110 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7112 (match_operand:V32QI 1 "register_operand" "x,x")
7113 (parallel [(const_int 16) (const_int 17)
7114 (const_int 18) (const_int 19)
7115 (const_int 20) (const_int 21)
7116 (const_int 22) (const_int 23)
7117 (const_int 24) (const_int 25)
7118 (const_int 26) (const_int 27)
7119 (const_int 28) (const_int 29)
7120 (const_int 30) (const_int 31)])))]
7122 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7123 [(set_attr "type" "sselog")
7124 (set_attr "prefix_extra" "1")
7125 (set_attr "length_immediate" "1")
7126 (set_attr "memory" "none,store")
7127 (set_attr "prefix" "vex")
7128 (set_attr "mode" "OI")])
7130 ;; Modes handled by vec_extract patterns.
7131 (define_mode_iterator VEC_EXTRACT_MODE
7132 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7133 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7134 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7135 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7136 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7137 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7139 (define_expand "vec_extract<mode>"
7140 [(match_operand:<ssescalarmode> 0 "register_operand")
7141 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7142 (match_operand 2 "const_int_operand")]
7145 ix86_expand_vector_extract (false, operands[0], operands[1],
7146 INTVAL (operands[2]));
7150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7152 ;; Parallel double-precision floating point element swizzling
7154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7156 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7157 [(set (match_operand:V8DF 0 "register_operand" "=v")
7160 (match_operand:V8DF 1 "nonimmediate_operand" "v")
7161 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7162 (parallel [(const_int 1) (const_int 9)
7163 (const_int 3) (const_int 11)
7164 (const_int 5) (const_int 13)
7165 (const_int 7) (const_int 15)])))]
7167 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7168 [(set_attr "type" "sselog")
7169 (set_attr "prefix" "evex")
7170 (set_attr "mode" "V8DF")])
7172 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7173 (define_insn "avx_unpckhpd256<mask_name>"
7174 [(set (match_operand:V4DF 0 "register_operand" "=v")
7177 (match_operand:V4DF 1 "register_operand" "v")
7178 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7179 (parallel [(const_int 1) (const_int 5)
7180 (const_int 3) (const_int 7)])))]
7181 "TARGET_AVX && <mask_avx512vl_condition>"
7182 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7183 [(set_attr "type" "sselog")
7184 (set_attr "prefix" "vex")
7185 (set_attr "mode" "V4DF")])
7187 (define_expand "vec_interleave_highv4df"
7191 (match_operand:V4DF 1 "register_operand" "x")
7192 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7193 (parallel [(const_int 0) (const_int 4)
7194 (const_int 2) (const_int 6)])))
7200 (parallel [(const_int 1) (const_int 5)
7201 (const_int 3) (const_int 7)])))
7202 (set (match_operand:V4DF 0 "register_operand")
7207 (parallel [(const_int 2) (const_int 3)
7208 (const_int 6) (const_int 7)])))]
7211 operands[3] = gen_reg_rtx (V4DFmode);
7212 operands[4] = gen_reg_rtx (V4DFmode);
7216 (define_insn "avx512vl_unpckhpd128_mask"
7217 [(set (match_operand:V2DF 0 "register_operand" "=v")
7221 (match_operand:V2DF 1 "register_operand" "v")
7222 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7223 (parallel [(const_int 1) (const_int 3)]))
7224 (match_operand:V2DF 3 "vector_move_operand" "0C")
7225 (match_operand:QI 4 "register_operand" "Yk")))]
7227 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7228 [(set_attr "type" "sselog")
7229 (set_attr "prefix" "evex")
7230 (set_attr "mode" "V2DF")])
7232 (define_expand "vec_interleave_highv2df"
7233 [(set (match_operand:V2DF 0 "register_operand")
7236 (match_operand:V2DF 1 "nonimmediate_operand")
7237 (match_operand:V2DF 2 "nonimmediate_operand"))
7238 (parallel [(const_int 1)
7242 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7243 operands[2] = force_reg (V2DFmode, operands[2]);
7246 (define_insn "*vec_interleave_highv2df"
7247 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7250 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7251 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7252 (parallel [(const_int 1)
7254 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7256 unpckhpd\t{%2, %0|%0, %2}
7257 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7258 %vmovddup\t{%H1, %0|%0, %H1}
7259 movlpd\t{%H1, %0|%0, %H1}
7260 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7261 %vmovhpd\t{%1, %0|%q0, %1}"
7262 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7263 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7264 (set_attr "ssememalign" "64")
7265 (set_attr "prefix_data16" "*,*,*,1,*,1")
7266 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7267 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7269 (define_expand "avx512f_movddup512<mask_name>"
7270 [(set (match_operand:V8DF 0 "register_operand")
7273 (match_operand:V8DF 1 "nonimmediate_operand")
7275 (parallel [(const_int 0) (const_int 8)
7276 (const_int 2) (const_int 10)
7277 (const_int 4) (const_int 12)
7278 (const_int 6) (const_int 14)])))]
7281 (define_expand "avx512f_unpcklpd512<mask_name>"
7282 [(set (match_operand:V8DF 0 "register_operand")
7285 (match_operand:V8DF 1 "register_operand")
7286 (match_operand:V8DF 2 "nonimmediate_operand"))
7287 (parallel [(const_int 0) (const_int 8)
7288 (const_int 2) (const_int 10)
7289 (const_int 4) (const_int 12)
7290 (const_int 6) (const_int 14)])))]
7293 (define_insn "*avx512f_unpcklpd512<mask_name>"
7294 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7297 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7298 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7299 (parallel [(const_int 0) (const_int 8)
7300 (const_int 2) (const_int 10)
7301 (const_int 4) (const_int 12)
7302 (const_int 6) (const_int 14)])))]
7305 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7306 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7307 [(set_attr "type" "sselog")
7308 (set_attr "prefix" "evex")
7309 (set_attr "mode" "V8DF")])
7311 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7312 (define_expand "avx_movddup256<mask_name>"
7313 [(set (match_operand:V4DF 0 "register_operand")
7316 (match_operand:V4DF 1 "nonimmediate_operand")
7318 (parallel [(const_int 0) (const_int 4)
7319 (const_int 2) (const_int 6)])))]
7320 "TARGET_AVX && <mask_avx512vl_condition>")
7322 (define_expand "avx_unpcklpd256<mask_name>"
7323 [(set (match_operand:V4DF 0 "register_operand")
7326 (match_operand:V4DF 1 "register_operand")
7327 (match_operand:V4DF 2 "nonimmediate_operand"))
7328 (parallel [(const_int 0) (const_int 4)
7329 (const_int 2) (const_int 6)])))]
7330 "TARGET_AVX && <mask_avx512vl_condition>")
7332 (define_insn "*avx_unpcklpd256<mask_name>"
7333 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7336 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7337 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7338 (parallel [(const_int 0) (const_int 4)
7339 (const_int 2) (const_int 6)])))]
7340 "TARGET_AVX && <mask_avx512vl_condition>"
7342 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7343 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7344 [(set_attr "type" "sselog")
7345 (set_attr "prefix" "vex")
7346 (set_attr "mode" "V4DF")])
7348 (define_expand "vec_interleave_lowv4df"
7352 (match_operand:V4DF 1 "register_operand" "x")
7353 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7354 (parallel [(const_int 0) (const_int 4)
7355 (const_int 2) (const_int 6)])))
7361 (parallel [(const_int 1) (const_int 5)
7362 (const_int 3) (const_int 7)])))
7363 (set (match_operand:V4DF 0 "register_operand")
7368 (parallel [(const_int 0) (const_int 1)
7369 (const_int 4) (const_int 5)])))]
7372 operands[3] = gen_reg_rtx (V4DFmode);
7373 operands[4] = gen_reg_rtx (V4DFmode);
7376 (define_insn "avx512vl_unpcklpd128_mask"
7377 [(set (match_operand:V2DF 0 "register_operand" "=v")
7381 (match_operand:V2DF 1 "register_operand" "v")
7382 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7383 (parallel [(const_int 0) (const_int 2)]))
7384 (match_operand:V2DF 3 "vector_move_operand" "0C")
7385 (match_operand:QI 4 "register_operand" "Yk")))]
7387 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7388 [(set_attr "type" "sselog")
7389 (set_attr "prefix" "evex")
7390 (set_attr "mode" "V2DF")])
7392 (define_expand "vec_interleave_lowv2df"
7393 [(set (match_operand:V2DF 0 "register_operand")
7396 (match_operand:V2DF 1 "nonimmediate_operand")
7397 (match_operand:V2DF 2 "nonimmediate_operand"))
7398 (parallel [(const_int 0)
7402 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7403 operands[1] = force_reg (V2DFmode, operands[1]);
7406 (define_insn "*vec_interleave_lowv2df"
7407 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7410 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7411 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7412 (parallel [(const_int 0)
7414 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7416 unpcklpd\t{%2, %0|%0, %2}
7417 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7418 %vmovddup\t{%1, %0|%0, %q1}
7419 movhpd\t{%2, %0|%0, %q2}
7420 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7421 %vmovlpd\t{%2, %H0|%H0, %2}"
7422 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7423 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7424 (set_attr "ssememalign" "64")
7425 (set_attr "prefix_data16" "*,*,*,1,*,1")
7426 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7427 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7430 [(set (match_operand:V2DF 0 "memory_operand")
7433 (match_operand:V2DF 1 "register_operand")
7435 (parallel [(const_int 0)
7437 "TARGET_SSE3 && reload_completed"
7440 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7441 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7442 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7447 [(set (match_operand:V2DF 0 "register_operand")
7450 (match_operand:V2DF 1 "memory_operand")
7452 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7453 (match_operand:SI 3 "const_int_operand")])))]
7454 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7455 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7457 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7460 (define_insn "avx512f_vmscalef<mode><round_name>"
7461 [(set (match_operand:VF_128 0 "register_operand" "=v")
7464 [(match_operand:VF_128 1 "register_operand" "v")
7465 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7470 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7471 [(set_attr "prefix" "evex")
7472 (set_attr "mode" "<ssescalarmode>")])
7474 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7475 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7477 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7478 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7481 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7482 [(set_attr "prefix" "evex")
7483 (set_attr "mode" "<MODE>")])
7485 (define_expand "<avx512>_vternlog<mode>_maskz"
7486 [(match_operand:VI48_AVX512VL 0 "register_operand")
7487 (match_operand:VI48_AVX512VL 1 "register_operand")
7488 (match_operand:VI48_AVX512VL 2 "register_operand")
7489 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7490 (match_operand:SI 4 "const_0_to_255_operand")
7491 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7494 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7495 operands[0], operands[1], operands[2], operands[3],
7496 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7500 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7501 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7502 (unspec:VI48_AVX512VL
7503 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7504 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7505 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7506 (match_operand:SI 4 "const_0_to_255_operand")]
7509 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7510 [(set_attr "type" "sselog")
7511 (set_attr "prefix" "evex")
7512 (set_attr "mode" "<sseinsnmode>")])
7514 (define_insn "<avx512>_vternlog<mode>_mask"
7515 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7516 (vec_merge:VI48_AVX512VL
7517 (unspec:VI48_AVX512VL
7518 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7519 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7520 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7521 (match_operand:SI 4 "const_0_to_255_operand")]
7524 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7526 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7527 [(set_attr "type" "sselog")
7528 (set_attr "prefix" "evex")
7529 (set_attr "mode" "<sseinsnmode>")])
7531 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7532 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7533 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7536 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7537 [(set_attr "prefix" "evex")
7538 (set_attr "mode" "<MODE>")])
7540 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7541 [(set (match_operand:VF_128 0 "register_operand" "=v")
7544 [(match_operand:VF_128 1 "register_operand" "v")
7545 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7550 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7551 [(set_attr "prefix" "evex")
7552 (set_attr "mode" "<ssescalarmode>")])
7554 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7555 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7556 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7557 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7558 (match_operand:SI 3 "const_0_to_255_operand")]
7561 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7562 [(set_attr "prefix" "evex")
7563 (set_attr "mode" "<sseinsnmode>")])
7565 (define_expand "avx512f_shufps512_mask"
7566 [(match_operand:V16SF 0 "register_operand")
7567 (match_operand:V16SF 1 "register_operand")
7568 (match_operand:V16SF 2 "nonimmediate_operand")
7569 (match_operand:SI 3 "const_0_to_255_operand")
7570 (match_operand:V16SF 4 "register_operand")
7571 (match_operand:HI 5 "register_operand")]
7574 int mask = INTVAL (operands[3]);
7575 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7576 GEN_INT ((mask >> 0) & 3),
7577 GEN_INT ((mask >> 2) & 3),
7578 GEN_INT (((mask >> 4) & 3) + 16),
7579 GEN_INT (((mask >> 6) & 3) + 16),
7580 GEN_INT (((mask >> 0) & 3) + 4),
7581 GEN_INT (((mask >> 2) & 3) + 4),
7582 GEN_INT (((mask >> 4) & 3) + 20),
7583 GEN_INT (((mask >> 6) & 3) + 20),
7584 GEN_INT (((mask >> 0) & 3) + 8),
7585 GEN_INT (((mask >> 2) & 3) + 8),
7586 GEN_INT (((mask >> 4) & 3) + 24),
7587 GEN_INT (((mask >> 6) & 3) + 24),
7588 GEN_INT (((mask >> 0) & 3) + 12),
7589 GEN_INT (((mask >> 2) & 3) + 12),
7590 GEN_INT (((mask >> 4) & 3) + 28),
7591 GEN_INT (((mask >> 6) & 3) + 28),
7592 operands[4], operands[5]));
7597 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7598 [(match_operand:VF_AVX512VL 0 "register_operand")
7599 (match_operand:VF_AVX512VL 1 "register_operand")
7600 (match_operand:VF_AVX512VL 2 "register_operand")
7601 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7602 (match_operand:SI 4 "const_0_to_255_operand")
7603 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7606 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7607 operands[0], operands[1], operands[2], operands[3],
7608 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7609 <round_saeonly_expand_operand6>));
7613 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7614 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7616 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7617 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7618 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7619 (match_operand:SI 4 "const_0_to_255_operand")]
7622 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7623 [(set_attr "prefix" "evex")
7624 (set_attr "mode" "<MODE>")])
7626 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7627 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7628 (vec_merge:VF_AVX512VL
7630 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7631 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7632 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7633 (match_operand:SI 4 "const_0_to_255_operand")]
7636 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7638 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7639 [(set_attr "prefix" "evex")
7640 (set_attr "mode" "<MODE>")])
7642 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7643 [(match_operand:VF_128 0 "register_operand")
7644 (match_operand:VF_128 1 "register_operand")
7645 (match_operand:VF_128 2 "register_operand")
7646 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7647 (match_operand:SI 4 "const_0_to_255_operand")
7648 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7651 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7652 operands[0], operands[1], operands[2], operands[3],
7653 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7654 <round_saeonly_expand_operand6>));
7658 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7659 [(set (match_operand:VF_128 0 "register_operand" "=v")
7662 [(match_operand:VF_128 1 "register_operand" "0")
7663 (match_operand:VF_128 2 "register_operand" "v")
7664 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7665 (match_operand:SI 4 "const_0_to_255_operand")]
7670 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7671 [(set_attr "prefix" "evex")
7672 (set_attr "mode" "<ssescalarmode>")])
7674 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7675 [(set (match_operand:VF_128 0 "register_operand" "=v")
7679 [(match_operand:VF_128 1 "register_operand" "0")
7680 (match_operand:VF_128 2 "register_operand" "v")
7681 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7682 (match_operand:SI 4 "const_0_to_255_operand")]
7687 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7689 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7690 [(set_attr "prefix" "evex")
7691 (set_attr "mode" "<ssescalarmode>")])
7693 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7694 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7696 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7697 (match_operand:SI 2 "const_0_to_255_operand")]
7700 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7701 [(set_attr "length_immediate" "1")
7702 (set_attr "prefix" "evex")
7703 (set_attr "mode" "<MODE>")])
7705 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7706 [(set (match_operand:VF_128 0 "register_operand" "=v")
7709 [(match_operand:VF_128 1 "register_operand" "v")
7710 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7711 (match_operand:SI 3 "const_0_to_255_operand")]
7716 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7717 [(set_attr "length_immediate" "1")
7718 (set_attr "prefix" "evex")
7719 (set_attr "mode" "<MODE>")])
7721 ;; One bit in mask selects 2 elements.
7722 (define_insn "avx512f_shufps512_1<mask_name>"
7723 [(set (match_operand:V16SF 0 "register_operand" "=v")
7726 (match_operand:V16SF 1 "register_operand" "v")
7727 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7728 (parallel [(match_operand 3 "const_0_to_3_operand")
7729 (match_operand 4 "const_0_to_3_operand")
7730 (match_operand 5 "const_16_to_19_operand")
7731 (match_operand 6 "const_16_to_19_operand")
7732 (match_operand 7 "const_4_to_7_operand")
7733 (match_operand 8 "const_4_to_7_operand")
7734 (match_operand 9 "const_20_to_23_operand")
7735 (match_operand 10 "const_20_to_23_operand")
7736 (match_operand 11 "const_8_to_11_operand")
7737 (match_operand 12 "const_8_to_11_operand")
7738 (match_operand 13 "const_24_to_27_operand")
7739 (match_operand 14 "const_24_to_27_operand")
7740 (match_operand 15 "const_12_to_15_operand")
7741 (match_operand 16 "const_12_to_15_operand")
7742 (match_operand 17 "const_28_to_31_operand")
7743 (match_operand 18 "const_28_to_31_operand")])))]
7745 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7746 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7747 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7748 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7749 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7750 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7751 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7752 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7753 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7754 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7755 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7756 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7759 mask = INTVAL (operands[3]);
7760 mask |= INTVAL (operands[4]) << 2;
7761 mask |= (INTVAL (operands[5]) - 16) << 4;
7762 mask |= (INTVAL (operands[6]) - 16) << 6;
7763 operands[3] = GEN_INT (mask);
7765 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7767 [(set_attr "type" "sselog")
7768 (set_attr "length_immediate" "1")
7769 (set_attr "prefix" "evex")
7770 (set_attr "mode" "V16SF")])
7772 (define_expand "avx512f_shufpd512_mask"
7773 [(match_operand:V8DF 0 "register_operand")
7774 (match_operand:V8DF 1 "register_operand")
7775 (match_operand:V8DF 2 "nonimmediate_operand")
7776 (match_operand:SI 3 "const_0_to_255_operand")
7777 (match_operand:V8DF 4 "register_operand")
7778 (match_operand:QI 5 "register_operand")]
7781 int mask = INTVAL (operands[3]);
7782 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7784 GEN_INT (mask & 2 ? 9 : 8),
7785 GEN_INT (mask & 4 ? 3 : 2),
7786 GEN_INT (mask & 8 ? 11 : 10),
7787 GEN_INT (mask & 16 ? 5 : 4),
7788 GEN_INT (mask & 32 ? 13 : 12),
7789 GEN_INT (mask & 64 ? 7 : 6),
7790 GEN_INT (mask & 128 ? 15 : 14),
7791 operands[4], operands[5]));
7795 (define_insn "avx512f_shufpd512_1<mask_name>"
7796 [(set (match_operand:V8DF 0 "register_operand" "=v")
7799 (match_operand:V8DF 1 "register_operand" "v")
7800 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7801 (parallel [(match_operand 3 "const_0_to_1_operand")
7802 (match_operand 4 "const_8_to_9_operand")
7803 (match_operand 5 "const_2_to_3_operand")
7804 (match_operand 6 "const_10_to_11_operand")
7805 (match_operand 7 "const_4_to_5_operand")
7806 (match_operand 8 "const_12_to_13_operand")
7807 (match_operand 9 "const_6_to_7_operand")
7808 (match_operand 10 "const_14_to_15_operand")])))]
7812 mask = INTVAL (operands[3]);
7813 mask |= (INTVAL (operands[4]) - 8) << 1;
7814 mask |= (INTVAL (operands[5]) - 2) << 2;
7815 mask |= (INTVAL (operands[6]) - 10) << 3;
7816 mask |= (INTVAL (operands[7]) - 4) << 4;
7817 mask |= (INTVAL (operands[8]) - 12) << 5;
7818 mask |= (INTVAL (operands[9]) - 6) << 6;
7819 mask |= (INTVAL (operands[10]) - 14) << 7;
7820 operands[3] = GEN_INT (mask);
7822 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7824 [(set_attr "type" "sselog")
7825 (set_attr "length_immediate" "1")
7826 (set_attr "prefix" "evex")
7827 (set_attr "mode" "V8DF")])
7829 (define_expand "avx_shufpd256<mask_expand4_name>"
7830 [(match_operand:V4DF 0 "register_operand")
7831 (match_operand:V4DF 1 "register_operand")
7832 (match_operand:V4DF 2 "nonimmediate_operand")
7833 (match_operand:SI 3 "const_int_operand")]
7836 int mask = INTVAL (operands[3]);
7837 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
7841 GEN_INT (mask & 2 ? 5 : 4),
7842 GEN_INT (mask & 4 ? 3 : 2),
7843 GEN_INT (mask & 8 ? 7 : 6)
7844 <mask_expand4_args>));
7848 (define_insn "avx_shufpd256_1<mask_name>"
7849 [(set (match_operand:V4DF 0 "register_operand" "=v")
7852 (match_operand:V4DF 1 "register_operand" "v")
7853 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7854 (parallel [(match_operand 3 "const_0_to_1_operand")
7855 (match_operand 4 "const_4_to_5_operand")
7856 (match_operand 5 "const_2_to_3_operand")
7857 (match_operand 6 "const_6_to_7_operand")])))]
7858 "TARGET_AVX && <mask_avx512vl_condition>"
7861 mask = INTVAL (operands[3]);
7862 mask |= (INTVAL (operands[4]) - 4) << 1;
7863 mask |= (INTVAL (operands[5]) - 2) << 2;
7864 mask |= (INTVAL (operands[6]) - 6) << 3;
7865 operands[3] = GEN_INT (mask);
7867 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
7869 [(set_attr "type" "sseshuf")
7870 (set_attr "length_immediate" "1")
7871 (set_attr "prefix" "vex")
7872 (set_attr "mode" "V4DF")])
7874 (define_expand "sse2_shufpd<mask_expand4_name>"
7875 [(match_operand:V2DF 0 "register_operand")
7876 (match_operand:V2DF 1 "register_operand")
7877 (match_operand:V2DF 2 "nonimmediate_operand")
7878 (match_operand:SI 3 "const_int_operand")]
7881 int mask = INTVAL (operands[3]);
7882 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
7883 operands[2], GEN_INT (mask & 1),
7884 GEN_INT (mask & 2 ? 3 : 2)
7885 <mask_expand4_args>));
7889 (define_insn "sse2_shufpd_v2df_mask"
7890 [(set (match_operand:V2DF 0 "register_operand" "=v")
7894 (match_operand:V2DF 1 "register_operand" "v")
7895 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7896 (parallel [(match_operand 3 "const_0_to_1_operand")
7897 (match_operand 4 "const_2_to_3_operand")]))
7898 (match_operand:V2DF 5 "vector_move_operand" "0C")
7899 (match_operand:QI 6 "register_operand" "Yk")))]
7903 mask = INTVAL (operands[3]);
7904 mask |= (INTVAL (operands[4]) - 2) << 1;
7905 operands[3] = GEN_INT (mask);
7907 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
7909 [(set_attr "type" "sseshuf")
7910 (set_attr "length_immediate" "1")
7911 (set_attr "prefix" "evex")
7912 (set_attr "mode" "V2DF")])
7914 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
7915 (define_insn "avx2_interleave_highv4di<mask_name>"
7916 [(set (match_operand:V4DI 0 "register_operand" "=v")
7919 (match_operand:V4DI 1 "register_operand" "v")
7920 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
7921 (parallel [(const_int 1)
7925 "TARGET_AVX2 && <mask_avx512vl_condition>"
7926 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7927 [(set_attr "type" "sselog")
7928 (set_attr "prefix" "vex")
7929 (set_attr "mode" "OI")])
7931 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7932 [(set (match_operand:V8DI 0 "register_operand" "=v")
7935 (match_operand:V8DI 1 "register_operand" "v")
7936 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7937 (parallel [(const_int 1) (const_int 9)
7938 (const_int 3) (const_int 11)
7939 (const_int 5) (const_int 13)
7940 (const_int 7) (const_int 15)])))]
7942 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7943 [(set_attr "type" "sselog")
7944 (set_attr "prefix" "evex")
7945 (set_attr "mode" "XI")])
7947 (define_insn "vec_interleave_highv2di<mask_name>"
7948 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
7951 (match_operand:V2DI 1 "register_operand" "0,v")
7952 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
7953 (parallel [(const_int 1)
7955 "TARGET_SSE2 && <mask_avx512vl_condition>"
7957 punpckhqdq\t{%2, %0|%0, %2}
7958 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7959 [(set_attr "isa" "noavx,avx")
7960 (set_attr "type" "sselog")
7961 (set_attr "prefix_data16" "1,*")
7962 (set_attr "prefix" "orig,<mask_prefix>")
7963 (set_attr "mode" "TI")])
7965 (define_insn "avx2_interleave_lowv4di<mask_name>"
7966 [(set (match_operand:V4DI 0 "register_operand" "=v")
7969 (match_operand:V4DI 1 "register_operand" "v")
7970 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
7971 (parallel [(const_int 0)
7975 "TARGET_AVX2 && <mask_avx512vl_condition>"
7976 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7977 [(set_attr "type" "sselog")
7978 (set_attr "prefix" "vex")
7979 (set_attr "mode" "OI")])
7981 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7982 [(set (match_operand:V8DI 0 "register_operand" "=v")
7985 (match_operand:V8DI 1 "register_operand" "v")
7986 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7987 (parallel [(const_int 0) (const_int 8)
7988 (const_int 2) (const_int 10)
7989 (const_int 4) (const_int 12)
7990 (const_int 6) (const_int 14)])))]
7992 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7993 [(set_attr "type" "sselog")
7994 (set_attr "prefix" "evex")
7995 (set_attr "mode" "XI")])
7997 (define_insn "vec_interleave_lowv2di<mask_name>"
7998 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8001 (match_operand:V2DI 1 "register_operand" "0,v")
8002 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8003 (parallel [(const_int 0)
8005 "TARGET_SSE2 && <mask_avx512vl_condition>"
8007 punpcklqdq\t{%2, %0|%0, %2}
8008 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8009 [(set_attr "isa" "noavx,avx")
8010 (set_attr "type" "sselog")
8011 (set_attr "prefix_data16" "1,*")
8012 (set_attr "prefix" "orig,vex")
8013 (set_attr "mode" "TI")])
8015 (define_insn "sse2_shufpd_<mode>"
8016 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8017 (vec_select:VI8F_128
8018 (vec_concat:<ssedoublevecmode>
8019 (match_operand:VI8F_128 1 "register_operand" "0,x")
8020 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8021 (parallel [(match_operand 3 "const_0_to_1_operand")
8022 (match_operand 4 "const_2_to_3_operand")])))]
8026 mask = INTVAL (operands[3]);
8027 mask |= (INTVAL (operands[4]) - 2) << 1;
8028 operands[3] = GEN_INT (mask);
8030 switch (which_alternative)
8033 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8035 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8040 [(set_attr "isa" "noavx,avx")
8041 (set_attr "type" "sseshuf")
8042 (set_attr "length_immediate" "1")
8043 (set_attr "prefix" "orig,vex")
8044 (set_attr "mode" "V2DF")])
8046 ;; Avoid combining registers from different units in a single alternative,
8047 ;; see comment above inline_secondary_memory_needed function in i386.c
8048 (define_insn "sse2_storehpd"
8049 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8051 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8052 (parallel [(const_int 1)])))]
8053 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8055 %vmovhpd\t{%1, %0|%0, %1}
8057 vunpckhpd\t{%d1, %0|%0, %d1}
8061 [(set_attr "isa" "*,noavx,avx,*,*,*")
8062 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8063 (set (attr "prefix_data16")
8065 (and (eq_attr "alternative" "0")
8066 (not (match_test "TARGET_AVX")))
8068 (const_string "*")))
8069 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8070 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8073 [(set (match_operand:DF 0 "register_operand")
8075 (match_operand:V2DF 1 "memory_operand")
8076 (parallel [(const_int 1)])))]
8077 "TARGET_SSE2 && reload_completed"
8078 [(set (match_dup 0) (match_dup 1))]
8079 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8081 (define_insn "*vec_extractv2df_1_sse"
8082 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8084 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8085 (parallel [(const_int 1)])))]
8086 "!TARGET_SSE2 && TARGET_SSE
8087 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8089 movhps\t{%1, %0|%q0, %1}
8090 movhlps\t{%1, %0|%0, %1}
8091 movlps\t{%H1, %0|%0, %H1}"
8092 [(set_attr "type" "ssemov")
8093 (set_attr "ssememalign" "64")
8094 (set_attr "mode" "V2SF,V4SF,V2SF")])
8096 ;; Avoid combining registers from different units in a single alternative,
8097 ;; see comment above inline_secondary_memory_needed function in i386.c
8098 (define_insn "sse2_storelpd"
8099 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8101 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8102 (parallel [(const_int 0)])))]
8103 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8105 %vmovlpd\t{%1, %0|%0, %1}
8110 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8111 (set_attr "prefix_data16" "1,*,*,*,*")
8112 (set_attr "prefix" "maybe_vex")
8113 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8116 [(set (match_operand:DF 0 "register_operand")
8118 (match_operand:V2DF 1 "nonimmediate_operand")
8119 (parallel [(const_int 0)])))]
8120 "TARGET_SSE2 && reload_completed"
8121 [(set (match_dup 0) (match_dup 1))]
8123 if (REG_P (operands[1]))
8124 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8126 operands[1] = adjust_address (operands[1], DFmode, 0);
8129 (define_insn "*vec_extractv2df_0_sse"
8130 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8132 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8133 (parallel [(const_int 0)])))]
8134 "!TARGET_SSE2 && TARGET_SSE
8135 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8137 movlps\t{%1, %0|%0, %1}
8138 movaps\t{%1, %0|%0, %1}
8139 movlps\t{%1, %0|%0, %q1}"
8140 [(set_attr "type" "ssemov")
8141 (set_attr "mode" "V2SF,V4SF,V2SF")])
8143 (define_expand "sse2_loadhpd_exp"
8144 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8147 (match_operand:V2DF 1 "nonimmediate_operand")
8148 (parallel [(const_int 0)]))
8149 (match_operand:DF 2 "nonimmediate_operand")))]
8152 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8154 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8156 /* Fix up the destination if needed. */
8157 if (dst != operands[0])
8158 emit_move_insn (operands[0], dst);
8163 ;; Avoid combining registers from different units in a single alternative,
8164 ;; see comment above inline_secondary_memory_needed function in i386.c
8165 (define_insn "sse2_loadhpd"
8166 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8170 (match_operand:V2DF 1 "nonimmediate_operand"
8172 (parallel [(const_int 0)]))
8173 (match_operand:DF 2 "nonimmediate_operand"
8174 " m,m,x,x,x,*f,r")))]
8175 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8177 movhpd\t{%2, %0|%0, %2}
8178 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8179 unpcklpd\t{%2, %0|%0, %2}
8180 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8184 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8185 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8186 (set_attr "ssememalign" "64")
8187 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8188 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8189 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8192 [(set (match_operand:V2DF 0 "memory_operand")
8194 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8195 (match_operand:DF 1 "register_operand")))]
8196 "TARGET_SSE2 && reload_completed"
8197 [(set (match_dup 0) (match_dup 1))]
8198 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8200 (define_expand "sse2_loadlpd_exp"
8201 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8203 (match_operand:DF 2 "nonimmediate_operand")
8205 (match_operand:V2DF 1 "nonimmediate_operand")
8206 (parallel [(const_int 1)]))))]
8209 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8211 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8213 /* Fix up the destination if needed. */
8214 if (dst != operands[0])
8215 emit_move_insn (operands[0], dst);
8220 ;; Avoid combining registers from different units in a single alternative,
8221 ;; see comment above inline_secondary_memory_needed function in i386.c
8222 (define_insn "sse2_loadlpd"
8223 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8224 "=x,x,x,x,x,x,x,x,m,m ,m")
8226 (match_operand:DF 2 "nonimmediate_operand"
8227 " m,m,m,x,x,0,0,x,x,*f,r")
8229 (match_operand:V2DF 1 "vector_move_operand"
8230 " C,0,x,0,x,x,o,o,0,0 ,0")
8231 (parallel [(const_int 1)]))))]
8232 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8234 %vmovsd\t{%2, %0|%0, %2}
8235 movlpd\t{%2, %0|%0, %2}
8236 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8237 movsd\t{%2, %0|%0, %2}
8238 vmovsd\t{%2, %1, %0|%0, %1, %2}
8239 shufpd\t{$2, %1, %0|%0, %1, 2}
8240 movhpd\t{%H1, %0|%0, %H1}
8241 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8245 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8247 (cond [(eq_attr "alternative" "5")
8248 (const_string "sselog")
8249 (eq_attr "alternative" "9")
8250 (const_string "fmov")
8251 (eq_attr "alternative" "10")
8252 (const_string "imov")
8254 (const_string "ssemov")))
8255 (set_attr "ssememalign" "64")
8256 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8257 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8258 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8259 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8262 [(set (match_operand:V2DF 0 "memory_operand")
8264 (match_operand:DF 1 "register_operand")
8265 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8266 "TARGET_SSE2 && reload_completed"
8267 [(set (match_dup 0) (match_dup 1))]
8268 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8270 (define_insn "sse2_movsd"
8271 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8273 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8274 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8278 movsd\t{%2, %0|%0, %2}
8279 vmovsd\t{%2, %1, %0|%0, %1, %2}
8280 movlpd\t{%2, %0|%0, %q2}
8281 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8282 %vmovlpd\t{%2, %0|%q0, %2}
8283 shufpd\t{$2, %1, %0|%0, %1, 2}
8284 movhps\t{%H1, %0|%0, %H1}
8285 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8286 %vmovhps\t{%1, %H0|%H0, %1}"
8287 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8290 (eq_attr "alternative" "5")
8291 (const_string "sselog")
8292 (const_string "ssemov")))
8293 (set (attr "prefix_data16")
8295 (and (eq_attr "alternative" "2,4")
8296 (not (match_test "TARGET_AVX")))
8298 (const_string "*")))
8299 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8300 (set_attr "ssememalign" "64")
8301 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8302 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8304 (define_insn "vec_dupv2df<mask_name>"
8305 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8307 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8308 "TARGET_SSE2 && <mask_avx512vl_condition>"
8311 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8312 [(set_attr "isa" "noavx,sse3")
8313 (set_attr "type" "sselog1")
8314 (set_attr "prefix" "orig,maybe_vex")
8315 (set_attr "mode" "V2DF,DF")])
8317 (define_insn "*vec_concatv2df"
8318 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8320 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8321 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8324 unpcklpd\t{%2, %0|%0, %2}
8325 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8326 %vmovddup\t{%1, %0|%0, %1}
8327 movhpd\t{%2, %0|%0, %2}
8328 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8329 %vmovsd\t{%1, %0|%0, %1}
8330 movlhps\t{%2, %0|%0, %2}
8331 movhps\t{%2, %0|%0, %2}"
8332 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8335 (eq_attr "alternative" "0,1,2")
8336 (const_string "sselog")
8337 (const_string "ssemov")))
8338 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8339 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8340 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8344 ;; Parallel integer down-conversion operations
8346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8348 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8349 (define_mode_attr pmov_src_mode
8350 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8351 (define_mode_attr pmov_src_lower
8352 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8353 (define_mode_attr pmov_suff_1
8354 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8356 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8357 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8358 (any_truncate:PMOV_DST_MODE_1
8359 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8361 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8362 [(set_attr "type" "ssemov")
8363 (set_attr "memory" "none,store")
8364 (set_attr "prefix" "evex")
8365 (set_attr "mode" "<sseinsnmode>")])
8367 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8368 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8369 (vec_merge:PMOV_DST_MODE_1
8370 (any_truncate:PMOV_DST_MODE_1
8371 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8372 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8373 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8375 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8376 [(set_attr "type" "ssemov")
8377 (set_attr "memory" "none,store")
8378 (set_attr "prefix" "evex")
8379 (set_attr "mode" "<sseinsnmode>")])
8381 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8382 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8383 (vec_merge:PMOV_DST_MODE_1
8384 (any_truncate:PMOV_DST_MODE_1
8385 (match_operand:<pmov_src_mode> 1 "register_operand"))
8387 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8390 (define_insn "*avx512bw_<code>v32hiv32qi2"
8391 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8393 (match_operand:V32HI 1 "register_operand" "v,v")))]
8395 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8396 [(set_attr "type" "ssemov")
8397 (set_attr "memory" "none,store")
8398 (set_attr "prefix" "evex")
8399 (set_attr "mode" "XI")])
8401 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8402 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8405 (match_operand:V32HI 1 "register_operand" "v,v"))
8406 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8407 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8409 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8410 [(set_attr "type" "ssemov")
8411 (set_attr "memory" "none,store")
8412 (set_attr "prefix" "evex")
8413 (set_attr "mode" "XI")])
8415 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8416 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8419 (match_operand:V32HI 1 "register_operand"))
8421 (match_operand:SI 2 "register_operand")))]
8424 (define_mode_iterator PMOV_DST_MODE_2
8425 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8426 (define_mode_attr pmov_suff_2
8427 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8429 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8430 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8431 (any_truncate:PMOV_DST_MODE_2
8432 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8434 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8435 [(set_attr "type" "ssemov")
8436 (set_attr "memory" "none,store")
8437 (set_attr "prefix" "evex")
8438 (set_attr "mode" "<sseinsnmode>")])
8440 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8441 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8442 (vec_merge:PMOV_DST_MODE_2
8443 (any_truncate:PMOV_DST_MODE_2
8444 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8445 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8446 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8448 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8449 [(set_attr "type" "ssemov")
8450 (set_attr "memory" "none,store")
8451 (set_attr "prefix" "evex")
8452 (set_attr "mode" "<sseinsnmode>")])
8454 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8455 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8456 (vec_merge:PMOV_DST_MODE_2
8457 (any_truncate:PMOV_DST_MODE_2
8458 (match_operand:<ssedoublemode> 1 "register_operand"))
8460 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8463 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8464 (define_mode_attr pmov_dst_3
8465 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8466 (define_mode_attr pmov_dst_zeroed_3
8467 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8468 (define_mode_attr pmov_suff_3
8469 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8471 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8472 [(set (match_operand:V16QI 0 "register_operand" "=v")
8474 (any_truncate:<pmov_dst_3>
8475 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8476 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8478 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8479 [(set_attr "type" "ssemov")
8480 (set_attr "prefix" "evex")
8481 (set_attr "mode" "TI")])
8483 (define_insn "*avx512vl_<code>v2div2qi2_store"
8484 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8487 (match_operand:V2DI 1 "register_operand" "v"))
8490 (parallel [(const_int 2) (const_int 3)
8491 (const_int 4) (const_int 5)
8492 (const_int 6) (const_int 7)
8493 (const_int 8) (const_int 9)
8494 (const_int 10) (const_int 11)
8495 (const_int 12) (const_int 13)
8496 (const_int 14) (const_int 15)]))))]
8498 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8499 [(set_attr "type" "ssemov")
8500 (set_attr "memory" "store")
8501 (set_attr "prefix" "evex")
8502 (set_attr "mode" "TI")])
8504 (define_insn "avx512vl_<code>v2div2qi2_mask"
8505 [(set (match_operand:V16QI 0 "register_operand" "=v")
8509 (match_operand:V2DI 1 "register_operand" "v"))
8511 (match_operand:V16QI 2 "vector_move_operand" "0C")
8512 (parallel [(const_int 0) (const_int 1)]))
8513 (match_operand:QI 3 "register_operand" "Yk"))
8514 (const_vector:V14QI [(const_int 0) (const_int 0)
8515 (const_int 0) (const_int 0)
8516 (const_int 0) (const_int 0)
8517 (const_int 0) (const_int 0)
8518 (const_int 0) (const_int 0)
8519 (const_int 0) (const_int 0)
8520 (const_int 0) (const_int 0)])))]
8522 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8523 [(set_attr "type" "ssemov")
8524 (set_attr "prefix" "evex")
8525 (set_attr "mode" "TI")])
8527 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8528 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8532 (match_operand:V2DI 1 "register_operand" "v"))
8535 (parallel [(const_int 0) (const_int 1)]))
8536 (match_operand:QI 2 "register_operand" "Yk"))
8539 (parallel [(const_int 2) (const_int 3)
8540 (const_int 4) (const_int 5)
8541 (const_int 6) (const_int 7)
8542 (const_int 8) (const_int 9)
8543 (const_int 10) (const_int 11)
8544 (const_int 12) (const_int 13)
8545 (const_int 14) (const_int 15)]))))]
8547 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8548 [(set_attr "type" "ssemov")
8549 (set_attr "memory" "store")
8550 (set_attr "prefix" "evex")
8551 (set_attr "mode" "TI")])
8553 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8554 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8557 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8560 (parallel [(const_int 4) (const_int 5)
8561 (const_int 6) (const_int 7)
8562 (const_int 8) (const_int 9)
8563 (const_int 10) (const_int 11)
8564 (const_int 12) (const_int 13)
8565 (const_int 14) (const_int 15)]))))]
8567 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8568 [(set_attr "type" "ssemov")
8569 (set_attr "memory" "store")
8570 (set_attr "prefix" "evex")
8571 (set_attr "mode" "TI")])
8573 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8574 [(set (match_operand:V16QI 0 "register_operand" "=v")
8578 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8580 (match_operand:V16QI 2 "vector_move_operand" "0C")
8581 (parallel [(const_int 0) (const_int 1)
8582 (const_int 2) (const_int 3)]))
8583 (match_operand:QI 3 "register_operand" "Yk"))
8584 (const_vector:V12QI [(const_int 0) (const_int 0)
8585 (const_int 0) (const_int 0)
8586 (const_int 0) (const_int 0)
8587 (const_int 0) (const_int 0)
8588 (const_int 0) (const_int 0)
8589 (const_int 0) (const_int 0)])))]
8591 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8592 [(set_attr "type" "ssemov")
8593 (set_attr "prefix" "evex")
8594 (set_attr "mode" "TI")])
8596 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8597 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8601 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8604 (parallel [(const_int 0) (const_int 1)
8605 (const_int 2) (const_int 3)]))
8606 (match_operand:QI 2 "register_operand" "Yk"))
8609 (parallel [(const_int 4) (const_int 5)
8610 (const_int 6) (const_int 7)
8611 (const_int 8) (const_int 9)
8612 (const_int 10) (const_int 11)
8613 (const_int 12) (const_int 13)
8614 (const_int 14) (const_int 15)]))))]
8616 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8617 [(set_attr "type" "ssemov")
8618 (set_attr "memory" "store")
8619 (set_attr "prefix" "evex")
8620 (set_attr "mode" "TI")])
8622 (define_mode_iterator VI2_128_BW_4_256
8623 [(V8HI "TARGET_AVX512BW") V8SI])
8625 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8626 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8629 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8632 (parallel [(const_int 8) (const_int 9)
8633 (const_int 10) (const_int 11)
8634 (const_int 12) (const_int 13)
8635 (const_int 14) (const_int 15)]))))]
8637 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8638 [(set_attr "type" "ssemov")
8639 (set_attr "memory" "store")
8640 (set_attr "prefix" "evex")
8641 (set_attr "mode" "TI")])
8643 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8644 [(set (match_operand:V16QI 0 "register_operand" "=v")
8648 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8650 (match_operand:V16QI 2 "vector_move_operand" "0C")
8651 (parallel [(const_int 0) (const_int 1)
8652 (const_int 2) (const_int 3)
8653 (const_int 4) (const_int 5)
8654 (const_int 6) (const_int 7)]))
8655 (match_operand:QI 3 "register_operand" "Yk"))
8656 (const_vector:V8QI [(const_int 0) (const_int 0)
8657 (const_int 0) (const_int 0)
8658 (const_int 0) (const_int 0)
8659 (const_int 0) (const_int 0)])))]
8661 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8662 [(set_attr "type" "ssemov")
8663 (set_attr "prefix" "evex")
8664 (set_attr "mode" "TI")])
8666 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8667 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8671 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8674 (parallel [(const_int 0) (const_int 1)
8675 (const_int 2) (const_int 3)
8676 (const_int 4) (const_int 5)
8677 (const_int 6) (const_int 7)]))
8678 (match_operand:QI 2 "register_operand" "Yk"))
8681 (parallel [(const_int 8) (const_int 9)
8682 (const_int 10) (const_int 11)
8683 (const_int 12) (const_int 13)
8684 (const_int 14) (const_int 15)]))))]
8686 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8687 [(set_attr "type" "ssemov")
8688 (set_attr "memory" "store")
8689 (set_attr "prefix" "evex")
8690 (set_attr "mode" "TI")])
8692 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8693 (define_mode_attr pmov_dst_4
8694 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8695 (define_mode_attr pmov_dst_zeroed_4
8696 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8697 (define_mode_attr pmov_suff_4
8698 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8700 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8701 [(set (match_operand:V8HI 0 "register_operand" "=v")
8703 (any_truncate:<pmov_dst_4>
8704 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8705 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8707 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8708 [(set_attr "type" "ssemov")
8709 (set_attr "prefix" "evex")
8710 (set_attr "mode" "TI")])
8712 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8713 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8716 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8719 (parallel [(const_int 4) (const_int 5)
8720 (const_int 6) (const_int 7)]))))]
8722 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8723 [(set_attr "type" "ssemov")
8724 (set_attr "memory" "store")
8725 (set_attr "prefix" "evex")
8726 (set_attr "mode" "TI")])
8728 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8729 [(set (match_operand:V8HI 0 "register_operand" "=v")
8733 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8735 (match_operand:V8HI 2 "vector_move_operand" "0C")
8736 (parallel [(const_int 0) (const_int 1)
8737 (const_int 2) (const_int 3)]))
8738 (match_operand:QI 3 "register_operand" "Yk"))
8739 (const_vector:V4HI [(const_int 0) (const_int 0)
8740 (const_int 0) (const_int 0)])))]
8742 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8743 [(set_attr "type" "ssemov")
8744 (set_attr "prefix" "evex")
8745 (set_attr "mode" "TI")])
8747 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8748 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8752 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8755 (parallel [(const_int 0) (const_int 1)
8756 (const_int 2) (const_int 3)]))
8757 (match_operand:QI 2 "register_operand" "Yk"))
8760 (parallel [(const_int 4) (const_int 5)
8761 (const_int 6) (const_int 7)]))))]
8763 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8764 [(set_attr "type" "ssemov")
8765 (set_attr "memory" "store")
8766 (set_attr "prefix" "evex")
8767 (set_attr "mode" "TI")])
8769 (define_insn "*avx512vl_<code>v2div2hi2_store"
8770 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8773 (match_operand:V2DI 1 "register_operand" "v"))
8776 (parallel [(const_int 2) (const_int 3)
8777 (const_int 4) (const_int 5)
8778 (const_int 6) (const_int 7)]))))]
8780 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8781 [(set_attr "type" "ssemov")
8782 (set_attr "memory" "store")
8783 (set_attr "prefix" "evex")
8784 (set_attr "mode" "TI")])
8786 (define_insn "avx512vl_<code>v2div2hi2_mask"
8787 [(set (match_operand:V8HI 0 "register_operand" "=v")
8791 (match_operand:V2DI 1 "register_operand" "v"))
8793 (match_operand:V8HI 2 "vector_move_operand" "0C")
8794 (parallel [(const_int 0) (const_int 1)]))
8795 (match_operand:QI 3 "register_operand" "Yk"))
8796 (const_vector:V6HI [(const_int 0) (const_int 0)
8797 (const_int 0) (const_int 0)
8798 (const_int 0) (const_int 0)])))]
8800 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8801 [(set_attr "type" "ssemov")
8802 (set_attr "prefix" "evex")
8803 (set_attr "mode" "TI")])
8805 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
8806 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8810 (match_operand:V2DI 1 "register_operand" "v"))
8813 (parallel [(const_int 0) (const_int 1)]))
8814 (match_operand:QI 2 "register_operand" "Yk"))
8817 (parallel [(const_int 2) (const_int 3)
8818 (const_int 4) (const_int 5)
8819 (const_int 6) (const_int 7)]))))]
8821 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8822 [(set_attr "type" "ssemov")
8823 (set_attr "memory" "store")
8824 (set_attr "prefix" "evex")
8825 (set_attr "mode" "TI")])
8827 (define_insn "*avx512vl_<code>v2div2si2"
8828 [(set (match_operand:V4SI 0 "register_operand" "=v")
8831 (match_operand:V2DI 1 "register_operand" "v"))
8832 (match_operand:V2SI 2 "const0_operand")))]
8834 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
8835 [(set_attr "type" "ssemov")
8836 (set_attr "prefix" "evex")
8837 (set_attr "mode" "TI")])
8839 (define_insn "*avx512vl_<code>v2div2si2_store"
8840 [(set (match_operand:V4SI 0 "memory_operand" "=m")
8843 (match_operand:V2DI 1 "register_operand" "v"))
8846 (parallel [(const_int 2) (const_int 3)]))))]
8848 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
8849 [(set_attr "type" "ssemov")
8850 (set_attr "memory" "store")
8851 (set_attr "prefix" "evex")
8852 (set_attr "mode" "TI")])
8854 (define_insn "avx512vl_<code>v2div2si2_mask"
8855 [(set (match_operand:V4SI 0 "register_operand" "=v")
8859 (match_operand:V2DI 1 "register_operand" "v"))
8861 (match_operand:V4SI 2 "vector_move_operand" "0C")
8862 (parallel [(const_int 0) (const_int 1)]))
8863 (match_operand:QI 3 "register_operand" "Yk"))
8864 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8866 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8867 [(set_attr "type" "ssemov")
8868 (set_attr "prefix" "evex")
8869 (set_attr "mode" "TI")])
8871 (define_insn "avx512vl_<code>v2div2si2_mask_store"
8872 [(set (match_operand:V4SI 0 "memory_operand" "=m")
8876 (match_operand:V2DI 1 "register_operand" "v"))
8879 (parallel [(const_int 0) (const_int 1)]))
8880 (match_operand:QI 2 "register_operand" "Yk"))
8883 (parallel [(const_int 2) (const_int 3)]))))]
8885 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8886 [(set_attr "type" "ssemov")
8887 (set_attr "memory" "store")
8888 (set_attr "prefix" "evex")
8889 (set_attr "mode" "TI")])
8891 (define_insn "*avx512f_<code>v8div16qi2"
8892 [(set (match_operand:V16QI 0 "register_operand" "=v")
8895 (match_operand:V8DI 1 "register_operand" "v"))
8896 (const_vector:V8QI [(const_int 0) (const_int 0)
8897 (const_int 0) (const_int 0)
8898 (const_int 0) (const_int 0)
8899 (const_int 0) (const_int 0)])))]
8901 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8902 [(set_attr "type" "ssemov")
8903 (set_attr "prefix" "evex")
8904 (set_attr "mode" "TI")])
8906 (define_insn "*avx512f_<code>v8div16qi2_store"
8907 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8910 (match_operand:V8DI 1 "register_operand" "v"))
8913 (parallel [(const_int 8) (const_int 9)
8914 (const_int 10) (const_int 11)
8915 (const_int 12) (const_int 13)
8916 (const_int 14) (const_int 15)]))))]
8918 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8919 [(set_attr "type" "ssemov")
8920 (set_attr "memory" "store")
8921 (set_attr "prefix" "evex")
8922 (set_attr "mode" "TI")])
8924 (define_insn "avx512f_<code>v8div16qi2_mask"
8925 [(set (match_operand:V16QI 0 "register_operand" "=v")
8929 (match_operand:V8DI 1 "register_operand" "v"))
8931 (match_operand:V16QI 2 "vector_move_operand" "0C")
8932 (parallel [(const_int 0) (const_int 1)
8933 (const_int 2) (const_int 3)
8934 (const_int 4) (const_int 5)
8935 (const_int 6) (const_int 7)]))
8936 (match_operand:QI 3 "register_operand" "Yk"))
8937 (const_vector:V8QI [(const_int 0) (const_int 0)
8938 (const_int 0) (const_int 0)
8939 (const_int 0) (const_int 0)
8940 (const_int 0) (const_int 0)])))]
8942 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8943 [(set_attr "type" "ssemov")
8944 (set_attr "prefix" "evex")
8945 (set_attr "mode" "TI")])
8947 (define_insn "avx512f_<code>v8div16qi2_mask_store"
8948 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8952 (match_operand:V8DI 1 "register_operand" "v"))
8955 (parallel [(const_int 0) (const_int 1)
8956 (const_int 2) (const_int 3)
8957 (const_int 4) (const_int 5)
8958 (const_int 6) (const_int 7)]))
8959 (match_operand:QI 2 "register_operand" "Yk"))
8962 (parallel [(const_int 8) (const_int 9)
8963 (const_int 10) (const_int 11)
8964 (const_int 12) (const_int 13)
8965 (const_int 14) (const_int 15)]))))]
8967 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8968 [(set_attr "type" "ssemov")
8969 (set_attr "memory" "store")
8970 (set_attr "prefix" "evex")
8971 (set_attr "mode" "TI")])
8973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8975 ;; Parallel integral arithmetic
8977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8979 (define_expand "neg<mode>2"
8980 [(set (match_operand:VI_AVX2 0 "register_operand")
8983 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
8985 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
8987 (define_expand "<plusminus_insn><mode>3<mask_name>"
8988 [(set (match_operand:VI_AVX2 0 "register_operand")
8990 (match_operand:VI_AVX2 1 "nonimmediate_operand")
8991 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8992 "TARGET_SSE2 && <mask_mode512bit_condition>"
8993 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8995 (define_insn "*<plusminus_insn><mode>3<mask_name>"
8996 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
8998 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
8999 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9000 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
9002 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9003 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9004 [(set_attr "isa" "noavx,avx")
9005 (set_attr "type" "sseiadd")
9006 (set_attr "prefix_data16" "1,*")
9007 (set_attr "prefix" "<mask_prefix3>")
9008 (set_attr "mode" "<sseinsnmode>")])
9010 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9011 [(set (match_operand:VI12_AVX2 0 "register_operand")
9012 (sat_plusminus:VI12_AVX2
9013 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9014 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9015 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9016 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9018 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9019 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9020 (sat_plusminus:VI12_AVX2
9021 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9022 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9023 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9024 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9026 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9027 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9028 [(set_attr "isa" "noavx,avx")
9029 (set_attr "type" "sseiadd")
9030 (set_attr "prefix_data16" "1,*")
9031 (set_attr "prefix" "orig,maybe_evex")
9032 (set_attr "mode" "TI")])
9034 (define_expand "mul<mode>3"
9035 [(set (match_operand:VI1_AVX2 0 "register_operand")
9036 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
9037 (match_operand:VI1_AVX2 2 "register_operand")))]
9040 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9044 (define_expand "mul<mode>3"
9045 [(set (match_operand:VI2_AVX2 0 "register_operand")
9046 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9047 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9049 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9051 (define_insn "*mul<mode>3"
9052 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9053 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9054 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9055 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
9057 pmullw\t{%2, %0|%0, %2}
9058 vpmullw\t{%2, %1, %0|%0, %1, %2}"
9059 [(set_attr "isa" "noavx,avx")
9060 (set_attr "type" "sseimul")
9061 (set_attr "prefix_data16" "1,*")
9062 (set_attr "prefix" "orig,vex")
9063 (set_attr "mode" "<sseinsnmode>")])
9065 (define_expand "<s>mul<mode>3_highpart"
9066 [(set (match_operand:VI2_AVX2 0 "register_operand")
9068 (lshiftrt:<ssedoublemode>
9069 (mult:<ssedoublemode>
9070 (any_extend:<ssedoublemode>
9071 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9072 (any_extend:<ssedoublemode>
9073 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9076 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9078 (define_insn "*<s>mul<mode>3_highpart"
9079 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9081 (lshiftrt:<ssedoublemode>
9082 (mult:<ssedoublemode>
9083 (any_extend:<ssedoublemode>
9084 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
9085 (any_extend:<ssedoublemode>
9086 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
9088 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
9090 pmulh<u>w\t{%2, %0|%0, %2}
9091 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
9092 [(set_attr "isa" "noavx,avx")
9093 (set_attr "type" "sseimul")
9094 (set_attr "prefix_data16" "1,*")
9095 (set_attr "prefix" "orig,vex")
9096 (set_attr "mode" "<sseinsnmode>")])
9098 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9099 [(set (match_operand:V8DI 0 "register_operand")
9103 (match_operand:V16SI 1 "nonimmediate_operand")
9104 (parallel [(const_int 0) (const_int 2)
9105 (const_int 4) (const_int 6)
9106 (const_int 8) (const_int 10)
9107 (const_int 12) (const_int 14)])))
9110 (match_operand:V16SI 2 "nonimmediate_operand")
9111 (parallel [(const_int 0) (const_int 2)
9112 (const_int 4) (const_int 6)
9113 (const_int 8) (const_int 10)
9114 (const_int 12) (const_int 14)])))))]
9116 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9118 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9119 [(set (match_operand:V8DI 0 "register_operand" "=v")
9123 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9124 (parallel [(const_int 0) (const_int 2)
9125 (const_int 4) (const_int 6)
9126 (const_int 8) (const_int 10)
9127 (const_int 12) (const_int 14)])))
9130 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9131 (parallel [(const_int 0) (const_int 2)
9132 (const_int 4) (const_int 6)
9133 (const_int 8) (const_int 10)
9134 (const_int 12) (const_int 14)])))))]
9135 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9136 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9137 [(set_attr "isa" "avx512f")
9138 (set_attr "type" "sseimul")
9139 (set_attr "prefix_extra" "1")
9140 (set_attr "prefix" "evex")
9141 (set_attr "mode" "XI")])
9143 (define_expand "vec_widen_umult_even_v8si"
9144 [(set (match_operand:V4DI 0 "register_operand")
9148 (match_operand:V8SI 1 "nonimmediate_operand")
9149 (parallel [(const_int 0) (const_int 2)
9150 (const_int 4) (const_int 6)])))
9153 (match_operand:V8SI 2 "nonimmediate_operand")
9154 (parallel [(const_int 0) (const_int 2)
9155 (const_int 4) (const_int 6)])))))]
9157 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9159 (define_insn "*vec_widen_umult_even_v8si"
9160 [(set (match_operand:V4DI 0 "register_operand" "=x")
9164 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
9165 (parallel [(const_int 0) (const_int 2)
9166 (const_int 4) (const_int 6)])))
9169 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
9170 (parallel [(const_int 0) (const_int 2)
9171 (const_int 4) (const_int 6)])))))]
9172 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9173 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
9174 [(set_attr "type" "sseimul")
9175 (set_attr "prefix" "vex")
9176 (set_attr "mode" "OI")])
9178 (define_expand "vec_widen_umult_even_v4si"
9179 [(set (match_operand:V2DI 0 "register_operand")
9183 (match_operand:V4SI 1 "nonimmediate_operand")
9184 (parallel [(const_int 0) (const_int 2)])))
9187 (match_operand:V4SI 2 "nonimmediate_operand")
9188 (parallel [(const_int 0) (const_int 2)])))))]
9190 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9192 (define_insn "*vec_widen_umult_even_v4si"
9193 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9197 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
9198 (parallel [(const_int 0) (const_int 2)])))
9201 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
9202 (parallel [(const_int 0) (const_int 2)])))))]
9203 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9205 pmuludq\t{%2, %0|%0, %2}
9206 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
9207 [(set_attr "isa" "noavx,avx")
9208 (set_attr "type" "sseimul")
9209 (set_attr "prefix_data16" "1,*")
9210 (set_attr "prefix" "orig,vex")
9211 (set_attr "mode" "TI")])
9213 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9214 [(set (match_operand:V8DI 0 "register_operand")
9218 (match_operand:V16SI 1 "nonimmediate_operand")
9219 (parallel [(const_int 0) (const_int 2)
9220 (const_int 4) (const_int 6)
9221 (const_int 8) (const_int 10)
9222 (const_int 12) (const_int 14)])))
9225 (match_operand:V16SI 2 "nonimmediate_operand")
9226 (parallel [(const_int 0) (const_int 2)
9227 (const_int 4) (const_int 6)
9228 (const_int 8) (const_int 10)
9229 (const_int 12) (const_int 14)])))))]
9231 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9233 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9234 [(set (match_operand:V8DI 0 "register_operand" "=v")
9238 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9239 (parallel [(const_int 0) (const_int 2)
9240 (const_int 4) (const_int 6)
9241 (const_int 8) (const_int 10)
9242 (const_int 12) (const_int 14)])))
9245 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9246 (parallel [(const_int 0) (const_int 2)
9247 (const_int 4) (const_int 6)
9248 (const_int 8) (const_int 10)
9249 (const_int 12) (const_int 14)])))))]
9250 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9251 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9252 [(set_attr "isa" "avx512f")
9253 (set_attr "type" "sseimul")
9254 (set_attr "prefix_extra" "1")
9255 (set_attr "prefix" "evex")
9256 (set_attr "mode" "XI")])
9258 (define_expand "vec_widen_smult_even_v8si"
9259 [(set (match_operand:V4DI 0 "register_operand")
9263 (match_operand:V8SI 1 "nonimmediate_operand")
9264 (parallel [(const_int 0) (const_int 2)
9265 (const_int 4) (const_int 6)])))
9268 (match_operand:V8SI 2 "nonimmediate_operand")
9269 (parallel [(const_int 0) (const_int 2)
9270 (const_int 4) (const_int 6)])))))]
9272 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9274 (define_insn "*vec_widen_smult_even_v8si"
9275 [(set (match_operand:V4DI 0 "register_operand" "=x")
9279 (match_operand:V8SI 1 "nonimmediate_operand" "x")
9280 (parallel [(const_int 0) (const_int 2)
9281 (const_int 4) (const_int 6)])))
9284 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
9285 (parallel [(const_int 0) (const_int 2)
9286 (const_int 4) (const_int 6)])))))]
9287 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9288 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
9289 [(set_attr "type" "sseimul")
9290 (set_attr "prefix_extra" "1")
9291 (set_attr "prefix" "vex")
9292 (set_attr "mode" "OI")])
9294 (define_expand "sse4_1_mulv2siv2di3"
9295 [(set (match_operand:V2DI 0 "register_operand")
9299 (match_operand:V4SI 1 "nonimmediate_operand")
9300 (parallel [(const_int 0) (const_int 2)])))
9303 (match_operand:V4SI 2 "nonimmediate_operand")
9304 (parallel [(const_int 0) (const_int 2)])))))]
9306 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9308 (define_insn "*sse4_1_mulv2siv2di3"
9309 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9313 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
9314 (parallel [(const_int 0) (const_int 2)])))
9317 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
9318 (parallel [(const_int 0) (const_int 2)])))))]
9319 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9321 pmuldq\t{%2, %0|%0, %2}
9322 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
9323 [(set_attr "isa" "noavx,avx")
9324 (set_attr "type" "sseimul")
9325 (set_attr "prefix_data16" "1,*")
9326 (set_attr "prefix_extra" "1")
9327 (set_attr "prefix" "orig,vex")
9328 (set_attr "mode" "TI")])
9330 (define_expand "avx2_pmaddwd"
9331 [(set (match_operand:V8SI 0 "register_operand")
9336 (match_operand:V16HI 1 "nonimmediate_operand")
9337 (parallel [(const_int 0) (const_int 2)
9338 (const_int 4) (const_int 6)
9339 (const_int 8) (const_int 10)
9340 (const_int 12) (const_int 14)])))
9343 (match_operand:V16HI 2 "nonimmediate_operand")
9344 (parallel [(const_int 0) (const_int 2)
9345 (const_int 4) (const_int 6)
9346 (const_int 8) (const_int 10)
9347 (const_int 12) (const_int 14)]))))
9350 (vec_select:V8HI (match_dup 1)
9351 (parallel [(const_int 1) (const_int 3)
9352 (const_int 5) (const_int 7)
9353 (const_int 9) (const_int 11)
9354 (const_int 13) (const_int 15)])))
9356 (vec_select:V8HI (match_dup 2)
9357 (parallel [(const_int 1) (const_int 3)
9358 (const_int 5) (const_int 7)
9359 (const_int 9) (const_int 11)
9360 (const_int 13) (const_int 15)]))))))]
9362 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9364 (define_insn "*avx2_pmaddwd"
9365 [(set (match_operand:V8SI 0 "register_operand" "=x")
9370 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9371 (parallel [(const_int 0) (const_int 2)
9372 (const_int 4) (const_int 6)
9373 (const_int 8) (const_int 10)
9374 (const_int 12) (const_int 14)])))
9377 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9378 (parallel [(const_int 0) (const_int 2)
9379 (const_int 4) (const_int 6)
9380 (const_int 8) (const_int 10)
9381 (const_int 12) (const_int 14)]))))
9384 (vec_select:V8HI (match_dup 1)
9385 (parallel [(const_int 1) (const_int 3)
9386 (const_int 5) (const_int 7)
9387 (const_int 9) (const_int 11)
9388 (const_int 13) (const_int 15)])))
9390 (vec_select:V8HI (match_dup 2)
9391 (parallel [(const_int 1) (const_int 3)
9392 (const_int 5) (const_int 7)
9393 (const_int 9) (const_int 11)
9394 (const_int 13) (const_int 15)]))))))]
9395 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9396 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9397 [(set_attr "type" "sseiadd")
9398 (set_attr "prefix" "vex")
9399 (set_attr "mode" "OI")])
9401 (define_expand "sse2_pmaddwd"
9402 [(set (match_operand:V4SI 0 "register_operand")
9407 (match_operand:V8HI 1 "nonimmediate_operand")
9408 (parallel [(const_int 0) (const_int 2)
9409 (const_int 4) (const_int 6)])))
9412 (match_operand:V8HI 2 "nonimmediate_operand")
9413 (parallel [(const_int 0) (const_int 2)
9414 (const_int 4) (const_int 6)]))))
9417 (vec_select:V4HI (match_dup 1)
9418 (parallel [(const_int 1) (const_int 3)
9419 (const_int 5) (const_int 7)])))
9421 (vec_select:V4HI (match_dup 2)
9422 (parallel [(const_int 1) (const_int 3)
9423 (const_int 5) (const_int 7)]))))))]
9425 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9427 (define_insn "*sse2_pmaddwd"
9428 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9433 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9434 (parallel [(const_int 0) (const_int 2)
9435 (const_int 4) (const_int 6)])))
9438 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9439 (parallel [(const_int 0) (const_int 2)
9440 (const_int 4) (const_int 6)]))))
9443 (vec_select:V4HI (match_dup 1)
9444 (parallel [(const_int 1) (const_int 3)
9445 (const_int 5) (const_int 7)])))
9447 (vec_select:V4HI (match_dup 2)
9448 (parallel [(const_int 1) (const_int 3)
9449 (const_int 5) (const_int 7)]))))))]
9450 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9452 pmaddwd\t{%2, %0|%0, %2}
9453 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9454 [(set_attr "isa" "noavx,avx")
9455 (set_attr "type" "sseiadd")
9456 (set_attr "atom_unit" "simul")
9457 (set_attr "prefix_data16" "1,*")
9458 (set_attr "prefix" "orig,vex")
9459 (set_attr "mode" "TI")])
9461 (define_expand "mul<mode>3<mask_name>"
9462 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9464 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9465 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9466 "TARGET_SSE2 && <mask_mode512bit_condition>"
9470 if (!nonimmediate_operand (operands[1], <MODE>mode))
9471 operands[1] = force_reg (<MODE>mode, operands[1]);
9472 if (!nonimmediate_operand (operands[2], <MODE>mode))
9473 operands[2] = force_reg (<MODE>mode, operands[2]);
9474 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9478 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9483 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9484 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
9486 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
9487 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
9488 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9490 pmulld\t{%2, %0|%0, %2}
9491 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9492 [(set_attr "isa" "noavx,avx")
9493 (set_attr "type" "sseimul")
9494 (set_attr "prefix_extra" "1")
9495 (set_attr "prefix" "<mask_prefix3>")
9496 (set_attr "btver2_decode" "vector,vector")
9497 (set_attr "mode" "<sseinsnmode>")])
9499 (define_expand "mul<mode>3"
9500 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9501 (mult:VI8_AVX2_AVX512F
9502 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9503 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9506 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9510 (define_expand "vec_widen_<s>mult_hi_<mode>"
9511 [(match_operand:<sseunpackmode> 0 "register_operand")
9512 (any_extend:<sseunpackmode>
9513 (match_operand:VI124_AVX2 1 "register_operand"))
9514 (match_operand:VI124_AVX2 2 "register_operand")]
9517 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9522 (define_expand "vec_widen_<s>mult_lo_<mode>"
9523 [(match_operand:<sseunpackmode> 0 "register_operand")
9524 (any_extend:<sseunpackmode>
9525 (match_operand:VI124_AVX2 1 "register_operand"))
9526 (match_operand:VI124_AVX2 2 "register_operand")]
9529 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9534 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9535 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9536 (define_expand "vec_widen_smult_even_v4si"
9537 [(match_operand:V2DI 0 "register_operand")
9538 (match_operand:V4SI 1 "nonimmediate_operand")
9539 (match_operand:V4SI 2 "nonimmediate_operand")]
9542 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9547 (define_expand "vec_widen_<s>mult_odd_<mode>"
9548 [(match_operand:<sseunpackmode> 0 "register_operand")
9549 (any_extend:<sseunpackmode>
9550 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9551 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9554 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9559 (define_expand "sdot_prod<mode>"
9560 [(match_operand:<sseunpackmode> 0 "register_operand")
9561 (match_operand:VI2_AVX2 1 "register_operand")
9562 (match_operand:VI2_AVX2 2 "register_operand")
9563 (match_operand:<sseunpackmode> 3 "register_operand")]
9566 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9567 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
9568 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9569 gen_rtx_PLUS (<sseunpackmode>mode,
9574 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9575 ;; back together when madd is available.
9576 (define_expand "sdot_prodv4si"
9577 [(match_operand:V2DI 0 "register_operand")
9578 (match_operand:V4SI 1 "register_operand")
9579 (match_operand:V4SI 2 "register_operand")
9580 (match_operand:V2DI 3 "register_operand")]
9583 rtx t = gen_reg_rtx (V2DImode);
9584 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9585 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9589 (define_expand "usadv16qi"
9590 [(match_operand:V4SI 0 "register_operand")
9591 (match_operand:V16QI 1 "register_operand")
9592 (match_operand:V16QI 2 "nonimmediate_operand")
9593 (match_operand:V4SI 3 "nonimmediate_operand")]
9596 rtx t1 = gen_reg_rtx (V2DImode);
9597 rtx t2 = gen_reg_rtx (V4SImode);
9598 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9599 convert_move (t2, t1, 0);
9600 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9604 (define_expand "usadv32qi"
9605 [(match_operand:V8SI 0 "register_operand")
9606 (match_operand:V32QI 1 "register_operand")
9607 (match_operand:V32QI 2 "nonimmediate_operand")
9608 (match_operand:V8SI 3 "nonimmediate_operand")]
9611 rtx t1 = gen_reg_rtx (V4DImode);
9612 rtx t2 = gen_reg_rtx (V8SImode);
9613 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9614 convert_move (t2, t1, 0);
9615 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9619 (define_insn "ashr<mode>3"
9620 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9622 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9623 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9626 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9627 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9628 [(set_attr "isa" "noavx,avx")
9629 (set_attr "type" "sseishft")
9630 (set (attr "length_immediate")
9631 (if_then_else (match_operand 2 "const_int_operand")
9633 (const_string "0")))
9634 (set_attr "prefix_data16" "1,*")
9635 (set_attr "prefix" "orig,vex")
9636 (set_attr "mode" "<sseinsnmode>")])
9638 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9639 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9640 (ashiftrt:VI24_AVX512BW_1
9641 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9642 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9644 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9645 [(set_attr "type" "sseishft")
9646 (set (attr "length_immediate")
9647 (if_then_else (match_operand 2 "const_int_operand")
9649 (const_string "0")))
9650 (set_attr "mode" "<sseinsnmode>")])
9652 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
9653 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9655 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9656 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9658 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9659 [(set_attr "type" "sseishft")
9660 (set (attr "length_immediate")
9661 (if_then_else (match_operand 2 "const_int_operand")
9663 (const_string "0")))
9664 (set_attr "mode" "TI")])
9666 (define_insn "ashr<mode>3<mask_name>"
9667 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9668 (ashiftrt:VI248_AVX512BW_AVX512VL
9669 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9670 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9672 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9673 [(set_attr "type" "sseishft")
9674 (set (attr "length_immediate")
9675 (if_then_else (match_operand 2 "const_int_operand")
9677 (const_string "0")))
9678 (set_attr "mode" "<sseinsnmode>")])
9680 (define_insn "<shift_insn><mode>3<mask_name>"
9681 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9682 (any_lshift:VI2_AVX2_AVX512BW
9683 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9684 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9685 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9687 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9688 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9689 [(set_attr "isa" "noavx,avx")
9690 (set_attr "type" "sseishft")
9691 (set (attr "length_immediate")
9692 (if_then_else (match_operand 2 "const_int_operand")
9694 (const_string "0")))
9695 (set_attr "prefix_data16" "1,*")
9696 (set_attr "prefix" "orig,vex")
9697 (set_attr "mode" "<sseinsnmode>")])
9699 (define_insn "<shift_insn><mode>3<mask_name>"
9700 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
9701 (any_lshift:VI48_AVX2
9702 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
9703 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9704 "TARGET_SSE2 && <mask_mode512bit_condition>"
9706 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9707 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9708 [(set_attr "isa" "noavx,avx")
9709 (set_attr "type" "sseishft")
9710 (set (attr "length_immediate")
9711 (if_then_else (match_operand 2 "const_int_operand")
9713 (const_string "0")))
9714 (set_attr "prefix_data16" "1,*")
9715 (set_attr "prefix" "orig,vex")
9716 (set_attr "mode" "<sseinsnmode>")])
9718 (define_insn "<shift_insn><mode>3<mask_name>"
9719 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9720 (any_lshift:VI48_512
9721 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
9722 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
9723 "TARGET_AVX512F && <mask_mode512bit_condition>"
9724 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9725 [(set_attr "isa" "avx512f")
9726 (set_attr "type" "sseishft")
9727 (set (attr "length_immediate")
9728 (if_then_else (match_operand 2 "const_int_operand")
9730 (const_string "0")))
9731 (set_attr "prefix" "evex")
9732 (set_attr "mode" "<sseinsnmode>")])
9735 (define_expand "vec_shl_<mode>"
9738 (match_operand:VI_128 1 "register_operand")
9739 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
9740 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
9743 operands[1] = gen_lowpart (V1TImode, operands[1]);
9744 operands[3] = gen_reg_rtx (V1TImode);
9745 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
9748 (define_insn "<sse2_avx2>_ashl<mode>3"
9749 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
9751 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
9752 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
9755 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
9757 switch (which_alternative)
9760 return "pslldq\t{%2, %0|%0, %2}";
9762 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
9767 [(set_attr "isa" "noavx,avx")
9768 (set_attr "type" "sseishft")
9769 (set_attr "length_immediate" "1")
9770 (set_attr "prefix_data16" "1,*")
9771 (set_attr "prefix" "orig,vex")
9772 (set_attr "mode" "<sseinsnmode>")])
9774 (define_expand "vec_shr_<mode>"
9777 (match_operand:VI_128 1 "register_operand")
9778 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
9779 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
9782 operands[1] = gen_lowpart (V1TImode, operands[1]);
9783 operands[3] = gen_reg_rtx (V1TImode);
9784 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
9787 (define_insn "<sse2_avx2>_lshr<mode>3"
9788 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
9789 (lshiftrt:VIMAX_AVX2
9790 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
9791 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
9794 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
9796 switch (which_alternative)
9799 return "psrldq\t{%2, %0|%0, %2}";
9801 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
9806 [(set_attr "isa" "noavx,avx")
9807 (set_attr "type" "sseishft")
9808 (set_attr "length_immediate" "1")
9809 (set_attr "atom_unit" "sishuf")
9810 (set_attr "prefix_data16" "1,*")
9811 (set_attr "prefix" "orig,vex")
9812 (set_attr "mode" "<sseinsnmode>")])
9814 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
9815 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9816 (any_rotate:VI48_AVX512VL
9817 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
9818 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
9820 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9821 [(set_attr "prefix" "evex")
9822 (set_attr "mode" "<sseinsnmode>")])
9824 (define_insn "<avx512>_<rotate><mode><mask_name>"
9825 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9826 (any_rotate:VI48_AVX512VL
9827 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
9828 (match_operand:SI 2 "const_0_to_255_operand")))]
9830 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9831 [(set_attr "prefix" "evex")
9832 (set_attr "mode" "<sseinsnmode>")])
9834 (define_expand "<code><mode>3"
9835 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
9836 (maxmin:VI124_256_AVX512F_AVX512BW
9837 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
9838 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
9840 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9842 (define_insn "*avx2_<code><mode>3"
9843 [(set (match_operand:VI124_256 0 "register_operand" "=v")
9845 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
9846 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
9847 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9848 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9849 [(set_attr "type" "sseiadd")
9850 (set_attr "prefix_extra" "1")
9851 (set_attr "prefix" "vex")
9852 (set_attr "mode" "OI")])
9854 (define_expand "<code><mode>3_mask"
9855 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9856 (vec_merge:VI48_AVX512VL
9857 (maxmin:VI48_AVX512VL
9858 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9859 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9860 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9861 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9863 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9865 (define_insn "*avx512bw_<code><mode>3<mask_name>"
9866 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9867 (maxmin:VI48_AVX512VL
9868 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
9869 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
9870 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9871 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9872 [(set_attr "type" "sseiadd")
9873 (set_attr "prefix_extra" "1")
9874 (set_attr "prefix" "maybe_evex")
9875 (set_attr "mode" "<sseinsnmode>")])
9877 (define_insn "<mask_codefor><code><mode>3<mask_name>"
9878 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9879 (maxmin:VI12_AVX512VL
9880 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
9881 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
9883 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9884 [(set_attr "type" "sseiadd")
9885 (set_attr "prefix" "evex")
9886 (set_attr "mode" "<sseinsnmode>")])
9888 (define_expand "<code><mode>3"
9889 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
9890 (maxmin:VI8_AVX2_AVX512BW
9891 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
9892 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
9896 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
9897 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
9905 xops[0] = operands[0];
9907 if (<CODE> == SMAX || <CODE> == UMAX)
9909 xops[1] = operands[1];
9910 xops[2] = operands[2];
9914 xops[1] = operands[2];
9915 xops[2] = operands[1];
9918 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
9920 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
9921 xops[4] = operands[1];
9922 xops[5] = operands[2];
9924 ok = ix86_expand_int_vcond (xops);
9930 (define_expand "<code><mode>3"
9931 [(set (match_operand:VI124_128 0 "register_operand")
9933 (match_operand:VI124_128 1 "nonimmediate_operand")
9934 (match_operand:VI124_128 2 "nonimmediate_operand")))]
9937 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
9938 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
9944 xops[0] = operands[0];
9945 operands[1] = force_reg (<MODE>mode, operands[1]);
9946 operands[2] = force_reg (<MODE>mode, operands[2]);
9950 xops[1] = operands[1];
9951 xops[2] = operands[2];
9955 xops[1] = operands[2];
9956 xops[2] = operands[1];
9959 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
9960 xops[4] = operands[1];
9961 xops[5] = operands[2];
9963 ok = ix86_expand_int_vcond (xops);
9969 (define_insn "*sse4_1_<code><mode>3"
9970 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
9972 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
9973 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
9974 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9976 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
9977 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9978 [(set_attr "isa" "noavx,avx")
9979 (set_attr "type" "sseiadd")
9980 (set_attr "prefix_extra" "1,*")
9981 (set_attr "prefix" "orig,vex")
9982 (set_attr "mode" "TI")])
9984 (define_insn "*<code>v8hi3"
9985 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9987 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9988 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
9989 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
9991 p<maxmin_int>w\t{%2, %0|%0, %2}
9992 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
9993 [(set_attr "isa" "noavx,avx")
9994 (set_attr "type" "sseiadd")
9995 (set_attr "prefix_data16" "1,*")
9996 (set_attr "prefix_extra" "*,1")
9997 (set_attr "prefix" "orig,vex")
9998 (set_attr "mode" "TI")])
10000 (define_expand "<code><mode>3"
10001 [(set (match_operand:VI124_128 0 "register_operand")
10003 (match_operand:VI124_128 1 "nonimmediate_operand")
10004 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10007 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10009 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10011 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10012 operands[1] = force_reg (<MODE>mode, operands[1]);
10013 if (rtx_equal_p (op3, op2))
10014 op3 = gen_reg_rtx (V8HImode);
10015 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10016 emit_insn (gen_addv8hi3 (op0, op3, op2));
10024 operands[1] = force_reg (<MODE>mode, operands[1]);
10025 operands[2] = force_reg (<MODE>mode, operands[2]);
10027 xops[0] = operands[0];
10029 if (<CODE> == UMAX)
10031 xops[1] = operands[1];
10032 xops[2] = operands[2];
10036 xops[1] = operands[2];
10037 xops[2] = operands[1];
10040 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10041 xops[4] = operands[1];
10042 xops[5] = operands[2];
10044 ok = ix86_expand_int_vcond (xops);
10050 (define_insn "*sse4_1_<code><mode>3"
10051 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
10053 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
10054 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
10055 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10057 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10058 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10059 [(set_attr "isa" "noavx,avx")
10060 (set_attr "type" "sseiadd")
10061 (set_attr "prefix_extra" "1,*")
10062 (set_attr "prefix" "orig,vex")
10063 (set_attr "mode" "TI")])
10065 (define_insn "*<code>v16qi3"
10066 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10068 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10069 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10070 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10072 p<maxmin_int>b\t{%2, %0|%0, %2}
10073 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10074 [(set_attr "isa" "noavx,avx")
10075 (set_attr "type" "sseiadd")
10076 (set_attr "prefix_data16" "1,*")
10077 (set_attr "prefix_extra" "*,1")
10078 (set_attr "prefix" "orig,vex")
10079 (set_attr "mode" "TI")])
10081 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10083 ;; Parallel integral comparisons
10085 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10087 (define_expand "avx2_eq<mode>3"
10088 [(set (match_operand:VI_256 0 "register_operand")
10090 (match_operand:VI_256 1 "nonimmediate_operand")
10091 (match_operand:VI_256 2 "nonimmediate_operand")))]
10093 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10095 (define_insn "*avx2_eq<mode>3"
10096 [(set (match_operand:VI_256 0 "register_operand" "=x")
10098 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10099 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10100 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10101 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10102 [(set_attr "type" "ssecmp")
10103 (set_attr "prefix_extra" "1")
10104 (set_attr "prefix" "vex")
10105 (set_attr "mode" "OI")])
10107 (define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
10108 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10109 (unspec:<avx512fmaskmode>
10110 [(match_operand:VI48_512 1 "register_operand")
10111 (match_operand:VI48_512 2 "nonimmediate_operand")]
10112 UNSPEC_MASKED_EQ))]
10114 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10116 (define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
10117 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10118 (unspec:<avx512fmaskmode>
10119 [(match_operand:VI48_512 1 "register_operand" "%v")
10120 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
10121 UNSPEC_MASKED_EQ))]
10122 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10123 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10124 [(set_attr "type" "ssecmp")
10125 (set_attr "prefix_extra" "1")
10126 (set_attr "prefix" "evex")
10127 (set_attr "mode" "<sseinsnmode>")])
10129 (define_insn "*sse4_1_eqv2di3"
10130 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10132 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
10133 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
10134 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10136 pcmpeqq\t{%2, %0|%0, %2}
10137 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10138 [(set_attr "isa" "noavx,avx")
10139 (set_attr "type" "ssecmp")
10140 (set_attr "prefix_extra" "1")
10141 (set_attr "prefix" "orig,vex")
10142 (set_attr "mode" "TI")])
10144 (define_insn "*sse2_eq<mode>3"
10145 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10147 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10148 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10149 "TARGET_SSE2 && !TARGET_XOP
10150 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10152 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10153 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10154 [(set_attr "isa" "noavx,avx")
10155 (set_attr "type" "ssecmp")
10156 (set_attr "prefix_data16" "1,*")
10157 (set_attr "prefix" "orig,vex")
10158 (set_attr "mode" "TI")])
10160 (define_expand "sse2_eq<mode>3"
10161 [(set (match_operand:VI124_128 0 "register_operand")
10163 (match_operand:VI124_128 1 "nonimmediate_operand")
10164 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10165 "TARGET_SSE2 && !TARGET_XOP "
10166 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10168 (define_expand "sse4_1_eqv2di3"
10169 [(set (match_operand:V2DI 0 "register_operand")
10171 (match_operand:V2DI 1 "nonimmediate_operand")
10172 (match_operand:V2DI 2 "nonimmediate_operand")))]
10174 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10176 (define_insn "sse4_2_gtv2di3"
10177 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10179 (match_operand:V2DI 1 "register_operand" "0,x")
10180 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
10183 pcmpgtq\t{%2, %0|%0, %2}
10184 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10185 [(set_attr "isa" "noavx,avx")
10186 (set_attr "type" "ssecmp")
10187 (set_attr "prefix_extra" "1")
10188 (set_attr "prefix" "orig,vex")
10189 (set_attr "mode" "TI")])
10191 (define_insn "avx2_gt<mode>3"
10192 [(set (match_operand:VI_256 0 "register_operand" "=x")
10194 (match_operand:VI_256 1 "register_operand" "x")
10195 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10197 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10198 [(set_attr "type" "ssecmp")
10199 (set_attr "prefix_extra" "1")
10200 (set_attr "prefix" "vex")
10201 (set_attr "mode" "OI")])
10203 (define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
10204 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10205 (unspec:<avx512fmaskmode>
10206 [(match_operand:VI48_512 1 "register_operand" "v")
10207 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10209 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10210 [(set_attr "type" "ssecmp")
10211 (set_attr "prefix_extra" "1")
10212 (set_attr "prefix" "evex")
10213 (set_attr "mode" "<sseinsnmode>")])
10215 (define_insn "sse2_gt<mode>3"
10216 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10218 (match_operand:VI124_128 1 "register_operand" "0,x")
10219 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10220 "TARGET_SSE2 && !TARGET_XOP"
10222 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10223 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10224 [(set_attr "isa" "noavx,avx")
10225 (set_attr "type" "ssecmp")
10226 (set_attr "prefix_data16" "1,*")
10227 (set_attr "prefix" "orig,vex")
10228 (set_attr "mode" "TI")])
10230 (define_expand "vcond<V_512:mode><VI_512:mode>"
10231 [(set (match_operand:V_512 0 "register_operand")
10232 (if_then_else:V_512
10233 (match_operator 3 ""
10234 [(match_operand:VI_512 4 "nonimmediate_operand")
10235 (match_operand:VI_512 5 "general_operand")])
10236 (match_operand:V_512 1)
10237 (match_operand:V_512 2)))]
10239 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10240 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10242 bool ok = ix86_expand_int_vcond (operands);
10247 (define_expand "vcond<V_256:mode><VI_256:mode>"
10248 [(set (match_operand:V_256 0 "register_operand")
10249 (if_then_else:V_256
10250 (match_operator 3 ""
10251 [(match_operand:VI_256 4 "nonimmediate_operand")
10252 (match_operand:VI_256 5 "general_operand")])
10253 (match_operand:V_256 1)
10254 (match_operand:V_256 2)))]
10256 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10257 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10259 bool ok = ix86_expand_int_vcond (operands);
10264 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10265 [(set (match_operand:V_128 0 "register_operand")
10266 (if_then_else:V_128
10267 (match_operator 3 ""
10268 [(match_operand:VI124_128 4 "nonimmediate_operand")
10269 (match_operand:VI124_128 5 "general_operand")])
10270 (match_operand:V_128 1)
10271 (match_operand:V_128 2)))]
10273 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10274 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10276 bool ok = ix86_expand_int_vcond (operands);
10281 (define_expand "vcond<VI8F_128:mode>v2di"
10282 [(set (match_operand:VI8F_128 0 "register_operand")
10283 (if_then_else:VI8F_128
10284 (match_operator 3 ""
10285 [(match_operand:V2DI 4 "nonimmediate_operand")
10286 (match_operand:V2DI 5 "general_operand")])
10287 (match_operand:VI8F_128 1)
10288 (match_operand:VI8F_128 2)))]
10291 bool ok = ix86_expand_int_vcond (operands);
10296 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10297 [(set (match_operand:V_512 0 "register_operand")
10298 (if_then_else:V_512
10299 (match_operator 3 ""
10300 [(match_operand:VI_512 4 "nonimmediate_operand")
10301 (match_operand:VI_512 5 "nonimmediate_operand")])
10302 (match_operand:V_512 1 "general_operand")
10303 (match_operand:V_512 2 "general_operand")))]
10305 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10306 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10308 bool ok = ix86_expand_int_vcond (operands);
10313 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10314 [(set (match_operand:V_256 0 "register_operand")
10315 (if_then_else:V_256
10316 (match_operator 3 ""
10317 [(match_operand:VI_256 4 "nonimmediate_operand")
10318 (match_operand:VI_256 5 "nonimmediate_operand")])
10319 (match_operand:V_256 1 "general_operand")
10320 (match_operand:V_256 2 "general_operand")))]
10322 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10323 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10325 bool ok = ix86_expand_int_vcond (operands);
10330 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10331 [(set (match_operand:V_128 0 "register_operand")
10332 (if_then_else:V_128
10333 (match_operator 3 ""
10334 [(match_operand:VI124_128 4 "nonimmediate_operand")
10335 (match_operand:VI124_128 5 "nonimmediate_operand")])
10336 (match_operand:V_128 1 "general_operand")
10337 (match_operand:V_128 2 "general_operand")))]
10339 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10340 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10342 bool ok = ix86_expand_int_vcond (operands);
10347 (define_expand "vcondu<VI8F_128:mode>v2di"
10348 [(set (match_operand:VI8F_128 0 "register_operand")
10349 (if_then_else:VI8F_128
10350 (match_operator 3 ""
10351 [(match_operand:V2DI 4 "nonimmediate_operand")
10352 (match_operand:V2DI 5 "nonimmediate_operand")])
10353 (match_operand:VI8F_128 1 "general_operand")
10354 (match_operand:VI8F_128 2 "general_operand")))]
10357 bool ok = ix86_expand_int_vcond (operands);
10362 (define_mode_iterator VEC_PERM_AVX2
10363 [V16QI V8HI V4SI V2DI V4SF V2DF
10364 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10365 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10366 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10367 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10368 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
10370 (define_expand "vec_perm<mode>"
10371 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10372 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10373 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10374 (match_operand:<sseintvecmode> 3 "register_operand")]
10375 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10377 ix86_expand_vec_perm (operands);
10381 (define_mode_iterator VEC_PERM_CONST
10382 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10383 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10384 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10385 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10386 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10387 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10388 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10389 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
10391 (define_expand "vec_perm_const<mode>"
10392 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10393 (match_operand:VEC_PERM_CONST 1 "register_operand")
10394 (match_operand:VEC_PERM_CONST 2 "register_operand")
10395 (match_operand:<sseintvecmode> 3)]
10398 if (ix86_expand_vec_perm_const (operands))
10404 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10406 ;; Parallel bitwise logical operations
10408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10410 (define_expand "one_cmpl<mode>2"
10411 [(set (match_operand:VI 0 "register_operand")
10412 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10416 int i, n = GET_MODE_NUNITS (<MODE>mode);
10417 rtvec v = rtvec_alloc (n);
10419 for (i = 0; i < n; ++i)
10420 RTVEC_ELT (v, i) = constm1_rtx;
10422 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10425 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
10426 [(set (match_operand:VI_AVX2 0 "register_operand")
10428 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10429 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10430 "TARGET_SSE2 && <mask_mode512bit_condition>")
10432 (define_insn "*andnot<mode>3<mask_name>"
10433 [(set (match_operand:VI 0 "register_operand" "=x,v")
10435 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10436 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10437 "TARGET_SSE && <mask_mode512bit_condition>"
10439 static char buf[64];
10443 switch (get_attr_mode (insn))
10446 gcc_assert (TARGET_AVX512F);
10448 tmp = "pandn<ssemodesuffix>";
10452 gcc_assert (TARGET_AVX2);
10454 gcc_assert (TARGET_SSE2);
10460 gcc_assert (TARGET_AVX512F);
10462 gcc_assert (TARGET_AVX);
10464 gcc_assert (TARGET_SSE);
10470 gcc_unreachable ();
10473 switch (which_alternative)
10476 ops = "%s\t{%%2, %%0|%%0, %%2}";
10479 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10482 gcc_unreachable ();
10485 snprintf (buf, sizeof (buf), ops, tmp);
10488 [(set_attr "isa" "noavx,avx")
10489 (set_attr "type" "sselog")
10490 (set (attr "prefix_data16")
10492 (and (eq_attr "alternative" "0")
10493 (eq_attr "mode" "TI"))
10495 (const_string "*")))
10496 (set_attr "prefix" "<mask_prefix3>")
10498 (cond [(and (match_test "<MODE_SIZE> == 16")
10499 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10500 (const_string "<ssePSmode>")
10501 (match_test "TARGET_AVX2")
10502 (const_string "<sseinsnmode>")
10503 (match_test "TARGET_AVX")
10505 (match_test "<MODE_SIZE> > 16")
10506 (const_string "V8SF")
10507 (const_string "<sseinsnmode>"))
10508 (ior (not (match_test "TARGET_SSE2"))
10509 (match_test "optimize_function_for_size_p (cfun)"))
10510 (const_string "V4SF")
10512 (const_string "<sseinsnmode>")))])
10514 (define_expand "<code><mode>3"
10515 [(set (match_operand:VI 0 "register_operand")
10517 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10518 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10521 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10525 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10526 [(set (match_operand:VI 0 "register_operand" "=x,v")
10528 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10529 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10530 "TARGET_SSE && <mask_mode512bit_condition>
10531 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10533 static char buf[64];
10537 switch (get_attr_mode (insn))
10540 gcc_assert (TARGET_AVX512F);
10542 tmp = "p<logic><ssemodesuffix>";
10546 gcc_assert (TARGET_AVX2);
10548 gcc_assert (TARGET_SSE2);
10554 gcc_assert (TARGET_AVX512F);
10556 gcc_assert (TARGET_AVX);
10558 gcc_assert (TARGET_SSE);
10564 gcc_unreachable ();
10567 switch (which_alternative)
10570 ops = "%s\t{%%2, %%0|%%0, %%2}";
10573 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10576 gcc_unreachable ();
10579 snprintf (buf, sizeof (buf), ops, tmp);
10582 [(set_attr "isa" "noavx,avx")
10583 (set_attr "type" "sselog")
10584 (set (attr "prefix_data16")
10586 (and (eq_attr "alternative" "0")
10587 (eq_attr "mode" "TI"))
10589 (const_string "*")))
10590 (set_attr "prefix" "<mask_prefix3>")
10592 (cond [(and (match_test "<MODE_SIZE> == 16")
10593 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10594 (const_string "<ssePSmode>")
10595 (match_test "TARGET_AVX2")
10596 (const_string "<sseinsnmode>")
10597 (match_test "TARGET_AVX")
10599 (match_test "<MODE_SIZE> > 16")
10600 (const_string "V8SF")
10601 (const_string "<sseinsnmode>"))
10602 (ior (not (match_test "TARGET_SSE2"))
10603 (match_test "optimize_function_for_size_p (cfun)"))
10604 (const_string "V4SF")
10606 (const_string "<sseinsnmode>")))])
10608 (define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
10609 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10610 (unspec:<avx512fmaskmode>
10611 [(match_operand:VI48_512 1 "register_operand" "v")
10612 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
10615 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10616 [(set_attr "prefix" "evex")
10617 (set_attr "mode" "<sseinsnmode>")])
10619 (define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
10620 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10621 (unspec:<avx512fmaskmode>
10622 [(match_operand:VI48_512 1 "register_operand" "v")
10623 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
10626 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10627 [(set_attr "prefix" "evex")
10628 (set_attr "mode" "<sseinsnmode>")])
10630 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10632 ;; Parallel integral element swizzling
10634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10636 (define_expand "vec_pack_trunc_<mode>"
10637 [(match_operand:<ssepackmode> 0 "register_operand")
10638 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
10639 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
10642 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
10643 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
10644 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
10648 (define_insn "<sse2_avx2>_packsswb"
10649 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
10650 (vec_concat:VI1_AVX2
10651 (ss_truncate:<ssehalfvecmode>
10652 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
10653 (ss_truncate:<ssehalfvecmode>
10654 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
10657 packsswb\t{%2, %0|%0, %2}
10658 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
10659 [(set_attr "isa" "noavx,avx")
10660 (set_attr "type" "sselog")
10661 (set_attr "prefix_data16" "1,*")
10662 (set_attr "prefix" "orig,vex")
10663 (set_attr "mode" "<sseinsnmode>")])
10665 (define_insn "<sse2_avx2>_packssdw"
10666 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
10667 (vec_concat:VI2_AVX2
10668 (ss_truncate:<ssehalfvecmode>
10669 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
10670 (ss_truncate:<ssehalfvecmode>
10671 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
10674 packssdw\t{%2, %0|%0, %2}
10675 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
10676 [(set_attr "isa" "noavx,avx")
10677 (set_attr "type" "sselog")
10678 (set_attr "prefix_data16" "1,*")
10679 (set_attr "prefix" "orig,vex")
10680 (set_attr "mode" "<sseinsnmode>")])
10682 (define_insn "<sse2_avx2>_packuswb"
10683 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
10684 (vec_concat:VI1_AVX2
10685 (us_truncate:<ssehalfvecmode>
10686 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
10687 (us_truncate:<ssehalfvecmode>
10688 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
10691 packuswb\t{%2, %0|%0, %2}
10692 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
10693 [(set_attr "isa" "noavx,avx")
10694 (set_attr "type" "sselog")
10695 (set_attr "prefix_data16" "1,*")
10696 (set_attr "prefix" "orig,vex")
10697 (set_attr "mode" "<sseinsnmode>")])
10699 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
10700 [(set (match_operand:V64QI 0 "register_operand" "=v")
10703 (match_operand:V64QI 1 "register_operand" "v")
10704 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
10705 (parallel [(const_int 8) (const_int 72)
10706 (const_int 9) (const_int 73)
10707 (const_int 10) (const_int 74)
10708 (const_int 11) (const_int 75)
10709 (const_int 12) (const_int 76)
10710 (const_int 13) (const_int 77)
10711 (const_int 14) (const_int 78)
10712 (const_int 15) (const_int 79)
10713 (const_int 24) (const_int 88)
10714 (const_int 25) (const_int 89)
10715 (const_int 26) (const_int 90)
10716 (const_int 27) (const_int 91)
10717 (const_int 28) (const_int 92)
10718 (const_int 29) (const_int 93)
10719 (const_int 30) (const_int 94)
10720 (const_int 31) (const_int 95)
10721 (const_int 40) (const_int 104)
10722 (const_int 41) (const_int 105)
10723 (const_int 42) (const_int 106)
10724 (const_int 43) (const_int 107)
10725 (const_int 44) (const_int 108)
10726 (const_int 45) (const_int 109)
10727 (const_int 46) (const_int 110)
10728 (const_int 47) (const_int 111)
10729 (const_int 56) (const_int 120)
10730 (const_int 57) (const_int 121)
10731 (const_int 58) (const_int 122)
10732 (const_int 59) (const_int 123)
10733 (const_int 60) (const_int 124)
10734 (const_int 61) (const_int 125)
10735 (const_int 62) (const_int 126)
10736 (const_int 63) (const_int 127)])))]
10738 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10739 [(set_attr "type" "sselog")
10740 (set_attr "prefix" "evex")
10741 (set_attr "mode" "XI")])
10743 (define_insn "avx2_interleave_highv32qi<mask_name>"
10744 [(set (match_operand:V32QI 0 "register_operand" "=v")
10747 (match_operand:V32QI 1 "register_operand" "v")
10748 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
10749 (parallel [(const_int 8) (const_int 40)
10750 (const_int 9) (const_int 41)
10751 (const_int 10) (const_int 42)
10752 (const_int 11) (const_int 43)
10753 (const_int 12) (const_int 44)
10754 (const_int 13) (const_int 45)
10755 (const_int 14) (const_int 46)
10756 (const_int 15) (const_int 47)
10757 (const_int 24) (const_int 56)
10758 (const_int 25) (const_int 57)
10759 (const_int 26) (const_int 58)
10760 (const_int 27) (const_int 59)
10761 (const_int 28) (const_int 60)
10762 (const_int 29) (const_int 61)
10763 (const_int 30) (const_int 62)
10764 (const_int 31) (const_int 63)])))]
10765 "TARGET_AVX2 && <mask_avx512vl_condition>"
10766 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10767 [(set_attr "type" "sselog")
10768 (set_attr "prefix" "<mask_prefix>")
10769 (set_attr "mode" "OI")])
10771 (define_insn "vec_interleave_highv16qi<mask_name>"
10772 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
10775 (match_operand:V16QI 1 "register_operand" "0,v")
10776 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
10777 (parallel [(const_int 8) (const_int 24)
10778 (const_int 9) (const_int 25)
10779 (const_int 10) (const_int 26)
10780 (const_int 11) (const_int 27)
10781 (const_int 12) (const_int 28)
10782 (const_int 13) (const_int 29)
10783 (const_int 14) (const_int 30)
10784 (const_int 15) (const_int 31)])))]
10785 "TARGET_SSE2 && <mask_avx512vl_condition>"
10787 punpckhbw\t{%2, %0|%0, %2}
10788 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10789 [(set_attr "isa" "noavx,avx")
10790 (set_attr "type" "sselog")
10791 (set_attr "prefix_data16" "1,*")
10792 (set_attr "prefix" "orig,<mask_prefix>")
10793 (set_attr "mode" "TI")])
10795 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
10796 [(set (match_operand:V64QI 0 "register_operand" "=v")
10799 (match_operand:V64QI 1 "register_operand" "v")
10800 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
10801 (parallel [(const_int 0) (const_int 64)
10802 (const_int 1) (const_int 65)
10803 (const_int 2) (const_int 66)
10804 (const_int 3) (const_int 67)
10805 (const_int 4) (const_int 68)
10806 (const_int 5) (const_int 69)
10807 (const_int 6) (const_int 70)
10808 (const_int 7) (const_int 71)
10809 (const_int 16) (const_int 80)
10810 (const_int 17) (const_int 81)
10811 (const_int 18) (const_int 82)
10812 (const_int 19) (const_int 83)
10813 (const_int 20) (const_int 84)
10814 (const_int 21) (const_int 85)
10815 (const_int 22) (const_int 86)
10816 (const_int 23) (const_int 87)
10817 (const_int 32) (const_int 96)
10818 (const_int 33) (const_int 97)
10819 (const_int 34) (const_int 98)
10820 (const_int 35) (const_int 99)
10821 (const_int 36) (const_int 100)
10822 (const_int 37) (const_int 101)
10823 (const_int 38) (const_int 102)
10824 (const_int 39) (const_int 103)
10825 (const_int 48) (const_int 112)
10826 (const_int 49) (const_int 113)
10827 (const_int 50) (const_int 114)
10828 (const_int 51) (const_int 115)
10829 (const_int 52) (const_int 116)
10830 (const_int 53) (const_int 117)
10831 (const_int 54) (const_int 118)
10832 (const_int 55) (const_int 119)])))]
10834 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10835 [(set_attr "type" "sselog")
10836 (set_attr "prefix" "evex")
10837 (set_attr "mode" "XI")])
10839 (define_insn "avx2_interleave_lowv32qi<mask_name>"
10840 [(set (match_operand:V32QI 0 "register_operand" "=v")
10843 (match_operand:V32QI 1 "register_operand" "v")
10844 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
10845 (parallel [(const_int 0) (const_int 32)
10846 (const_int 1) (const_int 33)
10847 (const_int 2) (const_int 34)
10848 (const_int 3) (const_int 35)
10849 (const_int 4) (const_int 36)
10850 (const_int 5) (const_int 37)
10851 (const_int 6) (const_int 38)
10852 (const_int 7) (const_int 39)
10853 (const_int 16) (const_int 48)
10854 (const_int 17) (const_int 49)
10855 (const_int 18) (const_int 50)
10856 (const_int 19) (const_int 51)
10857 (const_int 20) (const_int 52)
10858 (const_int 21) (const_int 53)
10859 (const_int 22) (const_int 54)
10860 (const_int 23) (const_int 55)])))]
10861 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
10862 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10863 [(set_attr "type" "sselog")
10864 (set_attr "prefix" "maybe_vex")
10865 (set_attr "mode" "OI")])
10867 (define_insn "vec_interleave_lowv16qi<mask_name>"
10868 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
10871 (match_operand:V16QI 1 "register_operand" "0,v")
10872 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
10873 (parallel [(const_int 0) (const_int 16)
10874 (const_int 1) (const_int 17)
10875 (const_int 2) (const_int 18)
10876 (const_int 3) (const_int 19)
10877 (const_int 4) (const_int 20)
10878 (const_int 5) (const_int 21)
10879 (const_int 6) (const_int 22)
10880 (const_int 7) (const_int 23)])))]
10881 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
10883 punpcklbw\t{%2, %0|%0, %2}
10884 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10885 [(set_attr "isa" "noavx,avx")
10886 (set_attr "type" "sselog")
10887 (set_attr "prefix_data16" "1,*")
10888 (set_attr "prefix" "orig,vex")
10889 (set_attr "mode" "TI")])
10891 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
10892 [(set (match_operand:V32HI 0 "register_operand" "=v")
10895 (match_operand:V32HI 1 "register_operand" "v")
10896 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
10897 (parallel [(const_int 4) (const_int 36)
10898 (const_int 5) (const_int 37)
10899 (const_int 6) (const_int 38)
10900 (const_int 7) (const_int 39)
10901 (const_int 12) (const_int 44)
10902 (const_int 13) (const_int 45)
10903 (const_int 14) (const_int 46)
10904 (const_int 15) (const_int 47)
10905 (const_int 20) (const_int 52)
10906 (const_int 21) (const_int 53)
10907 (const_int 22) (const_int 54)
10908 (const_int 23) (const_int 55)
10909 (const_int 28) (const_int 60)
10910 (const_int 29) (const_int 61)
10911 (const_int 30) (const_int 62)
10912 (const_int 31) (const_int 63)])))]
10914 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10915 [(set_attr "type" "sselog")
10916 (set_attr "prefix" "evex")
10917 (set_attr "mode" "XI")])
10919 (define_insn "avx2_interleave_highv16hi<mask_name>"
10920 [(set (match_operand:V16HI 0 "register_operand" "=v")
10923 (match_operand:V16HI 1 "register_operand" "v")
10924 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
10925 (parallel [(const_int 4) (const_int 20)
10926 (const_int 5) (const_int 21)
10927 (const_int 6) (const_int 22)
10928 (const_int 7) (const_int 23)
10929 (const_int 12) (const_int 28)
10930 (const_int 13) (const_int 29)
10931 (const_int 14) (const_int 30)
10932 (const_int 15) (const_int 31)])))]
10933 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
10934 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10935 [(set_attr "type" "sselog")
10936 (set_attr "prefix" "maybe_evex")
10937 (set_attr "mode" "OI")])
10939 (define_insn "vec_interleave_highv8hi<mask_name>"
10940 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
10943 (match_operand:V8HI 1 "register_operand" "0,v")
10944 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
10945 (parallel [(const_int 4) (const_int 12)
10946 (const_int 5) (const_int 13)
10947 (const_int 6) (const_int 14)
10948 (const_int 7) (const_int 15)])))]
10949 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
10951 punpckhwd\t{%2, %0|%0, %2}
10952 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10953 [(set_attr "isa" "noavx,avx")
10954 (set_attr "type" "sselog")
10955 (set_attr "prefix_data16" "1,*")
10956 (set_attr "prefix" "orig,maybe_vex")
10957 (set_attr "mode" "TI")])
10959 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
10960 [(set (match_operand:V32HI 0 "register_operand" "=v")
10963 (match_operand:V32HI 1 "register_operand" "v")
10964 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
10965 (parallel [(const_int 0) (const_int 32)
10966 (const_int 1) (const_int 33)
10967 (const_int 2) (const_int 34)
10968 (const_int 3) (const_int 35)
10969 (const_int 8) (const_int 40)
10970 (const_int 9) (const_int 41)
10971 (const_int 10) (const_int 42)
10972 (const_int 11) (const_int 43)
10973 (const_int 16) (const_int 48)
10974 (const_int 17) (const_int 49)
10975 (const_int 18) (const_int 50)
10976 (const_int 19) (const_int 51)
10977 (const_int 24) (const_int 56)
10978 (const_int 25) (const_int 57)
10979 (const_int 26) (const_int 58)
10980 (const_int 27) (const_int 59)])))]
10982 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10983 [(set_attr "type" "sselog")
10984 (set_attr "prefix" "evex")
10985 (set_attr "mode" "XI")])
10987 (define_insn "avx2_interleave_lowv16hi<mask_name>"
10988 [(set (match_operand:V16HI 0 "register_operand" "=v")
10991 (match_operand:V16HI 1 "register_operand" "v")
10992 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
10993 (parallel [(const_int 0) (const_int 16)
10994 (const_int 1) (const_int 17)
10995 (const_int 2) (const_int 18)
10996 (const_int 3) (const_int 19)
10997 (const_int 8) (const_int 24)
10998 (const_int 9) (const_int 25)
10999 (const_int 10) (const_int 26)
11000 (const_int 11) (const_int 27)])))]
11001 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11002 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11003 [(set_attr "type" "sselog")
11004 (set_attr "prefix" "maybe_evex")
11005 (set_attr "mode" "OI")])
11007 (define_insn "vec_interleave_lowv8hi<mask_name>"
11008 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11011 (match_operand:V8HI 1 "register_operand" "0,v")
11012 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11013 (parallel [(const_int 0) (const_int 8)
11014 (const_int 1) (const_int 9)
11015 (const_int 2) (const_int 10)
11016 (const_int 3) (const_int 11)])))]
11017 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11019 punpcklwd\t{%2, %0|%0, %2}
11020 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11021 [(set_attr "isa" "noavx,avx")
11022 (set_attr "type" "sselog")
11023 (set_attr "prefix_data16" "1,*")
11024 (set_attr "prefix" "orig,maybe_evex")
11025 (set_attr "mode" "TI")])
11027 (define_insn "avx2_interleave_highv8si<mask_name>"
11028 [(set (match_operand:V8SI 0 "register_operand" "=v")
11031 (match_operand:V8SI 1 "register_operand" "v")
11032 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11033 (parallel [(const_int 2) (const_int 10)
11034 (const_int 3) (const_int 11)
11035 (const_int 6) (const_int 14)
11036 (const_int 7) (const_int 15)])))]
11037 "TARGET_AVX2 && <mask_avx512vl_condition>"
11038 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11039 [(set_attr "type" "sselog")
11040 (set_attr "prefix" "maybe_evex")
11041 (set_attr "mode" "OI")])
11043 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11044 [(set (match_operand:V16SI 0 "register_operand" "=v")
11047 (match_operand:V16SI 1 "register_operand" "v")
11048 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11049 (parallel [(const_int 2) (const_int 18)
11050 (const_int 3) (const_int 19)
11051 (const_int 6) (const_int 22)
11052 (const_int 7) (const_int 23)
11053 (const_int 10) (const_int 26)
11054 (const_int 11) (const_int 27)
11055 (const_int 14) (const_int 30)
11056 (const_int 15) (const_int 31)])))]
11058 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11059 [(set_attr "type" "sselog")
11060 (set_attr "prefix" "evex")
11061 (set_attr "mode" "XI")])
11064 (define_insn "vec_interleave_highv4si<mask_name>"
11065 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11068 (match_operand:V4SI 1 "register_operand" "0,v")
11069 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11070 (parallel [(const_int 2) (const_int 6)
11071 (const_int 3) (const_int 7)])))]
11072 "TARGET_SSE2 && <mask_avx512vl_condition>"
11074 punpckhdq\t{%2, %0|%0, %2}
11075 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11076 [(set_attr "isa" "noavx,avx")
11077 (set_attr "type" "sselog")
11078 (set_attr "prefix_data16" "1,*")
11079 (set_attr "prefix" "orig,maybe_vex")
11080 (set_attr "mode" "TI")])
11082 (define_insn "avx2_interleave_lowv8si<mask_name>"
11083 [(set (match_operand:V8SI 0 "register_operand" "=v")
11086 (match_operand:V8SI 1 "register_operand" "v")
11087 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11088 (parallel [(const_int 0) (const_int 8)
11089 (const_int 1) (const_int 9)
11090 (const_int 4) (const_int 12)
11091 (const_int 5) (const_int 13)])))]
11092 "TARGET_AVX2 && <mask_avx512vl_condition>"
11093 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11094 [(set_attr "type" "sselog")
11095 (set_attr "prefix" "maybe_evex")
11096 (set_attr "mode" "OI")])
11098 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11099 [(set (match_operand:V16SI 0 "register_operand" "=v")
11102 (match_operand:V16SI 1 "register_operand" "v")
11103 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11104 (parallel [(const_int 0) (const_int 16)
11105 (const_int 1) (const_int 17)
11106 (const_int 4) (const_int 20)
11107 (const_int 5) (const_int 21)
11108 (const_int 8) (const_int 24)
11109 (const_int 9) (const_int 25)
11110 (const_int 12) (const_int 28)
11111 (const_int 13) (const_int 29)])))]
11113 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11114 [(set_attr "type" "sselog")
11115 (set_attr "prefix" "evex")
11116 (set_attr "mode" "XI")])
11118 (define_insn "vec_interleave_lowv4si<mask_name>"
11119 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11122 (match_operand:V4SI 1 "register_operand" "0,v")
11123 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11124 (parallel [(const_int 0) (const_int 4)
11125 (const_int 1) (const_int 5)])))]
11126 "TARGET_SSE2 && <mask_avx512vl_condition>"
11128 punpckldq\t{%2, %0|%0, %2}
11129 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11130 [(set_attr "isa" "noavx,avx")
11131 (set_attr "type" "sselog")
11132 (set_attr "prefix_data16" "1,*")
11133 (set_attr "prefix" "orig,vex")
11134 (set_attr "mode" "TI")])
11136 (define_expand "vec_interleave_high<mode>"
11137 [(match_operand:VI_256 0 "register_operand" "=x")
11138 (match_operand:VI_256 1 "register_operand" "x")
11139 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11142 rtx t1 = gen_reg_rtx (<MODE>mode);
11143 rtx t2 = gen_reg_rtx (<MODE>mode);
11144 rtx t3 = gen_reg_rtx (V4DImode);
11145 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11146 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11147 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11148 gen_lowpart (V4DImode, t2),
11149 GEN_INT (1 + (3 << 4))));
11150 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11154 (define_expand "vec_interleave_low<mode>"
11155 [(match_operand:VI_256 0 "register_operand" "=x")
11156 (match_operand:VI_256 1 "register_operand" "x")
11157 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11160 rtx t1 = gen_reg_rtx (<MODE>mode);
11161 rtx t2 = gen_reg_rtx (<MODE>mode);
11162 rtx t3 = gen_reg_rtx (V4DImode);
11163 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11164 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11165 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11166 gen_lowpart (V4DImode, t2),
11167 GEN_INT (0 + (2 << 4))));
11168 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11172 ;; Modes handled by pinsr patterns.
11173 (define_mode_iterator PINSR_MODE
11174 [(V16QI "TARGET_SSE4_1") V8HI
11175 (V4SI "TARGET_SSE4_1")
11176 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11178 (define_mode_attr sse2p4_1
11179 [(V16QI "sse4_1") (V8HI "sse2")
11180 (V4SI "sse4_1") (V2DI "sse4_1")])
11182 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11183 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11184 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11185 (vec_merge:PINSR_MODE
11186 (vec_duplicate:PINSR_MODE
11187 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11188 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11189 (match_operand:SI 3 "const_int_operand")))]
11191 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11192 < GET_MODE_NUNITS (<MODE>mode))"
11194 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11196 switch (which_alternative)
11199 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11200 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11203 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11205 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11206 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11209 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11211 gcc_unreachable ();
11214 [(set_attr "isa" "noavx,noavx,avx,avx")
11215 (set_attr "type" "sselog")
11216 (set (attr "prefix_rex")
11218 (and (not (match_test "TARGET_AVX"))
11219 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11221 (const_string "*")))
11222 (set (attr "prefix_data16")
11224 (and (not (match_test "TARGET_AVX"))
11225 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11227 (const_string "*")))
11228 (set (attr "prefix_extra")
11230 (and (not (match_test "TARGET_AVX"))
11231 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11233 (const_string "1")))
11234 (set_attr "length_immediate" "1")
11235 (set_attr "prefix" "orig,orig,vex,vex")
11236 (set_attr "mode" "TI")])
11238 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11239 [(match_operand:AVX512_VEC 0 "register_operand")
11240 (match_operand:AVX512_VEC 1 "register_operand")
11241 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11242 (match_operand:SI 3 "const_0_to_3_operand")
11243 (match_operand:AVX512_VEC 4 "register_operand")
11244 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11248 mask = INTVAL (operands[3]);
11249 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11250 0xFFFF ^ (0xF000 >> mask * 4)
11251 : 0xFF ^ (0xC0 >> mask * 2);
11252 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11253 (operands[0], operands[1], operands[2], GEN_INT (selector),
11254 operands[4], operands[5]));
11258 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11259 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11260 (vec_merge:AVX512_VEC
11261 (match_operand:AVX512_VEC 1 "register_operand" "v")
11262 (vec_duplicate:AVX512_VEC
11263 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11264 (match_operand:SI 3 "const_int_operand" "n")))]
11268 int selector = INTVAL (operands[3]);
11270 if (selector == 0xFFF || selector == 0x3F)
11272 else if ( selector == 0xF0FF || selector == 0xCF)
11274 else if ( selector == 0xFF0F || selector == 0xF3)
11276 else if ( selector == 0xFFF0 || selector == 0xFC)
11279 gcc_unreachable ();
11281 operands[3] = GEN_INT (mask);
11283 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11285 [(set_attr "type" "sselog")
11286 (set_attr "length_immediate" "1")
11287 (set_attr "prefix" "evex")
11288 (set_attr "mode" "<sseinsnmode>")])
11290 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11291 [(match_operand:AVX512_VEC_2 0 "register_operand")
11292 (match_operand:AVX512_VEC_2 1 "register_operand")
11293 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11294 (match_operand:SI 3 "const_0_to_1_operand")
11295 (match_operand:AVX512_VEC_2 4 "register_operand")
11296 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11299 int mask = INTVAL (operands[3]);
11301 emit_insn (gen_vec_set_lo_<mode>_mask
11302 (operands[0], operands[1], operands[2],
11303 operands[4], operands[5]));
11305 emit_insn (gen_vec_set_hi_<mode>_mask
11306 (operands[0], operands[1], operands[2],
11307 operands[4], operands[5]));
11311 (define_insn "vec_set_lo_<mode><mask_name>"
11312 [(set (match_operand:V16FI 0 "register_operand" "=v")
11314 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11315 (vec_select:<ssehalfvecmode>
11316 (match_operand:V16FI 1 "register_operand" "v")
11317 (parallel [(const_int 8) (const_int 9)
11318 (const_int 10) (const_int 11)
11319 (const_int 12) (const_int 13)
11320 (const_int 14) (const_int 15)]))))]
11322 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11323 [(set_attr "type" "sselog")
11324 (set_attr "length_immediate" "1")
11325 (set_attr "prefix" "evex")
11326 (set_attr "mode" "<sseinsnmode>")])
11328 (define_insn "vec_set_hi_<mode><mask_name>"
11329 [(set (match_operand:V16FI 0 "register_operand" "=v")
11331 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11332 (vec_select:<ssehalfvecmode>
11333 (match_operand:V16FI 1 "register_operand" "v")
11334 (parallel [(const_int 0) (const_int 1)
11335 (const_int 2) (const_int 3)
11336 (const_int 4) (const_int 5)
11337 (const_int 6) (const_int 7)]))))]
11339 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11340 [(set_attr "type" "sselog")
11341 (set_attr "length_immediate" "1")
11342 (set_attr "prefix" "evex")
11343 (set_attr "mode" "<sseinsnmode>")])
11345 (define_insn "vec_set_lo_<mode><mask_name>"
11346 [(set (match_operand:V8FI 0 "register_operand" "=v")
11348 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11349 (vec_select:<ssehalfvecmode>
11350 (match_operand:V8FI 1 "register_operand" "v")
11351 (parallel [(const_int 4) (const_int 5)
11352 (const_int 6) (const_int 7)]))))]
11354 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11355 [(set_attr "type" "sselog")
11356 (set_attr "length_immediate" "1")
11357 (set_attr "prefix" "evex")
11358 (set_attr "mode" "XI")])
11360 (define_insn "vec_set_hi_<mode><mask_name>"
11361 [(set (match_operand:V8FI 0 "register_operand" "=v")
11363 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11364 (vec_select:<ssehalfvecmode>
11365 (match_operand:V8FI 1 "register_operand" "v")
11366 (parallel [(const_int 0) (const_int 1)
11367 (const_int 2) (const_int 3)]))))]
11369 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11370 [(set_attr "type" "sselog")
11371 (set_attr "length_immediate" "1")
11372 (set_attr "prefix" "evex")
11373 (set_attr "mode" "XI")])
11375 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11376 [(match_operand:VI8F_256 0 "register_operand")
11377 (match_operand:VI8F_256 1 "register_operand")
11378 (match_operand:VI8F_256 2 "nonimmediate_operand")
11379 (match_operand:SI 3 "const_0_to_3_operand")
11380 (match_operand:VI8F_256 4 "register_operand")
11381 (match_operand:QI 5 "register_operand")]
11384 int mask = INTVAL (operands[3]);
11385 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11386 (operands[0], operands[1], operands[2],
11387 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11388 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11389 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11390 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11391 operands[4], operands[5]));
11395 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11396 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11397 (vec_select:VI8F_256
11398 (vec_concat:<ssedoublemode>
11399 (match_operand:VI8F_256 1 "register_operand" "v")
11400 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11401 (parallel [(match_operand 3 "const_0_to_3_operand")
11402 (match_operand 4 "const_0_to_3_operand")
11403 (match_operand 5 "const_4_to_7_operand")
11404 (match_operand 6 "const_4_to_7_operand")])))]
11406 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11407 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11410 mask = INTVAL (operands[3]) / 2;
11411 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11412 operands[3] = GEN_INT (mask);
11413 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11415 [(set_attr "type" "sselog")
11416 (set_attr "length_immediate" "1")
11417 (set_attr "prefix" "evex")
11418 (set_attr "mode" "XI")])
11420 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11421 [(match_operand:V8FI 0 "register_operand")
11422 (match_operand:V8FI 1 "register_operand")
11423 (match_operand:V8FI 2 "nonimmediate_operand")
11424 (match_operand:SI 3 "const_0_to_255_operand")
11425 (match_operand:V8FI 4 "register_operand")
11426 (match_operand:QI 5 "register_operand")]
11429 int mask = INTVAL (operands[3]);
11430 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11431 (operands[0], operands[1], operands[2],
11432 GEN_INT (((mask >> 0) & 3) * 2),
11433 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11434 GEN_INT (((mask >> 2) & 3) * 2),
11435 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11436 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11437 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11438 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11439 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11440 operands[4], operands[5]));
11444 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11445 [(set (match_operand:V8FI 0 "register_operand" "=v")
11447 (vec_concat:<ssedoublemode>
11448 (match_operand:V8FI 1 "register_operand" "v")
11449 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11450 (parallel [(match_operand 3 "const_0_to_7_operand")
11451 (match_operand 4 "const_0_to_7_operand")
11452 (match_operand 5 "const_0_to_7_operand")
11453 (match_operand 6 "const_0_to_7_operand")
11454 (match_operand 7 "const_8_to_15_operand")
11455 (match_operand 8 "const_8_to_15_operand")
11456 (match_operand 9 "const_8_to_15_operand")
11457 (match_operand 10 "const_8_to_15_operand")])))]
11459 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11460 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11461 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11462 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11465 mask = INTVAL (operands[3]) / 2;
11466 mask |= INTVAL (operands[5]) / 2 << 2;
11467 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11468 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11469 operands[3] = GEN_INT (mask);
11471 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11473 [(set_attr "type" "sselog")
11474 (set_attr "length_immediate" "1")
11475 (set_attr "prefix" "evex")
11476 (set_attr "mode" "<sseinsnmode>")])
11478 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11479 [(match_operand:VI4F_256 0 "register_operand")
11480 (match_operand:VI4F_256 1 "register_operand")
11481 (match_operand:VI4F_256 2 "nonimmediate_operand")
11482 (match_operand:SI 3 "const_0_to_3_operand")
11483 (match_operand:VI4F_256 4 "register_operand")
11484 (match_operand:QI 5 "register_operand")]
11487 int mask = INTVAL (operands[3]);
11488 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11489 (operands[0], operands[1], operands[2],
11490 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11491 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11492 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11493 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11494 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11495 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11496 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11497 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11498 operands[4], operands[5]));
11502 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11503 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11504 (vec_select:VI4F_256
11505 (vec_concat:<ssedoublemode>
11506 (match_operand:VI4F_256 1 "register_operand" "v")
11507 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11508 (parallel [(match_operand 3 "const_0_to_7_operand")
11509 (match_operand 4 "const_0_to_7_operand")
11510 (match_operand 5 "const_0_to_7_operand")
11511 (match_operand 6 "const_0_to_7_operand")
11512 (match_operand 7 "const_8_to_15_operand")
11513 (match_operand 8 "const_8_to_15_operand")
11514 (match_operand 9 "const_8_to_15_operand")
11515 (match_operand 10 "const_8_to_15_operand")])))]
11517 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11518 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11519 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11520 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11521 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11522 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11525 mask = INTVAL (operands[3]) / 4;
11526 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11527 operands[3] = GEN_INT (mask);
11529 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11531 [(set_attr "type" "sselog")
11532 (set_attr "length_immediate" "1")
11533 (set_attr "prefix" "evex")
11534 (set_attr "mode" "<sseinsnmode>")])
11536 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11537 [(match_operand:V16FI 0 "register_operand")
11538 (match_operand:V16FI 1 "register_operand")
11539 (match_operand:V16FI 2 "nonimmediate_operand")
11540 (match_operand:SI 3 "const_0_to_255_operand")
11541 (match_operand:V16FI 4 "register_operand")
11542 (match_operand:HI 5 "register_operand")]
11545 int mask = INTVAL (operands[3]);
11546 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
11547 (operands[0], operands[1], operands[2],
11548 GEN_INT (((mask >> 0) & 3) * 4),
11549 GEN_INT (((mask >> 0) & 3) * 4 + 1),
11550 GEN_INT (((mask >> 0) & 3) * 4 + 2),
11551 GEN_INT (((mask >> 0) & 3) * 4 + 3),
11552 GEN_INT (((mask >> 2) & 3) * 4),
11553 GEN_INT (((mask >> 2) & 3) * 4 + 1),
11554 GEN_INT (((mask >> 2) & 3) * 4 + 2),
11555 GEN_INT (((mask >> 2) & 3) * 4 + 3),
11556 GEN_INT (((mask >> 4) & 3) * 4 + 16),
11557 GEN_INT (((mask >> 4) & 3) * 4 + 17),
11558 GEN_INT (((mask >> 4) & 3) * 4 + 18),
11559 GEN_INT (((mask >> 4) & 3) * 4 + 19),
11560 GEN_INT (((mask >> 6) & 3) * 4 + 16),
11561 GEN_INT (((mask >> 6) & 3) * 4 + 17),
11562 GEN_INT (((mask >> 6) & 3) * 4 + 18),
11563 GEN_INT (((mask >> 6) & 3) * 4 + 19),
11564 operands[4], operands[5]));
11568 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
11569 [(set (match_operand:V16FI 0 "register_operand" "=v")
11571 (vec_concat:<ssedoublemode>
11572 (match_operand:V16FI 1 "register_operand" "v")
11573 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
11574 (parallel [(match_operand 3 "const_0_to_15_operand")
11575 (match_operand 4 "const_0_to_15_operand")
11576 (match_operand 5 "const_0_to_15_operand")
11577 (match_operand 6 "const_0_to_15_operand")
11578 (match_operand 7 "const_0_to_15_operand")
11579 (match_operand 8 "const_0_to_15_operand")
11580 (match_operand 9 "const_0_to_15_operand")
11581 (match_operand 10 "const_0_to_15_operand")
11582 (match_operand 11 "const_16_to_31_operand")
11583 (match_operand 12 "const_16_to_31_operand")
11584 (match_operand 13 "const_16_to_31_operand")
11585 (match_operand 14 "const_16_to_31_operand")
11586 (match_operand 15 "const_16_to_31_operand")
11587 (match_operand 16 "const_16_to_31_operand")
11588 (match_operand 17 "const_16_to_31_operand")
11589 (match_operand 18 "const_16_to_31_operand")])))]
11591 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11592 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11593 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11594 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11595 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11596 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
11597 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
11598 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
11599 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
11600 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
11601 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
11602 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
11605 mask = INTVAL (operands[3]) / 4;
11606 mask |= INTVAL (operands[7]) / 4 << 2;
11607 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
11608 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
11609 operands[3] = GEN_INT (mask);
11611 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
11613 [(set_attr "type" "sselog")
11614 (set_attr "length_immediate" "1")
11615 (set_attr "prefix" "evex")
11616 (set_attr "mode" "<sseinsnmode>")])
11618 (define_expand "avx512f_pshufdv3_mask"
11619 [(match_operand:V16SI 0 "register_operand")
11620 (match_operand:V16SI 1 "nonimmediate_operand")
11621 (match_operand:SI 2 "const_0_to_255_operand")
11622 (match_operand:V16SI 3 "register_operand")
11623 (match_operand:HI 4 "register_operand")]
11626 int mask = INTVAL (operands[2]);
11627 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
11628 GEN_INT ((mask >> 0) & 3),
11629 GEN_INT ((mask >> 2) & 3),
11630 GEN_INT ((mask >> 4) & 3),
11631 GEN_INT ((mask >> 6) & 3),
11632 GEN_INT (((mask >> 0) & 3) + 4),
11633 GEN_INT (((mask >> 2) & 3) + 4),
11634 GEN_INT (((mask >> 4) & 3) + 4),
11635 GEN_INT (((mask >> 6) & 3) + 4),
11636 GEN_INT (((mask >> 0) & 3) + 8),
11637 GEN_INT (((mask >> 2) & 3) + 8),
11638 GEN_INT (((mask >> 4) & 3) + 8),
11639 GEN_INT (((mask >> 6) & 3) + 8),
11640 GEN_INT (((mask >> 0) & 3) + 12),
11641 GEN_INT (((mask >> 2) & 3) + 12),
11642 GEN_INT (((mask >> 4) & 3) + 12),
11643 GEN_INT (((mask >> 6) & 3) + 12),
11644 operands[3], operands[4]));
11648 (define_insn "avx512f_pshufd_1<mask_name>"
11649 [(set (match_operand:V16SI 0 "register_operand" "=v")
11651 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
11652 (parallel [(match_operand 2 "const_0_to_3_operand")
11653 (match_operand 3 "const_0_to_3_operand")
11654 (match_operand 4 "const_0_to_3_operand")
11655 (match_operand 5 "const_0_to_3_operand")
11656 (match_operand 6 "const_4_to_7_operand")
11657 (match_operand 7 "const_4_to_7_operand")
11658 (match_operand 8 "const_4_to_7_operand")
11659 (match_operand 9 "const_4_to_7_operand")
11660 (match_operand 10 "const_8_to_11_operand")
11661 (match_operand 11 "const_8_to_11_operand")
11662 (match_operand 12 "const_8_to_11_operand")
11663 (match_operand 13 "const_8_to_11_operand")
11664 (match_operand 14 "const_12_to_15_operand")
11665 (match_operand 15 "const_12_to_15_operand")
11666 (match_operand 16 "const_12_to_15_operand")
11667 (match_operand 17 "const_12_to_15_operand")])))]
11669 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
11670 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
11671 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
11672 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
11673 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
11674 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
11675 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
11676 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
11677 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
11678 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
11679 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
11680 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
11683 mask |= INTVAL (operands[2]) << 0;
11684 mask |= INTVAL (operands[3]) << 2;
11685 mask |= INTVAL (operands[4]) << 4;
11686 mask |= INTVAL (operands[5]) << 6;
11687 operands[2] = GEN_INT (mask);
11689 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
11691 [(set_attr "type" "sselog1")
11692 (set_attr "prefix" "evex")
11693 (set_attr "length_immediate" "1")
11694 (set_attr "mode" "XI")])
11696 (define_expand "avx512vl_pshufdv3_mask"
11697 [(match_operand:V8SI 0 "register_operand")
11698 (match_operand:V8SI 1 "nonimmediate_operand")
11699 (match_operand:SI 2 "const_0_to_255_operand")
11700 (match_operand:V8SI 3 "register_operand")
11701 (match_operand:QI 4 "register_operand")]
11704 int mask = INTVAL (operands[2]);
11705 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
11706 GEN_INT ((mask >> 0) & 3),
11707 GEN_INT ((mask >> 2) & 3),
11708 GEN_INT ((mask >> 4) & 3),
11709 GEN_INT ((mask >> 6) & 3),
11710 GEN_INT (((mask >> 0) & 3) + 4),
11711 GEN_INT (((mask >> 2) & 3) + 4),
11712 GEN_INT (((mask >> 4) & 3) + 4),
11713 GEN_INT (((mask >> 6) & 3) + 4),
11714 operands[3], operands[4]));
11718 (define_expand "avx2_pshufdv3"
11719 [(match_operand:V8SI 0 "register_operand")
11720 (match_operand:V8SI 1 "nonimmediate_operand")
11721 (match_operand:SI 2 "const_0_to_255_operand")]
11724 int mask = INTVAL (operands[2]);
11725 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
11726 GEN_INT ((mask >> 0) & 3),
11727 GEN_INT ((mask >> 2) & 3),
11728 GEN_INT ((mask >> 4) & 3),
11729 GEN_INT ((mask >> 6) & 3),
11730 GEN_INT (((mask >> 0) & 3) + 4),
11731 GEN_INT (((mask >> 2) & 3) + 4),
11732 GEN_INT (((mask >> 4) & 3) + 4),
11733 GEN_INT (((mask >> 6) & 3) + 4)));
11737 (define_insn "avx2_pshufd_1<mask_name>"
11738 [(set (match_operand:V8SI 0 "register_operand" "=v")
11740 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
11741 (parallel [(match_operand 2 "const_0_to_3_operand")
11742 (match_operand 3 "const_0_to_3_operand")
11743 (match_operand 4 "const_0_to_3_operand")
11744 (match_operand 5 "const_0_to_3_operand")
11745 (match_operand 6 "const_4_to_7_operand")
11746 (match_operand 7 "const_4_to_7_operand")
11747 (match_operand 8 "const_4_to_7_operand")
11748 (match_operand 9 "const_4_to_7_operand")])))]
11750 && <mask_avx512vl_condition>
11751 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
11752 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
11753 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
11754 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
11757 mask |= INTVAL (operands[2]) << 0;
11758 mask |= INTVAL (operands[3]) << 2;
11759 mask |= INTVAL (operands[4]) << 4;
11760 mask |= INTVAL (operands[5]) << 6;
11761 operands[2] = GEN_INT (mask);
11763 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
11765 [(set_attr "type" "sselog1")
11766 (set_attr "prefix" "maybe_evex")
11767 (set_attr "length_immediate" "1")
11768 (set_attr "mode" "OI")])
11770 (define_expand "avx512vl_pshufd_mask"
11771 [(match_operand:V4SI 0 "register_operand")
11772 (match_operand:V4SI 1 "nonimmediate_operand")
11773 (match_operand:SI 2 "const_0_to_255_operand")
11774 (match_operand:V4SI 3 "register_operand")
11775 (match_operand:QI 4 "register_operand")]
11778 int mask = INTVAL (operands[2]);
11779 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
11780 GEN_INT ((mask >> 0) & 3),
11781 GEN_INT ((mask >> 2) & 3),
11782 GEN_INT ((mask >> 4) & 3),
11783 GEN_INT ((mask >> 6) & 3),
11784 operands[3], operands[4]));
11788 (define_expand "sse2_pshufd"
11789 [(match_operand:V4SI 0 "register_operand")
11790 (match_operand:V4SI 1 "nonimmediate_operand")
11791 (match_operand:SI 2 "const_int_operand")]
11794 int mask = INTVAL (operands[2]);
11795 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
11796 GEN_INT ((mask >> 0) & 3),
11797 GEN_INT ((mask >> 2) & 3),
11798 GEN_INT ((mask >> 4) & 3),
11799 GEN_INT ((mask >> 6) & 3)));
11803 (define_insn "sse2_pshufd_1<mask_name>"
11804 [(set (match_operand:V4SI 0 "register_operand" "=v")
11806 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
11807 (parallel [(match_operand 2 "const_0_to_3_operand")
11808 (match_operand 3 "const_0_to_3_operand")
11809 (match_operand 4 "const_0_to_3_operand")
11810 (match_operand 5 "const_0_to_3_operand")])))]
11811 "TARGET_SSE2 && <mask_avx512vl_condition>"
11814 mask |= INTVAL (operands[2]) << 0;
11815 mask |= INTVAL (operands[3]) << 2;
11816 mask |= INTVAL (operands[4]) << 4;
11817 mask |= INTVAL (operands[5]) << 6;
11818 operands[2] = GEN_INT (mask);
11820 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
11822 [(set_attr "type" "sselog1")
11823 (set_attr "prefix_data16" "1")
11824 (set_attr "prefix" "<mask_prefix2>")
11825 (set_attr "length_immediate" "1")
11826 (set_attr "mode" "TI")])
11828 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
11829 [(set (match_operand:V32HI 0 "register_operand" "=v")
11831 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
11832 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11835 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11836 [(set_attr "type" "sselog")
11837 (set_attr "prefix" "evex")
11838 (set_attr "mode" "XI")])
11840 (define_expand "avx512vl_pshuflwv3_mask"
11841 [(match_operand:V16HI 0 "register_operand")
11842 (match_operand:V16HI 1 "nonimmediate_operand")
11843 (match_operand:SI 2 "const_0_to_255_operand")
11844 (match_operand:V16HI 3 "register_operand")
11845 (match_operand:HI 4 "register_operand")]
11846 "TARGET_AVX512VL && TARGET_AVX512BW"
11848 int mask = INTVAL (operands[2]);
11849 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
11850 GEN_INT ((mask >> 0) & 3),
11851 GEN_INT ((mask >> 2) & 3),
11852 GEN_INT ((mask >> 4) & 3),
11853 GEN_INT ((mask >> 6) & 3),
11854 GEN_INT (((mask >> 0) & 3) + 8),
11855 GEN_INT (((mask >> 2) & 3) + 8),
11856 GEN_INT (((mask >> 4) & 3) + 8),
11857 GEN_INT (((mask >> 6) & 3) + 8),
11858 operands[3], operands[4]));
11862 (define_expand "avx2_pshuflwv3"
11863 [(match_operand:V16HI 0 "register_operand")
11864 (match_operand:V16HI 1 "nonimmediate_operand")
11865 (match_operand:SI 2 "const_0_to_255_operand")]
11868 int mask = INTVAL (operands[2]);
11869 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
11870 GEN_INT ((mask >> 0) & 3),
11871 GEN_INT ((mask >> 2) & 3),
11872 GEN_INT ((mask >> 4) & 3),
11873 GEN_INT ((mask >> 6) & 3),
11874 GEN_INT (((mask >> 0) & 3) + 8),
11875 GEN_INT (((mask >> 2) & 3) + 8),
11876 GEN_INT (((mask >> 4) & 3) + 8),
11877 GEN_INT (((mask >> 6) & 3) + 8)));
11881 (define_insn "avx2_pshuflw_1<mask_name>"
11882 [(set (match_operand:V16HI 0 "register_operand" "=v")
11884 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
11885 (parallel [(match_operand 2 "const_0_to_3_operand")
11886 (match_operand 3 "const_0_to_3_operand")
11887 (match_operand 4 "const_0_to_3_operand")
11888 (match_operand 5 "const_0_to_3_operand")
11893 (match_operand 6 "const_8_to_11_operand")
11894 (match_operand 7 "const_8_to_11_operand")
11895 (match_operand 8 "const_8_to_11_operand")
11896 (match_operand 9 "const_8_to_11_operand")
11900 (const_int 15)])))]
11902 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
11903 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
11904 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
11905 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
11906 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
11909 mask |= INTVAL (operands[2]) << 0;
11910 mask |= INTVAL (operands[3]) << 2;
11911 mask |= INTVAL (operands[4]) << 4;
11912 mask |= INTVAL (operands[5]) << 6;
11913 operands[2] = GEN_INT (mask);
11915 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
11917 [(set_attr "type" "sselog")
11918 (set_attr "prefix" "maybe_evex")
11919 (set_attr "length_immediate" "1")
11920 (set_attr "mode" "OI")])
11922 (define_expand "avx512vl_pshuflw_mask"
11923 [(match_operand:V8HI 0 "register_operand")
11924 (match_operand:V8HI 1 "nonimmediate_operand")
11925 (match_operand:SI 2 "const_0_to_255_operand")
11926 (match_operand:V8HI 3 "register_operand")
11927 (match_operand:QI 4 "register_operand")]
11928 "TARGET_AVX512VL && TARGET_AVX512BW"
11930 int mask = INTVAL (operands[2]);
11931 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
11932 GEN_INT ((mask >> 0) & 3),
11933 GEN_INT ((mask >> 2) & 3),
11934 GEN_INT ((mask >> 4) & 3),
11935 GEN_INT ((mask >> 6) & 3),
11936 operands[3], operands[4]));
11940 (define_expand "sse2_pshuflw"
11941 [(match_operand:V8HI 0 "register_operand")
11942 (match_operand:V8HI 1 "nonimmediate_operand")
11943 (match_operand:SI 2 "const_int_operand")]
11946 int mask = INTVAL (operands[2]);
11947 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
11948 GEN_INT ((mask >> 0) & 3),
11949 GEN_INT ((mask >> 2) & 3),
11950 GEN_INT ((mask >> 4) & 3),
11951 GEN_INT ((mask >> 6) & 3)));
11955 (define_insn "sse2_pshuflw_1<mask_name>"
11956 [(set (match_operand:V8HI 0 "register_operand" "=v")
11958 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
11959 (parallel [(match_operand 2 "const_0_to_3_operand")
11960 (match_operand 3 "const_0_to_3_operand")
11961 (match_operand 4 "const_0_to_3_operand")
11962 (match_operand 5 "const_0_to_3_operand")
11967 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
11970 mask |= INTVAL (operands[2]) << 0;
11971 mask |= INTVAL (operands[3]) << 2;
11972 mask |= INTVAL (operands[4]) << 4;
11973 mask |= INTVAL (operands[5]) << 6;
11974 operands[2] = GEN_INT (mask);
11976 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
11978 [(set_attr "type" "sselog")
11979 (set_attr "prefix_data16" "0")
11980 (set_attr "prefix_rep" "1")
11981 (set_attr "prefix" "maybe_vex")
11982 (set_attr "length_immediate" "1")
11983 (set_attr "mode" "TI")])
11985 (define_expand "avx2_pshufhwv3"
11986 [(match_operand:V16HI 0 "register_operand")
11987 (match_operand:V16HI 1 "nonimmediate_operand")
11988 (match_operand:SI 2 "const_0_to_255_operand")]
11991 int mask = INTVAL (operands[2]);
11992 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
11993 GEN_INT (((mask >> 0) & 3) + 4),
11994 GEN_INT (((mask >> 2) & 3) + 4),
11995 GEN_INT (((mask >> 4) & 3) + 4),
11996 GEN_INT (((mask >> 6) & 3) + 4),
11997 GEN_INT (((mask >> 0) & 3) + 12),
11998 GEN_INT (((mask >> 2) & 3) + 12),
11999 GEN_INT (((mask >> 4) & 3) + 12),
12000 GEN_INT (((mask >> 6) & 3) + 12)));
12004 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12005 [(set (match_operand:V32HI 0 "register_operand" "=v")
12007 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12008 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12011 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12012 [(set_attr "type" "sselog")
12013 (set_attr "prefix" "evex")
12014 (set_attr "mode" "XI")])
12016 (define_expand "avx512vl_pshufhwv3_mask"
12017 [(match_operand:V16HI 0 "register_operand")
12018 (match_operand:V16HI 1 "nonimmediate_operand")
12019 (match_operand:SI 2 "const_0_to_255_operand")
12020 (match_operand:V16HI 3 "register_operand")
12021 (match_operand:HI 4 "register_operand")]
12022 "TARGET_AVX512VL && TARGET_AVX512BW"
12024 int mask = INTVAL (operands[2]);
12025 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12026 GEN_INT (((mask >> 0) & 3) + 4),
12027 GEN_INT (((mask >> 2) & 3) + 4),
12028 GEN_INT (((mask >> 4) & 3) + 4),
12029 GEN_INT (((mask >> 6) & 3) + 4),
12030 GEN_INT (((mask >> 0) & 3) + 12),
12031 GEN_INT (((mask >> 2) & 3) + 12),
12032 GEN_INT (((mask >> 4) & 3) + 12),
12033 GEN_INT (((mask >> 6) & 3) + 12),
12034 operands[3], operands[4]));
12038 (define_insn "avx2_pshufhw_1<mask_name>"
12039 [(set (match_operand:V16HI 0 "register_operand" "=v")
12041 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12042 (parallel [(const_int 0)
12046 (match_operand 2 "const_4_to_7_operand")
12047 (match_operand 3 "const_4_to_7_operand")
12048 (match_operand 4 "const_4_to_7_operand")
12049 (match_operand 5 "const_4_to_7_operand")
12054 (match_operand 6 "const_12_to_15_operand")
12055 (match_operand 7 "const_12_to_15_operand")
12056 (match_operand 8 "const_12_to_15_operand")
12057 (match_operand 9 "const_12_to_15_operand")])))]
12059 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12060 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12061 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12062 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12063 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12066 mask |= (INTVAL (operands[2]) - 4) << 0;
12067 mask |= (INTVAL (operands[3]) - 4) << 2;
12068 mask |= (INTVAL (operands[4]) - 4) << 4;
12069 mask |= (INTVAL (operands[5]) - 4) << 6;
12070 operands[2] = GEN_INT (mask);
12072 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12074 [(set_attr "type" "sselog")
12075 (set_attr "prefix" "maybe_evex")
12076 (set_attr "length_immediate" "1")
12077 (set_attr "mode" "OI")])
12079 (define_expand "avx512vl_pshufhw_mask"
12080 [(match_operand:V8HI 0 "register_operand")
12081 (match_operand:V8HI 1 "nonimmediate_operand")
12082 (match_operand:SI 2 "const_0_to_255_operand")
12083 (match_operand:V8HI 3 "register_operand")
12084 (match_operand:QI 4 "register_operand")]
12085 "TARGET_AVX512VL && TARGET_AVX512BW"
12087 int mask = INTVAL (operands[2]);
12088 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12089 GEN_INT (((mask >> 0) & 3) + 4),
12090 GEN_INT (((mask >> 2) & 3) + 4),
12091 GEN_INT (((mask >> 4) & 3) + 4),
12092 GEN_INT (((mask >> 6) & 3) + 4),
12093 operands[3], operands[4]));
12097 (define_expand "sse2_pshufhw"
12098 [(match_operand:V8HI 0 "register_operand")
12099 (match_operand:V8HI 1 "nonimmediate_operand")
12100 (match_operand:SI 2 "const_int_operand")]
12103 int mask = INTVAL (operands[2]);
12104 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12105 GEN_INT (((mask >> 0) & 3) + 4),
12106 GEN_INT (((mask >> 2) & 3) + 4),
12107 GEN_INT (((mask >> 4) & 3) + 4),
12108 GEN_INT (((mask >> 6) & 3) + 4)));
12112 (define_insn "sse2_pshufhw_1<mask_name>"
12113 [(set (match_operand:V8HI 0 "register_operand" "=v")
12115 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12116 (parallel [(const_int 0)
12120 (match_operand 2 "const_4_to_7_operand")
12121 (match_operand 3 "const_4_to_7_operand")
12122 (match_operand 4 "const_4_to_7_operand")
12123 (match_operand 5 "const_4_to_7_operand")])))]
12124 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12127 mask |= (INTVAL (operands[2]) - 4) << 0;
12128 mask |= (INTVAL (operands[3]) - 4) << 2;
12129 mask |= (INTVAL (operands[4]) - 4) << 4;
12130 mask |= (INTVAL (operands[5]) - 4) << 6;
12131 operands[2] = GEN_INT (mask);
12133 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12135 [(set_attr "type" "sselog")
12136 (set_attr "prefix_rep" "1")
12137 (set_attr "prefix_data16" "0")
12138 (set_attr "prefix" "maybe_vex")
12139 (set_attr "length_immediate" "1")
12140 (set_attr "mode" "TI")])
12142 (define_expand "sse2_loadd"
12143 [(set (match_operand:V4SI 0 "register_operand")
12145 (vec_duplicate:V4SI
12146 (match_operand:SI 1 "nonimmediate_operand"))
12150 "operands[2] = CONST0_RTX (V4SImode);")
12152 (define_insn "sse2_loadld"
12153 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12155 (vec_duplicate:V4SI
12156 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12157 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12161 %vmovd\t{%2, %0|%0, %2}
12162 %vmovd\t{%2, %0|%0, %2}
12163 movss\t{%2, %0|%0, %2}
12164 movss\t{%2, %0|%0, %2}
12165 vmovss\t{%2, %1, %0|%0, %1, %2}"
12166 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
12167 (set_attr "type" "ssemov")
12168 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12169 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12171 (define_insn "*vec_extract<mode>"
12172 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12173 (vec_select:<ssescalarmode>
12174 (match_operand:VI12_128 1 "register_operand" "x,x")
12176 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12179 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12180 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12181 [(set_attr "type" "sselog1")
12182 (set (attr "prefix_data16")
12184 (and (eq_attr "alternative" "0")
12185 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12187 (const_string "*")))
12188 (set (attr "prefix_extra")
12190 (and (eq_attr "alternative" "0")
12191 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12193 (const_string "1")))
12194 (set_attr "length_immediate" "1")
12195 (set_attr "prefix" "maybe_vex")
12196 (set_attr "mode" "TI")])
12198 (define_insn "*vec_extractv8hi_sse2"
12199 [(set (match_operand:HI 0 "register_operand" "=r")
12201 (match_operand:V8HI 1 "register_operand" "x")
12203 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12204 "TARGET_SSE2 && !TARGET_SSE4_1"
12205 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12206 [(set_attr "type" "sselog1")
12207 (set_attr "prefix_data16" "1")
12208 (set_attr "length_immediate" "1")
12209 (set_attr "mode" "TI")])
12211 (define_insn "*vec_extractv16qi_zext"
12212 [(set (match_operand:SWI48 0 "register_operand" "=r")
12215 (match_operand:V16QI 1 "register_operand" "x")
12217 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12219 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12220 [(set_attr "type" "sselog1")
12221 (set_attr "prefix_extra" "1")
12222 (set_attr "length_immediate" "1")
12223 (set_attr "prefix" "maybe_vex")
12224 (set_attr "mode" "TI")])
12226 (define_insn "*vec_extractv8hi_zext"
12227 [(set (match_operand:SWI48 0 "register_operand" "=r")
12230 (match_operand:V8HI 1 "register_operand" "x")
12232 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12234 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12235 [(set_attr "type" "sselog1")
12236 (set_attr "prefix_data16" "1")
12237 (set_attr "length_immediate" "1")
12238 (set_attr "prefix" "maybe_vex")
12239 (set_attr "mode" "TI")])
12241 (define_insn "*vec_extract<mode>_mem"
12242 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12243 (vec_select:<ssescalarmode>
12244 (match_operand:VI12_128 1 "memory_operand" "o")
12246 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12250 (define_insn "*vec_extract<ssevecmodelower>_0"
12251 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12253 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12254 (parallel [(const_int 0)])))]
12255 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12257 [(set_attr "isa" "*,sse4,*,*")])
12259 (define_insn_and_split "*vec_extractv4si_0_zext"
12260 [(set (match_operand:DI 0 "register_operand" "=r")
12263 (match_operand:V4SI 1 "register_operand" "x")
12264 (parallel [(const_int 0)]))))]
12265 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12267 "&& reload_completed"
12268 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12269 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12271 (define_insn "*vec_extractv2di_0_sse"
12272 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12274 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12275 (parallel [(const_int 0)])))]
12276 "TARGET_SSE && !TARGET_64BIT
12277 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12281 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12283 (match_operand:<ssevecmode> 1 "register_operand")
12284 (parallel [(const_int 0)])))]
12285 "TARGET_SSE && reload_completed"
12286 [(set (match_dup 0) (match_dup 1))]
12287 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12289 (define_insn "*vec_extractv4si"
12290 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
12292 (match_operand:V4SI 1 "register_operand" "x,0,x")
12293 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12296 switch (which_alternative)
12299 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12302 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12303 return "psrldq\t{%2, %0|%0, %2}";
12306 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12307 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12310 gcc_unreachable ();
12313 [(set_attr "isa" "*,noavx,avx")
12314 (set_attr "type" "sselog1,sseishft1,sseishft1")
12315 (set_attr "prefix_extra" "1,*,*")
12316 (set_attr "length_immediate" "1")
12317 (set_attr "prefix" "maybe_vex,orig,vex")
12318 (set_attr "mode" "TI")])
12320 (define_insn "*vec_extractv4si_zext"
12321 [(set (match_operand:DI 0 "register_operand" "=r")
12324 (match_operand:V4SI 1 "register_operand" "x")
12325 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12326 "TARGET_64BIT && TARGET_SSE4_1"
12327 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12328 [(set_attr "type" "sselog1")
12329 (set_attr "prefix_extra" "1")
12330 (set_attr "length_immediate" "1")
12331 (set_attr "prefix" "maybe_vex")
12332 (set_attr "mode" "TI")])
12334 (define_insn "*vec_extractv4si_mem"
12335 [(set (match_operand:SI 0 "register_operand" "=x,r")
12337 (match_operand:V4SI 1 "memory_operand" "o,o")
12338 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12342 (define_insn_and_split "*vec_extractv4si_zext_mem"
12343 [(set (match_operand:DI 0 "register_operand" "=x,r")
12346 (match_operand:V4SI 1 "memory_operand" "o,o")
12347 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12348 "TARGET_64BIT && TARGET_SSE"
12350 "&& reload_completed"
12351 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12353 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12356 (define_insn "*vec_extractv2di_1"
12357 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12359 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12360 (parallel [(const_int 1)])))]
12361 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12363 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12364 %vmovhps\t{%1, %0|%0, %1}
12365 psrldq\t{$8, %0|%0, 8}
12366 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12367 movhlps\t{%1, %0|%0, %1}
12370 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12371 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12372 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12373 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12374 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12375 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12376 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12379 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12380 (vec_select:<ssescalarmode>
12381 (match_operand:VI_128 1 "memory_operand")
12383 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12384 "TARGET_SSE && reload_completed"
12385 [(set (match_dup 0) (match_dup 1))]
12387 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12389 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12392 (define_insn "*vec_dupv4si"
12393 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
12394 (vec_duplicate:V4SI
12395 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
12398 %vpshufd\t{$0, %1, %0|%0, %1, 0}
12399 vbroadcastss\t{%1, %0|%0, %1}
12400 shufps\t{$0, %0, %0|%0, %0, 0}"
12401 [(set_attr "isa" "sse2,avx,noavx")
12402 (set_attr "type" "sselog1,ssemov,sselog1")
12403 (set_attr "length_immediate" "1,0,1")
12404 (set_attr "prefix_extra" "0,1,*")
12405 (set_attr "prefix" "maybe_vex,vex,orig")
12406 (set_attr "mode" "TI,V4SF,V4SF")])
12408 (define_insn "*vec_dupv2di"
12409 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
12410 (vec_duplicate:V2DI
12411 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
12415 vpunpcklqdq\t{%d1, %0|%0, %d1}
12416 %vmovddup\t{%1, %0|%0, %1}
12418 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
12419 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
12420 (set_attr "prefix" "orig,vex,maybe_vex,orig")
12421 (set_attr "mode" "TI,TI,DF,V4SF")])
12423 (define_insn "*vec_concatv2si_sse4_1"
12424 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
12426 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
12427 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
12430 pinsrd\t{$1, %2, %0|%0, %2, 1}
12431 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12432 punpckldq\t{%2, %0|%0, %2}
12433 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12434 %vmovd\t{%1, %0|%0, %1}
12435 punpckldq\t{%2, %0|%0, %2}
12436 movd\t{%1, %0|%0, %1}"
12437 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
12438 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12439 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
12440 (set_attr "length_immediate" "1,1,*,*,*,*,*")
12441 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
12442 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
12444 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12445 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12446 ;; alternatives pretty much forces the MMX alternative to be chosen.
12447 (define_insn "*vec_concatv2si"
12448 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12450 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12451 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12452 "TARGET_SSE && !TARGET_SSE4_1"
12454 punpckldq\t{%2, %0|%0, %2}
12455 movd\t{%1, %0|%0, %1}
12456 movd\t{%1, %0|%0, %1}
12457 unpcklps\t{%2, %0|%0, %2}
12458 movss\t{%1, %0|%0, %1}
12459 punpckldq\t{%2, %0|%0, %2}
12460 movd\t{%1, %0|%0, %1}"
12461 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12462 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12463 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12465 (define_insn "*vec_concatv4si"
12466 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12468 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12469 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12472 punpcklqdq\t{%2, %0|%0, %2}
12473 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12474 movlhps\t{%2, %0|%0, %2}
12475 movhps\t{%2, %0|%0, %q2}
12476 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12477 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12478 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12479 (set_attr "prefix" "orig,vex,orig,orig,vex")
12480 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12482 ;; movd instead of movq is required to handle broken assemblers.
12483 (define_insn "vec_concatv2di"
12484 [(set (match_operand:V2DI 0 "register_operand"
12485 "=x,x ,Yi,x ,!x,x,x,x,x,x")
12487 (match_operand:DI 1 "nonimmediate_operand"
12488 " 0,x ,r ,xm,*y,0,x,0,0,x")
12489 (match_operand:DI 2 "vector_move_operand"
12490 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
12493 pinsrq\t{$1, %2, %0|%0, %2, 1}
12494 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12495 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12496 %vmovq\t{%1, %0|%0, %1}
12497 movq2dq\t{%1, %0|%0, %1}
12498 punpcklqdq\t{%2, %0|%0, %2}
12499 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12500 movlhps\t{%2, %0|%0, %2}
12501 movhps\t{%2, %0|%0, %2}
12502 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12503 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12506 (eq_attr "alternative" "0,1,5,6")
12507 (const_string "sselog")
12508 (const_string "ssemov")))
12509 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
12510 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
12511 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
12512 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
12513 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
12515 (define_expand "vec_unpacks_lo_<mode>"
12516 [(match_operand:<sseunpackmode> 0 "register_operand")
12517 (match_operand:VI124_AVX512F 1 "register_operand")]
12519 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
12521 (define_expand "vec_unpacks_hi_<mode>"
12522 [(match_operand:<sseunpackmode> 0 "register_operand")
12523 (match_operand:VI124_AVX512F 1 "register_operand")]
12525 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
12527 (define_expand "vec_unpacku_lo_<mode>"
12528 [(match_operand:<sseunpackmode> 0 "register_operand")
12529 (match_operand:VI124_AVX512F 1 "register_operand")]
12531 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
12533 (define_expand "vec_unpacku_hi_<mode>"
12534 [(match_operand:<sseunpackmode> 0 "register_operand")
12535 (match_operand:VI124_AVX512F 1 "register_operand")]
12537 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
12539 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12545 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
12546 [(set (match_operand:VI12_AVX2 0 "register_operand")
12547 (truncate:VI12_AVX2
12548 (lshiftrt:<ssedoublemode>
12549 (plus:<ssedoublemode>
12550 (plus:<ssedoublemode>
12551 (zero_extend:<ssedoublemode>
12552 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
12553 (zero_extend:<ssedoublemode>
12554 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
12555 (match_dup <mask_expand_op3>))
12557 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12560 if (<mask_applied>)
12562 operands[3] = CONST1_RTX(<MODE>mode);
12563 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12565 if (<mask_applied>)
12567 operands[5] = operands[3];
12572 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
12573 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
12574 (truncate:VI12_AVX2
12575 (lshiftrt:<ssedoublemode>
12576 (plus:<ssedoublemode>
12577 (plus:<ssedoublemode>
12578 (zero_extend:<ssedoublemode>
12579 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
12580 (zero_extend:<ssedoublemode>
12581 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
12582 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
12584 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
12585 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
12587 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
12588 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12589 [(set_attr "isa" "noavx,avx")
12590 (set_attr "type" "sseiadd")
12591 (set_attr "prefix_data16" "1,*")
12592 (set_attr "prefix" "orig,<mask_prefix>")
12593 (set_attr "mode" "<sseinsnmode>")])
12595 ;; The correct representation for this is absolutely enormous, and
12596 ;; surely not generally useful.
12597 (define_insn "<sse2_avx2>_psadbw"
12598 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
12599 (unspec:VI8_AVX2_AVX512BW
12600 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
12601 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
12605 psadbw\t{%2, %0|%0, %2}
12606 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
12607 [(set_attr "isa" "noavx,avx")
12608 (set_attr "type" "sseiadd")
12609 (set_attr "atom_unit" "simul")
12610 (set_attr "prefix_data16" "1,*")
12611 (set_attr "prefix" "orig,maybe_evex")
12612 (set_attr "mode" "<sseinsnmode>")])
12614 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
12615 [(set (match_operand:SI 0 "register_operand" "=r")
12617 [(match_operand:VF_128_256 1 "register_operand" "x")]
12620 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
12621 [(set_attr "type" "ssemov")
12622 (set_attr "prefix" "maybe_vex")
12623 (set_attr "mode" "<MODE>")])
12625 (define_insn "avx2_pmovmskb"
12626 [(set (match_operand:SI 0 "register_operand" "=r")
12627 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
12630 "vpmovmskb\t{%1, %0|%0, %1}"
12631 [(set_attr "type" "ssemov")
12632 (set_attr "prefix" "vex")
12633 (set_attr "mode" "DI")])
12635 (define_insn "sse2_pmovmskb"
12636 [(set (match_operand:SI 0 "register_operand" "=r")
12637 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
12640 "%vpmovmskb\t{%1, %0|%0, %1}"
12641 [(set_attr "type" "ssemov")
12642 (set_attr "prefix_data16" "1")
12643 (set_attr "prefix" "maybe_vex")
12644 (set_attr "mode" "SI")])
12646 (define_expand "sse2_maskmovdqu"
12647 [(set (match_operand:V16QI 0 "memory_operand")
12648 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
12649 (match_operand:V16QI 2 "register_operand")
12654 (define_insn "*sse2_maskmovdqu"
12655 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
12656 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
12657 (match_operand:V16QI 2 "register_operand" "x")
12658 (mem:V16QI (match_dup 0))]
12662 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
12663 that requires %v to be at the beginning of the opcode name. */
12664 if (Pmode != word_mode)
12665 fputs ("\taddr32", asm_out_file);
12666 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
12668 [(set_attr "type" "ssemov")
12669 (set_attr "prefix_data16" "1")
12670 (set (attr "length_address")
12671 (symbol_ref ("Pmode != word_mode")))
12672 ;; The implicit %rdi operand confuses default length_vex computation.
12673 (set (attr "length_vex")
12674 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
12675 (set_attr "prefix" "maybe_vex")
12676 (set_attr "mode" "TI")])
12678 (define_insn "sse_ldmxcsr"
12679 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
12683 [(set_attr "type" "sse")
12684 (set_attr "atom_sse_attr" "mxcsr")
12685 (set_attr "prefix" "maybe_vex")
12686 (set_attr "memory" "load")])
12688 (define_insn "sse_stmxcsr"
12689 [(set (match_operand:SI 0 "memory_operand" "=m")
12690 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
12693 [(set_attr "type" "sse")
12694 (set_attr "atom_sse_attr" "mxcsr")
12695 (set_attr "prefix" "maybe_vex")
12696 (set_attr "memory" "store")])
12698 (define_insn "sse2_clflush"
12699 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
12703 [(set_attr "type" "sse")
12704 (set_attr "atom_sse_attr" "fence")
12705 (set_attr "memory" "unknown")])
12708 (define_insn "sse3_mwait"
12709 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
12710 (match_operand:SI 1 "register_operand" "c")]
12713 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
12714 ;; Since 32bit register operands are implicitly zero extended to 64bit,
12715 ;; we only need to set up 32bit registers.
12717 [(set_attr "length" "3")])
12719 (define_insn "sse3_monitor_<mode>"
12720 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
12721 (match_operand:SI 1 "register_operand" "c")
12722 (match_operand:SI 2 "register_operand" "d")]
12725 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
12726 ;; RCX and RDX are used. Since 32bit register operands are implicitly
12727 ;; zero extended to 64bit, we only need to set up 32bit registers.
12729 [(set (attr "length")
12730 (symbol_ref ("(Pmode != word_mode) + 3")))])
12732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12734 ;; SSSE3 instructions
12736 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12738 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
12740 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
12741 [(set (match_operand:V16HI 0 "register_operand" "=x")
12746 (ssse3_plusminus:HI
12748 (match_operand:V16HI 1 "register_operand" "x")
12749 (parallel [(const_int 0)]))
12750 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
12751 (ssse3_plusminus:HI
12752 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
12753 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
12755 (ssse3_plusminus:HI
12756 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
12757 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
12758 (ssse3_plusminus:HI
12759 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
12760 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
12763 (ssse3_plusminus:HI
12764 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
12765 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
12766 (ssse3_plusminus:HI
12767 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
12768 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
12770 (ssse3_plusminus:HI
12771 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
12772 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
12773 (ssse3_plusminus:HI
12774 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
12775 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
12779 (ssse3_plusminus:HI
12781 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
12782 (parallel [(const_int 0)]))
12783 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
12784 (ssse3_plusminus:HI
12785 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
12786 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
12788 (ssse3_plusminus:HI
12789 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
12790 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
12791 (ssse3_plusminus:HI
12792 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
12793 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
12796 (ssse3_plusminus:HI
12797 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
12798 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
12799 (ssse3_plusminus:HI
12800 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
12801 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
12803 (ssse3_plusminus:HI
12804 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
12805 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
12806 (ssse3_plusminus:HI
12807 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
12808 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
12810 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
12811 [(set_attr "type" "sseiadd")
12812 (set_attr "prefix_extra" "1")
12813 (set_attr "prefix" "vex")
12814 (set_attr "mode" "OI")])
12816 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
12817 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12821 (ssse3_plusminus:HI
12823 (match_operand:V8HI 1 "register_operand" "0,x")
12824 (parallel [(const_int 0)]))
12825 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
12826 (ssse3_plusminus:HI
12827 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
12828 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
12830 (ssse3_plusminus:HI
12831 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
12832 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
12833 (ssse3_plusminus:HI
12834 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
12835 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
12838 (ssse3_plusminus:HI
12840 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
12841 (parallel [(const_int 0)]))
12842 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
12843 (ssse3_plusminus:HI
12844 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
12845 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
12847 (ssse3_plusminus:HI
12848 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
12849 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
12850 (ssse3_plusminus:HI
12851 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
12852 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
12855 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
12856 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
12857 [(set_attr "isa" "noavx,avx")
12858 (set_attr "type" "sseiadd")
12859 (set_attr "atom_unit" "complex")
12860 (set_attr "prefix_data16" "1,*")
12861 (set_attr "prefix_extra" "1")
12862 (set_attr "prefix" "orig,vex")
12863 (set_attr "mode" "TI")])
12865 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
12866 [(set (match_operand:V4HI 0 "register_operand" "=y")
12869 (ssse3_plusminus:HI
12871 (match_operand:V4HI 1 "register_operand" "0")
12872 (parallel [(const_int 0)]))
12873 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
12874 (ssse3_plusminus:HI
12875 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
12876 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
12878 (ssse3_plusminus:HI
12880 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
12881 (parallel [(const_int 0)]))
12882 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
12883 (ssse3_plusminus:HI
12884 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
12885 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
12887 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
12888 [(set_attr "type" "sseiadd")
12889 (set_attr "atom_unit" "complex")
12890 (set_attr "prefix_extra" "1")
12891 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
12892 (set_attr "mode" "DI")])
12894 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
12895 [(set (match_operand:V8SI 0 "register_operand" "=x")
12901 (match_operand:V8SI 1 "register_operand" "x")
12902 (parallel [(const_int 0)]))
12903 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
12905 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
12906 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
12909 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
12910 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
12912 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
12913 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
12918 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12919 (parallel [(const_int 0)]))
12920 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
12922 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
12923 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
12926 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
12927 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
12929 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
12930 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
12932 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
12933 [(set_attr "type" "sseiadd")
12934 (set_attr "prefix_extra" "1")
12935 (set_attr "prefix" "vex")
12936 (set_attr "mode" "OI")])
12938 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
12939 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
12944 (match_operand:V4SI 1 "register_operand" "0,x")
12945 (parallel [(const_int 0)]))
12946 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
12948 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
12949 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
12953 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
12954 (parallel [(const_int 0)]))
12955 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
12957 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
12958 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
12961 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
12962 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
12963 [(set_attr "isa" "noavx,avx")
12964 (set_attr "type" "sseiadd")
12965 (set_attr "atom_unit" "complex")
12966 (set_attr "prefix_data16" "1,*")
12967 (set_attr "prefix_extra" "1")
12968 (set_attr "prefix" "orig,vex")
12969 (set_attr "mode" "TI")])
12971 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
12972 [(set (match_operand:V2SI 0 "register_operand" "=y")
12976 (match_operand:V2SI 1 "register_operand" "0")
12977 (parallel [(const_int 0)]))
12978 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
12981 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
12982 (parallel [(const_int 0)]))
12983 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
12985 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
12986 [(set_attr "type" "sseiadd")
12987 (set_attr "atom_unit" "complex")
12988 (set_attr "prefix_extra" "1")
12989 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
12990 (set_attr "mode" "DI")])
12992 (define_insn "avx2_pmaddubsw256"
12993 [(set (match_operand:V16HI 0 "register_operand" "=x")
12998 (match_operand:V32QI 1 "register_operand" "x")
12999 (parallel [(const_int 0) (const_int 2)
13000 (const_int 4) (const_int 6)
13001 (const_int 8) (const_int 10)
13002 (const_int 12) (const_int 14)
13003 (const_int 16) (const_int 18)
13004 (const_int 20) (const_int 22)
13005 (const_int 24) (const_int 26)
13006 (const_int 28) (const_int 30)])))
13009 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13010 (parallel [(const_int 0) (const_int 2)
13011 (const_int 4) (const_int 6)
13012 (const_int 8) (const_int 10)
13013 (const_int 12) (const_int 14)
13014 (const_int 16) (const_int 18)
13015 (const_int 20) (const_int 22)
13016 (const_int 24) (const_int 26)
13017 (const_int 28) (const_int 30)]))))
13020 (vec_select:V16QI (match_dup 1)
13021 (parallel [(const_int 1) (const_int 3)
13022 (const_int 5) (const_int 7)
13023 (const_int 9) (const_int 11)
13024 (const_int 13) (const_int 15)
13025 (const_int 17) (const_int 19)
13026 (const_int 21) (const_int 23)
13027 (const_int 25) (const_int 27)
13028 (const_int 29) (const_int 31)])))
13030 (vec_select:V16QI (match_dup 2)
13031 (parallel [(const_int 1) (const_int 3)
13032 (const_int 5) (const_int 7)
13033 (const_int 9) (const_int 11)
13034 (const_int 13) (const_int 15)
13035 (const_int 17) (const_int 19)
13036 (const_int 21) (const_int 23)
13037 (const_int 25) (const_int 27)
13038 (const_int 29) (const_int 31)]))))))]
13040 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13041 [(set_attr "type" "sseiadd")
13042 (set_attr "prefix_extra" "1")
13043 (set_attr "prefix" "vex")
13044 (set_attr "mode" "OI")])
13046 (define_insn "ssse3_pmaddubsw128"
13047 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13052 (match_operand:V16QI 1 "register_operand" "0,x")
13053 (parallel [(const_int 0) (const_int 2)
13054 (const_int 4) (const_int 6)
13055 (const_int 8) (const_int 10)
13056 (const_int 12) (const_int 14)])))
13059 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13060 (parallel [(const_int 0) (const_int 2)
13061 (const_int 4) (const_int 6)
13062 (const_int 8) (const_int 10)
13063 (const_int 12) (const_int 14)]))))
13066 (vec_select:V8QI (match_dup 1)
13067 (parallel [(const_int 1) (const_int 3)
13068 (const_int 5) (const_int 7)
13069 (const_int 9) (const_int 11)
13070 (const_int 13) (const_int 15)])))
13072 (vec_select:V8QI (match_dup 2)
13073 (parallel [(const_int 1) (const_int 3)
13074 (const_int 5) (const_int 7)
13075 (const_int 9) (const_int 11)
13076 (const_int 13) (const_int 15)]))))))]
13079 pmaddubsw\t{%2, %0|%0, %2}
13080 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13081 [(set_attr "isa" "noavx,avx")
13082 (set_attr "type" "sseiadd")
13083 (set_attr "atom_unit" "simul")
13084 (set_attr "prefix_data16" "1,*")
13085 (set_attr "prefix_extra" "1")
13086 (set_attr "prefix" "orig,vex")
13087 (set_attr "mode" "TI")])
13089 (define_insn "ssse3_pmaddubsw"
13090 [(set (match_operand:V4HI 0 "register_operand" "=y")
13095 (match_operand:V8QI 1 "register_operand" "0")
13096 (parallel [(const_int 0) (const_int 2)
13097 (const_int 4) (const_int 6)])))
13100 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13101 (parallel [(const_int 0) (const_int 2)
13102 (const_int 4) (const_int 6)]))))
13105 (vec_select:V4QI (match_dup 1)
13106 (parallel [(const_int 1) (const_int 3)
13107 (const_int 5) (const_int 7)])))
13109 (vec_select:V4QI (match_dup 2)
13110 (parallel [(const_int 1) (const_int 3)
13111 (const_int 5) (const_int 7)]))))))]
13113 "pmaddubsw\t{%2, %0|%0, %2}"
13114 [(set_attr "type" "sseiadd")
13115 (set_attr "atom_unit" "simul")
13116 (set_attr "prefix_extra" "1")
13117 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13118 (set_attr "mode" "DI")])
13120 (define_mode_iterator PMULHRSW
13121 [V4HI V8HI (V16HI "TARGET_AVX2")])
13123 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13124 [(set (match_operand:PMULHRSW 0 "register_operand")
13126 (lshiftrt:<ssedoublemode>
13127 (plus:<ssedoublemode>
13128 (lshiftrt:<ssedoublemode>
13129 (mult:<ssedoublemode>
13130 (sign_extend:<ssedoublemode>
13131 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13132 (sign_extend:<ssedoublemode>
13133 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13139 operands[3] = CONST1_RTX(<MODE>mode);
13140 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13143 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
13144 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
13146 (lshiftrt:<ssedoublemode>
13147 (plus:<ssedoublemode>
13148 (lshiftrt:<ssedoublemode>
13149 (mult:<ssedoublemode>
13150 (sign_extend:<ssedoublemode>
13151 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
13152 (sign_extend:<ssedoublemode>
13153 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
13155 (match_operand:VI2_AVX2 3 "const1_operand"))
13157 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13159 pmulhrsw\t{%2, %0|%0, %2}
13160 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
13161 [(set_attr "isa" "noavx,avx")
13162 (set_attr "type" "sseimul")
13163 (set_attr "prefix_data16" "1,*")
13164 (set_attr "prefix_extra" "1")
13165 (set_attr "prefix" "orig,vex")
13166 (set_attr "mode" "<sseinsnmode>")])
13168 (define_insn "*ssse3_pmulhrswv4hi3"
13169 [(set (match_operand:V4HI 0 "register_operand" "=y")
13176 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13178 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13180 (match_operand:V4HI 3 "const1_operand"))
13182 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13183 "pmulhrsw\t{%2, %0|%0, %2}"
13184 [(set_attr "type" "sseimul")
13185 (set_attr "prefix_extra" "1")
13186 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13187 (set_attr "mode" "DI")])
13189 (define_insn "<ssse3_avx2>_pshufb<mode>3"
13190 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
13192 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
13193 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
13197 pshufb\t{%2, %0|%0, %2}
13198 vpshufb\t{%2, %1, %0|%0, %1, %2}"
13199 [(set_attr "isa" "noavx,avx")
13200 (set_attr "type" "sselog1")
13201 (set_attr "prefix_data16" "1,*")
13202 (set_attr "prefix_extra" "1")
13203 (set_attr "prefix" "orig,vex")
13204 (set_attr "btver2_decode" "vector,vector")
13205 (set_attr "mode" "<sseinsnmode>")])
13207 (define_insn "ssse3_pshufbv8qi3"
13208 [(set (match_operand:V8QI 0 "register_operand" "=y")
13209 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13210 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13213 "pshufb\t{%2, %0|%0, %2}";
13214 [(set_attr "type" "sselog1")
13215 (set_attr "prefix_extra" "1")
13216 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13217 (set_attr "mode" "DI")])
13219 (define_insn "<ssse3_avx2>_psign<mode>3"
13220 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13222 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13223 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13227 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13228 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13229 [(set_attr "isa" "noavx,avx")
13230 (set_attr "type" "sselog1")
13231 (set_attr "prefix_data16" "1,*")
13232 (set_attr "prefix_extra" "1")
13233 (set_attr "prefix" "orig,vex")
13234 (set_attr "mode" "<sseinsnmode>")])
13236 (define_insn "ssse3_psign<mode>3"
13237 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13239 [(match_operand:MMXMODEI 1 "register_operand" "0")
13240 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13243 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13244 [(set_attr "type" "sselog1")
13245 (set_attr "prefix_extra" "1")
13246 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13247 (set_attr "mode" "DI")])
13249 (define_insn "<ssse3_avx2>_palignr<mode>"
13250 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
13251 (unspec:SSESCALARMODE
13252 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
13253 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
13254 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13258 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13260 switch (which_alternative)
13263 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13265 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13267 gcc_unreachable ();
13270 [(set_attr "isa" "noavx,avx")
13271 (set_attr "type" "sseishft")
13272 (set_attr "atom_unit" "sishuf")
13273 (set_attr "prefix_data16" "1,*")
13274 (set_attr "prefix_extra" "1")
13275 (set_attr "length_immediate" "1")
13276 (set_attr "prefix" "orig,vex")
13277 (set_attr "mode" "<sseinsnmode>")])
13279 (define_insn "ssse3_palignrdi"
13280 [(set (match_operand:DI 0 "register_operand" "=y")
13281 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13282 (match_operand:DI 2 "nonimmediate_operand" "ym")
13283 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13287 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13288 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13290 [(set_attr "type" "sseishft")
13291 (set_attr "atom_unit" "sishuf")
13292 (set_attr "prefix_extra" "1")
13293 (set_attr "length_immediate" "1")
13294 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13295 (set_attr "mode" "DI")])
13297 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
13298 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
13299 (abs:VI124_AVX2_48_AVX512F
13300 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
13301 "TARGET_SSSE3 && <mask_mode512bit_condition>"
13302 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13303 [(set_attr "type" "sselog1")
13304 (set_attr "prefix_data16" "1")
13305 (set_attr "prefix_extra" "1")
13306 (set_attr "prefix" "maybe_vex")
13307 (set_attr "mode" "<sseinsnmode>")])
13309 (define_expand "abs<mode>2"
13310 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
13311 (abs:VI124_AVX2_48_AVX512F
13312 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
13317 ix86_expand_sse2_abs (operands[0], operands[1]);
13322 (define_insn "abs<mode>2"
13323 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13325 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13327 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13328 [(set_attr "type" "sselog1")
13329 (set_attr "prefix_rep" "0")
13330 (set_attr "prefix_extra" "1")
13331 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13332 (set_attr "mode" "DI")])
13334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13336 ;; AMD SSE4A instructions
13338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13340 (define_insn "sse4a_movnt<mode>"
13341 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13343 [(match_operand:MODEF 1 "register_operand" "x")]
13346 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13347 [(set_attr "type" "ssemov")
13348 (set_attr "mode" "<MODE>")])
13350 (define_insn "sse4a_vmmovnt<mode>"
13351 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13352 (unspec:<ssescalarmode>
13353 [(vec_select:<ssescalarmode>
13354 (match_operand:VF_128 1 "register_operand" "x")
13355 (parallel [(const_int 0)]))]
13358 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13359 [(set_attr "type" "ssemov")
13360 (set_attr "mode" "<ssescalarmode>")])
13362 (define_insn "sse4a_extrqi"
13363 [(set (match_operand:V2DI 0 "register_operand" "=x")
13364 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13365 (match_operand 2 "const_0_to_255_operand")
13366 (match_operand 3 "const_0_to_255_operand")]
13369 "extrq\t{%3, %2, %0|%0, %2, %3}"
13370 [(set_attr "type" "sse")
13371 (set_attr "prefix_data16" "1")
13372 (set_attr "length_immediate" "2")
13373 (set_attr "mode" "TI")])
13375 (define_insn "sse4a_extrq"
13376 [(set (match_operand:V2DI 0 "register_operand" "=x")
13377 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13378 (match_operand:V16QI 2 "register_operand" "x")]
13381 "extrq\t{%2, %0|%0, %2}"
13382 [(set_attr "type" "sse")
13383 (set_attr "prefix_data16" "1")
13384 (set_attr "mode" "TI")])
13386 (define_insn "sse4a_insertqi"
13387 [(set (match_operand:V2DI 0 "register_operand" "=x")
13388 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13389 (match_operand:V2DI 2 "register_operand" "x")
13390 (match_operand 3 "const_0_to_255_operand")
13391 (match_operand 4 "const_0_to_255_operand")]
13394 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
13395 [(set_attr "type" "sseins")
13396 (set_attr "prefix_data16" "0")
13397 (set_attr "prefix_rep" "1")
13398 (set_attr "length_immediate" "2")
13399 (set_attr "mode" "TI")])
13401 (define_insn "sse4a_insertq"
13402 [(set (match_operand:V2DI 0 "register_operand" "=x")
13403 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13404 (match_operand:V2DI 2 "register_operand" "x")]
13407 "insertq\t{%2, %0|%0, %2}"
13408 [(set_attr "type" "sseins")
13409 (set_attr "prefix_data16" "0")
13410 (set_attr "prefix_rep" "1")
13411 (set_attr "mode" "TI")])
13413 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13415 ;; Intel SSE4.1 instructions
13417 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13419 ;; Mapping of immediate bits for blend instructions
13420 (define_mode_attr blendbits
13421 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
13423 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
13424 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13425 (vec_merge:VF_128_256
13426 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13427 (match_operand:VF_128_256 1 "register_operand" "0,x")
13428 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
13431 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13432 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13433 [(set_attr "isa" "noavx,avx")
13434 (set_attr "type" "ssemov")
13435 (set_attr "length_immediate" "1")
13436 (set_attr "prefix_data16" "1,*")
13437 (set_attr "prefix_extra" "1")
13438 (set_attr "prefix" "orig,vex")
13439 (set_attr "mode" "<MODE>")])
13441 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
13442 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13444 [(match_operand:VF_128_256 1 "register_operand" "0,x")
13445 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13446 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
13450 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13451 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13452 [(set_attr "isa" "noavx,avx")
13453 (set_attr "type" "ssemov")
13454 (set_attr "length_immediate" "1")
13455 (set_attr "prefix_data16" "1,*")
13456 (set_attr "prefix_extra" "1")
13457 (set_attr "prefix" "orig,vex")
13458 (set_attr "btver2_decode" "vector,vector")
13459 (set_attr "mode" "<MODE>")])
13461 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
13462 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13464 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
13465 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13466 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13470 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13471 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13472 [(set_attr "isa" "noavx,avx")
13473 (set_attr "type" "ssemul")
13474 (set_attr "length_immediate" "1")
13475 (set_attr "prefix_data16" "1,*")
13476 (set_attr "prefix_extra" "1")
13477 (set_attr "prefix" "orig,vex")
13478 (set_attr "btver2_decode" "vector,vector")
13479 (set_attr "mode" "<MODE>")])
13481 ;; Mode attribute used by `vmovntdqa' pattern
13482 (define_mode_attr vi8_sse4_1_avx2_avx512
13483 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
13485 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
13486 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
13487 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
13490 "%vmovntdqa\t{%1, %0|%0, %1}"
13491 [(set_attr "type" "ssemov")
13492 (set_attr "prefix_extra" "1, *")
13493 (set_attr "prefix" "maybe_vex, evex")
13494 (set_attr "mode" "<sseinsnmode>")])
13496 (define_insn "<sse4_1_avx2>_mpsadbw"
13497 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
13499 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
13500 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
13501 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13505 mpsadbw\t{%3, %2, %0|%0, %2, %3}
13506 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13507 [(set_attr "isa" "noavx,avx")
13508 (set_attr "type" "sselog1")
13509 (set_attr "length_immediate" "1")
13510 (set_attr "prefix_extra" "1")
13511 (set_attr "prefix" "orig,vex")
13512 (set_attr "btver2_decode" "vector,vector")
13513 (set_attr "mode" "<sseinsnmode>")])
13515 (define_insn "avx2_packusdw"
13516 [(set (match_operand:V16HI 0 "register_operand" "=x")
13519 (match_operand:V8SI 1 "register_operand" "x"))
13521 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
13523 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
13524 [(set_attr "type" "sselog")
13525 (set_attr "prefix_extra" "1")
13526 (set_attr "prefix" "vex")
13527 (set_attr "mode" "OI")])
13529 (define_insn "sse4_1_packusdw"
13530 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13533 (match_operand:V4SI 1 "register_operand" "0,x"))
13535 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
13538 packusdw\t{%2, %0|%0, %2}
13539 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
13540 [(set_attr "isa" "noavx,avx")
13541 (set_attr "type" "sselog")
13542 (set_attr "prefix_extra" "1")
13543 (set_attr "prefix" "orig,vex")
13544 (set_attr "mode" "TI")])
13546 (define_insn "<sse4_1_avx2>_pblendvb"
13547 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
13549 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
13550 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
13551 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
13555 pblendvb\t{%3, %2, %0|%0, %2, %3}
13556 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13557 [(set_attr "isa" "noavx,avx")
13558 (set_attr "type" "ssemov")
13559 (set_attr "prefix_extra" "1")
13560 (set_attr "length_immediate" "*,1")
13561 (set_attr "prefix" "orig,vex")
13562 (set_attr "btver2_decode" "vector,vector")
13563 (set_attr "mode" "<sseinsnmode>")])
13565 (define_insn "sse4_1_pblendw"
13566 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13568 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13569 (match_operand:V8HI 1 "register_operand" "0,x")
13570 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
13573 pblendw\t{%3, %2, %0|%0, %2, %3}
13574 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13575 [(set_attr "isa" "noavx,avx")
13576 (set_attr "type" "ssemov")
13577 (set_attr "prefix_extra" "1")
13578 (set_attr "length_immediate" "1")
13579 (set_attr "prefix" "orig,vex")
13580 (set_attr "mode" "TI")])
13582 ;; The builtin uses an 8-bit immediate. Expand that.
13583 (define_expand "avx2_pblendw"
13584 [(set (match_operand:V16HI 0 "register_operand")
13586 (match_operand:V16HI 2 "nonimmediate_operand")
13587 (match_operand:V16HI 1 "register_operand")
13588 (match_operand:SI 3 "const_0_to_255_operand")))]
13591 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
13592 operands[3] = GEN_INT (val << 8 | val);
13595 (define_insn "*avx2_pblendw"
13596 [(set (match_operand:V16HI 0 "register_operand" "=x")
13598 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13599 (match_operand:V16HI 1 "register_operand" "x")
13600 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
13603 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
13604 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13606 [(set_attr "type" "ssemov")
13607 (set_attr "prefix_extra" "1")
13608 (set_attr "length_immediate" "1")
13609 (set_attr "prefix" "vex")
13610 (set_attr "mode" "OI")])
13612 (define_insn "avx2_pblendd<mode>"
13613 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
13614 (vec_merge:VI4_AVX2
13615 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
13616 (match_operand:VI4_AVX2 1 "register_operand" "x")
13617 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
13619 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13620 [(set_attr "type" "ssemov")
13621 (set_attr "prefix_extra" "1")
13622 (set_attr "length_immediate" "1")
13623 (set_attr "prefix" "vex")
13624 (set_attr "mode" "<sseinsnmode>")])
13626 (define_insn "sse4_1_phminposuw"
13627 [(set (match_operand:V8HI 0 "register_operand" "=x")
13628 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
13629 UNSPEC_PHMINPOSUW))]
13631 "%vphminposuw\t{%1, %0|%0, %1}"
13632 [(set_attr "type" "sselog1")
13633 (set_attr "prefix_extra" "1")
13634 (set_attr "prefix" "maybe_vex")
13635 (set_attr "mode" "TI")])
13637 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
13638 [(set (match_operand:V16HI 0 "register_operand" "=v")
13640 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
13641 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13642 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13643 [(set_attr "type" "ssemov")
13644 (set_attr "prefix_extra" "1")
13645 (set_attr "prefix" "maybe_evex")
13646 (set_attr "mode" "OI")])
13648 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
13649 [(set (match_operand:V32HI 0 "register_operand" "=v")
13651 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
13653 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13654 [(set_attr "type" "ssemov")
13655 (set_attr "prefix_extra" "1")
13656 (set_attr "prefix" "evex")
13657 (set_attr "mode" "XI")])
13659 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
13660 [(set (match_operand:V8HI 0 "register_operand" "=v")
13663 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13664 (parallel [(const_int 0) (const_int 1)
13665 (const_int 2) (const_int 3)
13666 (const_int 4) (const_int 5)
13667 (const_int 6) (const_int 7)]))))]
13668 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13669 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13670 [(set_attr "type" "ssemov")
13671 (set_attr "ssememalign" "64")
13672 (set_attr "prefix_extra" "1")
13673 (set_attr "prefix" "maybe_vex")
13674 (set_attr "mode" "TI")])
13676 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
13677 [(set (match_operand:V16SI 0 "register_operand" "=v")
13679 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
13681 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13682 [(set_attr "type" "ssemov")
13683 (set_attr "prefix" "evex")
13684 (set_attr "mode" "XI")])
13686 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
13687 [(set (match_operand:V8SI 0 "register_operand" "=v")
13690 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13691 (parallel [(const_int 0) (const_int 1)
13692 (const_int 2) (const_int 3)
13693 (const_int 4) (const_int 5)
13694 (const_int 6) (const_int 7)]))))]
13695 "TARGET_AVX2 && <mask_avx512vl_condition>"
13696 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13697 [(set_attr "type" "ssemov")
13698 (set_attr "prefix_extra" "1")
13699 (set_attr "prefix" "maybe_evex")
13700 (set_attr "mode" "OI")])
13702 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
13703 [(set (match_operand:V4SI 0 "register_operand" "=v")
13706 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13707 (parallel [(const_int 0) (const_int 1)
13708 (const_int 2) (const_int 3)]))))]
13709 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
13710 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13711 [(set_attr "type" "ssemov")
13712 (set_attr "ssememalign" "32")
13713 (set_attr "prefix_extra" "1")
13714 (set_attr "prefix" "maybe_vex")
13715 (set_attr "mode" "TI")])
13717 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
13718 [(set (match_operand:V16SI 0 "register_operand" "=v")
13720 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
13722 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13723 [(set_attr "type" "ssemov")
13724 (set_attr "prefix" "evex")
13725 (set_attr "mode" "XI")])
13727 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
13728 [(set (match_operand:V8SI 0 "register_operand" "=v")
13730 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
13731 "TARGET_AVX2 && <mask_avx512vl_condition>"
13732 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13733 [(set_attr "type" "ssemov")
13734 (set_attr "prefix_extra" "1")
13735 (set_attr "prefix" "maybe_evex")
13736 (set_attr "mode" "OI")])
13738 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
13739 [(set (match_operand:V4SI 0 "register_operand" "=v")
13742 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13743 (parallel [(const_int 0) (const_int 1)
13744 (const_int 2) (const_int 3)]))))]
13745 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
13746 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13747 [(set_attr "type" "ssemov")
13748 (set_attr "ssememalign" "64")
13749 (set_attr "prefix_extra" "1")
13750 (set_attr "prefix" "maybe_vex")
13751 (set_attr "mode" "TI")])
13753 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
13754 [(set (match_operand:V8DI 0 "register_operand" "=v")
13757 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13758 (parallel [(const_int 0) (const_int 1)
13759 (const_int 2) (const_int 3)
13760 (const_int 4) (const_int 5)
13761 (const_int 6) (const_int 7)]))))]
13763 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13764 [(set_attr "type" "ssemov")
13765 (set_attr "prefix" "evex")
13766 (set_attr "mode" "XI")])
13768 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
13769 [(set (match_operand:V4DI 0 "register_operand" "=v")
13772 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13773 (parallel [(const_int 0) (const_int 1)
13774 (const_int 2) (const_int 3)]))))]
13775 "TARGET_AVX2 && <mask_avx512vl_condition>"
13776 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13777 [(set_attr "type" "ssemov")
13778 (set_attr "prefix_extra" "1")
13779 (set_attr "prefix" "maybe_evex")
13780 (set_attr "mode" "OI")])
13782 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
13783 [(set (match_operand:V2DI 0 "register_operand" "=v")
13786 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13787 (parallel [(const_int 0) (const_int 1)]))))]
13788 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
13789 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
13790 [(set_attr "type" "ssemov")
13791 (set_attr "ssememalign" "16")
13792 (set_attr "prefix_extra" "1")
13793 (set_attr "prefix" "maybe_vex")
13794 (set_attr "mode" "TI")])
13796 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
13797 [(set (match_operand:V8DI 0 "register_operand" "=v")
13799 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
13801 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13802 [(set_attr "type" "ssemov")
13803 (set_attr "prefix" "evex")
13804 (set_attr "mode" "XI")])
13806 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
13807 [(set (match_operand:V4DI 0 "register_operand" "=v")
13810 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13811 (parallel [(const_int 0) (const_int 1)
13812 (const_int 2) (const_int 3)]))))]
13813 "TARGET_AVX2 && <mask_avx512vl_condition>"
13814 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13815 [(set_attr "type" "ssemov")
13816 (set_attr "prefix_extra" "1")
13817 (set_attr "prefix" "maybe_evex")
13818 (set_attr "mode" "OI")])
13820 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
13821 [(set (match_operand:V2DI 0 "register_operand" "=v")
13824 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13825 (parallel [(const_int 0) (const_int 1)]))))]
13826 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
13827 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13828 [(set_attr "type" "ssemov")
13829 (set_attr "ssememalign" "32")
13830 (set_attr "prefix_extra" "1")
13831 (set_attr "prefix" "maybe_vex")
13832 (set_attr "mode" "TI")])
13834 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
13835 [(set (match_operand:V8DI 0 "register_operand" "=v")
13837 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
13839 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13840 [(set_attr "type" "ssemov")
13841 (set_attr "prefix" "evex")
13842 (set_attr "mode" "XI")])
13844 (define_insn "avx2_<code>v4siv4di2<mask_name>"
13845 [(set (match_operand:V4DI 0 "register_operand" "=v")
13847 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
13848 "TARGET_AVX2 && <mask_avx512vl_condition>"
13849 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13850 [(set_attr "type" "ssemov")
13851 (set_attr "prefix" "maybe_evex")
13852 (set_attr "prefix_extra" "1")
13853 (set_attr "mode" "OI")])
13855 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
13856 [(set (match_operand:V2DI 0 "register_operand" "=v")
13859 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
13860 (parallel [(const_int 0) (const_int 1)]))))]
13861 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
13862 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13863 [(set_attr "type" "ssemov")
13864 (set_attr "ssememalign" "64")
13865 (set_attr "prefix_extra" "1")
13866 (set_attr "prefix" "maybe_vex")
13867 (set_attr "mode" "TI")])
13869 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
13870 ;; setting FLAGS_REG. But it is not a really compare instruction.
13871 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
13872 [(set (reg:CC FLAGS_REG)
13873 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
13874 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
13877 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
13878 [(set_attr "type" "ssecomi")
13879 (set_attr "prefix_extra" "1")
13880 (set_attr "prefix" "vex")
13881 (set_attr "mode" "<MODE>")])
13883 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
13884 ;; But it is not a really compare instruction.
13885 (define_insn "avx_ptest256"
13886 [(set (reg:CC FLAGS_REG)
13887 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
13888 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
13891 "vptest\t{%1, %0|%0, %1}"
13892 [(set_attr "type" "ssecomi")
13893 (set_attr "prefix_extra" "1")
13894 (set_attr "prefix" "vex")
13895 (set_attr "btver2_decode" "vector")
13896 (set_attr "mode" "OI")])
13898 (define_insn "sse4_1_ptest"
13899 [(set (reg:CC FLAGS_REG)
13900 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
13901 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13904 "%vptest\t{%1, %0|%0, %1}"
13905 [(set_attr "type" "ssecomi")
13906 (set_attr "prefix_extra" "1")
13907 (set_attr "prefix" "maybe_vex")
13908 (set_attr "mode" "TI")])
13910 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
13911 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13913 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
13914 (match_operand:SI 2 "const_0_to_15_operand" "n")]
13917 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13918 [(set_attr "type" "ssecvt")
13919 (set (attr "prefix_data16")
13921 (match_test "TARGET_AVX")
13923 (const_string "1")))
13924 (set_attr "prefix_extra" "1")
13925 (set_attr "length_immediate" "1")
13926 (set_attr "prefix" "maybe_vex")
13927 (set_attr "mode" "<MODE>")])
13929 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
13930 [(match_operand:<sseintvecmode> 0 "register_operand")
13931 (match_operand:VF1_128_256 1 "nonimmediate_operand")
13932 (match_operand:SI 2 "const_0_to_15_operand")]
13935 rtx tmp = gen_reg_rtx (<MODE>mode);
13938 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
13941 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
13945 (define_expand "avx512f_roundpd512"
13946 [(match_operand:V8DF 0 "register_operand")
13947 (match_operand:V8DF 1 "nonimmediate_operand")
13948 (match_operand:SI 2 "const_0_to_15_operand")]
13951 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
13955 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
13956 [(match_operand:<ssepackfltmode> 0 "register_operand")
13957 (match_operand:VF2 1 "nonimmediate_operand")
13958 (match_operand:VF2 2 "nonimmediate_operand")
13959 (match_operand:SI 3 "const_0_to_15_operand")]
13964 if (<MODE>mode == V2DFmode
13965 && TARGET_AVX && !TARGET_PREFER_AVX128)
13967 rtx tmp2 = gen_reg_rtx (V4DFmode);
13969 tmp0 = gen_reg_rtx (V4DFmode);
13970 tmp1 = force_reg (V2DFmode, operands[1]);
13972 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
13973 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
13974 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
13978 tmp0 = gen_reg_rtx (<MODE>mode);
13979 tmp1 = gen_reg_rtx (<MODE>mode);
13982 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
13985 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
13988 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
13993 (define_insn "sse4_1_round<ssescalarmodesuffix>"
13994 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
13997 [(match_operand:VF_128 2 "register_operand" "x,x")
13998 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
14000 (match_operand:VF_128 1 "register_operand" "0,x")
14004 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14005 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14006 [(set_attr "isa" "noavx,avx")
14007 (set_attr "type" "ssecvt")
14008 (set_attr "length_immediate" "1")
14009 (set_attr "prefix_data16" "1,*")
14010 (set_attr "prefix_extra" "1")
14011 (set_attr "prefix" "orig,vex")
14012 (set_attr "mode" "<MODE>")])
14014 (define_expand "round<mode>2"
14015 [(set (match_dup 4)
14017 (match_operand:VF 1 "register_operand")
14019 (set (match_operand:VF 0 "register_operand")
14021 [(match_dup 4) (match_dup 5)]
14023 "TARGET_ROUND && !flag_trapping_math"
14025 enum machine_mode scalar_mode;
14026 const struct real_format *fmt;
14027 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14028 rtx half, vec_half;
14030 scalar_mode = GET_MODE_INNER (<MODE>mode);
14032 /* load nextafter (0.5, 0.0) */
14033 fmt = REAL_MODE_FORMAT (scalar_mode);
14034 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14035 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14036 half = const_double_from_real_value (pred_half, scalar_mode);
14038 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14039 vec_half = force_reg (<MODE>mode, vec_half);
14041 operands[3] = gen_reg_rtx (<MODE>mode);
14042 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14044 operands[4] = gen_reg_rtx (<MODE>mode);
14045 operands[5] = GEN_INT (ROUND_TRUNC);
14048 (define_expand "round<mode>2_sfix"
14049 [(match_operand:<sseintvecmode> 0 "register_operand")
14050 (match_operand:VF1_128_256 1 "register_operand")]
14051 "TARGET_ROUND && !flag_trapping_math"
14053 rtx tmp = gen_reg_rtx (<MODE>mode);
14055 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14058 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14062 (define_expand "round<mode>2_vec_pack_sfix"
14063 [(match_operand:<ssepackfltmode> 0 "register_operand")
14064 (match_operand:VF2 1 "register_operand")
14065 (match_operand:VF2 2 "register_operand")]
14066 "TARGET_ROUND && !flag_trapping_math"
14070 if (<MODE>mode == V2DFmode
14071 && TARGET_AVX && !TARGET_PREFER_AVX128)
14073 rtx tmp2 = gen_reg_rtx (V4DFmode);
14075 tmp0 = gen_reg_rtx (V4DFmode);
14076 tmp1 = force_reg (V2DFmode, operands[1]);
14078 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14079 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14080 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14084 tmp0 = gen_reg_rtx (<MODE>mode);
14085 tmp1 = gen_reg_rtx (<MODE>mode);
14087 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14088 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14091 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14098 ;; Intel SSE4.2 string/text processing instructions
14100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14102 (define_insn_and_split "sse4_2_pcmpestr"
14103 [(set (match_operand:SI 0 "register_operand" "=c,c")
14105 [(match_operand:V16QI 2 "register_operand" "x,x")
14106 (match_operand:SI 3 "register_operand" "a,a")
14107 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14108 (match_operand:SI 5 "register_operand" "d,d")
14109 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14111 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14119 (set (reg:CC FLAGS_REG)
14128 && can_create_pseudo_p ()"
14133 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14134 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14135 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14138 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14139 operands[3], operands[4],
14140 operands[5], operands[6]));
14142 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14143 operands[3], operands[4],
14144 operands[5], operands[6]));
14145 if (flags && !(ecx || xmm0))
14146 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14147 operands[2], operands[3],
14148 operands[4], operands[5],
14150 if (!(flags || ecx || xmm0))
14151 emit_note (NOTE_INSN_DELETED);
14155 [(set_attr "type" "sselog")
14156 (set_attr "prefix_data16" "1")
14157 (set_attr "prefix_extra" "1")
14158 (set_attr "ssememalign" "8")
14159 (set_attr "length_immediate" "1")
14160 (set_attr "memory" "none,load")
14161 (set_attr "mode" "TI")])
14163 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14164 [(set (match_operand:SI 0 "register_operand" "=c")
14166 [(match_operand:V16QI 2 "register_operand" "x")
14167 (match_operand:SI 3 "register_operand" "a")
14169 [(match_operand:V16QI 4 "memory_operand" "m")]
14171 (match_operand:SI 5 "register_operand" "d")
14172 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14174 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14178 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14182 (set (reg:CC FLAGS_REG)
14186 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14191 && can_create_pseudo_p ()"
14196 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14197 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14198 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14201 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14202 operands[3], operands[4],
14203 operands[5], operands[6]));
14205 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14206 operands[3], operands[4],
14207 operands[5], operands[6]));
14208 if (flags && !(ecx || xmm0))
14209 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14210 operands[2], operands[3],
14211 operands[4], operands[5],
14213 if (!(flags || ecx || xmm0))
14214 emit_note (NOTE_INSN_DELETED);
14218 [(set_attr "type" "sselog")
14219 (set_attr "prefix_data16" "1")
14220 (set_attr "prefix_extra" "1")
14221 (set_attr "ssememalign" "8")
14222 (set_attr "length_immediate" "1")
14223 (set_attr "memory" "load")
14224 (set_attr "mode" "TI")])
14226 (define_insn "sse4_2_pcmpestri"
14227 [(set (match_operand:SI 0 "register_operand" "=c,c")
14229 [(match_operand:V16QI 1 "register_operand" "x,x")
14230 (match_operand:SI 2 "register_operand" "a,a")
14231 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14232 (match_operand:SI 4 "register_operand" "d,d")
14233 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14235 (set (reg:CC FLAGS_REG)
14244 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14245 [(set_attr "type" "sselog")
14246 (set_attr "prefix_data16" "1")
14247 (set_attr "prefix_extra" "1")
14248 (set_attr "prefix" "maybe_vex")
14249 (set_attr "ssememalign" "8")
14250 (set_attr "length_immediate" "1")
14251 (set_attr "btver2_decode" "vector")
14252 (set_attr "memory" "none,load")
14253 (set_attr "mode" "TI")])
14255 (define_insn "sse4_2_pcmpestrm"
14256 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14258 [(match_operand:V16QI 1 "register_operand" "x,x")
14259 (match_operand:SI 2 "register_operand" "a,a")
14260 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14261 (match_operand:SI 4 "register_operand" "d,d")
14262 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14264 (set (reg:CC FLAGS_REG)
14273 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14274 [(set_attr "type" "sselog")
14275 (set_attr "prefix_data16" "1")
14276 (set_attr "prefix_extra" "1")
14277 (set_attr "ssememalign" "8")
14278 (set_attr "length_immediate" "1")
14279 (set_attr "prefix" "maybe_vex")
14280 (set_attr "btver2_decode" "vector")
14281 (set_attr "memory" "none,load")
14282 (set_attr "mode" "TI")])
14284 (define_insn "sse4_2_pcmpestr_cconly"
14285 [(set (reg:CC FLAGS_REG)
14287 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14288 (match_operand:SI 3 "register_operand" "a,a,a,a")
14289 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14290 (match_operand:SI 5 "register_operand" "d,d,d,d")
14291 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14293 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14294 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14297 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14298 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14299 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14300 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14301 [(set_attr "type" "sselog")
14302 (set_attr "prefix_data16" "1")
14303 (set_attr "prefix_extra" "1")
14304 (set_attr "ssememalign" "8")
14305 (set_attr "length_immediate" "1")
14306 (set_attr "memory" "none,load,none,load")
14307 (set_attr "btver2_decode" "vector,vector,vector,vector")
14308 (set_attr "prefix" "maybe_vex")
14309 (set_attr "mode" "TI")])
14311 (define_insn_and_split "sse4_2_pcmpistr"
14312 [(set (match_operand:SI 0 "register_operand" "=c,c")
14314 [(match_operand:V16QI 2 "register_operand" "x,x")
14315 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14316 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14318 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14324 (set (reg:CC FLAGS_REG)
14331 && can_create_pseudo_p ()"
14336 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14337 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14338 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14341 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14342 operands[3], operands[4]));
14344 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14345 operands[3], operands[4]));
14346 if (flags && !(ecx || xmm0))
14347 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14348 operands[2], operands[3],
14350 if (!(flags || ecx || xmm0))
14351 emit_note (NOTE_INSN_DELETED);
14355 [(set_attr "type" "sselog")
14356 (set_attr "prefix_data16" "1")
14357 (set_attr "prefix_extra" "1")
14358 (set_attr "ssememalign" "8")
14359 (set_attr "length_immediate" "1")
14360 (set_attr "memory" "none,load")
14361 (set_attr "mode" "TI")])
14363 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14364 [(set (match_operand:SI 0 "register_operand" "=c")
14366 [(match_operand:V16QI 2 "register_operand" "x")
14368 [(match_operand:V16QI 3 "memory_operand" "m")]
14370 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14372 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14375 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14378 (set (reg:CC FLAGS_REG)
14381 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14385 && can_create_pseudo_p ()"
14390 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14391 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14392 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14395 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14396 operands[3], operands[4]));
14398 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14399 operands[3], operands[4]));
14400 if (flags && !(ecx || xmm0))
14401 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14402 operands[2], operands[3],
14404 if (!(flags || ecx || xmm0))
14405 emit_note (NOTE_INSN_DELETED);
14409 [(set_attr "type" "sselog")
14410 (set_attr "prefix_data16" "1")
14411 (set_attr "prefix_extra" "1")
14412 (set_attr "ssememalign" "8")
14413 (set_attr "length_immediate" "1")
14414 (set_attr "memory" "load")
14415 (set_attr "mode" "TI")])
14417 (define_insn "sse4_2_pcmpistri"
14418 [(set (match_operand:SI 0 "register_operand" "=c,c")
14420 [(match_operand:V16QI 1 "register_operand" "x,x")
14421 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14422 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14424 (set (reg:CC FLAGS_REG)
14431 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
14432 [(set_attr "type" "sselog")
14433 (set_attr "prefix_data16" "1")
14434 (set_attr "prefix_extra" "1")
14435 (set_attr "ssememalign" "8")
14436 (set_attr "length_immediate" "1")
14437 (set_attr "prefix" "maybe_vex")
14438 (set_attr "memory" "none,load")
14439 (set_attr "btver2_decode" "vector")
14440 (set_attr "mode" "TI")])
14442 (define_insn "sse4_2_pcmpistrm"
14443 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14445 [(match_operand:V16QI 1 "register_operand" "x,x")
14446 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14447 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14449 (set (reg:CC FLAGS_REG)
14456 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
14457 [(set_attr "type" "sselog")
14458 (set_attr "prefix_data16" "1")
14459 (set_attr "prefix_extra" "1")
14460 (set_attr "ssememalign" "8")
14461 (set_attr "length_immediate" "1")
14462 (set_attr "prefix" "maybe_vex")
14463 (set_attr "memory" "none,load")
14464 (set_attr "btver2_decode" "vector")
14465 (set_attr "mode" "TI")])
14467 (define_insn "sse4_2_pcmpistr_cconly"
14468 [(set (reg:CC FLAGS_REG)
14470 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14471 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
14472 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
14474 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14475 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14478 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
14479 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
14480 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
14481 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
14482 [(set_attr "type" "sselog")
14483 (set_attr "prefix_data16" "1")
14484 (set_attr "prefix_extra" "1")
14485 (set_attr "ssememalign" "8")
14486 (set_attr "length_immediate" "1")
14487 (set_attr "memory" "none,load,none,load")
14488 (set_attr "prefix" "maybe_vex")
14489 (set_attr "btver2_decode" "vector,vector,vector,vector")
14490 (set_attr "mode" "TI")])
14492 ;; Packed float variants
14493 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
14494 [(V8DI "V8SF") (V16SI "V16SF")])
14496 (define_expand "avx512pf_gatherpf<mode>sf"
14498 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14499 (mem:<GATHER_SCATTER_SF_MEM_MODE>
14501 [(match_operand 2 "vsib_address_operand")
14502 (match_operand:VI48_512 1 "register_operand")
14503 (match_operand:SI 3 "const1248_operand")]))
14504 (match_operand:SI 4 "const_2_to_3_operand")]
14505 UNSPEC_GATHER_PREFETCH)]
14509 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14510 operands[3]), UNSPEC_VSIBADDR);
14513 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
14515 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14516 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
14518 [(match_operand:P 2 "vsib_address_operand" "Tv")
14519 (match_operand:VI48_512 1 "register_operand" "v")
14520 (match_operand:SI 3 "const1248_operand" "n")]
14522 (match_operand:SI 4 "const_2_to_3_operand" "n")]
14523 UNSPEC_GATHER_PREFETCH)]
14526 switch (INTVAL (operands[4]))
14529 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14531 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14533 gcc_unreachable ();
14536 [(set_attr "type" "sse")
14537 (set_attr "prefix" "evex")
14538 (set_attr "mode" "XI")])
14540 (define_insn "*avx512pf_gatherpf<mode>sf"
14543 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
14545 [(match_operand:P 1 "vsib_address_operand" "Tv")
14546 (match_operand:VI48_512 0 "register_operand" "v")
14547 (match_operand:SI 2 "const1248_operand" "n")]
14549 (match_operand:SI 3 "const_2_to_3_operand" "n")]
14550 UNSPEC_GATHER_PREFETCH)]
14553 switch (INTVAL (operands[3]))
14556 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
14558 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
14560 gcc_unreachable ();
14563 [(set_attr "type" "sse")
14564 (set_attr "prefix" "evex")
14565 (set_attr "mode" "XI")])
14567 ;; Packed double variants
14568 (define_expand "avx512pf_gatherpf<mode>df"
14570 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14573 [(match_operand 2 "vsib_address_operand")
14574 (match_operand:VI4_256_8_512 1 "register_operand")
14575 (match_operand:SI 3 "const1248_operand")]))
14576 (match_operand:SI 4 "const_2_to_3_operand")]
14577 UNSPEC_GATHER_PREFETCH)]
14581 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14582 operands[3]), UNSPEC_VSIBADDR);
14585 (define_insn "*avx512pf_gatherpf<mode>df_mask"
14587 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14588 (match_operator:V8DF 5 "vsib_mem_operator"
14590 [(match_operand:P 2 "vsib_address_operand" "Tv")
14591 (match_operand:VI4_256_8_512 1 "register_operand" "v")
14592 (match_operand:SI 3 "const1248_operand" "n")]
14594 (match_operand:SI 4 "const_2_to_3_operand" "n")]
14595 UNSPEC_GATHER_PREFETCH)]
14598 switch (INTVAL (operands[4]))
14601 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14603 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14605 gcc_unreachable ();
14608 [(set_attr "type" "sse")
14609 (set_attr "prefix" "evex")
14610 (set_attr "mode" "XI")])
14612 (define_insn "*avx512pf_gatherpf<mode>df"
14615 (match_operator:V8DF 4 "vsib_mem_operator"
14617 [(match_operand:P 1 "vsib_address_operand" "Tv")
14618 (match_operand:VI4_256_8_512 0 "register_operand" "v")
14619 (match_operand:SI 2 "const1248_operand" "n")]
14621 (match_operand:SI 3 "const_2_to_3_operand" "n")]
14622 UNSPEC_GATHER_PREFETCH)]
14625 switch (INTVAL (operands[3]))
14628 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
14630 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
14632 gcc_unreachable ();
14635 [(set_attr "type" "sse")
14636 (set_attr "prefix" "evex")
14637 (set_attr "mode" "XI")])
14639 ;; Packed float variants
14640 (define_expand "avx512pf_scatterpf<mode>sf"
14642 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14643 (mem:<GATHER_SCATTER_SF_MEM_MODE>
14645 [(match_operand 2 "vsib_address_operand")
14646 (match_operand:VI48_512 1 "register_operand")
14647 (match_operand:SI 3 "const1248_operand")]))
14648 (match_operand:SI 4 "const2367_operand")]
14649 UNSPEC_SCATTER_PREFETCH)]
14653 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14654 operands[3]), UNSPEC_VSIBADDR);
14657 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
14659 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14660 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
14662 [(match_operand:P 2 "vsib_address_operand" "Tv")
14663 (match_operand:VI48_512 1 "register_operand" "v")
14664 (match_operand:SI 3 "const1248_operand" "n")]
14666 (match_operand:SI 4 "const2367_operand" "n")]
14667 UNSPEC_SCATTER_PREFETCH)]
14670 switch (INTVAL (operands[4]))
14674 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14677 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14679 gcc_unreachable ();
14682 [(set_attr "type" "sse")
14683 (set_attr "prefix" "evex")
14684 (set_attr "mode" "XI")])
14686 (define_insn "*avx512pf_scatterpf<mode>sf"
14689 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
14691 [(match_operand:P 1 "vsib_address_operand" "Tv")
14692 (match_operand:VI48_512 0 "register_operand" "v")
14693 (match_operand:SI 2 "const1248_operand" "n")]
14695 (match_operand:SI 3 "const2367_operand" "n")]
14696 UNSPEC_SCATTER_PREFETCH)]
14699 switch (INTVAL (operands[3]))
14703 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
14706 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
14708 gcc_unreachable ();
14711 [(set_attr "type" "sse")
14712 (set_attr "prefix" "evex")
14713 (set_attr "mode" "XI")])
14715 ;; Packed double variants
14716 (define_expand "avx512pf_scatterpf<mode>df"
14718 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14721 [(match_operand 2 "vsib_address_operand")
14722 (match_operand:VI4_256_8_512 1 "register_operand")
14723 (match_operand:SI 3 "const1248_operand")]))
14724 (match_operand:SI 4 "const2367_operand")]
14725 UNSPEC_SCATTER_PREFETCH)]
14729 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14730 operands[3]), UNSPEC_VSIBADDR);
14733 (define_insn "*avx512pf_scatterpf<mode>df_mask"
14735 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14736 (match_operator:V8DF 5 "vsib_mem_operator"
14738 [(match_operand:P 2 "vsib_address_operand" "Tv")
14739 (match_operand:VI4_256_8_512 1 "register_operand" "v")
14740 (match_operand:SI 3 "const1248_operand" "n")]
14742 (match_operand:SI 4 "const2367_operand" "n")]
14743 UNSPEC_SCATTER_PREFETCH)]
14746 switch (INTVAL (operands[4]))
14750 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14753 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14755 gcc_unreachable ();
14758 [(set_attr "type" "sse")
14759 (set_attr "prefix" "evex")
14760 (set_attr "mode" "XI")])
14762 (define_insn "*avx512pf_scatterpf<mode>df"
14765 (match_operator:V8DF 4 "vsib_mem_operator"
14767 [(match_operand:P 1 "vsib_address_operand" "Tv")
14768 (match_operand:VI4_256_8_512 0 "register_operand" "v")
14769 (match_operand:SI 2 "const1248_operand" "n")]
14771 (match_operand:SI 3 "const2367_operand" "n")]
14772 UNSPEC_SCATTER_PREFETCH)]
14775 switch (INTVAL (operands[3]))
14779 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
14782 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
14784 gcc_unreachable ();
14787 [(set_attr "type" "sse")
14788 (set_attr "prefix" "evex")
14789 (set_attr "mode" "XI")])
14791 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
14792 [(set (match_operand:VF_512 0 "register_operand" "=v")
14794 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14797 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14798 [(set_attr "prefix" "evex")
14799 (set_attr "type" "sse")
14800 (set_attr "mode" "<MODE>")])
14802 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
14803 [(set (match_operand:VF_512 0 "register_operand" "=v")
14805 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14808 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14809 [(set_attr "prefix" "evex")
14810 (set_attr "type" "sse")
14811 (set_attr "mode" "<MODE>")])
14813 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
14814 [(set (match_operand:VF_128 0 "register_operand" "=v")
14817 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14819 (match_operand:VF_128 2 "register_operand" "v")
14822 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
14823 [(set_attr "length_immediate" "1")
14824 (set_attr "prefix" "evex")
14825 (set_attr "type" "sse")
14826 (set_attr "mode" "<MODE>")])
14828 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
14829 [(set (match_operand:VF_512 0 "register_operand" "=v")
14831 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14834 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14835 [(set_attr "prefix" "evex")
14836 (set_attr "type" "sse")
14837 (set_attr "mode" "<MODE>")])
14839 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
14840 [(set (match_operand:VF_128 0 "register_operand" "=v")
14843 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14845 (match_operand:VF_128 2 "register_operand" "v")
14848 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
14849 [(set_attr "length_immediate" "1")
14850 (set_attr "type" "sse")
14851 (set_attr "prefix" "evex")
14852 (set_attr "mode" "<MODE>")])
14854 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14856 ;; XOP instructions
14858 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14860 (define_code_iterator xop_plus [plus ss_plus])
14862 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
14863 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
14865 ;; XOP parallel integer multiply/add instructions.
14867 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
14868 [(set (match_operand:VI24_128 0 "register_operand" "=x")
14871 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
14872 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
14873 (match_operand:VI24_128 3 "register_operand" "x")))]
14875 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14876 [(set_attr "type" "ssemuladd")
14877 (set_attr "mode" "TI")])
14879 (define_insn "xop_p<macs>dql"
14880 [(set (match_operand:V2DI 0 "register_operand" "=x")
14885 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
14886 (parallel [(const_int 0) (const_int 2)])))
14889 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
14890 (parallel [(const_int 0) (const_int 2)]))))
14891 (match_operand:V2DI 3 "register_operand" "x")))]
14893 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14894 [(set_attr "type" "ssemuladd")
14895 (set_attr "mode" "TI")])
14897 (define_insn "xop_p<macs>dqh"
14898 [(set (match_operand:V2DI 0 "register_operand" "=x")
14903 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
14904 (parallel [(const_int 1) (const_int 3)])))
14907 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
14908 (parallel [(const_int 1) (const_int 3)]))))
14909 (match_operand:V2DI 3 "register_operand" "x")))]
14911 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14912 [(set_attr "type" "ssemuladd")
14913 (set_attr "mode" "TI")])
14915 ;; XOP parallel integer multiply/add instructions for the intrinisics
14916 (define_insn "xop_p<macs>wd"
14917 [(set (match_operand:V4SI 0 "register_operand" "=x")
14922 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
14923 (parallel [(const_int 1) (const_int 3)
14924 (const_int 5) (const_int 7)])))
14927 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14928 (parallel [(const_int 1) (const_int 3)
14929 (const_int 5) (const_int 7)]))))
14930 (match_operand:V4SI 3 "register_operand" "x")))]
14932 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14933 [(set_attr "type" "ssemuladd")
14934 (set_attr "mode" "TI")])
14936 (define_insn "xop_p<madcs>wd"
14937 [(set (match_operand:V4SI 0 "register_operand" "=x")
14943 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
14944 (parallel [(const_int 0) (const_int 2)
14945 (const_int 4) (const_int 6)])))
14948 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14949 (parallel [(const_int 0) (const_int 2)
14950 (const_int 4) (const_int 6)]))))
14955 (parallel [(const_int 1) (const_int 3)
14956 (const_int 5) (const_int 7)])))
14960 (parallel [(const_int 1) (const_int 3)
14961 (const_int 5) (const_int 7)])))))
14962 (match_operand:V4SI 3 "register_operand" "x")))]
14964 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14965 [(set_attr "type" "ssemuladd")
14966 (set_attr "mode" "TI")])
14968 ;; XOP parallel XMM conditional moves
14969 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
14970 [(set (match_operand:V 0 "register_operand" "=x,x")
14972 (match_operand:V 3 "nonimmediate_operand" "x,m")
14973 (match_operand:V 1 "register_operand" "x,x")
14974 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
14976 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14977 [(set_attr "type" "sse4arg")])
14979 ;; XOP horizontal add/subtract instructions
14980 (define_insn "xop_phadd<u>bw"
14981 [(set (match_operand:V8HI 0 "register_operand" "=x")
14985 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
14986 (parallel [(const_int 0) (const_int 2)
14987 (const_int 4) (const_int 6)
14988 (const_int 8) (const_int 10)
14989 (const_int 12) (const_int 14)])))
14993 (parallel [(const_int 1) (const_int 3)
14994 (const_int 5) (const_int 7)
14995 (const_int 9) (const_int 11)
14996 (const_int 13) (const_int 15)])))))]
14998 "vphadd<u>bw\t{%1, %0|%0, %1}"
14999 [(set_attr "type" "sseiadd1")])
15001 (define_insn "xop_phadd<u>bd"
15002 [(set (match_operand:V4SI 0 "register_operand" "=x")
15007 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15008 (parallel [(const_int 0) (const_int 4)
15009 (const_int 8) (const_int 12)])))
15013 (parallel [(const_int 1) (const_int 5)
15014 (const_int 9) (const_int 13)]))))
15019 (parallel [(const_int 2) (const_int 6)
15020 (const_int 10) (const_int 14)])))
15024 (parallel [(const_int 3) (const_int 7)
15025 (const_int 11) (const_int 15)]))))))]
15027 "vphadd<u>bd\t{%1, %0|%0, %1}"
15028 [(set_attr "type" "sseiadd1")])
15030 (define_insn "xop_phadd<u>bq"
15031 [(set (match_operand:V2DI 0 "register_operand" "=x")
15037 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15038 (parallel [(const_int 0) (const_int 8)])))
15042 (parallel [(const_int 1) (const_int 9)]))))
15047 (parallel [(const_int 2) (const_int 10)])))
15051 (parallel [(const_int 3) (const_int 11)])))))
15057 (parallel [(const_int 4) (const_int 12)])))
15061 (parallel [(const_int 5) (const_int 13)]))))
15066 (parallel [(const_int 6) (const_int 14)])))
15070 (parallel [(const_int 7) (const_int 15)])))))))]
15072 "vphadd<u>bq\t{%1, %0|%0, %1}"
15073 [(set_attr "type" "sseiadd1")])
15075 (define_insn "xop_phadd<u>wd"
15076 [(set (match_operand:V4SI 0 "register_operand" "=x")
15080 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15081 (parallel [(const_int 0) (const_int 2)
15082 (const_int 4) (const_int 6)])))
15086 (parallel [(const_int 1) (const_int 3)
15087 (const_int 5) (const_int 7)])))))]
15089 "vphadd<u>wd\t{%1, %0|%0, %1}"
15090 [(set_attr "type" "sseiadd1")])
15092 (define_insn "xop_phadd<u>wq"
15093 [(set (match_operand:V2DI 0 "register_operand" "=x")
15098 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15099 (parallel [(const_int 0) (const_int 4)])))
15103 (parallel [(const_int 1) (const_int 5)]))))
15108 (parallel [(const_int 2) (const_int 6)])))
15112 (parallel [(const_int 3) (const_int 7)]))))))]
15114 "vphadd<u>wq\t{%1, %0|%0, %1}"
15115 [(set_attr "type" "sseiadd1")])
15117 (define_insn "xop_phadd<u>dq"
15118 [(set (match_operand:V2DI 0 "register_operand" "=x")
15122 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15123 (parallel [(const_int 0) (const_int 2)])))
15127 (parallel [(const_int 1) (const_int 3)])))))]
15129 "vphadd<u>dq\t{%1, %0|%0, %1}"
15130 [(set_attr "type" "sseiadd1")])
15132 (define_insn "xop_phsubbw"
15133 [(set (match_operand:V8HI 0 "register_operand" "=x")
15137 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15138 (parallel [(const_int 0) (const_int 2)
15139 (const_int 4) (const_int 6)
15140 (const_int 8) (const_int 10)
15141 (const_int 12) (const_int 14)])))
15145 (parallel [(const_int 1) (const_int 3)
15146 (const_int 5) (const_int 7)
15147 (const_int 9) (const_int 11)
15148 (const_int 13) (const_int 15)])))))]
15150 "vphsubbw\t{%1, %0|%0, %1}"
15151 [(set_attr "type" "sseiadd1")])
15153 (define_insn "xop_phsubwd"
15154 [(set (match_operand:V4SI 0 "register_operand" "=x")
15158 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15159 (parallel [(const_int 0) (const_int 2)
15160 (const_int 4) (const_int 6)])))
15164 (parallel [(const_int 1) (const_int 3)
15165 (const_int 5) (const_int 7)])))))]
15167 "vphsubwd\t{%1, %0|%0, %1}"
15168 [(set_attr "type" "sseiadd1")])
15170 (define_insn "xop_phsubdq"
15171 [(set (match_operand:V2DI 0 "register_operand" "=x")
15175 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15176 (parallel [(const_int 0) (const_int 2)])))
15180 (parallel [(const_int 1) (const_int 3)])))))]
15182 "vphsubdq\t{%1, %0|%0, %1}"
15183 [(set_attr "type" "sseiadd1")])
15185 ;; XOP permute instructions
15186 (define_insn "xop_pperm"
15187 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15189 [(match_operand:V16QI 1 "register_operand" "x,x")
15190 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15191 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15192 UNSPEC_XOP_PERMUTE))]
15193 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15194 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15195 [(set_attr "type" "sse4arg")
15196 (set_attr "mode" "TI")])
15198 ;; XOP pack instructions that combine two vectors into a smaller vector
15199 (define_insn "xop_pperm_pack_v2di_v4si"
15200 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15203 (match_operand:V2DI 1 "register_operand" "x,x"))
15205 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15206 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15207 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15208 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15209 [(set_attr "type" "sse4arg")
15210 (set_attr "mode" "TI")])
15212 (define_insn "xop_pperm_pack_v4si_v8hi"
15213 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15216 (match_operand:V4SI 1 "register_operand" "x,x"))
15218 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15219 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15220 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15221 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15222 [(set_attr "type" "sse4arg")
15223 (set_attr "mode" "TI")])
15225 (define_insn "xop_pperm_pack_v8hi_v16qi"
15226 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15229 (match_operand:V8HI 1 "register_operand" "x,x"))
15231 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15232 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15233 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15234 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15235 [(set_attr "type" "sse4arg")
15236 (set_attr "mode" "TI")])
15238 ;; XOP packed rotate instructions
15239 (define_expand "rotl<mode>3"
15240 [(set (match_operand:VI_128 0 "register_operand")
15242 (match_operand:VI_128 1 "nonimmediate_operand")
15243 (match_operand:SI 2 "general_operand")))]
15246 /* If we were given a scalar, convert it to parallel */
15247 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15249 rtvec vs = rtvec_alloc (<ssescalarnum>);
15250 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15251 rtx reg = gen_reg_rtx (<MODE>mode);
15252 rtx op2 = operands[2];
15255 if (GET_MODE (op2) != <ssescalarmode>mode)
15257 op2 = gen_reg_rtx (<ssescalarmode>mode);
15258 convert_move (op2, operands[2], false);
15261 for (i = 0; i < <ssescalarnum>; i++)
15262 RTVEC_ELT (vs, i) = op2;
15264 emit_insn (gen_vec_init<mode> (reg, par));
15265 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15270 (define_expand "rotr<mode>3"
15271 [(set (match_operand:VI_128 0 "register_operand")
15273 (match_operand:VI_128 1 "nonimmediate_operand")
15274 (match_operand:SI 2 "general_operand")))]
15277 /* If we were given a scalar, convert it to parallel */
15278 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15280 rtvec vs = rtvec_alloc (<ssescalarnum>);
15281 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15282 rtx neg = gen_reg_rtx (<MODE>mode);
15283 rtx reg = gen_reg_rtx (<MODE>mode);
15284 rtx op2 = operands[2];
15287 if (GET_MODE (op2) != <ssescalarmode>mode)
15289 op2 = gen_reg_rtx (<ssescalarmode>mode);
15290 convert_move (op2, operands[2], false);
15293 for (i = 0; i < <ssescalarnum>; i++)
15294 RTVEC_ELT (vs, i) = op2;
15296 emit_insn (gen_vec_init<mode> (reg, par));
15297 emit_insn (gen_neg<mode>2 (neg, reg));
15298 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15303 (define_insn "xop_rotl<mode>3"
15304 [(set (match_operand:VI_128 0 "register_operand" "=x")
15306 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15307 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15309 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15310 [(set_attr "type" "sseishft")
15311 (set_attr "length_immediate" "1")
15312 (set_attr "mode" "TI")])
15314 (define_insn "xop_rotr<mode>3"
15315 [(set (match_operand:VI_128 0 "register_operand" "=x")
15317 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15318 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15322 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15323 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15325 [(set_attr "type" "sseishft")
15326 (set_attr "length_immediate" "1")
15327 (set_attr "mode" "TI")])
15329 (define_expand "vrotr<mode>3"
15330 [(match_operand:VI_128 0 "register_operand")
15331 (match_operand:VI_128 1 "register_operand")
15332 (match_operand:VI_128 2 "register_operand")]
15335 rtx reg = gen_reg_rtx (<MODE>mode);
15336 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15337 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15341 (define_expand "vrotl<mode>3"
15342 [(match_operand:VI_128 0 "register_operand")
15343 (match_operand:VI_128 1 "register_operand")
15344 (match_operand:VI_128 2 "register_operand")]
15347 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15351 (define_insn "xop_vrotl<mode>3"
15352 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15353 (if_then_else:VI_128
15355 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15358 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15362 (neg:VI_128 (match_dup 2)))))]
15363 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15364 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15365 [(set_attr "type" "sseishft")
15366 (set_attr "prefix_data16" "0")
15367 (set_attr "prefix_extra" "2")
15368 (set_attr "mode" "TI")])
15370 ;; XOP packed shift instructions.
15371 (define_expand "vlshr<mode>3"
15372 [(set (match_operand:VI12_128 0 "register_operand")
15374 (match_operand:VI12_128 1 "register_operand")
15375 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15378 rtx neg = gen_reg_rtx (<MODE>mode);
15379 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15380 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15384 (define_expand "vlshr<mode>3"
15385 [(set (match_operand:VI48_128 0 "register_operand")
15387 (match_operand:VI48_128 1 "register_operand")
15388 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15389 "TARGET_AVX2 || TARGET_XOP"
15393 rtx neg = gen_reg_rtx (<MODE>mode);
15394 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15395 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15400 (define_expand "vlshr<mode>3"
15401 [(set (match_operand:VI48_512 0 "register_operand")
15403 (match_operand:VI48_512 1 "register_operand")
15404 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15407 (define_expand "vlshr<mode>3"
15408 [(set (match_operand:VI48_256 0 "register_operand")
15410 (match_operand:VI48_256 1 "register_operand")
15411 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15414 (define_expand "vashr<mode>3"
15415 [(set (match_operand:VI128_128 0 "register_operand")
15416 (ashiftrt:VI128_128
15417 (match_operand:VI128_128 1 "register_operand")
15418 (match_operand:VI128_128 2 "nonimmediate_operand")))]
15421 rtx neg = gen_reg_rtx (<MODE>mode);
15422 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15423 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
15427 (define_expand "vashrv4si3"
15428 [(set (match_operand:V4SI 0 "register_operand")
15429 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
15430 (match_operand:V4SI 2 "nonimmediate_operand")))]
15431 "TARGET_AVX2 || TARGET_XOP"
15435 rtx neg = gen_reg_rtx (V4SImode);
15436 emit_insn (gen_negv4si2 (neg, operands[2]));
15437 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
15442 (define_expand "vashrv16si3"
15443 [(set (match_operand:V16SI 0 "register_operand")
15444 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
15445 (match_operand:V16SI 2 "nonimmediate_operand")))]
15448 (define_expand "vashrv8si3"
15449 [(set (match_operand:V8SI 0 "register_operand")
15450 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
15451 (match_operand:V8SI 2 "nonimmediate_operand")))]
15454 (define_expand "vashl<mode>3"
15455 [(set (match_operand:VI12_128 0 "register_operand")
15457 (match_operand:VI12_128 1 "register_operand")
15458 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15461 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
15465 (define_expand "vashl<mode>3"
15466 [(set (match_operand:VI48_128 0 "register_operand")
15468 (match_operand:VI48_128 1 "register_operand")
15469 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15470 "TARGET_AVX2 || TARGET_XOP"
15474 operands[2] = force_reg (<MODE>mode, operands[2]);
15475 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
15480 (define_expand "vashl<mode>3"
15481 [(set (match_operand:VI48_512 0 "register_operand")
15483 (match_operand:VI48_512 1 "register_operand")
15484 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15487 (define_expand "vashl<mode>3"
15488 [(set (match_operand:VI48_256 0 "register_operand")
15490 (match_operand:VI48_256 1 "register_operand")
15491 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15494 (define_insn "xop_sha<mode>3"
15495 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15496 (if_then_else:VI_128
15498 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15501 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15505 (neg:VI_128 (match_dup 2)))))]
15506 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15507 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15508 [(set_attr "type" "sseishft")
15509 (set_attr "prefix_data16" "0")
15510 (set_attr "prefix_extra" "2")
15511 (set_attr "mode" "TI")])
15513 (define_insn "xop_shl<mode>3"
15514 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15515 (if_then_else:VI_128
15517 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15520 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15524 (neg:VI_128 (match_dup 2)))))]
15525 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15526 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15527 [(set_attr "type" "sseishft")
15528 (set_attr "prefix_data16" "0")
15529 (set_attr "prefix_extra" "2")
15530 (set_attr "mode" "TI")])
15532 (define_expand "<shift_insn><mode>3"
15533 [(set (match_operand:VI1_AVX2 0 "register_operand")
15534 (any_shift:VI1_AVX2
15535 (match_operand:VI1_AVX2 1 "register_operand")
15536 (match_operand:SI 2 "nonmemory_operand")))]
15539 if (TARGET_XOP && <MODE>mode == V16QImode)
15541 bool negate = false;
15542 rtx (*gen) (rtx, rtx, rtx);
15546 if (<CODE> != ASHIFT)
15548 if (CONST_INT_P (operands[2]))
15549 operands[2] = GEN_INT (-INTVAL (operands[2]));
15553 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
15554 for (i = 0; i < 16; i++)
15555 XVECEXP (par, 0, i) = operands[2];
15557 tmp = gen_reg_rtx (V16QImode);
15558 emit_insn (gen_vec_initv16qi (tmp, par));
15561 emit_insn (gen_negv16qi2 (tmp, tmp));
15563 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
15564 emit_insn (gen (operands[0], operands[1], tmp));
15567 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
15571 (define_expand "ashrv2di3"
15572 [(set (match_operand:V2DI 0 "register_operand")
15574 (match_operand:V2DI 1 "register_operand")
15575 (match_operand:DI 2 "nonmemory_operand")))]
15576 "TARGET_XOP || TARGET_AVX512VL"
15578 if (!TARGET_AVX512VL)
15580 rtx reg = gen_reg_rtx (V2DImode);
15582 bool negate = false;
15585 if (CONST_INT_P (operands[2]))
15586 operands[2] = GEN_INT (-INTVAL (operands[2]));
15590 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
15591 for (i = 0; i < 2; i++)
15592 XVECEXP (par, 0, i) = operands[2];
15594 emit_insn (gen_vec_initv2di (reg, par));
15597 emit_insn (gen_negv2di2 (reg, reg));
15599 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
15604 ;; XOP FRCZ support
15605 (define_insn "xop_frcz<mode>2"
15606 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
15608 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
15611 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
15612 [(set_attr "type" "ssecvt1")
15613 (set_attr "mode" "<MODE>")])
15615 (define_expand "xop_vmfrcz<mode>2"
15616 [(set (match_operand:VF_128 0 "register_operand")
15619 [(match_operand:VF_128 1 "nonimmediate_operand")]
15624 "operands[2] = CONST0_RTX (<MODE>mode);")
15626 (define_insn "*xop_vmfrcz<mode>2"
15627 [(set (match_operand:VF_128 0 "register_operand" "=x")
15630 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
15632 (match_operand:VF_128 2 "const0_operand")
15635 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
15636 [(set_attr "type" "ssecvt1")
15637 (set_attr "mode" "<MODE>")])
15639 (define_insn "xop_maskcmp<mode>3"
15640 [(set (match_operand:VI_128 0 "register_operand" "=x")
15641 (match_operator:VI_128 1 "ix86_comparison_int_operator"
15642 [(match_operand:VI_128 2 "register_operand" "x")
15643 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
15645 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
15646 [(set_attr "type" "sse4arg")
15647 (set_attr "prefix_data16" "0")
15648 (set_attr "prefix_rep" "0")
15649 (set_attr "prefix_extra" "2")
15650 (set_attr "length_immediate" "1")
15651 (set_attr "mode" "TI")])
15653 (define_insn "xop_maskcmp_uns<mode>3"
15654 [(set (match_operand:VI_128 0 "register_operand" "=x")
15655 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
15656 [(match_operand:VI_128 2 "register_operand" "x")
15657 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
15659 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
15660 [(set_attr "type" "ssecmp")
15661 (set_attr "prefix_data16" "0")
15662 (set_attr "prefix_rep" "0")
15663 (set_attr "prefix_extra" "2")
15664 (set_attr "length_immediate" "1")
15665 (set_attr "mode" "TI")])
15667 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
15668 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
15669 ;; the exact instruction generated for the intrinsic.
15670 (define_insn "xop_maskcmp_uns2<mode>3"
15671 [(set (match_operand:VI_128 0 "register_operand" "=x")
15673 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
15674 [(match_operand:VI_128 2 "register_operand" "x")
15675 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
15676 UNSPEC_XOP_UNSIGNED_CMP))]
15678 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
15679 [(set_attr "type" "ssecmp")
15680 (set_attr "prefix_data16" "0")
15681 (set_attr "prefix_extra" "2")
15682 (set_attr "length_immediate" "1")
15683 (set_attr "mode" "TI")])
15685 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
15686 ;; being added here to be complete.
15687 (define_insn "xop_pcom_tf<mode>3"
15688 [(set (match_operand:VI_128 0 "register_operand" "=x")
15690 [(match_operand:VI_128 1 "register_operand" "x")
15691 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
15692 (match_operand:SI 3 "const_int_operand" "n")]
15693 UNSPEC_XOP_TRUEFALSE))]
15696 return ((INTVAL (operands[3]) != 0)
15697 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15698 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
15700 [(set_attr "type" "ssecmp")
15701 (set_attr "prefix_data16" "0")
15702 (set_attr "prefix_extra" "2")
15703 (set_attr "length_immediate" "1")
15704 (set_attr "mode" "TI")])
15706 (define_insn "xop_vpermil2<mode>3"
15707 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
15709 [(match_operand:VF_128_256 1 "register_operand" "x")
15710 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
15711 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
15712 (match_operand:SI 4 "const_0_to_3_operand" "n")]
15715 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
15716 [(set_attr "type" "sse4arg")
15717 (set_attr "length_immediate" "1")
15718 (set_attr "mode" "<MODE>")])
15720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15722 (define_insn "aesenc"
15723 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15724 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15725 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15729 aesenc\t{%2, %0|%0, %2}
15730 vaesenc\t{%2, %1, %0|%0, %1, %2}"
15731 [(set_attr "isa" "noavx,avx")
15732 (set_attr "type" "sselog1")
15733 (set_attr "prefix_extra" "1")
15734 (set_attr "prefix" "orig,vex")
15735 (set_attr "btver2_decode" "double,double")
15736 (set_attr "mode" "TI")])
15738 (define_insn "aesenclast"
15739 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15740 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15741 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15742 UNSPEC_AESENCLAST))]
15745 aesenclast\t{%2, %0|%0, %2}
15746 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
15747 [(set_attr "isa" "noavx,avx")
15748 (set_attr "type" "sselog1")
15749 (set_attr "prefix_extra" "1")
15750 (set_attr "prefix" "orig,vex")
15751 (set_attr "btver2_decode" "double,double")
15752 (set_attr "mode" "TI")])
15754 (define_insn "aesdec"
15755 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15756 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15757 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15761 aesdec\t{%2, %0|%0, %2}
15762 vaesdec\t{%2, %1, %0|%0, %1, %2}"
15763 [(set_attr "isa" "noavx,avx")
15764 (set_attr "type" "sselog1")
15765 (set_attr "prefix_extra" "1")
15766 (set_attr "prefix" "orig,vex")
15767 (set_attr "btver2_decode" "double,double")
15768 (set_attr "mode" "TI")])
15770 (define_insn "aesdeclast"
15771 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15772 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15773 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15774 UNSPEC_AESDECLAST))]
15777 aesdeclast\t{%2, %0|%0, %2}
15778 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
15779 [(set_attr "isa" "noavx,avx")
15780 (set_attr "type" "sselog1")
15781 (set_attr "prefix_extra" "1")
15782 (set_attr "prefix" "orig,vex")
15783 (set_attr "btver2_decode" "double,double")
15784 (set_attr "mode" "TI")])
15786 (define_insn "aesimc"
15787 [(set (match_operand:V2DI 0 "register_operand" "=x")
15788 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
15791 "%vaesimc\t{%1, %0|%0, %1}"
15792 [(set_attr "type" "sselog1")
15793 (set_attr "prefix_extra" "1")
15794 (set_attr "prefix" "maybe_vex")
15795 (set_attr "mode" "TI")])
15797 (define_insn "aeskeygenassist"
15798 [(set (match_operand:V2DI 0 "register_operand" "=x")
15799 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
15800 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15801 UNSPEC_AESKEYGENASSIST))]
15803 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
15804 [(set_attr "type" "sselog1")
15805 (set_attr "prefix_extra" "1")
15806 (set_attr "length_immediate" "1")
15807 (set_attr "prefix" "maybe_vex")
15808 (set_attr "mode" "TI")])
15810 (define_insn "pclmulqdq"
15811 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15812 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15813 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
15814 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15818 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
15819 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15820 [(set_attr "isa" "noavx,avx")
15821 (set_attr "type" "sselog1")
15822 (set_attr "prefix_extra" "1")
15823 (set_attr "length_immediate" "1")
15824 (set_attr "prefix" "orig,vex")
15825 (set_attr "mode" "TI")])
15827 (define_expand "avx_vzeroall"
15828 [(match_par_dup 0 [(const_int 0)])]
15831 int nregs = TARGET_64BIT ? 16 : 8;
15834 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
15836 XVECEXP (operands[0], 0, 0)
15837 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
15840 for (regno = 0; regno < nregs; regno++)
15841 XVECEXP (operands[0], 0, regno + 1)
15842 = gen_rtx_SET (VOIDmode,
15843 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
15844 CONST0_RTX (V8SImode));
15847 (define_insn "*avx_vzeroall"
15848 [(match_parallel 0 "vzeroall_operation"
15849 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
15852 [(set_attr "type" "sse")
15853 (set_attr "modrm" "0")
15854 (set_attr "memory" "none")
15855 (set_attr "prefix" "vex")
15856 (set_attr "btver2_decode" "vector")
15857 (set_attr "mode" "OI")])
15859 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
15860 ;; if the upper 128bits are unused.
15861 (define_insn "avx_vzeroupper"
15862 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
15865 [(set_attr "type" "sse")
15866 (set_attr "modrm" "0")
15867 (set_attr "memory" "none")
15868 (set_attr "prefix" "vex")
15869 (set_attr "btver2_decode" "vector")
15870 (set_attr "mode" "OI")])
15872 (define_insn "avx2_pbroadcast<mode>"
15873 [(set (match_operand:VI 0 "register_operand" "=x")
15875 (vec_select:<ssescalarmode>
15876 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
15877 (parallel [(const_int 0)]))))]
15879 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
15880 [(set_attr "type" "ssemov")
15881 (set_attr "prefix_extra" "1")
15882 (set_attr "prefix" "vex")
15883 (set_attr "mode" "<sseinsnmode>")])
15885 (define_insn "avx2_pbroadcast<mode>_1"
15886 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
15887 (vec_duplicate:VI_256
15888 (vec_select:<ssescalarmode>
15889 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
15890 (parallel [(const_int 0)]))))]
15893 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
15894 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
15895 [(set_attr "type" "ssemov")
15896 (set_attr "prefix_extra" "1")
15897 (set_attr "prefix" "vex")
15898 (set_attr "mode" "<sseinsnmode>")])
15900 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
15901 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
15902 (unspec:VI48F_256_512
15903 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
15904 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
15906 "TARGET_AVX2 && <mask_mode512bit_condition>"
15907 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
15908 [(set_attr "type" "sselog")
15909 (set_attr "prefix" "<mask_prefix2>")
15910 (set_attr "mode" "<sseinsnmode>")])
15912 (define_insn "<avx512>_permvar<mode><mask_name>"
15913 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15914 (unspec:VI2_AVX512VL
15915 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
15916 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
15918 "TARGET_AVX512BW && <mask_mode512bit_condition>"
15919 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
15920 [(set_attr "type" "sselog")
15921 (set_attr "prefix" "<mask_prefix2>")
15922 (set_attr "mode" "<sseinsnmode>")])
15924 (define_expand "<avx2_avx512>_perm<mode>"
15925 [(match_operand:VI8F_256_512 0 "register_operand")
15926 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
15927 (match_operand:SI 2 "const_0_to_255_operand")]
15930 int mask = INTVAL (operands[2]);
15931 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
15932 GEN_INT ((mask >> 0) & 3),
15933 GEN_INT ((mask >> 2) & 3),
15934 GEN_INT ((mask >> 4) & 3),
15935 GEN_INT ((mask >> 6) & 3)));
15939 (define_expand "<avx512>_perm<mode>_mask"
15940 [(match_operand:VI8F_256_512 0 "register_operand")
15941 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
15942 (match_operand:SI 2 "const_0_to_255_operand")
15943 (match_operand:VI8F_256_512 3 "vector_move_operand")
15944 (match_operand:<avx512fmaskmode> 4 "register_operand")]
15947 int mask = INTVAL (operands[2]);
15948 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
15949 GEN_INT ((mask >> 0) & 3),
15950 GEN_INT ((mask >> 2) & 3),
15951 GEN_INT ((mask >> 4) & 3),
15952 GEN_INT ((mask >> 6) & 3),
15953 operands[3], operands[4]));
15957 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
15958 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
15959 (vec_select:VI8F_256_512
15960 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
15961 (parallel [(match_operand 2 "const_0_to_3_operand")
15962 (match_operand 3 "const_0_to_3_operand")
15963 (match_operand 4 "const_0_to_3_operand")
15964 (match_operand 5 "const_0_to_3_operand")])))]
15965 "TARGET_AVX2 && <mask_mode512bit_condition>"
15968 mask |= INTVAL (operands[2]) << 0;
15969 mask |= INTVAL (operands[3]) << 2;
15970 mask |= INTVAL (operands[4]) << 4;
15971 mask |= INTVAL (operands[5]) << 6;
15972 operands[2] = GEN_INT (mask);
15973 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15975 [(set_attr "type" "sselog")
15976 (set_attr "prefix" "<mask_prefix2>")
15977 (set_attr "mode" "<sseinsnmode>")])
15979 (define_insn "avx2_permv2ti"
15980 [(set (match_operand:V4DI 0 "register_operand" "=x")
15982 [(match_operand:V4DI 1 "register_operand" "x")
15983 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
15984 (match_operand:SI 3 "const_0_to_255_operand" "n")]
15987 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15988 [(set_attr "type" "sselog")
15989 (set_attr "prefix" "vex")
15990 (set_attr "mode" "OI")])
15992 (define_insn "avx2_vec_dupv4df"
15993 [(set (match_operand:V4DF 0 "register_operand" "=x")
15994 (vec_duplicate:V4DF
15996 (match_operand:V2DF 1 "register_operand" "x")
15997 (parallel [(const_int 0)]))))]
15999 "vbroadcastsd\t{%1, %0|%0, %1}"
16000 [(set_attr "type" "sselog1")
16001 (set_attr "prefix" "vex")
16002 (set_attr "mode" "V4DF")])
16004 ;; Modes handled by AVX vec_dup patterns.
16005 (define_mode_iterator AVX_VEC_DUP_MODE
16006 [V8SI V8SF V4DI V4DF])
16008 (define_insn "vec_dup<mode>"
16009 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
16010 (vec_duplicate:AVX_VEC_DUP_MODE
16011 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
16014 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16015 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
16017 [(set_attr "type" "ssemov")
16018 (set_attr "prefix_extra" "1")
16019 (set_attr "prefix" "vex")
16020 (set_attr "isa" "*,avx2,noavx2")
16021 (set_attr "mode" "V8SF")])
16023 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
16024 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
16025 (vec_duplicate:VI48F_512
16026 (vec_select:<ssescalarmode>
16027 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16028 (parallel [(const_int 0)]))))]
16030 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16031 [(set_attr "type" "ssemov")
16032 (set_attr "prefix" "evex")
16033 (set_attr "mode" "<sseinsnmode>")])
16035 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16036 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16037 (vec_duplicate:V16FI
16038 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16041 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16042 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16043 [(set_attr "type" "ssemov")
16044 (set_attr "prefix" "evex")
16045 (set_attr "mode" "<sseinsnmode>")])
16047 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16048 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16049 (vec_duplicate:V8FI
16050 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16053 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16054 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16055 [(set_attr "type" "ssemov")
16056 (set_attr "prefix" "evex")
16057 (set_attr "mode" "<sseinsnmode>")])
16059 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
16060 [(set (match_operand:VI48_512 0 "register_operand" "=v")
16061 (vec_duplicate:VI48_512
16062 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
16063 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
16064 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16065 [(set_attr "type" "ssemov")
16066 (set_attr "prefix" "evex")
16067 (set_attr "mode" "<sseinsnmode>")])
16069 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
16070 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
16071 (vec_duplicate:VI48F_512
16072 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
16074 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16075 [(set_attr "type" "ssemov")
16076 (set_attr "prefix" "evex")
16077 (set_attr "mode" "<sseinsnmode>")])
16079 (define_insn "avx2_vbroadcasti128_<mode>"
16080 [(set (match_operand:VI_256 0 "register_operand" "=x")
16082 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16085 "vbroadcasti128\t{%1, %0|%0, %1}"
16086 [(set_attr "type" "ssemov")
16087 (set_attr "prefix_extra" "1")
16088 (set_attr "prefix" "vex")
16089 (set_attr "mode" "OI")])
16092 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16093 (vec_duplicate:AVX_VEC_DUP_MODE
16094 (match_operand:<ssescalarmode> 1 "register_operand")))]
16095 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16096 [(set (match_dup 2)
16097 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16099 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16100 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16102 (define_insn "avx_vbroadcastf128_<mode>"
16103 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16105 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16109 vbroadcast<i128>\t{%1, %0|%0, %1}
16110 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16111 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16112 [(set_attr "type" "ssemov,sselog1,sselog1")
16113 (set_attr "prefix_extra" "1")
16114 (set_attr "length_immediate" "0,1,1")
16115 (set_attr "prefix" "vex")
16116 (set_attr "mode" "<sseinsnmode>")])
16118 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16119 (define_mode_iterator VI4F_BRCST32x2
16120 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16121 V16SF (V8SF "TARGET_AVX512VL")])
16123 (define_mode_attr 64x2mode
16124 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16126 (define_mode_attr 32x2mode
16127 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16128 (V8SF "V2SF") (V4SI "V2SI")])
16130 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16131 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16132 (vec_duplicate:VI4F_BRCST32x2
16133 (vec_select:<32x2mode>
16134 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16135 (parallel [(const_int 0) (const_int 1)]))))]
16137 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16138 [(set_attr "type" "ssemov")
16139 (set_attr "prefix_extra" "1")
16140 (set_attr "prefix" "evex")
16141 (set_attr "mode" "<sseinsnmode>")])
16143 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16144 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16145 (vec_duplicate:VI4F_256
16146 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16149 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16150 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16151 [(set_attr "type" "ssemov")
16152 (set_attr "prefix_extra" "1")
16153 (set_attr "prefix" "evex")
16154 (set_attr "mode" "<sseinsnmode>")])
16156 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16157 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16158 (vec_duplicate:V16FI
16159 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16162 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16163 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16164 [(set_attr "type" "ssemov")
16165 (set_attr "prefix_extra" "1")
16166 (set_attr "prefix" "evex")
16167 (set_attr "mode" "<sseinsnmode>")])
16169 ;; For broadcast[i|f]64x2
16170 (define_mode_iterator VI8F_BRCST64x2
16171 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16173 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16174 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16175 (vec_duplicate:VI8F_BRCST64x2
16176 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16179 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16180 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16181 [(set_attr "type" "ssemov")
16182 (set_attr "prefix_extra" "1")
16183 (set_attr "prefix" "evex")
16184 (set_attr "mode" "<sseinsnmode>")])
16186 (define_insn "avx512cd_maskb_vec_dup<mode>"
16187 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16188 (vec_duplicate:VI8_AVX512VL
16190 (match_operand:QI 1 "register_operand" "Yk"))))]
16192 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16193 [(set_attr "type" "mskmov")
16194 (set_attr "prefix" "evex")
16195 (set_attr "mode" "XI")])
16197 (define_insn "avx512cd_maskw_vec_dup<mode>"
16198 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16199 (vec_duplicate:VI4_AVX512VL
16201 (match_operand:HI 1 "register_operand" "Yk"))))]
16203 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16204 [(set_attr "type" "mskmov")
16205 (set_attr "prefix" "evex")
16206 (set_attr "mode" "XI")])
16208 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16209 ;; If it so happens that the input is in memory, use vbroadcast.
16210 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16211 (define_insn "*avx_vperm_broadcast_v4sf"
16212 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16214 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16215 (match_parallel 2 "avx_vbroadcast_operand"
16216 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16219 int elt = INTVAL (operands[3]);
16220 switch (which_alternative)
16224 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16225 return "vbroadcastss\t{%1, %0|%0, %k1}";
16227 operands[2] = GEN_INT (elt * 0x55);
16228 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16230 gcc_unreachable ();
16233 [(set_attr "type" "ssemov,ssemov,sselog1")
16234 (set_attr "prefix_extra" "1")
16235 (set_attr "length_immediate" "0,0,1")
16236 (set_attr "prefix" "vex")
16237 (set_attr "mode" "SF,SF,V4SF")])
16239 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
16240 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
16242 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
16243 (match_parallel 2 "avx_vbroadcast_operand"
16244 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16247 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
16248 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
16250 rtx op0 = operands[0], op1 = operands[1];
16251 int elt = INTVAL (operands[3]);
16257 if (TARGET_AVX2 && elt == 0)
16259 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
16264 /* Shuffle element we care about into all elements of the 128-bit lane.
16265 The other lane gets shuffled too, but we don't care. */
16266 if (<MODE>mode == V4DFmode)
16267 mask = (elt & 1 ? 15 : 0);
16269 mask = (elt & 3) * 0x55;
16270 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
16272 /* Shuffle the lane we care about into both lanes of the dest. */
16273 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
16274 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
16278 operands[1] = adjust_address (op1, <ssescalarmode>mode,
16279 elt * GET_MODE_SIZE (<ssescalarmode>mode));
16282 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
16283 [(set (match_operand:VF2 0 "register_operand")
16285 (match_operand:VF2 1 "nonimmediate_operand")
16286 (match_operand:SI 2 "const_0_to_255_operand")))]
16287 "TARGET_AVX && <mask_mode512bit_condition>"
16289 int mask = INTVAL (operands[2]);
16290 rtx perm[<ssescalarnum>];
16293 for (i = 0; i < <ssescalarnum>; i = i + 2)
16295 perm[i] = GEN_INT (((mask >> i) & 1) + i);
16296 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
16300 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
16303 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
16304 [(set (match_operand:VF1 0 "register_operand")
16306 (match_operand:VF1 1 "nonimmediate_operand")
16307 (match_operand:SI 2 "const_0_to_255_operand")))]
16308 "TARGET_AVX && <mask_mode512bit_condition>"
16310 int mask = INTVAL (operands[2]);
16311 rtx perm[<ssescalarnum>];
16314 for (i = 0; i < <ssescalarnum>; i = i + 4)
16316 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
16317 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
16318 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
16319 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
16323 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
16326 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
16327 [(set (match_operand:VF 0 "register_operand" "=v")
16329 (match_operand:VF 1 "nonimmediate_operand" "vm")
16330 (match_parallel 2 ""
16331 [(match_operand 3 "const_int_operand")])))]
16332 "TARGET_AVX && <mask_mode512bit_condition>
16333 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
16335 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
16336 operands[2] = GEN_INT (mask);
16337 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
16339 [(set_attr "type" "sselog")
16340 (set_attr "prefix_extra" "1")
16341 (set_attr "length_immediate" "1")
16342 (set_attr "prefix" "<mask_prefix>")
16343 (set_attr "mode" "<sseinsnmode>")])
16345 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
16346 [(set (match_operand:VF 0 "register_operand" "=v")
16348 [(match_operand:VF 1 "register_operand" "v")
16349 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
16351 "TARGET_AVX && <mask_mode512bit_condition>"
16352 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16353 [(set_attr "type" "sselog")
16354 (set_attr "prefix_extra" "1")
16355 (set_attr "btver2_decode" "vector")
16356 (set_attr "prefix" "<mask_prefix>")
16357 (set_attr "mode" "<sseinsnmode>")])
16359 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
16360 [(match_operand:VI48F 0 "register_operand" "=v")
16361 (match_operand:VI48F 1 "register_operand" "v")
16362 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16363 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
16364 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16367 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
16368 operands[0], operands[1], operands[2], operands[3],
16369 CONST0_RTX (<MODE>mode), operands[4]));
16373 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
16374 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16375 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
16376 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16377 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
16378 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16381 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
16382 operands[0], operands[1], operands[2], operands[3],
16383 CONST0_RTX (<MODE>mode), operands[4]));
16387 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
16388 [(set (match_operand:VI48F 0 "register_operand" "=v")
16390 [(match_operand:VI48F 1 "register_operand" "v")
16391 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16392 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
16395 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16396 [(set_attr "type" "sselog")
16397 (set_attr "prefix" "evex")
16398 (set_attr "mode" "<sseinsnmode>")])
16400 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
16401 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16402 (unspec:VI2_AVX512VL
16403 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
16404 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16405 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
16408 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16409 [(set_attr "type" "sselog")
16410 (set_attr "prefix" "evex")
16411 (set_attr "mode" "<sseinsnmode>")])
16413 (define_insn "<avx512>_vpermi2var<mode>3_mask"
16414 [(set (match_operand:VI48F 0 "register_operand" "=v")
16417 [(match_operand:VI48F 1 "register_operand" "v")
16418 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16419 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
16420 UNSPEC_VPERMI2_MASK)
16422 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
16424 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
16425 [(set_attr "type" "sselog")
16426 (set_attr "prefix" "evex")
16427 (set_attr "mode" "<sseinsnmode>")])
16429 (define_insn "<avx512>_vpermi2var<mode>3_mask"
16430 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16431 (vec_merge:VI2_AVX512VL
16432 (unspec:VI2_AVX512VL
16433 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
16434 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16435 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
16436 UNSPEC_VPERMI2_MASK)
16438 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
16440 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
16441 [(set_attr "type" "sselog")
16442 (set_attr "prefix" "evex")
16443 (set_attr "mode" "<sseinsnmode>")])
16445 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
16446 [(match_operand:VI48F 0 "register_operand" "=v")
16447 (match_operand:<sseintvecmode> 1 "register_operand" "v")
16448 (match_operand:VI48F 2 "register_operand" "0")
16449 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
16450 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16453 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
16454 operands[0], operands[1], operands[2], operands[3],
16455 CONST0_RTX (<MODE>mode), operands[4]));
16459 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
16460 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16461 (match_operand:<sseintvecmode> 1 "register_operand" "v")
16462 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
16463 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
16464 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16467 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
16468 operands[0], operands[1], operands[2], operands[3],
16469 CONST0_RTX (<MODE>mode), operands[4]));
16473 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
16474 [(set (match_operand:VI48F 0 "register_operand" "=v")
16476 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
16477 (match_operand:VI48F 2 "register_operand" "0")
16478 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
16481 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16482 [(set_attr "type" "sselog")
16483 (set_attr "prefix" "evex")
16484 (set_attr "mode" "<sseinsnmode>")])
16486 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
16487 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16488 (unspec:VI2_AVX512VL
16489 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
16490 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
16491 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
16494 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16495 [(set_attr "type" "sselog")
16496 (set_attr "prefix" "evex")
16497 (set_attr "mode" "<sseinsnmode>")])
16499 (define_insn "<avx512>_vpermt2var<mode>3_mask"
16500 [(set (match_operand:VI48F 0 "register_operand" "=v")
16503 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
16504 (match_operand:VI48F 2 "register_operand" "0")
16505 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
16508 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
16510 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
16511 [(set_attr "type" "sselog")
16512 (set_attr "prefix" "evex")
16513 (set_attr "mode" "<sseinsnmode>")])
16515 (define_insn "<avx512>_vpermt2var<mode>3_mask"
16516 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16517 (vec_merge:VI2_AVX512VL
16518 (unspec:VI2_AVX512VL
16519 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
16520 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
16521 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
16524 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
16526 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
16527 [(set_attr "type" "sselog")
16528 (set_attr "prefix" "evex")
16529 (set_attr "mode" "<sseinsnmode>")])
16531 (define_expand "avx_vperm2f128<mode>3"
16532 [(set (match_operand:AVX256MODE2P 0 "register_operand")
16533 (unspec:AVX256MODE2P
16534 [(match_operand:AVX256MODE2P 1 "register_operand")
16535 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
16536 (match_operand:SI 3 "const_0_to_255_operand")]
16537 UNSPEC_VPERMIL2F128))]
16540 int mask = INTVAL (operands[3]);
16541 if ((mask & 0x88) == 0)
16543 rtx perm[<ssescalarnum>], t1, t2;
16544 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
16546 base = (mask & 3) * nelt2;
16547 for (i = 0; i < nelt2; ++i)
16548 perm[i] = GEN_INT (base + i);
16550 base = ((mask >> 4) & 3) * nelt2;
16551 for (i = 0; i < nelt2; ++i)
16552 perm[i + nelt2] = GEN_INT (base + i);
16554 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
16555 operands[1], operands[2]);
16556 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
16557 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
16558 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
16564 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
16565 ;; means that in order to represent this properly in rtl we'd have to
16566 ;; nest *another* vec_concat with a zero operand and do the select from
16567 ;; a 4x wide vector. That doesn't seem very nice.
16568 (define_insn "*avx_vperm2f128<mode>_full"
16569 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
16570 (unspec:AVX256MODE2P
16571 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
16572 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
16573 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16574 UNSPEC_VPERMIL2F128))]
16576 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16577 [(set_attr "type" "sselog")
16578 (set_attr "prefix_extra" "1")
16579 (set_attr "length_immediate" "1")
16580 (set_attr "prefix" "vex")
16581 (set_attr "mode" "<sseinsnmode>")])
16583 (define_insn "*avx_vperm2f128<mode>_nozero"
16584 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
16585 (vec_select:AVX256MODE2P
16586 (vec_concat:<ssedoublevecmode>
16587 (match_operand:AVX256MODE2P 1 "register_operand" "x")
16588 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
16589 (match_parallel 3 ""
16590 [(match_operand 4 "const_int_operand")])))]
16592 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
16594 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
16596 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
16598 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
16599 operands[3] = GEN_INT (mask);
16600 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16602 [(set_attr "type" "sselog")
16603 (set_attr "prefix_extra" "1")
16604 (set_attr "length_immediate" "1")
16605 (set_attr "prefix" "vex")
16606 (set_attr "mode" "<sseinsnmode>")])
16608 (define_insn "*ssse3_palignr<mode>_perm"
16609 [(set (match_operand:V_128 0 "register_operand" "=x,x")
16611 (match_operand:V_128 1 "register_operand" "0,x")
16612 (match_parallel 2 "palignr_operand"
16613 [(match_operand 3 "const_int_operand" "n, n")])))]
16616 enum machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
16617 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
16619 switch (which_alternative)
16622 return "palignr\t{%2, %1, %0|%0, %1, %2}";
16624 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
16626 gcc_unreachable ();
16629 [(set_attr "isa" "noavx,avx")
16630 (set_attr "type" "sseishft")
16631 (set_attr "atom_unit" "sishuf")
16632 (set_attr "prefix_data16" "1,*")
16633 (set_attr "prefix_extra" "1")
16634 (set_attr "length_immediate" "1")
16635 (set_attr "prefix" "orig,vex")])
16637 (define_expand "avx512vl_vinsert<mode>"
16638 [(match_operand:VI48F_256 0 "register_operand")
16639 (match_operand:VI48F_256 1 "register_operand")
16640 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
16641 (match_operand:SI 3 "const_0_to_1_operand")
16642 (match_operand:VI48F_256 4 "register_operand")
16643 (match_operand:<avx512fmaskmode> 5 "register_operand")]
16646 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
16648 switch (INTVAL (operands[3]))
16651 insn = gen_vec_set_lo_<mode>_mask;
16654 insn = gen_vec_set_hi_<mode>_mask;
16657 gcc_unreachable ();
16660 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
16665 (define_expand "avx_vinsertf128<mode>"
16666 [(match_operand:V_256 0 "register_operand")
16667 (match_operand:V_256 1 "register_operand")
16668 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
16669 (match_operand:SI 3 "const_0_to_1_operand")]
16672 rtx (*insn)(rtx, rtx, rtx);
16674 switch (INTVAL (operands[3]))
16677 insn = gen_vec_set_lo_<mode>;
16680 insn = gen_vec_set_hi_<mode>;
16683 gcc_unreachable ();
16686 emit_insn (insn (operands[0], operands[1], operands[2]));
16690 (define_insn "vec_set_lo_<mode><mask_name>"
16691 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
16692 (vec_concat:VI8F_256
16693 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
16694 (vec_select:<ssehalfvecmode>
16695 (match_operand:VI8F_256 1 "register_operand" "v")
16696 (parallel [(const_int 2) (const_int 3)]))))]
16699 if (TARGET_AVX512VL)
16700 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
16702 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
16704 [(set_attr "type" "sselog")
16705 (set_attr "prefix_extra" "1")
16706 (set_attr "length_immediate" "1")
16707 (set_attr "prefix" "vex")
16708 (set_attr "mode" "<sseinsnmode>")])
16710 (define_insn "vec_set_hi_<mode><mask_name>"
16711 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
16712 (vec_concat:VI8F_256
16713 (vec_select:<ssehalfvecmode>
16714 (match_operand:VI8F_256 1 "register_operand" "v")
16715 (parallel [(const_int 0) (const_int 1)]))
16716 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
16719 if (TARGET_AVX512VL)
16720 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
16722 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
16724 [(set_attr "type" "sselog")
16725 (set_attr "prefix_extra" "1")
16726 (set_attr "length_immediate" "1")
16727 (set_attr "prefix" "vex")
16728 (set_attr "mode" "<sseinsnmode>")])
16730 (define_insn "vec_set_lo_<mode><mask_name>"
16731 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
16732 (vec_concat:VI4F_256
16733 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
16734 (vec_select:<ssehalfvecmode>
16735 (match_operand:VI4F_256 1 "register_operand" "v")
16736 (parallel [(const_int 4) (const_int 5)
16737 (const_int 6) (const_int 7)]))))]
16740 if (TARGET_AVX512VL)
16741 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
16743 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
16745 [(set_attr "type" "sselog")
16746 (set_attr "prefix_extra" "1")
16747 (set_attr "length_immediate" "1")
16748 (set_attr "prefix" "vex")
16749 (set_attr "mode" "<sseinsnmode>")])
16751 (define_insn "vec_set_hi_<mode><mask_name>"
16752 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
16753 (vec_concat:VI4F_256
16754 (vec_select:<ssehalfvecmode>
16755 (match_operand:VI4F_256 1 "register_operand" "v")
16756 (parallel [(const_int 0) (const_int 1)
16757 (const_int 2) (const_int 3)]))
16758 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
16761 if (TARGET_AVX512VL)
16762 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
16764 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
16766 [(set_attr "type" "sselog")
16767 (set_attr "prefix_extra" "1")
16768 (set_attr "length_immediate" "1")
16769 (set_attr "prefix" "vex")
16770 (set_attr "mode" "<sseinsnmode>")])
16772 (define_insn "vec_set_lo_v16hi"
16773 [(set (match_operand:V16HI 0 "register_operand" "=x")
16775 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16777 (match_operand:V16HI 1 "register_operand" "x")
16778 (parallel [(const_int 8) (const_int 9)
16779 (const_int 10) (const_int 11)
16780 (const_int 12) (const_int 13)
16781 (const_int 14) (const_int 15)]))))]
16783 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
16784 [(set_attr "type" "sselog")
16785 (set_attr "prefix_extra" "1")
16786 (set_attr "length_immediate" "1")
16787 (set_attr "prefix" "vex")
16788 (set_attr "mode" "OI")])
16790 (define_insn "vec_set_hi_v16hi"
16791 [(set (match_operand:V16HI 0 "register_operand" "=x")
16794 (match_operand:V16HI 1 "register_operand" "x")
16795 (parallel [(const_int 0) (const_int 1)
16796 (const_int 2) (const_int 3)
16797 (const_int 4) (const_int 5)
16798 (const_int 6) (const_int 7)]))
16799 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
16801 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
16802 [(set_attr "type" "sselog")
16803 (set_attr "prefix_extra" "1")
16804 (set_attr "length_immediate" "1")
16805 (set_attr "prefix" "vex")
16806 (set_attr "mode" "OI")])
16808 (define_insn "vec_set_lo_v32qi"
16809 [(set (match_operand:V32QI 0 "register_operand" "=x")
16811 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
16813 (match_operand:V32QI 1 "register_operand" "x")
16814 (parallel [(const_int 16) (const_int 17)
16815 (const_int 18) (const_int 19)
16816 (const_int 20) (const_int 21)
16817 (const_int 22) (const_int 23)
16818 (const_int 24) (const_int 25)
16819 (const_int 26) (const_int 27)
16820 (const_int 28) (const_int 29)
16821 (const_int 30) (const_int 31)]))))]
16823 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
16824 [(set_attr "type" "sselog")
16825 (set_attr "prefix_extra" "1")
16826 (set_attr "length_immediate" "1")
16827 (set_attr "prefix" "vex")
16828 (set_attr "mode" "OI")])
16830 (define_insn "vec_set_hi_v32qi"
16831 [(set (match_operand:V32QI 0 "register_operand" "=x")
16834 (match_operand:V32QI 1 "register_operand" "x")
16835 (parallel [(const_int 0) (const_int 1)
16836 (const_int 2) (const_int 3)
16837 (const_int 4) (const_int 5)
16838 (const_int 6) (const_int 7)
16839 (const_int 8) (const_int 9)
16840 (const_int 10) (const_int 11)
16841 (const_int 12) (const_int 13)
16842 (const_int 14) (const_int 15)]))
16843 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
16845 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
16846 [(set_attr "type" "sselog")
16847 (set_attr "prefix_extra" "1")
16848 (set_attr "length_immediate" "1")
16849 (set_attr "prefix" "vex")
16850 (set_attr "mode" "OI")])
16852 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
16853 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
16855 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
16856 (match_operand:V48_AVX2 1 "memory_operand" "m")]
16859 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
16860 [(set_attr "type" "sselog1")
16861 (set_attr "prefix_extra" "1")
16862 (set_attr "prefix" "vex")
16863 (set_attr "btver2_decode" "vector")
16864 (set_attr "mode" "<sseinsnmode>")])
16866 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
16867 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
16869 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
16870 (match_operand:V48_AVX2 2 "register_operand" "x")
16874 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16875 [(set_attr "type" "sselog1")
16876 (set_attr "prefix_extra" "1")
16877 (set_attr "prefix" "vex")
16878 (set_attr "btver2_decode" "vector")
16879 (set_attr "mode" "<sseinsnmode>")])
16881 (define_expand "maskload<mode>"
16882 [(set (match_operand:V48_AVX2 0 "register_operand")
16884 [(match_operand:<sseintvecmode> 2 "register_operand")
16885 (match_operand:V48_AVX2 1 "memory_operand")]
16889 (define_expand "maskstore<mode>"
16890 [(set (match_operand:V48_AVX2 0 "memory_operand")
16892 [(match_operand:<sseintvecmode> 2 "register_operand")
16893 (match_operand:V48_AVX2 1 "register_operand")
16898 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
16899 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
16900 (unspec:AVX256MODE2P
16901 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
16905 "&& reload_completed"
16908 rtx op0 = operands[0];
16909 rtx op1 = operands[1];
16911 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
16913 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
16914 emit_move_insn (op0, op1);
16918 (define_expand "vec_init<mode>"
16919 [(match_operand:V_256 0 "register_operand")
16923 ix86_expand_vector_init (false, operands[0], operands[1]);
16927 (define_expand "vec_init<mode>"
16928 [(match_operand:VI48F_512 0 "register_operand")
16932 ix86_expand_vector_init (false, operands[0], operands[1]);
16936 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
16937 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
16938 (ashiftrt:VI48_AVX512F_AVX512VL
16939 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
16940 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
16941 "TARGET_AVX2 && <mask_mode512bit_condition>"
16942 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16943 [(set_attr "type" "sseishft")
16944 (set_attr "prefix" "maybe_evex")
16945 (set_attr "mode" "<sseinsnmode>")])
16947 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
16948 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16949 (ashiftrt:VI2_AVX512VL
16950 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
16951 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
16953 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16954 [(set_attr "type" "sseishft")
16955 (set_attr "prefix" "maybe_evex")
16956 (set_attr "mode" "<sseinsnmode>")])
16958 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
16959 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
16960 (any_lshift:VI48_AVX512F
16961 (match_operand:VI48_AVX512F 1 "register_operand" "v")
16962 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
16963 "TARGET_AVX2 && <mask_mode512bit_condition>"
16964 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16965 [(set_attr "type" "sseishft")
16966 (set_attr "prefix" "maybe_evex")
16967 (set_attr "mode" "<sseinsnmode>")])
16969 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
16970 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16971 (any_lshift:VI2_AVX512VL
16972 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
16973 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
16975 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16976 [(set_attr "type" "sseishft")
16977 (set_attr "prefix" "maybe_evex")
16978 (set_attr "mode" "<sseinsnmode>")])
16980 (define_insn "avx_vec_concat<mode>"
16981 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
16982 (vec_concat:V_256_512
16983 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
16984 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
16987 switch (which_alternative)
16990 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
16992 switch (get_attr_mode (insn))
16995 return "vmovaps\t{%1, %t0|%t0, %1}";
16997 return "vmovapd\t{%1, %t0|%t0, %1}";
16999 return "vmovaps\t{%1, %x0|%x0, %1}";
17001 return "vmovapd\t{%1, %x0|%x0, %1}";
17003 return "vmovdqa\t{%1, %t0|%t0, %1}";
17005 return "vmovdqa\t{%1, %x0|%x0, %1}";
17007 gcc_unreachable ();
17010 gcc_unreachable ();
17013 [(set_attr "type" "sselog,ssemov")
17014 (set_attr "prefix_extra" "1,*")
17015 (set_attr "length_immediate" "1,*")
17016 (set_attr "prefix" "maybe_evex")
17017 (set_attr "mode" "<sseinsnmode>")])
17019 (define_insn "vcvtph2ps<mask_name>"
17020 [(set (match_operand:V4SF 0 "register_operand" "=v")
17022 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17024 (parallel [(const_int 0) (const_int 1)
17025 (const_int 2) (const_int 3)])))]
17026 "TARGET_F16C || TARGET_AVX512VL"
17027 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17028 [(set_attr "type" "ssecvt")
17029 (set_attr "prefix" "maybe_evex")
17030 (set_attr "mode" "V4SF")])
17032 (define_insn "*vcvtph2ps_load<mask_name>"
17033 [(set (match_operand:V4SF 0 "register_operand" "=v")
17034 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17035 UNSPEC_VCVTPH2PS))]
17036 "TARGET_F16C || TARGET_AVX512VL"
17037 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17038 [(set_attr "type" "ssecvt")
17039 (set_attr "prefix" "vex")
17040 (set_attr "mode" "V8SF")])
17042 (define_insn "vcvtph2ps256<mask_name>"
17043 [(set (match_operand:V8SF 0 "register_operand" "=v")
17044 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17045 UNSPEC_VCVTPH2PS))]
17046 "TARGET_F16C || TARGET_AVX512VL"
17047 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17048 [(set_attr "type" "ssecvt")
17049 (set_attr "prefix" "vex")
17050 (set_attr "btver2_decode" "double")
17051 (set_attr "mode" "V8SF")])
17053 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17054 [(set (match_operand:V16SF 0 "register_operand" "=v")
17056 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17057 UNSPEC_VCVTPH2PS))]
17059 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17060 [(set_attr "type" "ssecvt")
17061 (set_attr "prefix" "evex")
17062 (set_attr "mode" "V16SF")])
17064 (define_expand "vcvtps2ph_mask"
17065 [(set (match_operand:V8HI 0 "register_operand")
17068 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17069 (match_operand:SI 2 "const_0_to_255_operand")]
17072 (match_operand:V8HI 3 "vector_move_operand")
17073 (match_operand:QI 4 "register_operand")))]
17075 "operands[5] = CONST0_RTX (V4HImode);")
17077 (define_expand "vcvtps2ph"
17078 [(set (match_operand:V8HI 0 "register_operand")
17080 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17081 (match_operand:SI 2 "const_0_to_255_operand")]
17085 "operands[3] = CONST0_RTX (V4HImode);")
17087 (define_insn "*vcvtps2ph<mask_name>"
17088 [(set (match_operand:V8HI 0 "register_operand" "=v")
17090 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17091 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17093 (match_operand:V4HI 3 "const0_operand")))]
17094 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17095 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17096 [(set_attr "type" "ssecvt")
17097 (set_attr "prefix" "maybe_evex")
17098 (set_attr "mode" "V4SF")])
17100 (define_insn "*vcvtps2ph_store<mask_name>"
17101 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17102 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17103 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17104 UNSPEC_VCVTPS2PH))]
17105 "TARGET_F16C || TARGET_AVX512VL"
17106 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17107 [(set_attr "type" "ssecvt")
17108 (set_attr "prefix" "maybe_evex")
17109 (set_attr "mode" "V4SF")])
17111 (define_insn "vcvtps2ph256<mask_name>"
17112 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17113 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17114 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17115 UNSPEC_VCVTPS2PH))]
17116 "TARGET_F16C || TARGET_AVX512VL"
17117 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17118 [(set_attr "type" "ssecvt")
17119 (set_attr "prefix" "maybe_evex")
17120 (set_attr "btver2_decode" "vector")
17121 (set_attr "mode" "V8SF")])
17123 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17124 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17126 [(match_operand:V16SF 1 "register_operand" "v")
17127 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17128 UNSPEC_VCVTPS2PH))]
17130 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17131 [(set_attr "type" "ssecvt")
17132 (set_attr "prefix" "evex")
17133 (set_attr "mode" "V16SF")])
17135 ;; For gather* insn patterns
17136 (define_mode_iterator VEC_GATHER_MODE
17137 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17138 (define_mode_attr VEC_GATHER_IDXSI
17139 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17140 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17141 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17142 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17144 (define_mode_attr VEC_GATHER_IDXDI
17145 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17146 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
17147 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
17148 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
17150 (define_mode_attr VEC_GATHER_SRCDI
17151 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17152 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
17153 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
17154 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
17156 (define_expand "avx2_gathersi<mode>"
17157 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17158 (unspec:VEC_GATHER_MODE
17159 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
17160 (mem:<ssescalarmode>
17162 [(match_operand 2 "vsib_address_operand")
17163 (match_operand:<VEC_GATHER_IDXSI>
17164 3 "register_operand")
17165 (match_operand:SI 5 "const1248_operand ")]))
17166 (mem:BLK (scratch))
17167 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
17169 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17173 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17174 operands[5]), UNSPEC_VSIBADDR);
17177 (define_insn "*avx2_gathersi<mode>"
17178 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17179 (unspec:VEC_GATHER_MODE
17180 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
17181 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17183 [(match_operand:P 3 "vsib_address_operand" "Tv")
17184 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
17185 (match_operand:SI 6 "const1248_operand" "n")]
17187 (mem:BLK (scratch))
17188 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
17190 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17192 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
17193 [(set_attr "type" "ssemov")
17194 (set_attr "prefix" "vex")
17195 (set_attr "mode" "<sseinsnmode>")])
17197 (define_insn "*avx2_gathersi<mode>_2"
17198 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17199 (unspec:VEC_GATHER_MODE
17201 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17203 [(match_operand:P 2 "vsib_address_operand" "Tv")
17204 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
17205 (match_operand:SI 5 "const1248_operand" "n")]
17207 (mem:BLK (scratch))
17208 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
17210 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17212 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
17213 [(set_attr "type" "ssemov")
17214 (set_attr "prefix" "vex")
17215 (set_attr "mode" "<sseinsnmode>")])
17217 (define_expand "avx2_gatherdi<mode>"
17218 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17219 (unspec:VEC_GATHER_MODE
17220 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
17221 (mem:<ssescalarmode>
17223 [(match_operand 2 "vsib_address_operand")
17224 (match_operand:<VEC_GATHER_IDXDI>
17225 3 "register_operand")
17226 (match_operand:SI 5 "const1248_operand ")]))
17227 (mem:BLK (scratch))
17228 (match_operand:<VEC_GATHER_SRCDI>
17229 4 "register_operand")]
17231 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17235 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17236 operands[5]), UNSPEC_VSIBADDR);
17239 (define_insn "*avx2_gatherdi<mode>"
17240 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17241 (unspec:VEC_GATHER_MODE
17242 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
17243 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17245 [(match_operand:P 3 "vsib_address_operand" "Tv")
17246 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
17247 (match_operand:SI 6 "const1248_operand" "n")]
17249 (mem:BLK (scratch))
17250 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
17252 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17254 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
17255 [(set_attr "type" "ssemov")
17256 (set_attr "prefix" "vex")
17257 (set_attr "mode" "<sseinsnmode>")])
17259 (define_insn "*avx2_gatherdi<mode>_2"
17260 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17261 (unspec:VEC_GATHER_MODE
17263 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17265 [(match_operand:P 2 "vsib_address_operand" "Tv")
17266 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
17267 (match_operand:SI 5 "const1248_operand" "n")]
17269 (mem:BLK (scratch))
17270 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
17272 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17275 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
17276 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
17277 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
17279 [(set_attr "type" "ssemov")
17280 (set_attr "prefix" "vex")
17281 (set_attr "mode" "<sseinsnmode>")])
17283 (define_insn "*avx2_gatherdi<mode>_3"
17284 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
17285 (vec_select:<VEC_GATHER_SRCDI>
17287 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
17288 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17290 [(match_operand:P 3 "vsib_address_operand" "Tv")
17291 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
17292 (match_operand:SI 6 "const1248_operand" "n")]
17294 (mem:BLK (scratch))
17295 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
17297 (parallel [(const_int 0) (const_int 1)
17298 (const_int 2) (const_int 3)])))
17299 (clobber (match_scratch:VI4F_256 1 "=&x"))]
17301 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
17302 [(set_attr "type" "ssemov")
17303 (set_attr "prefix" "vex")
17304 (set_attr "mode" "<sseinsnmode>")])
17306 (define_insn "*avx2_gatherdi<mode>_4"
17307 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
17308 (vec_select:<VEC_GATHER_SRCDI>
17311 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17313 [(match_operand:P 2 "vsib_address_operand" "Tv")
17314 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
17315 (match_operand:SI 5 "const1248_operand" "n")]
17317 (mem:BLK (scratch))
17318 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
17320 (parallel [(const_int 0) (const_int 1)
17321 (const_int 2) (const_int 3)])))
17322 (clobber (match_scratch:VI4F_256 1 "=&x"))]
17324 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
17325 [(set_attr "type" "ssemov")
17326 (set_attr "prefix" "vex")
17327 (set_attr "mode" "<sseinsnmode>")])
17329 (define_expand "<avx512>_gathersi<mode>"
17330 [(parallel [(set (match_operand:VI48F 0 "register_operand")
17332 [(match_operand:VI48F 1 "register_operand")
17333 (match_operand:<avx512fmaskmode> 4 "register_operand")
17334 (mem:<ssescalarmode>
17336 [(match_operand 2 "vsib_address_operand")
17337 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
17338 (match_operand:SI 5 "const1248_operand")]))]
17340 (clobber (match_scratch:<avx512fmaskmode> 7))])]
17344 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17345 operands[5]), UNSPEC_VSIBADDR);
17348 (define_insn "*avx512f_gathersi<mode>"
17349 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17351 [(match_operand:VI48F 1 "register_operand" "0")
17352 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
17353 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17355 [(match_operand:P 4 "vsib_address_operand" "Tv")
17356 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
17357 (match_operand:SI 5 "const1248_operand" "n")]
17358 UNSPEC_VSIBADDR)])]
17360 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
17362 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
17363 [(set_attr "type" "ssemov")
17364 (set_attr "prefix" "evex")
17365 (set_attr "mode" "<sseinsnmode>")])
17367 (define_insn "*avx512f_gathersi<mode>_2"
17368 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17371 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
17372 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
17374 [(match_operand:P 3 "vsib_address_operand" "Tv")
17375 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
17376 (match_operand:SI 4 "const1248_operand" "n")]
17377 UNSPEC_VSIBADDR)])]
17379 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
17381 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
17382 [(set_attr "type" "ssemov")
17383 (set_attr "prefix" "evex")
17384 (set_attr "mode" "<sseinsnmode>")])
17387 (define_expand "<avx512>_gatherdi<mode>"
17388 [(parallel [(set (match_operand:VI48F 0 "register_operand")
17390 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
17391 (match_operand:QI 4 "register_operand")
17392 (mem:<ssescalarmode>
17394 [(match_operand 2 "vsib_address_operand")
17395 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
17396 (match_operand:SI 5 "const1248_operand")]))]
17398 (clobber (match_scratch:QI 7))])]
17402 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17403 operands[5]), UNSPEC_VSIBADDR);
17406 (define_insn "*avx512f_gatherdi<mode>"
17407 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17409 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
17410 (match_operand:QI 7 "register_operand" "2")
17411 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17413 [(match_operand:P 4 "vsib_address_operand" "Tv")
17414 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
17415 (match_operand:SI 5 "const1248_operand" "n")]
17416 UNSPEC_VSIBADDR)])]
17418 (clobber (match_scratch:QI 2 "=&Yk"))]
17420 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
17421 [(set_attr "type" "ssemov")
17422 (set_attr "prefix" "evex")
17423 (set_attr "mode" "<sseinsnmode>")])
17425 (define_insn "*avx512f_gatherdi<mode>_2"
17426 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17429 (match_operand:QI 6 "register_operand" "1")
17430 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
17432 [(match_operand:P 3 "vsib_address_operand" "Tv")
17433 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
17434 (match_operand:SI 4 "const1248_operand" "n")]
17435 UNSPEC_VSIBADDR)])]
17437 (clobber (match_scratch:QI 1 "=&Yk"))]
17440 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
17442 if (GET_MODE_SIZE (<MODE>mode) != 64)
17443 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
17445 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
17447 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
17449 [(set_attr "type" "ssemov")
17450 (set_attr "prefix" "evex")
17451 (set_attr "mode" "<sseinsnmode>")])
17453 (define_expand "<avx512>_scattersi<mode>"
17454 [(parallel [(set (mem:VI48F
17456 [(match_operand 0 "vsib_address_operand")
17457 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
17458 (match_operand:SI 4 "const1248_operand")]))
17460 [(match_operand:<avx512fmaskmode> 1 "register_operand")
17461 (match_operand:VI48F 3 "register_operand")]
17463 (clobber (match_scratch:<avx512fmaskmode> 6))])]
17467 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
17468 operands[4]), UNSPEC_VSIBADDR);
17471 (define_insn "*avx512f_scattersi<mode>"
17472 [(set (match_operator:VI48F 5 "vsib_mem_operator"
17474 [(match_operand:P 0 "vsib_address_operand" "Tv")
17475 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
17476 (match_operand:SI 4 "const1248_operand" "n")]
17479 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
17480 (match_operand:VI48F 3 "register_operand" "v")]
17482 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
17484 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
17485 [(set_attr "type" "ssemov")
17486 (set_attr "prefix" "evex")
17487 (set_attr "mode" "<sseinsnmode>")])
17489 (define_expand "<avx512>_scatterdi<mode>"
17490 [(parallel [(set (mem:VI48F
17492 [(match_operand 0 "vsib_address_operand")
17493 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
17494 (match_operand:SI 4 "const1248_operand")]))
17496 [(match_operand:QI 1 "register_operand")
17497 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
17499 (clobber (match_scratch:QI 6))])]
17503 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
17504 operands[4]), UNSPEC_VSIBADDR);
17507 (define_insn "*avx512f_scatterdi<mode>"
17508 [(set (match_operator:VI48F 5 "vsib_mem_operator"
17510 [(match_operand:P 0 "vsib_address_operand" "Tv")
17511 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
17512 (match_operand:SI 4 "const1248_operand" "n")]
17515 [(match_operand:QI 6 "register_operand" "1")
17516 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
17518 (clobber (match_scratch:QI 1 "=&Yk"))]
17520 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
17521 [(set_attr "type" "ssemov")
17522 (set_attr "prefix" "evex")
17523 (set_attr "mode" "<sseinsnmode>")])
17525 (define_insn "<avx512>_compress<mode>_mask"
17526 [(set (match_operand:VI48F 0 "register_operand" "=v")
17528 [(match_operand:VI48F 1 "register_operand" "v")
17529 (match_operand:VI48F 2 "vector_move_operand" "0C")
17530 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
17533 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17534 [(set_attr "type" "ssemov")
17535 (set_attr "prefix" "evex")
17536 (set_attr "mode" "<sseinsnmode>")])
17538 (define_insn "<avx512>_compressstore<mode>_mask"
17539 [(set (match_operand:VI48F 0 "memory_operand" "=m")
17541 [(match_operand:VI48F 1 "register_operand" "x")
17543 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
17544 UNSPEC_COMPRESS_STORE))]
17546 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
17547 [(set_attr "type" "ssemov")
17548 (set_attr "prefix" "evex")
17549 (set_attr "memory" "store")
17550 (set_attr "mode" "<sseinsnmode>")])
17552 (define_expand "<avx512>_expand<mode>_maskz"
17553 [(set (match_operand:VI48F 0 "register_operand")
17555 [(match_operand:VI48F 1 "nonimmediate_operand")
17556 (match_operand:VI48F 2 "vector_move_operand")
17557 (match_operand:<avx512fmaskmode> 3 "register_operand")]
17560 "operands[2] = CONST0_RTX (<MODE>mode);")
17562 (define_insn "<avx512>_expand<mode>_mask"
17563 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
17565 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
17566 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
17567 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
17570 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17571 [(set_attr "type" "ssemov")
17572 (set_attr "prefix" "evex")
17573 (set_attr "memory" "none,load")
17574 (set_attr "mode" "<sseinsnmode>")])
17576 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
17577 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
17578 (unspec:VF_AVX512VL
17579 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
17580 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
17581 (match_operand:SI 3 "const_0_to_15_operand")]
17583 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
17584 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
17585 [(set_attr "type" "sse")
17586 (set_attr "prefix" "evex")
17587 (set_attr "mode" "<MODE>")])
17589 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
17590 [(set (match_operand:VF_128 0 "register_operand" "=v")
17593 [(match_operand:VF_128 1 "register_operand" "v")
17594 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
17595 (match_operand:SI 3 "const_0_to_15_operand")]
17600 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
17601 [(set_attr "type" "sse")
17602 (set_attr "prefix" "evex")
17603 (set_attr "mode" "<MODE>")])
17605 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
17606 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
17607 (unspec:<avx512fmaskmode>
17608 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
17609 (match_operand:QI 2 "const_0_to_255_operand" "n")]
17612 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
17613 [(set_attr "type" "sse")
17614 (set_attr "length_immediate" "1")
17615 (set_attr "prefix" "evex")
17616 (set_attr "mode" "<MODE>")])
17618 (define_insn "avx512dq_vmfpclass<mode>"
17619 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
17620 (and:<avx512fmaskmode>
17621 (unspec:<avx512fmaskmode>
17622 [(match_operand:VF_128 1 "register_operand" "v")
17623 (match_operand:QI 2 "const_0_to_255_operand" "n")]
17627 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
17628 [(set_attr "type" "sse")
17629 (set_attr "length_immediate" "1")
17630 (set_attr "prefix" "evex")
17631 (set_attr "mode" "<MODE>")])
17633 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
17634 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
17635 (unspec:VF_AVX512VL
17636 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
17637 (match_operand:SI 2 "const_0_to_15_operand")]
17640 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
17641 [(set_attr "prefix" "evex")
17642 (set_attr "mode" "<MODE>")])
17644 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
17645 [(set (match_operand:VF_128 0 "register_operand" "=v")
17648 [(match_operand:VF_128 1 "register_operand" "v")
17649 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
17650 (match_operand:SI 3 "const_0_to_15_operand")]
17655 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
17656 [(set_attr "prefix" "evex")
17657 (set_attr "mode" "<ssescalarmode>")])
17659 (define_insn "clz<mode>2<mask_name>"
17660 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17662 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
17664 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17665 [(set_attr "type" "sse")
17666 (set_attr "prefix" "evex")
17667 (set_attr "mode" "<sseinsnmode>")])
17669 (define_insn "<mask_codefor>conflict<mode><mask_name>"
17670 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17671 (unspec:VI48_AVX512VL
17672 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
17675 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17676 [(set_attr "type" "sse")
17677 (set_attr "prefix" "evex")
17678 (set_attr "mode" "<sseinsnmode>")])
17680 (define_insn "sha1msg1"
17681 [(set (match_operand:V4SI 0 "register_operand" "=x")
17683 [(match_operand:V4SI 1 "register_operand" "0")
17684 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17687 "sha1msg1\t{%2, %0|%0, %2}"
17688 [(set_attr "type" "sselog1")
17689 (set_attr "mode" "TI")])
17691 (define_insn "sha1msg2"
17692 [(set (match_operand:V4SI 0 "register_operand" "=x")
17694 [(match_operand:V4SI 1 "register_operand" "0")
17695 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17698 "sha1msg2\t{%2, %0|%0, %2}"
17699 [(set_attr "type" "sselog1")
17700 (set_attr "mode" "TI")])
17702 (define_insn "sha1nexte"
17703 [(set (match_operand:V4SI 0 "register_operand" "=x")
17705 [(match_operand:V4SI 1 "register_operand" "0")
17706 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17707 UNSPEC_SHA1NEXTE))]
17709 "sha1nexte\t{%2, %0|%0, %2}"
17710 [(set_attr "type" "sselog1")
17711 (set_attr "mode" "TI")])
17713 (define_insn "sha1rnds4"
17714 [(set (match_operand:V4SI 0 "register_operand" "=x")
17716 [(match_operand:V4SI 1 "register_operand" "0")
17717 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
17718 (match_operand:SI 3 "const_0_to_3_operand" "n")]
17719 UNSPEC_SHA1RNDS4))]
17721 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
17722 [(set_attr "type" "sselog1")
17723 (set_attr "length_immediate" "1")
17724 (set_attr "mode" "TI")])
17726 (define_insn "sha256msg1"
17727 [(set (match_operand:V4SI 0 "register_operand" "=x")
17729 [(match_operand:V4SI 1 "register_operand" "0")
17730 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17731 UNSPEC_SHA256MSG1))]
17733 "sha256msg1\t{%2, %0|%0, %2}"
17734 [(set_attr "type" "sselog1")
17735 (set_attr "mode" "TI")])
17737 (define_insn "sha256msg2"
17738 [(set (match_operand:V4SI 0 "register_operand" "=x")
17740 [(match_operand:V4SI 1 "register_operand" "0")
17741 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17742 UNSPEC_SHA256MSG2))]
17744 "sha256msg2\t{%2, %0|%0, %2}"
17745 [(set_attr "type" "sselog1")
17746 (set_attr "mode" "TI")])
17748 (define_insn "sha256rnds2"
17749 [(set (match_operand:V4SI 0 "register_operand" "=x")
17751 [(match_operand:V4SI 1 "register_operand" "0")
17752 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
17753 (match_operand:V4SI 3 "register_operand" "Yz")]
17754 UNSPEC_SHA256RNDS2))]
17756 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
17757 [(set_attr "type" "sselog1")
17758 (set_attr "length_immediate" "1")
17759 (set_attr "mode" "TI")])
17761 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
17762 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
17763 (unspec:AVX512MODE2P
17764 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
17768 "&& reload_completed"
17771 rtx op0 = operands[0];
17772 rtx op1 = operands[1];
17774 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
17776 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17777 emit_move_insn (op0, op1);
17781 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
17782 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
17783 (unspec:AVX512MODE2P
17784 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17788 "&& reload_completed"
17791 rtx op0 = operands[0];
17792 rtx op1 = operands[1];
17794 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17796 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17797 emit_move_insn (op0, op1);