1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
132 ;; For AVX512BW support
140 ;; For AVX512DQ support
143 UNSPEC_FPCLASS_SCALAR
148 (define_c_enum "unspecv" [
158 ;; All vector modes including V?TImode, used in move patterns.
159 (define_mode_iterator VMOVE
160 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
161 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
162 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
163 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
164 (V4TI "TARGET_AVX") (V2TI "TARGET_AVX") V1TI
165 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
166 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
168 ;; All AVX512VL vector modes
169 (define_mode_iterator V_AVX512VL
170 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
171 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
172 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
173 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
174 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
175 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
176 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
177 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
180 (define_mode_iterator V
181 [(V32QI "TARGET_AVX") V16QI
182 (V16HI "TARGET_AVX") V8HI
183 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
184 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
185 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
186 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
188 ;; All 128bit vector modes
189 (define_mode_iterator V_128
190 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
192 ;; All 256bit vector modes
193 (define_mode_iterator V_256
194 [V32QI V16HI V8SI V4DI V8SF V4DF])
196 ;; All 512bit vector modes
197 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
199 ;; All 256bit and 512bit vector modes
200 (define_mode_iterator V_256_512
201 [V32QI V16HI V8SI V4DI V8SF V4DF
202 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
203 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
205 ;; All vector float modes
206 (define_mode_iterator VF
207 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
208 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
210 ;; 128- and 256-bit float vector modes
211 (define_mode_iterator VF_128_256
212 [(V8SF "TARGET_AVX") V4SF
213 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
215 ;; All SFmode vector float modes
216 (define_mode_iterator VF1
217 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
219 ;; 128- and 256-bit SF vector modes
220 (define_mode_iterator VF1_128_256
221 [(V8SF "TARGET_AVX") V4SF])
223 (define_mode_iterator VF1_128_256VL
224 [V8SF (V4SF "TARGET_AVX512VL")])
226 ;; All DFmode vector float modes
227 (define_mode_iterator VF2
228 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
230 ;; 128- and 256-bit DF vector modes
231 (define_mode_iterator VF2_128_256
232 [(V4DF "TARGET_AVX") V2DF])
234 (define_mode_iterator VF2_512_256
235 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
237 (define_mode_iterator VF2_512_256VL
238 [V8DF (V4DF "TARGET_AVX512VL")])
240 ;; All 128bit vector float modes
241 (define_mode_iterator VF_128
242 [V4SF (V2DF "TARGET_SSE2")])
244 ;; All 256bit vector float modes
245 (define_mode_iterator VF_256
248 ;; All 512bit vector float modes
249 (define_mode_iterator VF_512
252 (define_mode_iterator VI_AVX512VL
253 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL") (V16QI "TARGET_AVX512VL")
254 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
255 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
256 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
258 (define_mode_iterator VF_AVX512VL
259 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
260 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
262 (define_mode_iterator VF2_AVX512VL
263 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
265 (define_mode_iterator VF1_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
268 ;; All vector integer modes
269 (define_mode_iterator VI
270 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
271 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
272 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
273 (V8SI "TARGET_AVX") V4SI
274 (V4DI "TARGET_AVX") V2DI])
276 (define_mode_iterator VI_AVX2
277 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
278 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
279 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
280 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
282 ;; All QImode vector integer modes
283 (define_mode_iterator VI1
284 [(V32QI "TARGET_AVX") V16QI])
286 (define_mode_iterator VI_UNALIGNED_LOADSTORE
287 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
288 (V32HI "TARGET_AVX512BW")
289 (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
290 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
291 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
292 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
294 ;; All DImode vector integer modes
295 (define_mode_iterator VI8
296 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
298 (define_mode_iterator VI8_AVX512VL
299 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
301 (define_mode_iterator VI8_256_512
302 [V8DI (V4DI "TARGET_AVX512VL")])
304 (define_mode_iterator VI128_256
305 [(V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
306 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
307 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")])
309 (define_mode_iterator VI1_AVX2
310 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
312 (define_mode_iterator VI2_AVX2
313 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
315 (define_mode_iterator VI2_AVX512F
316 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
318 (define_mode_iterator VI4_AVX
319 [(V8SI "TARGET_AVX") V4SI])
321 (define_mode_iterator VI4_AVX2
322 [(V8SI "TARGET_AVX2") V4SI])
324 (define_mode_iterator VI4_AVX512F
325 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
327 (define_mode_iterator VI4_AVX512VL
328 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
330 (define_mode_iterator VI248_AVX512
331 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") (V4SI "TARGET_AVX2")
332 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
333 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
335 (define_mode_iterator VI48_AVX512VL
336 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
337 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
339 (define_mode_iterator VI8_AVX2_AVX512BW
340 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
342 (define_mode_iterator VI8_AVX2
343 [(V4DI "TARGET_AVX2") V2DI])
345 (define_mode_iterator VI8_AVX2_AVX512F
346 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
348 (define_mode_iterator VI4_128_8_256
351 (define_mode_iterator VI2_128_4_256
355 (define_mode_iterator V8FI
359 (define_mode_iterator V16FI
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
374 (define_mode_iterator VI24_AVX2
375 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
378 (define_mode_iterator VI124_AVX512F
379 [(V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
388 (define_mode_iterator VI248_AVX2
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
390 (V8SI "TARGET_AVX2") V4SI
391 (V4DI "TARGET_AVX2") V2DI])
393 (define_mode_iterator VI248_AVX2_8_AVX512F
394 [(V16HI "TARGET_AVX2") V8HI
395 (V8SI "TARGET_AVX2") V4SI
396 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
398 (define_mode_iterator VI248_AVX512BW
399 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") (V4SI "TARGET_AVX2")
400 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
401 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") (V2DI "TARGET_AVX2")])
403 (define_mode_iterator V48_AVX2
406 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
407 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
409 (define_mode_attr avx512
410 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
411 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
412 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
413 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
414 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
415 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
417 (define_mode_attr sse2_avx_avx512f
418 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
419 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
420 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
421 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
422 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
423 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
425 (define_mode_attr sse2_avx2
426 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
427 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
428 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
429 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
430 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
432 (define_mode_attr ssse3_avx2
433 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
434 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
435 (V4SI "ssse3") (V8SI "avx2")
436 (V2DI "ssse3") (V4DI "avx2")
437 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
439 (define_mode_attr sse4_1_avx2
440 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
441 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
442 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
443 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
445 (define_mode_attr avx_avx2
446 [(V4SF "avx") (V2DF "avx")
447 (V8SF "avx") (V4DF "avx")
448 (V4SI "avx2") (V2DI "avx2")
449 (V8SI "avx2") (V4DI "avx2")])
451 (define_mode_attr vec_avx2
452 [(V16QI "vec") (V32QI "avx2")
453 (V8HI "vec") (V16HI "avx2")
454 (V4SI "vec") (V8SI "avx2")
455 (V2DI "vec") (V4DI "avx2")])
457 (define_mode_attr avx2_avx512bw
458 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
459 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
460 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
461 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
462 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
464 (define_mode_attr shuffletype
465 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
466 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
467 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
468 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
469 (V64QI "i") (V1TI "i") (V2TI "i")])
471 (define_mode_attr ssequartermode
472 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
474 (define_mode_attr ssedoublemodelower
475 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
476 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
477 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
479 (define_mode_attr ssedoublemode
480 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
481 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
482 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
483 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
485 (define_mode_attr ssebytemode
486 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
488 ;; All 128bit vector integer modes
489 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
491 ;; All 256bit vector integer modes
492 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
494 ;; All 512bit vector integer modes
495 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
497 ;; Various 128bit vector integer mode combinations
498 (define_mode_iterator VI12_128 [V16QI V8HI])
499 (define_mode_iterator VI14_128 [V16QI V4SI])
500 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
501 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
502 (define_mode_iterator VI24_128 [V8HI V4SI])
503 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
504 (define_mode_iterator VI48_128 [V4SI V2DI])
506 ;; Various 256bit and 512 vector integer mode combinations
507 (define_mode_iterator VI124_256_1248_512
508 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F")
509 (V16SI "TARGET_AVX512F") (V64QI "TARGET_AVX512BW")
510 (V32HI "TARGET_AVX512BW")])
511 (define_mode_iterator VI48_256 [V8SI V4DI])
512 (define_mode_iterator VI48_512 [V16SI V8DI])
513 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
514 (define_mode_iterator VI512_48F_12BW
515 [V16SI V8DI (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")])
517 ;; Int-float size matches
518 (define_mode_iterator VI4F_128 [V4SI V4SF])
519 (define_mode_iterator VI8F_128 [V2DI V2DF])
520 (define_mode_iterator VI4F_256 [V8SI V8SF])
521 (define_mode_iterator VI8F_256 [V4DI V4DF])
522 (define_mode_iterator VI8F_256_512
523 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
524 (define_mode_iterator VI48F_256_512_2I
526 (V32HI "TARGET_AVX512BW")
527 (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
528 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
529 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
530 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
531 (V4DF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
532 (define_mode_iterator VI248F
533 [(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
534 (V4SF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
535 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
536 (V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
537 (V32HI "TARGET_AVX512BW")
538 (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
539 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
540 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
541 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
542 (define_mode_iterator VI48F_I12B_512
543 [V16SI V16SF V8DI V8DF
544 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
545 (define_mode_iterator VI48F
546 [V16SI V16SF V8DI V8DF
547 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
548 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
549 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
550 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
551 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
553 ;; Mapping from float mode to required SSE level
554 (define_mode_attr sse
555 [(SF "sse") (DF "sse2")
556 (V4SF "sse") (V2DF "sse2")
557 (V16SF "avx512f") (V8SF "avx")
558 (V8DF "avx512f") (V4DF "avx")])
560 (define_mode_attr sse2
561 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
562 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
564 (define_mode_attr sse3
565 [(V16QI "sse3") (V32QI "avx")])
567 (define_mode_attr sse4_1
568 [(V4SF "sse4_1") (V2DF "sse4_1")
569 (V8SF "avx") (V4DF "avx")
572 (define_mode_attr avxsizesuffix
573 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
574 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
575 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
576 (V16SF "512") (V8DF "512")
577 (V8SF "256") (V4DF "256")
578 (V4SF "") (V2DF "")])
580 ;; SSE instruction mode
581 (define_mode_attr sseinsnmode
582 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
583 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
584 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
585 (V16SF "V16SF") (V8DF "V8DF")
586 (V8SF "V8SF") (V4DF "V4DF")
587 (V4SF "V4SF") (V2DF "V2DF")
590 ;; Mapping of vector modes to corresponding mask size
591 (define_mode_attr avx512fmaskmode
592 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
593 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
594 (V16SI "HI") (V8SI "QI") (V4SI "QI")
595 (V8DI "QI") (V4DI "QI") (V2DI "QI")
596 (V16SF "HI") (V8SF "QI") (V4SF "QI")
597 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
599 ;; Mapping of vector float modes to an integer mode of the same size
600 (define_mode_attr sseintvecmode
601 [(V16SF "V16SI") (V8DF "V8DI")
602 (V8SF "V8SI") (V4DF "V4DI")
603 (V4SF "V4SI") (V2DF "V2DI")
604 (V16SI "V16SI") (V8DI "V8DI")
605 (V8SI "V8SI") (V4DI "V4DI")
606 (V4SI "V4SI") (V2DI "V2DI")
607 (V16HI "V16HI") (V8HI "V8HI")
608 (V32HI "V32HI") (V64QI "V64QI")
609 (V32QI "V32QI") (V16QI "V16QI")])
611 (define_mode_attr sseintvecmode2
612 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
613 (V8SF "OI") (V4SF "TI")])
615 (define_mode_attr sseintvecmodelower
616 [(V16SF "v16si") (V8DF "v8di")
617 (V8SF "v8si") (V4DF "v4di")
618 (V4SF "v4si") (V2DF "v2di")
619 (V8SI "v8si") (V4DI "v4di")
620 (V4SI "v4si") (V2DI "v2di")
621 (V16HI "v16hi") (V8HI "v8hi")
622 (V32QI "v32qi") (V16QI "v16qi")])
624 ;; Mapping of vector modes to a vector mode of double size
625 (define_mode_attr ssedoublevecmode
626 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
627 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
628 (V8SF "V16SF") (V4DF "V8DF")
629 (V4SF "V8SF") (V2DF "V4DF")])
631 ;; Mapping of vector modes to a vector mode of half size
632 (define_mode_attr ssehalfvecmode
633 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
634 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
635 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
636 (V16SF "V8SF") (V8DF "V4DF")
637 (V8SF "V4SF") (V4DF "V2DF")
640 ;; Mapping of vector modes ti packed single mode of the same size
641 (define_mode_attr ssePSmode
642 [(V16SI "V16SF") (V8DF "V16SF")
643 (V16SF "V16SF") (V8DI "V16SF")
644 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
645 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
646 (V8SI "V8SF") (V4SI "V4SF")
647 (V4DI "V8SF") (V2DI "V4SF")
648 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
649 (V8SF "V8SF") (V4SF "V4SF")
650 (V4DF "V8SF") (V2DF "V4SF")])
652 (define_mode_attr ssePSmode2
653 [(V8DI "V8SF") (V4DI "V4SF")])
655 ;; Mapping of vector modes back to the scalar modes
656 (define_mode_attr ssescalarmode
657 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
658 (V32HI "HI") (V16HI "HI") (V8HI "HI")
659 (V16SI "SI") (V8SI "SI") (V4SI "SI")
660 (V8DI "DI") (V4DI "DI") (V2DI "DI")
661 (V16SF "SF") (V8SF "SF") (V4SF "SF")
662 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
664 ;; Mapping of vector modes to the 128bit modes
665 (define_mode_attr ssexmmmode
666 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
667 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
668 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
669 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
670 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
671 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
673 ;; Pointer size override for scalar modes (Intel asm dialect)
674 (define_mode_attr iptr
675 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
676 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
677 (V8SF "k") (V4DF "q")
678 (V4SF "k") (V2DF "q")
681 ;; Number of scalar elements in each vector type
682 (define_mode_attr ssescalarnum
683 [(V64QI "64") (V16SI "16") (V8DI "8")
684 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
685 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
686 (V16SF "16") (V8DF "8")
687 (V8SF "8") (V4DF "4")
688 (V4SF "4") (V2DF "2")])
690 ;; Mask of scalar elements in each vector type
691 (define_mode_attr ssescalarnummask
692 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
693 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
694 (V8SF "7") (V4DF "3")
695 (V4SF "3") (V2DF "1")])
697 (define_mode_attr ssescalarsize
698 [(V8DI "64") (V4DI "64") (V2DI "64")
699 (V64QI "8") (V32QI "8") (V16QI "8")
700 (V32HI "16") (V16HI "16") (V8HI "16")
701 (V16SI "32") (V8SI "32") (V4SI "32")
702 (V16SF "32") (V8DF "64")])
704 ;; SSE prefix for integer vector modes
705 (define_mode_attr sseintprefix
706 [(V2DI "p") (V2DF "")
711 (V16SI "p") (V16SF "")
712 (V16QI "p") (V8HI "p")
713 (V32QI "p") (V16HI "p")
714 (V64QI "p") (V32HI "p")])
716 ;; SSE scalar suffix for vector modes
717 (define_mode_attr ssescalarmodesuffix
719 (V8SF "ss") (V4DF "sd")
720 (V4SF "ss") (V2DF "sd")
721 (V8SI "ss") (V4DI "sd")
724 ;; Pack/unpack vector modes
725 (define_mode_attr sseunpackmode
726 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
727 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
728 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
730 (define_mode_attr ssepackmode
731 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
732 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
733 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
735 ;; Mapping of the max integer size for xop rotate immediate constraint
736 (define_mode_attr sserotatemax
737 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
739 ;; Mapping of mode to cast intrinsic name
740 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
742 ;; Instruction suffix for sign and zero extensions.
743 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
745 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
746 ;; i64x4 or f64x4 for 512bit modes.
747 (define_mode_attr i128
748 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
749 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
750 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
753 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
755 ;; Mapping of immediate bits for blend instructions
756 (define_mode_attr blendbits
757 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
759 ;; Mapping for dbpsabbw modes
760 (define_mode_attr dbpsadbwmode
761 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
763 ;; Mapping suffixes for broadcast
764 (define_mode_attr bcstscalarsuff
765 [(V64QI "b") (V32QI "b") (V16QI "b")
766 (V32HI "w") (V16HI "w") (V8HI "w")
767 (V16SI "d") (V8SI "d") (V4SI "d")
768 (V8DI "q") (V4DI "q") (V2DI "q")
769 (V16SF "ss") (V8SF "ss") (V4SF "ss")
770 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
772 ;; Include define_subst patterns for instructions with mask
775 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
781 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
783 ;; All of these patterns are enabled for SSE1 as well as SSE2.
784 ;; This is essential for maintaining stable calling conventions.
786 (define_expand "mov<mode>"
787 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
788 (match_operand:VMOVE 1 "nonimmediate_operand"))]
791 ix86_expand_vector_move (<MODE>mode, operands);
795 (define_insn "*mov<mode>_internal"
796 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
797 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
799 && (register_operand (operands[0], <MODE>mode)
800 || register_operand (operands[1], <MODE>mode))"
802 int mode = get_attr_mode (insn);
803 switch (which_alternative)
806 return standard_sse_constant_opcode (insn, operands[1]);
809 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
810 in avx512f, so we need to use workarounds, to access sse registers
811 16-31, which are evex-only. In avx512vl we don't need workarounds. */
812 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
813 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
814 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
816 if (memory_operand (operands[0], <MODE>mode))
818 if (<MODE_SIZE> == 32)
819 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
820 else if (<MODE_SIZE> == 16)
821 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
825 else if (memory_operand (operands[1], <MODE>mode))
827 if (<MODE_SIZE> == 32)
828 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
829 else if (<MODE_SIZE> == 16)
830 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
835 /* Reg -> reg move is always aligned. Just use wider move. */
840 return "vmovaps\t{%g1, %g0|%g0, %g1}";
843 return "vmovapd\t{%g1, %g0|%g0, %g1}";
846 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
857 && (misaligned_operand (operands[0], <MODE>mode)
858 || misaligned_operand (operands[1], <MODE>mode)))
859 return "vmovups\t{%1, %0|%0, %1}";
861 return "%vmovaps\t{%1, %0|%0, %1}";
867 && (misaligned_operand (operands[0], <MODE>mode)
868 || misaligned_operand (operands[1], <MODE>mode)))
869 return "vmovupd\t{%1, %0|%0, %1}";
871 return "%vmovapd\t{%1, %0|%0, %1}";
876 && (misaligned_operand (operands[0], <MODE>mode)
877 || misaligned_operand (operands[1], <MODE>mode)))
878 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
879 : "vmovdqu\t{%1, %0|%0, %1}";
881 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
882 : "%vmovdqa\t{%1, %0|%0, %1}";
884 if (misaligned_operand (operands[0], <MODE>mode)
885 || misaligned_operand (operands[1], <MODE>mode))
886 return "vmovdqu64\t{%1, %0|%0, %1}";
888 return "vmovdqa64\t{%1, %0|%0, %1}";
897 [(set_attr "type" "sselog1,ssemov,ssemov")
898 (set_attr "prefix" "maybe_vex")
900 (cond [(and (match_test "<MODE_SIZE> == 16")
901 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
902 (and (eq_attr "alternative" "2")
903 (match_test "TARGET_SSE_TYPELESS_STORES"))))
904 (const_string "<ssePSmode>")
905 (match_test "TARGET_AVX")
906 (const_string "<sseinsnmode>")
907 (ior (not (match_test "TARGET_SSE2"))
908 (match_test "optimize_function_for_size_p (cfun)"))
909 (const_string "V4SF")
910 (and (eq_attr "alternative" "0")
911 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
914 (const_string "<sseinsnmode>")))])
916 (define_insn "<avx512>_load<mode>_mask"
917 [(set (match_operand:V_AVX512VL 0 "register_operand" "=v,v")
918 (vec_merge:V_AVX512VL
919 (match_operand:V_AVX512VL 1 "nonimmediate_operand" "v,m")
920 (match_operand:V_AVX512VL 2 "vector_move_operand" "0C,0C")
921 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
924 switch (MODE_<sseinsnmode>)
932 if (misaligned_operand (operands[1], <MODE>mode))
933 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
934 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
936 /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
937 if (<MODE>mode == V64QImode
938 || <MODE>mode == V32QImode
939 || <MODE>mode == V16QImode
940 || <MODE>mode == V32HImode
941 || <MODE>mode == V16HImode
942 || <MODE>mode == V8HImode
943 || misaligned_operand (operands[1], <MODE>mode))
944 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
946 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
949 [(set_attr "type" "ssemov")
950 (set_attr "prefix" "evex")
951 (set_attr "memory" "none,load")
952 (set_attr "mode" "<sseinsnmode>")])
954 (define_insn "<avx512>_blendm<mode>"
955 [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
956 (vec_merge:V_AVX512VL
957 (match_operand:V_AVX512VL 2 "nonimmediate_operand" "vm")
958 (match_operand:V_AVX512VL 1 "register_operand" "v")
959 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
961 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
962 [(set_attr "type" "ssemov")
963 (set_attr "prefix" "evex")
964 (set_attr "mode" "<sseinsnmode>")])
966 (define_insn "<avx512>_store<mode>_mask"
967 [(set (match_operand:V_AVX512VL 0 "memory_operand" "=m")
968 (vec_merge:V_AVX512VL
969 (match_operand:V_AVX512VL 1 "register_operand" "v")
971 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
974 switch (MODE_<sseinsnmode>)
982 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
984 /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
985 if (<MODE>mode == V64QImode
986 || <MODE>mode == V32QImode
987 || <MODE>mode == V16QImode
988 || <MODE>mode == V32HImode
989 || <MODE>mode == V16HImode
990 || <MODE>mode == V8HImode)
991 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
993 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
996 [(set_attr "type" "ssemov")
997 (set_attr "prefix" "evex")
998 (set_attr "memory" "store")
999 (set_attr "mode" "<sseinsnmode>")])
1001 (define_insn "sse2_movq128"
1002 [(set (match_operand:V2DI 0 "register_operand" "=x")
1005 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1006 (parallel [(const_int 0)]))
1009 "%vmovq\t{%1, %0|%0, %q1}"
1010 [(set_attr "type" "ssemov")
1011 (set_attr "prefix" "maybe_vex")
1012 (set_attr "mode" "TI")])
1014 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1015 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1016 ;; from memory, we'd prefer to load the memory directly into the %xmm
1017 ;; register. To facilitate this happy circumstance, this pattern won't
1018 ;; split until after register allocation. If the 64-bit value didn't
1019 ;; come from memory, this is the best we can do. This is much better
1020 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1023 (define_insn_and_split "movdi_to_sse"
1025 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1026 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1027 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1028 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1030 "&& reload_completed"
1033 if (register_operand (operands[1], DImode))
1035 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1036 Assemble the 64-bit DImode value in an xmm register. */
1037 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1038 gen_rtx_SUBREG (SImode, operands[1], 0)));
1039 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1040 gen_rtx_SUBREG (SImode, operands[1], 4)));
1041 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1044 else if (memory_operand (operands[1], DImode))
1046 rtx tmp = gen_reg_rtx (V2DImode);
1047 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1048 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1055 [(set (match_operand:V4SF 0 "register_operand")
1056 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1057 "TARGET_SSE && reload_completed"
1060 (vec_duplicate:V4SF (match_dup 1))
1064 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1065 operands[2] = CONST0_RTX (V4SFmode);
1069 [(set (match_operand:V2DF 0 "register_operand")
1070 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1071 "TARGET_SSE2 && reload_completed"
1072 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1074 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1075 operands[2] = CONST0_RTX (DFmode);
1078 (define_expand "movmisalign<mode>"
1079 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1080 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1083 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1087 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1088 [(set (match_operand:VF 0 "register_operand")
1089 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1091 "TARGET_SSE && <mask_mode512bit_condition>"
1093 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1094 just fine if misaligned_operand is true, and without the UNSPEC it can
1095 be combined with arithmetic instructions. If misaligned_operand is
1096 false, still emit UNSPEC_LOADU insn to honor user's request for
1099 && misaligned_operand (operands[1], <MODE>mode))
1101 rtx src = operands[1];
1103 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1104 operands[2 * <mask_applied>],
1105 operands[3 * <mask_applied>]);
1106 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1111 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1112 [(set (match_operand:VF 0 "register_operand" "=v")
1114 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1116 "TARGET_SSE && <mask_mode512bit_condition>"
1118 switch (get_attr_mode (insn))
1123 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1125 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1128 [(set_attr "type" "ssemov")
1129 (set_attr "movu" "1")
1130 (set_attr "ssememalign" "8")
1131 (set_attr "prefix" "maybe_vex")
1133 (cond [(and (match_test "<MODE_SIZE> == 16")
1134 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1135 (const_string "<ssePSmode>")
1136 (match_test "TARGET_AVX")
1137 (const_string "<MODE>")
1138 (match_test "optimize_function_for_size_p (cfun)")
1139 (const_string "V4SF")
1141 (const_string "<MODE>")))])
1143 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1144 [(set (match_operand:VF 0 "memory_operand" "=m")
1146 [(match_operand:VF 1 "register_operand" "v")]
1150 switch (get_attr_mode (insn))
1155 return "%vmovups\t{%1, %0|%0, %1}";
1157 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1160 [(set_attr "type" "ssemov")
1161 (set_attr "movu" "1")
1162 (set_attr "ssememalign" "8")
1163 (set_attr "prefix" "maybe_vex")
1165 (cond [(and (match_test "<MODE_SIZE> == 16")
1166 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1167 (match_test "TARGET_SSE_TYPELESS_STORES")))
1168 (const_string "<ssePSmode>")
1169 (match_test "TARGET_AVX")
1170 (const_string "<MODE>")
1171 (match_test "optimize_function_for_size_p (cfun)")
1172 (const_string "V4SF")
1174 (const_string "<MODE>")))])
1176 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1177 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1178 (vec_merge:VF_AVX512VL
1180 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1183 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1186 switch (get_attr_mode (insn))
1191 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1193 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1196 [(set_attr "type" "ssemov")
1197 (set_attr "movu" "1")
1198 (set_attr "memory" "store")
1199 (set_attr "prefix" "evex")
1200 (set_attr "mode" "<sseinsnmode>")])
1202 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1203 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
1204 (unspec:VI_UNALIGNED_LOADSTORE
1205 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
1207 "TARGET_SSE2 && <mask_mode512bit_condition>"
1209 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1210 just fine if misaligned_operand is true, and without the UNSPEC it can
1211 be combined with arithmetic instructions. If misaligned_operand is
1212 false, still emit UNSPEC_LOADU insn to honor user's request for
1215 && misaligned_operand (operands[1], <MODE>mode))
1217 rtx src = operands[1];
1219 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1220 operands[2 * <mask_applied>],
1221 operands[3 * <mask_applied>]);
1222 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1227 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1228 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1229 (unspec:VI_UNALIGNED_LOADSTORE
1230 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1232 "TARGET_SSE2 && <mask_mode512bit_condition>"
1234 switch (get_attr_mode (insn))
1239 return "%vmovups\t{%1, %0|%0, %1}";
1245 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1246 return "%vmovdqu\t{%1, %0|%0, %1}";
1248 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1252 [(set_attr "type" "ssemov")
1253 (set_attr "movu" "1")
1254 (set_attr "ssememalign" "8")
1255 (set (attr "prefix_data16")
1257 (match_test "TARGET_AVX")
1259 (const_string "1")))
1260 (set_attr "prefix" "maybe_vex")
1262 (cond [(and (match_test "<MODE_SIZE> == 16")
1263 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1264 (const_string "<ssePSmode>")
1265 (match_test "TARGET_AVX")
1266 (const_string "<sseinsnmode>")
1267 (match_test "optimize_function_for_size_p (cfun)")
1268 (const_string "V4SF")
1270 (const_string "<sseinsnmode>")))])
1272 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1273 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1274 (unspec:VI_UNALIGNED_LOADSTORE
1275 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1279 switch (get_attr_mode (insn))
1284 return "%vmovups\t{%1, %0|%0, %1}";
1290 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1291 return "%vmovdqu\t{%1, %0|%0, %1}";
1293 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1297 [(set_attr "type" "ssemov")
1298 (set_attr "movu" "1")
1299 (set_attr "ssememalign" "8")
1300 (set (attr "prefix_data16")
1302 (match_test "TARGET_AVX")
1304 (const_string "1")))
1305 (set_attr "prefix" "maybe_vex")
1307 (cond [(and (match_test "<MODE_SIZE> == 16")
1308 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1309 (match_test "TARGET_SSE_TYPELESS_STORES")))
1310 (const_string "<ssePSmode>")
1311 (match_test "TARGET_AVX")
1312 (const_string "<sseinsnmode>")
1313 (match_test "optimize_function_for_size_p (cfun)")
1314 (const_string "V4SF")
1316 (const_string "<sseinsnmode>")))])
1318 (define_insn "<avx512>_storedqu<mode>_mask"
1319 [(set (match_operand:VI_AVX512VL 0 "memory_operand" "=m")
1320 (vec_merge:VI_AVX512VL
1322 [(match_operand:VI_AVX512VL 1 "register_operand" "v")]
1325 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1327 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1328 [(set_attr "type" "ssemov")
1329 (set_attr "movu" "1")
1330 (set_attr "memory" "store")
1331 (set_attr "prefix" "evex")
1332 (set_attr "mode" "<sseinsnmode>")])
1334 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1335 [(set (match_operand:VI1 0 "register_operand" "=x")
1336 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1339 "%vlddqu\t{%1, %0|%0, %1}"
1340 [(set_attr "type" "ssemov")
1341 (set_attr "movu" "1")
1342 (set_attr "ssememalign" "8")
1343 (set (attr "prefix_data16")
1345 (match_test "TARGET_AVX")
1347 (const_string "0")))
1348 (set (attr "prefix_rep")
1350 (match_test "TARGET_AVX")
1352 (const_string "1")))
1353 (set_attr "prefix" "maybe_vex")
1354 (set_attr "mode" "<sseinsnmode>")])
1356 (define_insn "sse2_movnti<mode>"
1357 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1358 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1361 "movnti\t{%1, %0|%0, %1}"
1362 [(set_attr "type" "ssemov")
1363 (set_attr "prefix_data16" "0")
1364 (set_attr "mode" "<MODE>")])
1366 (define_insn "<sse>_movnt<mode>"
1367 [(set (match_operand:VF 0 "memory_operand" "=m")
1369 [(match_operand:VF 1 "register_operand" "v")]
1372 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1373 [(set_attr "type" "ssemov")
1374 (set_attr "prefix" "maybe_vex")
1375 (set_attr "mode" "<MODE>")])
1377 (define_insn "<sse2>_movnt<mode>"
1378 [(set (match_operand:VI8 0 "memory_operand" "=m")
1379 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1382 "%vmovntdq\t{%1, %0|%0, %1}"
1383 [(set_attr "type" "ssecvt")
1384 (set (attr "prefix_data16")
1386 (match_test "TARGET_AVX")
1388 (const_string "1")))
1389 (set_attr "prefix" "maybe_vex")
1390 (set_attr "mode" "<sseinsnmode>")])
1392 ; Expand patterns for non-temporal stores. At the moment, only those
1393 ; that directly map to insns are defined; it would be possible to
1394 ; define patterns for other modes that would expand to several insns.
1396 ;; Modes handled by storent patterns.
1397 (define_mode_iterator STORENT_MODE
1398 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1399 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1400 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1401 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1402 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1404 (define_expand "storent<mode>"
1405 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1406 (unspec:STORENT_MODE
1407 [(match_operand:STORENT_MODE 1 "register_operand")]
1411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1413 ;; Parallel floating point arithmetic
1415 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1417 (define_expand "<code><mode>2"
1418 [(set (match_operand:VF 0 "register_operand")
1420 (match_operand:VF 1 "register_operand")))]
1422 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1424 (define_insn_and_split "*absneg<mode>2"
1425 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1426 (match_operator:VF 3 "absneg_operator"
1427 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1428 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1431 "&& reload_completed"
1434 enum rtx_code absneg_op;
1440 if (MEM_P (operands[1]))
1441 op1 = operands[2], op2 = operands[1];
1443 op1 = operands[1], op2 = operands[2];
1448 if (rtx_equal_p (operands[0], operands[1]))
1454 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1455 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1456 t = gen_rtx_SET (VOIDmode, operands[0], t);
1460 [(set_attr "isa" "noavx,noavx,avx,avx")])
1462 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1463 [(set (match_operand:VF 0 "register_operand")
1465 (match_operand:VF 1 "<round_nimm_predicate>")
1466 (match_operand:VF 2 "<round_nimm_predicate>")))]
1467 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1468 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1470 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1471 [(set (match_operand:VF 0 "register_operand" "=x,v")
1473 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1474 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1475 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1477 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1478 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1479 [(set_attr "isa" "noavx,avx")
1480 (set_attr "type" "sseadd")
1481 (set_attr "prefix" "<mask_prefix3>")
1482 (set_attr "mode" "<MODE>")])
1484 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1485 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1488 (match_operand:VF_128 1 "register_operand" "0,v")
1489 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1494 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1495 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1496 [(set_attr "isa" "noavx,avx")
1497 (set_attr "type" "sseadd")
1498 (set_attr "prefix" "<round_prefix>")
1499 (set_attr "mode" "<ssescalarmode>")])
1501 (define_expand "mul<mode>3<mask_name><round_name>"
1502 [(set (match_operand:VF 0 "register_operand")
1504 (match_operand:VF 1 "<round_nimm_predicate>")
1505 (match_operand:VF 2 "<round_nimm_predicate>")))]
1506 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1507 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1509 (define_insn "*mul<mode>3<mask_name><round_name>"
1510 [(set (match_operand:VF 0 "register_operand" "=x,v")
1512 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1513 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1514 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1516 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1517 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1518 [(set_attr "isa" "noavx,avx")
1519 (set_attr "type" "ssemul")
1520 (set_attr "prefix" "<mask_prefix3>")
1521 (set_attr "btver2_decode" "direct,double")
1522 (set_attr "mode" "<MODE>")])
1524 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1525 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1528 (match_operand:VF_128 1 "register_operand" "0,v")
1529 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1534 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1535 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1536 [(set_attr "isa" "noavx,avx")
1537 (set_attr "type" "sse<multdiv_mnemonic>")
1538 (set_attr "prefix" "<round_prefix>")
1539 (set_attr "btver2_decode" "direct,double")
1540 (set_attr "mode" "<ssescalarmode>")])
1542 (define_expand "div<mode>3"
1543 [(set (match_operand:VF2 0 "register_operand")
1544 (div:VF2 (match_operand:VF2 1 "register_operand")
1545 (match_operand:VF2 2 "nonimmediate_operand")))]
1547 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1549 (define_expand "div<mode>3"
1550 [(set (match_operand:VF1 0 "register_operand")
1551 (div:VF1 (match_operand:VF1 1 "register_operand")
1552 (match_operand:VF1 2 "nonimmediate_operand")))]
1555 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1558 && TARGET_RECIP_VEC_DIV
1559 && !optimize_insn_for_size_p ()
1560 && flag_finite_math_only && !flag_trapping_math
1561 && flag_unsafe_math_optimizations)
1563 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1568 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1569 [(set (match_operand:VF 0 "register_operand" "=x,v")
1571 (match_operand:VF 1 "register_operand" "0,v")
1572 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1573 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1575 div<ssemodesuffix>\t{%2, %0|%0, %2}
1576 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1577 [(set_attr "isa" "noavx,avx")
1578 (set_attr "type" "ssediv")
1579 (set_attr "prefix" "<mask_prefix3>")
1580 (set_attr "mode" "<MODE>")])
1582 (define_insn "<sse>_rcp<mode>2"
1583 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1585 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1587 "%vrcpps\t{%1, %0|%0, %1}"
1588 [(set_attr "type" "sse")
1589 (set_attr "atom_sse_attr" "rcp")
1590 (set_attr "btver2_sse_attr" "rcp")
1591 (set_attr "prefix" "maybe_vex")
1592 (set_attr "mode" "<MODE>")])
1594 (define_insn "sse_vmrcpv4sf2"
1595 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1597 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1599 (match_operand:V4SF 2 "register_operand" "0,x")
1603 rcpss\t{%1, %0|%0, %k1}
1604 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1605 [(set_attr "isa" "noavx,avx")
1606 (set_attr "type" "sse")
1607 (set_attr "ssememalign" "32")
1608 (set_attr "atom_sse_attr" "rcp")
1609 (set_attr "btver2_sse_attr" "rcp")
1610 (set_attr "prefix" "orig,vex")
1611 (set_attr "mode" "SF")])
1613 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1614 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1616 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1619 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1620 [(set_attr "type" "sse")
1621 (set_attr "prefix" "evex")
1622 (set_attr "mode" "<MODE>")])
1624 (define_insn "srcp14<mode>"
1625 [(set (match_operand:VF_128 0 "register_operand" "=v")
1628 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1630 (match_operand:VF_128 2 "register_operand" "v")
1633 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1634 [(set_attr "type" "sse")
1635 (set_attr "prefix" "evex")
1636 (set_attr "mode" "<MODE>")])
1638 (define_expand "sqrt<mode>2"
1639 [(set (match_operand:VF2 0 "register_operand")
1640 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1643 (define_expand "sqrt<mode>2"
1644 [(set (match_operand:VF1 0 "register_operand")
1645 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1649 && TARGET_RECIP_VEC_SQRT
1650 && !optimize_insn_for_size_p ()
1651 && flag_finite_math_only && !flag_trapping_math
1652 && flag_unsafe_math_optimizations)
1654 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1659 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1660 [(set (match_operand:VF 0 "register_operand" "=v")
1661 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1662 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1663 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1664 [(set_attr "type" "sse")
1665 (set_attr "atom_sse_attr" "sqrt")
1666 (set_attr "btver2_sse_attr" "sqrt")
1667 (set_attr "prefix" "maybe_vex")
1668 (set_attr "mode" "<MODE>")])
1670 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1671 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1674 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1675 (match_operand:VF_128 2 "register_operand" "0,v")
1679 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1680 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1681 [(set_attr "isa" "noavx,avx")
1682 (set_attr "type" "sse")
1683 (set_attr "atom_sse_attr" "sqrt")
1684 (set_attr "prefix" "<round_prefix>")
1685 (set_attr "btver2_sse_attr" "sqrt")
1686 (set_attr "mode" "<ssescalarmode>")])
1688 (define_expand "rsqrt<mode>2"
1689 [(set (match_operand:VF1_128_256 0 "register_operand")
1691 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1694 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1698 (define_insn "<sse>_rsqrt<mode>2"
1699 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1701 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1703 "%vrsqrtps\t{%1, %0|%0, %1}"
1704 [(set_attr "type" "sse")
1705 (set_attr "prefix" "maybe_vex")
1706 (set_attr "mode" "<MODE>")])
1708 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1709 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1711 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1714 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1715 [(set_attr "type" "sse")
1716 (set_attr "prefix" "evex")
1717 (set_attr "mode" "<MODE>")])
1719 (define_insn "rsqrt14<mode>"
1720 [(set (match_operand:VF_128 0 "register_operand" "=v")
1723 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1725 (match_operand:VF_128 2 "register_operand" "v")
1728 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1729 [(set_attr "type" "sse")
1730 (set_attr "prefix" "evex")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "sse_vmrsqrtv4sf2"
1734 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1736 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1738 (match_operand:V4SF 2 "register_operand" "0,x")
1742 rsqrtss\t{%1, %0|%0, %k1}
1743 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1744 [(set_attr "isa" "noavx,avx")
1745 (set_attr "type" "sse")
1746 (set_attr "ssememalign" "32")
1747 (set_attr "prefix" "orig,vex")
1748 (set_attr "mode" "SF")])
1750 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1751 ;; isn't really correct, as those rtl operators aren't defined when
1752 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1754 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1755 [(set (match_operand:VF 0 "register_operand")
1757 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1758 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1759 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1761 if (!flag_finite_math_only)
1762 operands[1] = force_reg (<MODE>mode, operands[1]);
1763 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1766 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1767 [(set (match_operand:VF 0 "register_operand" "=x,v")
1769 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1770 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1771 "TARGET_SSE && flag_finite_math_only
1772 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1773 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1775 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1776 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1777 [(set_attr "isa" "noavx,avx")
1778 (set_attr "type" "sseadd")
1779 (set_attr "btver2_sse_attr" "maxmin")
1780 (set_attr "prefix" "<mask_prefix3>")
1781 (set_attr "mode" "<MODE>")])
1783 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1784 [(set (match_operand:VF 0 "register_operand" "=x,v")
1786 (match_operand:VF 1 "register_operand" "0,v")
1787 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1788 "TARGET_SSE && !flag_finite_math_only
1789 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1791 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1792 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1793 [(set_attr "isa" "noavx,avx")
1794 (set_attr "type" "sseadd")
1795 (set_attr "btver2_sse_attr" "maxmin")
1796 (set_attr "prefix" "<mask_prefix3>")
1797 (set_attr "mode" "<MODE>")])
1799 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1800 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1803 (match_operand:VF_128 1 "register_operand" "0,v")
1804 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1809 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1810 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1811 [(set_attr "isa" "noavx,avx")
1812 (set_attr "type" "sse")
1813 (set_attr "btver2_sse_attr" "maxmin")
1814 (set_attr "prefix" "<round_saeonly_prefix>")
1815 (set_attr "mode" "<ssescalarmode>")])
1817 ;; These versions of the min/max patterns implement exactly the operations
1818 ;; min = (op1 < op2 ? op1 : op2)
1819 ;; max = (!(op1 < op2) ? op1 : op2)
1820 ;; Their operands are not commutative, and thus they may be used in the
1821 ;; presence of -0.0 and NaN.
1823 (define_insn "*ieee_smin<mode>3"
1824 [(set (match_operand:VF 0 "register_operand" "=v,v")
1826 [(match_operand:VF 1 "register_operand" "0,v")
1827 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1831 min<ssemodesuffix>\t{%2, %0|%0, %2}
1832 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1833 [(set_attr "isa" "noavx,avx")
1834 (set_attr "type" "sseadd")
1835 (set_attr "prefix" "orig,vex")
1836 (set_attr "mode" "<MODE>")])
1838 (define_insn "*ieee_smax<mode>3"
1839 [(set (match_operand:VF 0 "register_operand" "=v,v")
1841 [(match_operand:VF 1 "register_operand" "0,v")
1842 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1846 max<ssemodesuffix>\t{%2, %0|%0, %2}
1847 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1848 [(set_attr "isa" "noavx,avx")
1849 (set_attr "type" "sseadd")
1850 (set_attr "prefix" "orig,vex")
1851 (set_attr "mode" "<MODE>")])
1853 (define_insn "avx_addsubv4df3"
1854 [(set (match_operand:V4DF 0 "register_operand" "=x")
1857 (match_operand:V4DF 1 "register_operand" "x")
1858 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1859 (minus:V4DF (match_dup 1) (match_dup 2))
1862 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1863 [(set_attr "type" "sseadd")
1864 (set_attr "prefix" "vex")
1865 (set_attr "mode" "V4DF")])
1867 (define_insn "sse3_addsubv2df3"
1868 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1871 (match_operand:V2DF 1 "register_operand" "0,x")
1872 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1873 (minus:V2DF (match_dup 1) (match_dup 2))
1877 addsubpd\t{%2, %0|%0, %2}
1878 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1879 [(set_attr "isa" "noavx,avx")
1880 (set_attr "type" "sseadd")
1881 (set_attr "atom_unit" "complex")
1882 (set_attr "prefix" "orig,vex")
1883 (set_attr "mode" "V2DF")])
1885 (define_insn "avx_addsubv8sf3"
1886 [(set (match_operand:V8SF 0 "register_operand" "=x")
1889 (match_operand:V8SF 1 "register_operand" "x")
1890 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1891 (minus:V8SF (match_dup 1) (match_dup 2))
1894 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1895 [(set_attr "type" "sseadd")
1896 (set_attr "prefix" "vex")
1897 (set_attr "mode" "V8SF")])
1899 (define_insn "sse3_addsubv4sf3"
1900 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1903 (match_operand:V4SF 1 "register_operand" "0,x")
1904 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1905 (minus:V4SF (match_dup 1) (match_dup 2))
1909 addsubps\t{%2, %0|%0, %2}
1910 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1911 [(set_attr "isa" "noavx,avx")
1912 (set_attr "type" "sseadd")
1913 (set_attr "prefix" "orig,vex")
1914 (set_attr "prefix_rep" "1,*")
1915 (set_attr "mode" "V4SF")])
1917 (define_insn "avx_h<plusminus_insn>v4df3"
1918 [(set (match_operand:V4DF 0 "register_operand" "=x")
1923 (match_operand:V4DF 1 "register_operand" "x")
1924 (parallel [(const_int 0)]))
1925 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1928 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1929 (parallel [(const_int 0)]))
1930 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1933 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1934 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1936 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1937 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1939 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1940 [(set_attr "type" "sseadd")
1941 (set_attr "prefix" "vex")
1942 (set_attr "mode" "V4DF")])
1944 (define_expand "sse3_haddv2df3"
1945 [(set (match_operand:V2DF 0 "register_operand")
1949 (match_operand:V2DF 1 "register_operand")
1950 (parallel [(const_int 0)]))
1951 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1954 (match_operand:V2DF 2 "nonimmediate_operand")
1955 (parallel [(const_int 0)]))
1956 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1959 (define_insn "*sse3_haddv2df3"
1960 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1964 (match_operand:V2DF 1 "register_operand" "0,x")
1965 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1968 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1971 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1972 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1975 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1977 && INTVAL (operands[3]) != INTVAL (operands[4])
1978 && INTVAL (operands[5]) != INTVAL (operands[6])"
1980 haddpd\t{%2, %0|%0, %2}
1981 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1982 [(set_attr "isa" "noavx,avx")
1983 (set_attr "type" "sseadd")
1984 (set_attr "prefix" "orig,vex")
1985 (set_attr "mode" "V2DF")])
1987 (define_insn "sse3_hsubv2df3"
1988 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1992 (match_operand:V2DF 1 "register_operand" "0,x")
1993 (parallel [(const_int 0)]))
1994 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1997 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1998 (parallel [(const_int 0)]))
1999 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2002 hsubpd\t{%2, %0|%0, %2}
2003 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2004 [(set_attr "isa" "noavx,avx")
2005 (set_attr "type" "sseadd")
2006 (set_attr "prefix" "orig,vex")
2007 (set_attr "mode" "V2DF")])
2009 (define_insn "*sse3_haddv2df3_low"
2010 [(set (match_operand:DF 0 "register_operand" "=x,x")
2013 (match_operand:V2DF 1 "register_operand" "0,x")
2014 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2017 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2019 && INTVAL (operands[2]) != INTVAL (operands[3])"
2021 haddpd\t{%0, %0|%0, %0}
2022 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2023 [(set_attr "isa" "noavx,avx")
2024 (set_attr "type" "sseadd1")
2025 (set_attr "prefix" "orig,vex")
2026 (set_attr "mode" "V2DF")])
2028 (define_insn "*sse3_hsubv2df3_low"
2029 [(set (match_operand:DF 0 "register_operand" "=x,x")
2032 (match_operand:V2DF 1 "register_operand" "0,x")
2033 (parallel [(const_int 0)]))
2036 (parallel [(const_int 1)]))))]
2039 hsubpd\t{%0, %0|%0, %0}
2040 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2041 [(set_attr "isa" "noavx,avx")
2042 (set_attr "type" "sseadd1")
2043 (set_attr "prefix" "orig,vex")
2044 (set_attr "mode" "V2DF")])
2046 (define_insn "avx_h<plusminus_insn>v8sf3"
2047 [(set (match_operand:V8SF 0 "register_operand" "=x")
2053 (match_operand:V8SF 1 "register_operand" "x")
2054 (parallel [(const_int 0)]))
2055 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2057 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2058 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2062 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2063 (parallel [(const_int 0)]))
2064 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2066 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2067 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2071 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2072 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2074 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2075 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2078 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2079 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2081 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2082 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2084 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2085 [(set_attr "type" "sseadd")
2086 (set_attr "prefix" "vex")
2087 (set_attr "mode" "V8SF")])
2089 (define_insn "sse3_h<plusminus_insn>v4sf3"
2090 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2095 (match_operand:V4SF 1 "register_operand" "0,x")
2096 (parallel [(const_int 0)]))
2097 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2099 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2100 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2104 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2105 (parallel [(const_int 0)]))
2106 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2108 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2109 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2112 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2113 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2114 [(set_attr "isa" "noavx,avx")
2115 (set_attr "type" "sseadd")
2116 (set_attr "atom_unit" "complex")
2117 (set_attr "prefix" "orig,vex")
2118 (set_attr "prefix_rep" "1,*")
2119 (set_attr "mode" "V4SF")])
2121 (define_expand "reduc_splus_v8df"
2122 [(match_operand:V8DF 0 "register_operand")
2123 (match_operand:V8DF 1 "register_operand")]
2126 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2130 (define_expand "reduc_splus_v4df"
2131 [(match_operand:V4DF 0 "register_operand")
2132 (match_operand:V4DF 1 "register_operand")]
2135 rtx tmp = gen_reg_rtx (V4DFmode);
2136 rtx tmp2 = gen_reg_rtx (V4DFmode);
2137 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2138 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2139 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2143 (define_expand "reduc_splus_v2df"
2144 [(match_operand:V2DF 0 "register_operand")
2145 (match_operand:V2DF 1 "register_operand")]
2148 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2152 (define_expand "reduc_splus_v16sf"
2153 [(match_operand:V16SF 0 "register_operand")
2154 (match_operand:V16SF 1 "register_operand")]
2157 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2161 (define_expand "reduc_splus_v8sf"
2162 [(match_operand:V8SF 0 "register_operand")
2163 (match_operand:V8SF 1 "register_operand")]
2166 rtx tmp = gen_reg_rtx (V8SFmode);
2167 rtx tmp2 = gen_reg_rtx (V8SFmode);
2168 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2169 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2170 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2171 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2175 (define_expand "reduc_splus_v4sf"
2176 [(match_operand:V4SF 0 "register_operand")
2177 (match_operand:V4SF 1 "register_operand")]
2182 rtx tmp = gen_reg_rtx (V4SFmode);
2183 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2184 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2187 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2191 ;; Modes handled by reduc_sm{in,ax}* patterns.
2192 (define_mode_iterator REDUC_SMINMAX_MODE
2193 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2194 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2195 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2196 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2197 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2198 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2199 (V8DF "TARGET_AVX512F")])
2201 (define_expand "reduc_<code>_<mode>"
2202 [(smaxmin:REDUC_SMINMAX_MODE
2203 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2204 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2207 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2211 (define_expand "reduc_<code>_<mode>"
2212 [(umaxmin:VI512_48F_12BW
2213 (match_operand:VI512_48F_12BW 0 "register_operand")
2214 (match_operand:VI512_48F_12BW 1 "register_operand"))]
2217 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2221 (define_expand "reduc_<code>_<mode>"
2223 (match_operand:VI_256 0 "register_operand")
2224 (match_operand:VI_256 1 "register_operand"))]
2227 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2231 (define_expand "reduc_umin_v8hi"
2233 (match_operand:V8HI 0 "register_operand")
2234 (match_operand:V8HI 1 "register_operand"))]
2237 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2241 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2242 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2244 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2245 (match_operand:SI 2 "const_0_to_255_operand")]
2248 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2249 [(set_attr "type" "sse")
2250 (set_attr "prefix" "evex")
2251 (set_attr "mode" "<MODE>")])
2253 (define_insn "reduces<mode>"
2254 [(set (match_operand:VF_128 0 "register_operand" "=v")
2257 [(match_operand:VF_128 1 "register_operand" "v")
2258 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2259 (match_operand:SI 3 "const_0_to_255_operand")]
2264 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2265 [(set_attr "type" "sse")
2266 (set_attr "prefix" "evex")
2267 (set_attr "mode" "<MODE>")])
2269 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2271 ;; Parallel floating point comparisons
2273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2275 (define_insn "avx_cmp<mode>3"
2276 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2278 [(match_operand:VF_128_256 1 "register_operand" "x")
2279 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2280 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2283 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2284 [(set_attr "type" "ssecmp")
2285 (set_attr "length_immediate" "1")
2286 (set_attr "prefix" "vex")
2287 (set_attr "mode" "<MODE>")])
2289 (define_insn "avx_vmcmp<mode>3"
2290 [(set (match_operand:VF_128 0 "register_operand" "=x")
2293 [(match_operand:VF_128 1 "register_operand" "x")
2294 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2295 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2300 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2301 [(set_attr "type" "ssecmp")
2302 (set_attr "length_immediate" "1")
2303 (set_attr "prefix" "vex")
2304 (set_attr "mode" "<ssescalarmode>")])
2306 (define_insn "*<sse>_maskcmp<mode>3_comm"
2307 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2308 (match_operator:VF_128_256 3 "sse_comparison_operator"
2309 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2310 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2312 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2314 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2315 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2316 [(set_attr "isa" "noavx,avx")
2317 (set_attr "type" "ssecmp")
2318 (set_attr "length_immediate" "1")
2319 (set_attr "prefix" "orig,vex")
2320 (set_attr "mode" "<MODE>")])
2322 (define_insn "<sse>_maskcmp<mode>3"
2323 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2324 (match_operator:VF_128_256 3 "sse_comparison_operator"
2325 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2326 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2329 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2330 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2331 [(set_attr "isa" "noavx,avx")
2332 (set_attr "type" "ssecmp")
2333 (set_attr "length_immediate" "1")
2334 (set_attr "prefix" "orig,vex")
2335 (set_attr "mode" "<MODE>")])
2337 (define_insn "<sse>_vmmaskcmp<mode>3"
2338 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2340 (match_operator:VF_128 3 "sse_comparison_operator"
2341 [(match_operand:VF_128 1 "register_operand" "0,x")
2342 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2347 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2348 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2349 [(set_attr "isa" "noavx,avx")
2350 (set_attr "type" "ssecmp")
2351 (set_attr "length_immediate" "1,*")
2352 (set_attr "prefix" "orig,vex")
2353 (set_attr "mode" "<ssescalarmode>")])
2355 (define_mode_attr cmp_imm_predicate
2356 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2357 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2358 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2359 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2360 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2361 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2362 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2363 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2364 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2366 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2367 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2368 (unspec:<avx512fmaskmode>
2369 [(match_operand:V_AVX512VL 1 "register_operand" "v")
2370 (match_operand:V_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2371 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2373 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2374 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2375 [(set_attr "type" "ssecmp")
2376 (set_attr "length_immediate" "1")
2377 (set_attr "prefix" "evex")
2378 (set_attr "mode" "<sseinsnmode>")])
2380 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2381 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2382 (unspec:<avx512fmaskmode>
2383 [(match_operand:VI_AVX512VL 1 "register_operand" "v")
2384 (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")
2385 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2386 UNSPEC_UNSIGNED_PCMP))]
2388 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2389 [(set_attr "type" "ssecmp")
2390 (set_attr "length_immediate" "1")
2391 (set_attr "prefix" "evex")
2392 (set_attr "mode" "<sseinsnmode>")])
2394 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2395 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2396 (and:<avx512fmaskmode>
2397 (unspec:<avx512fmaskmode>
2398 [(match_operand:VF_128 1 "register_operand" "v")
2399 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2400 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2404 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2405 [(set_attr "type" "ssecmp")
2406 (set_attr "length_immediate" "1")
2407 (set_attr "prefix" "evex")
2408 (set_attr "mode" "<ssescalarmode>")])
2410 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2411 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2412 (and:<avx512fmaskmode>
2413 (unspec:<avx512fmaskmode>
2414 [(match_operand:VF_128 1 "register_operand" "v")
2415 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2416 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2418 (and:<avx512fmaskmode>
2419 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2422 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2423 [(set_attr "type" "ssecmp")
2424 (set_attr "length_immediate" "1")
2425 (set_attr "prefix" "evex")
2426 (set_attr "mode" "<ssescalarmode>")])
2428 (define_insn "avx512f_maskcmp<mode>3"
2429 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2430 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2431 [(match_operand:VF 1 "register_operand" "v")
2432 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2434 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2435 [(set_attr "type" "ssecmp")
2436 (set_attr "length_immediate" "1")
2437 (set_attr "prefix" "evex")
2438 (set_attr "mode" "<sseinsnmode>")])
2440 (define_insn "<sse>_comi<round_saeonly_name>"
2441 [(set (reg:CCFP FLAGS_REG)
2444 (match_operand:<ssevecmode> 0 "register_operand" "v")
2445 (parallel [(const_int 0)]))
2447 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2448 (parallel [(const_int 0)]))))]
2449 "SSE_FLOAT_MODE_P (<MODE>mode)"
2450 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2451 [(set_attr "type" "ssecomi")
2452 (set_attr "prefix" "maybe_vex")
2453 (set_attr "prefix_rep" "0")
2454 (set (attr "prefix_data16")
2455 (if_then_else (eq_attr "mode" "DF")
2457 (const_string "0")))
2458 (set_attr "mode" "<MODE>")])
2460 (define_insn "<sse>_ucomi<round_saeonly_name>"
2461 [(set (reg:CCFPU FLAGS_REG)
2464 (match_operand:<ssevecmode> 0 "register_operand" "v")
2465 (parallel [(const_int 0)]))
2467 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2468 (parallel [(const_int 0)]))))]
2469 "SSE_FLOAT_MODE_P (<MODE>mode)"
2470 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2471 [(set_attr "type" "ssecomi")
2472 (set_attr "prefix" "maybe_vex")
2473 (set_attr "prefix_rep" "0")
2474 (set (attr "prefix_data16")
2475 (if_then_else (eq_attr "mode" "DF")
2477 (const_string "0")))
2478 (set_attr "mode" "<MODE>")])
2480 (define_expand "vcond<V_512:mode><VF_512:mode>"
2481 [(set (match_operand:V_512 0 "register_operand")
2483 (match_operator 3 ""
2484 [(match_operand:VF_512 4 "nonimmediate_operand")
2485 (match_operand:VF_512 5 "nonimmediate_operand")])
2486 (match_operand:V_512 1 "general_operand")
2487 (match_operand:V_512 2 "general_operand")))]
2489 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2490 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2492 bool ok = ix86_expand_fp_vcond (operands);
2497 (define_expand "vcond<V_256:mode><VF_256:mode>"
2498 [(set (match_operand:V_256 0 "register_operand")
2500 (match_operator 3 ""
2501 [(match_operand:VF_256 4 "nonimmediate_operand")
2502 (match_operand:VF_256 5 "nonimmediate_operand")])
2503 (match_operand:V_256 1 "general_operand")
2504 (match_operand:V_256 2 "general_operand")))]
2506 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2507 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2509 bool ok = ix86_expand_fp_vcond (operands);
2514 (define_expand "vcond<V_128:mode><VF_128:mode>"
2515 [(set (match_operand:V_128 0 "register_operand")
2517 (match_operator 3 ""
2518 [(match_operand:VF_128 4 "nonimmediate_operand")
2519 (match_operand:VF_128 5 "nonimmediate_operand")])
2520 (match_operand:V_128 1 "general_operand")
2521 (match_operand:V_128 2 "general_operand")))]
2523 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2524 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2526 bool ok = ix86_expand_fp_vcond (operands);
2531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2533 ;; Parallel floating point logical operations
2535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2537 (define_insn "<sse>_andnot<mode>3<mask_name>"
2538 [(set (match_operand:VF 0 "register_operand" "=x,v")
2541 (match_operand:VF 1 "register_operand" "0,v"))
2542 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2545 || (TARGET_AVX512DQ && GET_MODE_SIZE (<MODE>mode) == 64)
2546 || (TARGET_AVX512DQ && TARGET_AVX512VL))"
2548 static char buf[64];
2552 switch (get_attr_mode (insn))
2560 suffix = "<ssemodesuffix>";
2563 switch (which_alternative)
2566 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2569 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2575 /* There is no vandnp[sd]. Use vpandnq. */
2576 if (GET_MODE_SIZE (<MODE>mode) == 64 && !TARGET_AVX512DQ)
2579 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2582 snprintf (buf, sizeof (buf), ops, suffix);
2585 [(set_attr "isa" "noavx,avx")
2586 (set_attr "type" "sselog")
2587 (set_attr "prefix" "orig,maybe_evex")
2589 (cond [(and (match_test "<MODE_SIZE> == 16")
2590 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2591 (const_string "<ssePSmode>")
2592 (match_test "TARGET_AVX")
2593 (const_string "<MODE>")
2594 (match_test "optimize_function_for_size_p (cfun)")
2595 (const_string "V4SF")
2597 (const_string "<MODE>")))])
2599 (define_expand "<code><mode>3<mask_name>"
2600 [(set (match_operand:VF 0 "register_operand")
2602 (match_operand:VF 1 "nonimmediate_operand")
2603 (match_operand:VF 2 "nonimmediate_operand")))]
2605 && (!<mask_applied> || TARGET_AVX512VL || GET_MODE_SIZE (<MODE>mode) == 64)"
2606 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2608 (define_insn "*<code><mode>3<mask_name>"
2609 [(set (match_operand:VF 0 "register_operand" "=x,v")
2611 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2612 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2614 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
2615 && (!<mask_applied> || TARGET_AVX512VL || GET_MODE_SIZE (<MODE>mode) == 64)"
2617 static char buf[64];
2621 switch (get_attr_mode (insn))
2629 suffix = "<ssemodesuffix>";
2632 switch (which_alternative)
2635 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2638 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2644 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>q. */
2645 if (GET_MODE_SIZE (<MODE>mode) == 64 && !TARGET_AVX512DQ)
2648 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2651 snprintf (buf, sizeof (buf), ops, suffix);
2654 [(set_attr "isa" "noavx,avx")
2655 (set_attr "type" "sselog")
2656 (set_attr "prefix" "orig,maybe_evex")
2658 (cond [(and (match_test "<MODE_SIZE> == 16")
2659 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2660 (const_string "<ssePSmode>")
2661 (match_test "TARGET_AVX")
2662 (const_string "<MODE>")
2663 (match_test "optimize_function_for_size_p (cfun)")
2664 (const_string "V4SF")
2666 (const_string "<MODE>")))])
2668 (define_expand "copysign<mode>3"
2671 (not:VF (match_dup 3))
2672 (match_operand:VF 1 "nonimmediate_operand")))
2674 (and:VF (match_dup 3)
2675 (match_operand:VF 2 "nonimmediate_operand")))
2676 (set (match_operand:VF 0 "register_operand")
2677 (ior:VF (match_dup 4) (match_dup 5)))]
2680 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2682 operands[4] = gen_reg_rtx (<MODE>mode);
2683 operands[5] = gen_reg_rtx (<MODE>mode);
2686 ;; Also define scalar versions. These are used for abs, neg, and
2687 ;; conditional move. Using subregs into vector modes causes register
2688 ;; allocation lossage. These patterns do not allow memory operands
2689 ;; because the native instructions read the full 128-bits.
2691 (define_insn "*andnot<mode>3"
2692 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2695 (match_operand:MODEF 1 "register_operand" "0,x"))
2696 (match_operand:MODEF 2 "register_operand" "x,x")))]
2697 "SSE_FLOAT_MODE_P (<MODE>mode)"
2699 static char buf[32];
2702 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2704 switch (which_alternative)
2707 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2710 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2716 snprintf (buf, sizeof (buf), ops, suffix);
2719 [(set_attr "isa" "noavx,avx")
2720 (set_attr "type" "sselog")
2721 (set_attr "prefix" "orig,vex")
2723 (cond [(and (match_test "<MODE_SIZE> == 16")
2724 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2725 (const_string "V4SF")
2726 (match_test "TARGET_AVX")
2727 (const_string "<ssevecmode>")
2728 (match_test "optimize_function_for_size_p (cfun)")
2729 (const_string "V4SF")
2731 (const_string "<ssevecmode>")))])
2733 (define_insn "*andnottf3"
2734 [(set (match_operand:TF 0 "register_operand" "=x,x")
2736 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2737 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2740 static char buf[32];
2743 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2745 switch (which_alternative)
2748 ops = "%s\t{%%2, %%0|%%0, %%2}";
2751 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2757 snprintf (buf, sizeof (buf), ops, tmp);
2760 [(set_attr "isa" "noavx,avx")
2761 (set_attr "type" "sselog")
2762 (set (attr "prefix_data16")
2764 (and (eq_attr "alternative" "0")
2765 (eq_attr "mode" "TI"))
2767 (const_string "*")))
2768 (set_attr "prefix" "orig,vex")
2770 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2771 (const_string "V4SF")
2772 (match_test "TARGET_AVX")
2774 (ior (not (match_test "TARGET_SSE2"))
2775 (match_test "optimize_function_for_size_p (cfun)"))
2776 (const_string "V4SF")
2778 (const_string "TI")))])
2780 (define_insn "*<code><mode>3"
2781 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2783 (match_operand:MODEF 1 "register_operand" "%0,x")
2784 (match_operand:MODEF 2 "register_operand" "x,x")))]
2785 "SSE_FLOAT_MODE_P (<MODE>mode)"
2787 static char buf[32];
2790 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2792 switch (which_alternative)
2795 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2798 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2804 snprintf (buf, sizeof (buf), ops, suffix);
2807 [(set_attr "isa" "noavx,avx")
2808 (set_attr "type" "sselog")
2809 (set_attr "prefix" "orig,vex")
2811 (cond [(and (match_test "<MODE_SIZE> == 16")
2812 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2813 (const_string "V4SF")
2814 (match_test "TARGET_AVX")
2815 (const_string "<ssevecmode>")
2816 (match_test "optimize_function_for_size_p (cfun)")
2817 (const_string "V4SF")
2819 (const_string "<ssevecmode>")))])
2821 (define_expand "<code>tf3"
2822 [(set (match_operand:TF 0 "register_operand")
2824 (match_operand:TF 1 "nonimmediate_operand")
2825 (match_operand:TF 2 "nonimmediate_operand")))]
2827 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2829 (define_insn "*<code>tf3"
2830 [(set (match_operand:TF 0 "register_operand" "=x,x")
2832 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2833 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2835 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2837 static char buf[32];
2840 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2842 switch (which_alternative)
2845 ops = "%s\t{%%2, %%0|%%0, %%2}";
2848 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2854 snprintf (buf, sizeof (buf), ops, tmp);
2857 [(set_attr "isa" "noavx,avx")
2858 (set_attr "type" "sselog")
2859 (set (attr "prefix_data16")
2861 (and (eq_attr "alternative" "0")
2862 (eq_attr "mode" "TI"))
2864 (const_string "*")))
2865 (set_attr "prefix" "orig,vex")
2867 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2868 (const_string "V4SF")
2869 (match_test "TARGET_AVX")
2871 (ior (not (match_test "TARGET_SSE2"))
2872 (match_test "optimize_function_for_size_p (cfun)"))
2873 (const_string "V4SF")
2875 (const_string "TI")))])
2877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2879 ;; FMA floating point multiply/accumulate instructions. These include
2880 ;; scalar versions of the instructions as well as vector versions.
2882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2884 ;; The standard names for scalar FMA are only available with SSE math enabled.
2885 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2886 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2887 ;; and TARGET_FMA4 are both false.
2888 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2889 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2890 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2891 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2892 (define_mode_iterator FMAMODEM
2893 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2894 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2895 (V4SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2896 (V2DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2897 (V8SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2898 (V4DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2899 (V16SF "TARGET_AVX512F")
2900 (V8DF "TARGET_AVX512F")])
2902 (define_expand "fma<mode>4"
2903 [(set (match_operand:FMAMODEM 0 "register_operand")
2905 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2906 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2907 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2909 (define_expand "fms<mode>4"
2910 [(set (match_operand:FMAMODEM 0 "register_operand")
2912 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2913 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2914 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2916 (define_expand "fnma<mode>4"
2917 [(set (match_operand:FMAMODEM 0 "register_operand")
2919 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2920 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2921 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2923 (define_expand "fnms<mode>4"
2924 [(set (match_operand:FMAMODEM 0 "register_operand")
2926 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2927 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2928 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2930 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2931 (define_mode_iterator FMAMODE
2932 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2933 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2934 (V4SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2935 (V2DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2936 (V8SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2937 (V4DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
2938 (V16SF "TARGET_AVX512F")
2939 (V8DF "TARGET_AVX512F")])
2941 (define_expand "fma4i_fmadd_<mode>"
2942 [(set (match_operand:FMAMODE 0 "register_operand")
2944 (match_operand:FMAMODE 1 "nonimmediate_operand")
2945 (match_operand:FMAMODE 2 "nonimmediate_operand")
2946 (match_operand:FMAMODE 3 "nonimmediate_operand")))])
2948 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
2949 [(match_operand:VF_AVX512VL 0 "register_operand")
2950 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
2951 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
2952 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
2953 (match_operand:<avx512fmaskmode> 4 "register_operand")]
2954 "TARGET_AVX512F && <round_mode512bit_condition>"
2956 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
2957 operands[0], operands[1], operands[2], operands[3],
2958 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
2962 (define_mode_iterator FMAMODE_NOVF512
2963 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2964 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2965 (V4SF "TARGET_FMA || TARGET_FMA4")
2966 (V2DF "TARGET_FMA || TARGET_FMA4")
2967 (V8SF "TARGET_FMA || TARGET_FMA4")
2968 (V4DF "TARGET_FMA || TARGET_FMA4")])
2970 (define_insn "<sd_mask_codefor>fma_fmadd_noavx512_<mode><sd_maskz_name><round_name>"
2971 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2972 (fma:FMAMODE_NOVF512
2973 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2974 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2975 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2976 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2978 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2979 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2980 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2981 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2982 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2983 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2984 (set_attr "type" "ssemuladd")
2985 (set_attr "mode" "<MODE>")])
2987 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2988 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
2990 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
2991 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2992 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
2993 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2995 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2996 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2997 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2998 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2999 (set_attr "type" "ssemuladd")
3000 (set_attr "mode" "<MODE>")])
3002 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3003 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3004 (vec_merge:VF_AVX512VL
3006 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3007 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3008 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3010 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3011 "TARGET_AVX512F && <round_mode512bit_condition>"
3013 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3014 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3015 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3016 (set_attr "type" "ssemuladd")
3017 (set_attr "mode" "<MODE>")])
3019 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3020 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3021 (vec_merge:VF_AVX512VL
3023 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3024 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3025 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3027 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3029 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3030 [(set_attr "isa" "fma_avx512f")
3031 (set_attr "type" "ssemuladd")
3032 (set_attr "mode" "<MODE>")])
3034 (define_insn "<sd_mask_codefor>fma_fmsub_noavx512<mode><sd_maskz_name><round_name>"
3035 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3036 (fma:FMAMODE_NOVF512
3037 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3038 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3039 (neg:FMAMODE_NOVF512
3040 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
3041 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3043 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3044 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3045 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3046 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3047 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3048 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3049 (set_attr "type" "ssemuladd")
3050 (set_attr "mode" "<MODE>")])
3052 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3053 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
3055 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3056 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3058 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3059 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3061 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3062 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3063 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3064 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3065 (set_attr "type" "ssemuladd")
3066 (set_attr "mode" "<MODE>")])
3068 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3069 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3070 (vec_merge:VF_AVX512VL
3072 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3073 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3075 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3077 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3080 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3081 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3082 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3083 (set_attr "type" "ssemuladd")
3084 (set_attr "mode" "<MODE>")])
3086 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3087 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3088 (vec_merge:VF_AVX512VL
3090 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3091 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3093 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3095 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3096 "TARGET_AVX512F && <round_mode512bit_condition>"
3097 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3098 [(set_attr "isa" "fma_avx512f")
3099 (set_attr "type" "ssemuladd")
3100 (set_attr "mode" "<MODE>")])
3102 (define_insn "<sd_mask_codefor>fma_fnmadd_noavx512_<mode><sd_maskz_name><round_name>"
3103 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3104 (fma:FMAMODE_NOVF512
3105 (neg:FMAMODE_NOVF512
3106 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
3107 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3108 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
3109 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3111 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3112 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3113 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3114 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3115 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3116 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3117 (set_attr "type" "ssemuladd")
3118 (set_attr "mode" "<MODE>")])
3120 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3121 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
3124 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3125 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3126 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3127 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3129 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3130 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3131 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3132 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3133 (set_attr "type" "ssemuladd")
3134 (set_attr "mode" "<MODE>")])
3136 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3137 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3138 (vec_merge:VF_AVX512VL
3141 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3142 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3143 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3145 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3146 "TARGET_AVX512F && <round_mode512bit_condition>"
3148 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3149 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3150 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3151 (set_attr "type" "ssemuladd")
3152 (set_attr "mode" "<MODE>")])
3154 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3155 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3156 (vec_merge:VF_AVX512VL
3159 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3160 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3161 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3163 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3164 "TARGET_AVX512F && <round_mode512bit_condition>"
3165 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3166 [(set_attr "isa" "fma_avx512f")
3167 (set_attr "type" "ssemuladd")
3168 (set_attr "mode" "<MODE>")])
3170 (define_insn "<sd_mask_codefor>fma_fnmsub_noavx512_<mode><sd_maskz_name><round_name>"
3171 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3172 (fma:FMAMODE_NOVF512
3173 (neg:FMAMODE_NOVF512
3174 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
3175 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3176 (neg:FMAMODE_NOVF512
3177 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
3178 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3180 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3181 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3182 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3183 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3184 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3185 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3186 (set_attr "type" "ssemuladd")
3187 (set_attr "mode" "<MODE>")])
3189 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3190 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
3193 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3194 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3196 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3197 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3199 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3200 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3201 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3202 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3203 (set_attr "type" "ssemuladd")
3204 (set_attr "mode" "<MODE>")])
3206 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3207 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3208 (vec_merge:VF_AVX512VL
3211 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3212 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3214 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3216 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3217 "TARGET_AVX512F && <round_mode512bit_condition>"
3219 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3220 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3221 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3222 (set_attr "type" "ssemuladd")
3223 (set_attr "mode" "<MODE>")])
3225 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3226 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3227 (vec_merge:VF_AVX512VL
3230 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3231 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3233 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3235 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3237 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3238 [(set_attr "isa" "fma_avx512f")
3239 (set_attr "type" "ssemuladd")
3240 (set_attr "mode" "<MODE>")])
3242 ;; FMA parallel floating point multiply addsub and subadd operations.
3244 ;; It would be possible to represent these without the UNSPEC as
3247 ;; (fma op1 op2 op3)
3248 ;; (fma op1 op2 (neg op3))
3251 ;; But this doesn't seem useful in practice.
3253 (define_expand "fmaddsub_<mode>"
3254 [(set (match_operand:VF 0 "register_operand")
3256 [(match_operand:VF 1 "nonimmediate_operand")
3257 (match_operand:VF 2 "nonimmediate_operand")
3258 (match_operand:VF 3 "nonimmediate_operand")]
3260 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3262 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3263 [(match_operand:VF_AVX512VL 0 "register_operand")
3264 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3265 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3266 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3267 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3270 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3271 operands[0], operands[1], operands[2], operands[3],
3272 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3276 (define_insn "<sd_mask_codefor>fma_fmaddsub_noavx512_<mode><sd_maskz_name><round_name>"
3277 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3279 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3280 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3281 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
3283 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3285 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3286 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3287 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3288 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3289 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3290 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3291 (set_attr "type" "ssemuladd")
3292 (set_attr "mode" "<MODE>")])
3294 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3295 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
3297 [(match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3298 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3299 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3301 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3303 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3304 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3305 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3306 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3307 (set_attr "type" "ssemuladd")
3308 (set_attr "mode" "<MODE>")])
3310 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3311 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3312 (vec_merge:VF_AVX512VL
3314 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3315 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3316 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3319 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3322 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3323 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3324 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3325 (set_attr "type" "ssemuladd")
3326 (set_attr "mode" "<MODE>")])
3328 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3329 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3330 (vec_merge:VF_AVX512VL
3332 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3333 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3334 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3337 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3339 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3340 [(set_attr "isa" "fma_avx512f")
3341 (set_attr "type" "ssemuladd")
3342 (set_attr "mode" "<MODE>")])
3344 (define_insn "<sd_mask_codefor>fma_fmsubadd_noavx512_<mode><sd_maskz_name><round_name>"
3345 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3347 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3348 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3350 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
3352 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3354 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3355 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3356 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3357 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3358 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3359 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3360 (set_attr "type" "ssemuladd")
3361 (set_attr "mode" "<MODE>")])
3363 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3364 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
3366 [(match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3367 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3369 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3371 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3373 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3374 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3375 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3376 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3377 (set_attr "type" "ssemuladd")
3378 (set_attr "mode" "<MODE>")])
3380 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3381 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3382 (vec_merge:VF_AVX512VL
3384 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3385 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3387 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3390 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3393 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3394 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3395 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3396 (set_attr "type" "ssemuladd")
3397 (set_attr "mode" "<MODE>")])
3399 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3400 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3401 (vec_merge:VF_AVX512VL
3403 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3404 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3406 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3409 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3411 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3412 [(set_attr "isa" "fma_avx512f")
3413 (set_attr "type" "ssemuladd")
3414 (set_attr "mode" "<MODE>")])
3416 ;; FMA3 floating point scalar intrinsics. These merge result with
3417 ;; high-order elements from the destination register.
3419 (define_expand "fmai_vmfmadd_<mode><round_name>"
3420 [(set (match_operand:VF_128 0 "register_operand")
3423 (match_operand:VF_128 1 "<round_nimm_predicate>")
3424 (match_operand:VF_128 2 "<round_nimm_predicate>")
3425 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3430 (define_insn "*fmai_fmadd_<mode>"
3431 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3434 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3435 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3436 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3439 "TARGET_FMA || TARGET_AVX512F"
3441 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3442 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3443 [(set_attr "type" "ssemuladd")
3444 (set_attr "mode" "<MODE>")])
3446 (define_insn "*fmai_fmsub_<mode>"
3447 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3450 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3451 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3453 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3456 "TARGET_FMA || TARGET_AVX512F"
3458 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3459 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3460 [(set_attr "type" "ssemuladd")
3461 (set_attr "mode" "<MODE>")])
3463 (define_insn "*fmai_fnmadd_<mode><round_name>"
3464 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3468 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3469 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3470 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3473 "TARGET_FMA || TARGET_AVX512F"
3475 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3476 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3477 [(set_attr "type" "ssemuladd")
3478 (set_attr "mode" "<MODE>")])
3480 (define_insn "*fmai_fnmsub_<mode><round_name>"
3481 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3485 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3486 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3488 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3491 "TARGET_FMA || TARGET_AVX512F"
3493 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3494 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3495 [(set_attr "type" "ssemuladd")
3496 (set_attr "mode" "<MODE>")])
3498 ;; FMA4 floating point scalar intrinsics. These write the
3499 ;; entire destination register, with the high-order elements zeroed.
3501 (define_expand "fma4i_vmfmadd_<mode>"
3502 [(set (match_operand:VF_128 0 "register_operand")
3505 (match_operand:VF_128 1 "nonimmediate_operand")
3506 (match_operand:VF_128 2 "nonimmediate_operand")
3507 (match_operand:VF_128 3 "nonimmediate_operand"))
3511 "operands[4] = CONST0_RTX (<MODE>mode);")
3513 (define_insn "*fma4i_vmfmadd_<mode>"
3514 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3517 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3518 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3519 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3520 (match_operand:VF_128 4 "const0_operand")
3523 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3524 [(set_attr "type" "ssemuladd")
3525 (set_attr "mode" "<MODE>")])
3527 (define_insn "*fma4i_vmfmsub_<mode>"
3528 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3531 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3532 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3534 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3535 (match_operand:VF_128 4 "const0_operand")
3538 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3539 [(set_attr "type" "ssemuladd")
3540 (set_attr "mode" "<MODE>")])
3542 (define_insn "*fma4i_vmfnmadd_<mode>"
3543 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3547 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3548 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3549 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3550 (match_operand:VF_128 4 "const0_operand")
3553 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3554 [(set_attr "type" "ssemuladd")
3555 (set_attr "mode" "<MODE>")])
3557 (define_insn "*fma4i_vmfnmsub_<mode>"
3558 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3562 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3563 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3565 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3566 (match_operand:VF_128 4 "const0_operand")
3569 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3570 [(set_attr "type" "ssemuladd")
3571 (set_attr "mode" "<MODE>")])
3573 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3575 ;; Parallel single-precision floating point conversion operations
3577 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3579 (define_insn "sse_cvtpi2ps"
3580 [(set (match_operand:V4SF 0 "register_operand" "=x")
3583 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3584 (match_operand:V4SF 1 "register_operand" "0")
3587 "cvtpi2ps\t{%2, %0|%0, %2}"
3588 [(set_attr "type" "ssecvt")
3589 (set_attr "mode" "V4SF")])
3591 (define_insn "sse_cvtps2pi"
3592 [(set (match_operand:V2SI 0 "register_operand" "=y")
3594 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3596 (parallel [(const_int 0) (const_int 1)])))]
3598 "cvtps2pi\t{%1, %0|%0, %q1}"
3599 [(set_attr "type" "ssecvt")
3600 (set_attr "unit" "mmx")
3601 (set_attr "mode" "DI")])
3603 (define_insn "sse_cvttps2pi"
3604 [(set (match_operand:V2SI 0 "register_operand" "=y")
3606 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3607 (parallel [(const_int 0) (const_int 1)])))]
3609 "cvttps2pi\t{%1, %0|%0, %q1}"
3610 [(set_attr "type" "ssecvt")
3611 (set_attr "unit" "mmx")
3612 (set_attr "prefix_rep" "0")
3613 (set_attr "mode" "SF")])
3615 (define_insn "sse_cvtsi2ss<round_name>"
3616 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3619 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3620 (match_operand:V4SF 1 "register_operand" "0,0,v")
3624 cvtsi2ss\t{%2, %0|%0, %2}
3625 cvtsi2ss\t{%2, %0|%0, %2}
3626 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3627 [(set_attr "isa" "noavx,noavx,avx")
3628 (set_attr "type" "sseicvt")
3629 (set_attr "athlon_decode" "vector,double,*")
3630 (set_attr "amdfam10_decode" "vector,double,*")
3631 (set_attr "bdver1_decode" "double,direct,*")
3632 (set_attr "btver2_decode" "double,double,double")
3633 (set_attr "prefix" "orig,orig,maybe_evex")
3634 (set_attr "mode" "SF")])
3636 (define_insn "sse_cvtsi2ssq<round_name>"
3637 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3640 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3641 (match_operand:V4SF 1 "register_operand" "0,0,v")
3643 "TARGET_SSE && TARGET_64BIT"
3645 cvtsi2ssq\t{%2, %0|%0, %2}
3646 cvtsi2ssq\t{%2, %0|%0, %2}
3647 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3648 [(set_attr "isa" "noavx,noavx,avx")
3649 (set_attr "type" "sseicvt")
3650 (set_attr "athlon_decode" "vector,double,*")
3651 (set_attr "amdfam10_decode" "vector,double,*")
3652 (set_attr "bdver1_decode" "double,direct,*")
3653 (set_attr "btver2_decode" "double,double,double")
3654 (set_attr "length_vex" "*,*,4")
3655 (set_attr "prefix_rex" "1,1,*")
3656 (set_attr "prefix" "orig,orig,maybe_evex")
3657 (set_attr "mode" "SF")])
3659 (define_insn "sse_cvtss2si<round_name>"
3660 [(set (match_operand:SI 0 "register_operand" "=r,r")
3663 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3664 (parallel [(const_int 0)]))]
3665 UNSPEC_FIX_NOTRUNC))]
3667 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3668 [(set_attr "type" "sseicvt")
3669 (set_attr "athlon_decode" "double,vector")
3670 (set_attr "bdver1_decode" "double,double")
3671 (set_attr "prefix_rep" "1")
3672 (set_attr "prefix" "maybe_vex")
3673 (set_attr "mode" "SI")])
3675 (define_insn "sse_cvtss2si_2"
3676 [(set (match_operand:SI 0 "register_operand" "=r,r")
3677 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3678 UNSPEC_FIX_NOTRUNC))]
3680 "%vcvtss2si\t{%1, %0|%0, %k1}"
3681 [(set_attr "type" "sseicvt")
3682 (set_attr "athlon_decode" "double,vector")
3683 (set_attr "amdfam10_decode" "double,double")
3684 (set_attr "bdver1_decode" "double,double")
3685 (set_attr "prefix_rep" "1")
3686 (set_attr "prefix" "maybe_vex")
3687 (set_attr "mode" "SI")])
3689 (define_insn "sse_cvtss2siq<round_name>"
3690 [(set (match_operand:DI 0 "register_operand" "=r,r")
3693 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3694 (parallel [(const_int 0)]))]
3695 UNSPEC_FIX_NOTRUNC))]
3696 "TARGET_SSE && TARGET_64BIT"
3697 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3698 [(set_attr "type" "sseicvt")
3699 (set_attr "athlon_decode" "double,vector")
3700 (set_attr "bdver1_decode" "double,double")
3701 (set_attr "prefix_rep" "1")
3702 (set_attr "prefix" "maybe_vex")
3703 (set_attr "mode" "DI")])
3705 (define_insn "sse_cvtss2siq_2"
3706 [(set (match_operand:DI 0 "register_operand" "=r,r")
3707 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3708 UNSPEC_FIX_NOTRUNC))]
3709 "TARGET_SSE && TARGET_64BIT"
3710 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3711 [(set_attr "type" "sseicvt")
3712 (set_attr "athlon_decode" "double,vector")
3713 (set_attr "amdfam10_decode" "double,double")
3714 (set_attr "bdver1_decode" "double,double")
3715 (set_attr "prefix_rep" "1")
3716 (set_attr "prefix" "maybe_vex")
3717 (set_attr "mode" "DI")])
3719 (define_insn "sse_cvttss2si<round_saeonly_name>"
3720 [(set (match_operand:SI 0 "register_operand" "=r,r")
3723 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3724 (parallel [(const_int 0)]))))]
3726 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3727 [(set_attr "type" "sseicvt")
3728 (set_attr "athlon_decode" "double,vector")
3729 (set_attr "amdfam10_decode" "double,double")
3730 (set_attr "bdver1_decode" "double,double")
3731 (set_attr "prefix_rep" "1")
3732 (set_attr "prefix" "maybe_vex")
3733 (set_attr "mode" "SI")])
3735 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3736 [(set (match_operand:DI 0 "register_operand" "=r,r")
3739 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3740 (parallel [(const_int 0)]))))]
3741 "TARGET_SSE && TARGET_64BIT"
3742 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3743 [(set_attr "type" "sseicvt")
3744 (set_attr "athlon_decode" "double,vector")
3745 (set_attr "amdfam10_decode" "double,double")
3746 (set_attr "bdver1_decode" "double,double")
3747 (set_attr "prefix_rep" "1")
3748 (set_attr "prefix" "maybe_vex")
3749 (set_attr "mode" "DI")])
3751 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3752 [(set (match_operand:VF_128 0 "register_operand" "=v")
3754 (vec_duplicate:VF_128
3755 (unsigned_float:<ssescalarmode>
3756 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3757 (match_operand:VF_128 1 "register_operand" "v")
3759 "TARGET_AVX512F && <round_modev4sf_condition>"
3760 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3761 [(set_attr "type" "sseicvt")
3762 (set_attr "prefix" "evex")
3763 (set_attr "mode" "<ssescalarmode>")])
3765 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3766 [(set (match_operand:VF_128 0 "register_operand" "=v")
3768 (vec_duplicate:VF_128
3769 (unsigned_float:<ssescalarmode>
3770 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3771 (match_operand:VF_128 1 "register_operand" "v")
3773 "TARGET_AVX512F && TARGET_64BIT"
3774 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3775 [(set_attr "type" "sseicvt")
3776 (set_attr "prefix" "evex")
3777 (set_attr "mode" "<ssescalarmode>")])
3779 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3780 [(set (match_operand:VF1 0 "register_operand" "=v")
3782 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3783 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3784 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3785 [(set_attr "type" "ssecvt")
3786 (set_attr "prefix" "maybe_vex")
3787 (set_attr "mode" "<sseinsnmode>")])
3789 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
3790 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
3791 (unsigned_float:VF1_AVX512VL
3792 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
3794 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3795 [(set_attr "type" "ssecvt")
3796 (set_attr "prefix" "evex")
3797 (set_attr "mode" "<MODE>")])
3799 (define_expand "floatuns<sseintvecmodelower><mode>2"
3800 [(match_operand:VF1 0 "register_operand")
3801 (match_operand:<sseintvecmode> 1 "register_operand")]
3802 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3804 if (<MODE>mode == V16SFmode)
3805 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3807 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3813 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3814 (define_mode_attr sf2simodelower
3815 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3817 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
3818 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3820 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3821 UNSPEC_FIX_NOTRUNC))]
3822 "TARGET_SSE2 && <mask_mode512bit_condition>"
3823 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3824 [(set_attr "type" "ssecvt")
3825 (set (attr "prefix_data16")
3827 (match_test "TARGET_AVX")
3829 (const_string "1")))
3830 (set_attr "prefix" "maybe_vex")
3831 (set_attr "mode" "<sseinsnmode>")])
3833 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3834 [(set (match_operand:V16SI 0 "register_operand" "=v")
3836 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3837 UNSPEC_FIX_NOTRUNC))]
3839 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3840 [(set_attr "type" "ssecvt")
3841 (set_attr "prefix" "evex")
3842 (set_attr "mode" "XI")])
3844 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
3845 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
3846 (unspec:VI4_AVX512VL
3847 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
3848 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3850 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3851 [(set_attr "type" "ssecvt")
3852 (set_attr "prefix" "evex")
3853 (set_attr "mode" "<sseinsnmode>")])
3855 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
3856 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
3857 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
3858 UNSPEC_FIX_NOTRUNC))]
3859 "TARGET_AVX512DQ && <round_mode512bit_condition>"
3860 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3861 [(set_attr "type" "ssecvt")
3862 (set_attr "prefix" "evex")
3863 (set_attr "mode" "<sseinsnmode>")])
3865 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
3866 [(set (match_operand:V2DI 0 "register_operand" "=v")
3869 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3870 (parallel [(const_int 0) (const_int 1)]))]
3871 UNSPEC_FIX_NOTRUNC))]
3872 "TARGET_AVX512DQ && TARGET_AVX512VL"
3873 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3874 [(set_attr "type" "ssecvt")
3875 (set_attr "prefix" "evex")
3876 (set_attr "mode" "TI")])
3878 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
3879 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
3880 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
3881 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3882 "TARGET_AVX512DQ && <round_mode512bit_condition>"
3883 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3884 [(set_attr "type" "ssecvt")
3885 (set_attr "prefix" "evex")
3886 (set_attr "mode" "<sseinsnmode>")])
3888 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
3889 [(set (match_operand:V2DI 0 "register_operand" "=v")
3892 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3893 (parallel [(const_int 0) (const_int 1)]))]
3894 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3895 "TARGET_AVX512DQ && TARGET_AVX512VL"
3896 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3897 [(set_attr "type" "ssecvt")
3898 (set_attr "prefix" "evex")
3899 (set_attr "mode" "TI")])
3901 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
3902 [(set (match_operand:V16SI 0 "register_operand" "=v")
3904 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
3906 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
3907 [(set_attr "type" "ssecvt")
3908 (set_attr "prefix" "evex")
3909 (set_attr "mode" "XI")])
3911 (define_insn "fix_truncv8sfv8si2<mask_name>"
3912 [(set (match_operand:V8SI 0 "register_operand" "=v")
3913 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
3914 "TARGET_AVX && <mask_mode512bit_condition>"
3915 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3916 [(set_attr "type" "ssecvt")
3917 (set_attr "prefix" "<mask_prefix>")
3918 (set_attr "mode" "OI")])
3920 (define_insn "fix_truncv4sfv4si2<mask_name>"
3921 [(set (match_operand:V4SI 0 "register_operand" "=v")
3922 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
3923 "TARGET_SSE2 && <mask_mode512bit_condition>"
3924 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3925 [(set_attr "type" "ssecvt")
3926 (set (attr "prefix_rep")
3928 (match_test "TARGET_AVX")
3930 (const_string "1")))
3931 (set (attr "prefix_data16")
3933 (match_test "TARGET_AVX")
3935 (const_string "0")))
3936 (set_attr "prefix_data16" "0")
3937 (set_attr "prefix" "<mask_prefix2>")
3938 (set_attr "mode" "TI")])
3940 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3941 [(match_operand:<sseintvecmode> 0 "register_operand")
3942 (match_operand:VF1 1 "register_operand")]
3945 if (<MODE>mode == V16SFmode)
3946 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
3951 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3952 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3953 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3954 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3959 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3961 ;; Parallel double-precision floating point conversion operations
3963 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3965 (define_insn "sse2_cvtpi2pd"
3966 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3967 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3969 "cvtpi2pd\t{%1, %0|%0, %1}"
3970 [(set_attr "type" "ssecvt")
3971 (set_attr "unit" "mmx,*")
3972 (set_attr "prefix_data16" "1,*")
3973 (set_attr "mode" "V2DF")])
3975 (define_insn "sse2_cvtpd2pi"
3976 [(set (match_operand:V2SI 0 "register_operand" "=y")
3977 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3978 UNSPEC_FIX_NOTRUNC))]
3980 "cvtpd2pi\t{%1, %0|%0, %1}"
3981 [(set_attr "type" "ssecvt")
3982 (set_attr "unit" "mmx")
3983 (set_attr "bdver1_decode" "double")
3984 (set_attr "btver2_decode" "direct")
3985 (set_attr "prefix_data16" "1")
3986 (set_attr "mode" "DI")])
3988 (define_insn "sse2_cvttpd2pi"
3989 [(set (match_operand:V2SI 0 "register_operand" "=y")
3990 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3992 "cvttpd2pi\t{%1, %0|%0, %1}"
3993 [(set_attr "type" "ssecvt")
3994 (set_attr "unit" "mmx")
3995 (set_attr "bdver1_decode" "double")
3996 (set_attr "prefix_data16" "1")
3997 (set_attr "mode" "TI")])
3999 (define_insn "sse2_cvtsi2sd"
4000 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4003 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4004 (match_operand:V2DF 1 "register_operand" "0,0,x")
4008 cvtsi2sd\t{%2, %0|%0, %2}
4009 cvtsi2sd\t{%2, %0|%0, %2}
4010 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4011 [(set_attr "isa" "noavx,noavx,avx")
4012 (set_attr "type" "sseicvt")
4013 (set_attr "athlon_decode" "double,direct,*")
4014 (set_attr "amdfam10_decode" "vector,double,*")
4015 (set_attr "bdver1_decode" "double,direct,*")
4016 (set_attr "btver2_decode" "double,double,double")
4017 (set_attr "prefix" "orig,orig,vex")
4018 (set_attr "mode" "DF")])
4020 (define_insn "sse2_cvtsi2sdq<round_name>"
4021 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4024 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4025 (match_operand:V2DF 1 "register_operand" "0,0,v")
4027 "TARGET_SSE2 && TARGET_64BIT"
4029 cvtsi2sdq\t{%2, %0|%0, %2}
4030 cvtsi2sdq\t{%2, %0|%0, %2}
4031 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4032 [(set_attr "isa" "noavx,noavx,avx")
4033 (set_attr "type" "sseicvt")
4034 (set_attr "athlon_decode" "double,direct,*")
4035 (set_attr "amdfam10_decode" "vector,double,*")
4036 (set_attr "bdver1_decode" "double,direct,*")
4037 (set_attr "length_vex" "*,*,4")
4038 (set_attr "prefix_rex" "1,1,*")
4039 (set_attr "prefix" "orig,orig,maybe_evex")
4040 (set_attr "mode" "DF")])
4042 (define_insn "avx512f_vcvtss2usi<round_name>"
4043 [(set (match_operand:SI 0 "register_operand" "=r")
4046 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4047 (parallel [(const_int 0)]))]
4048 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4050 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4051 [(set_attr "type" "sseicvt")
4052 (set_attr "prefix" "evex")
4053 (set_attr "mode" "SI")])
4055 (define_insn "avx512f_vcvtss2usiq<round_name>"
4056 [(set (match_operand:DI 0 "register_operand" "=r")
4059 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4060 (parallel [(const_int 0)]))]
4061 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4062 "TARGET_AVX512F && TARGET_64BIT"
4063 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4064 [(set_attr "type" "sseicvt")
4065 (set_attr "prefix" "evex")
4066 (set_attr "mode" "DI")])
4068 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4069 [(set (match_operand:SI 0 "register_operand" "=r")
4072 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4073 (parallel [(const_int 0)]))))]
4075 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4076 [(set_attr "type" "sseicvt")
4077 (set_attr "prefix" "evex")
4078 (set_attr "mode" "SI")])
4080 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4081 [(set (match_operand:DI 0 "register_operand" "=r")
4084 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4085 (parallel [(const_int 0)]))))]
4086 "TARGET_AVX512F && TARGET_64BIT"
4087 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4088 [(set_attr "type" "sseicvt")
4089 (set_attr "prefix" "evex")
4090 (set_attr "mode" "DI")])
4092 (define_insn "avx512f_vcvtsd2usi<round_name>"
4093 [(set (match_operand:SI 0 "register_operand" "=r")
4096 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4097 (parallel [(const_int 0)]))]
4098 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4100 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4101 [(set_attr "type" "sseicvt")
4102 (set_attr "prefix" "evex")
4103 (set_attr "mode" "SI")])
4105 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4106 [(set (match_operand:DI 0 "register_operand" "=r")
4109 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4110 (parallel [(const_int 0)]))]
4111 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4112 "TARGET_AVX512F && TARGET_64BIT"
4113 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4114 [(set_attr "type" "sseicvt")
4115 (set_attr "prefix" "evex")
4116 (set_attr "mode" "DI")])
4118 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4119 [(set (match_operand:SI 0 "register_operand" "=r")
4122 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4123 (parallel [(const_int 0)]))))]
4125 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4126 [(set_attr "type" "sseicvt")
4127 (set_attr "prefix" "evex")
4128 (set_attr "mode" "SI")])
4130 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4131 [(set (match_operand:DI 0 "register_operand" "=r")
4134 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4135 (parallel [(const_int 0)]))))]
4136 "TARGET_AVX512F && TARGET_64BIT"
4137 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4138 [(set_attr "type" "sseicvt")
4139 (set_attr "prefix" "evex")
4140 (set_attr "mode" "DI")])
4142 (define_insn "sse2_cvtsd2si<round_name>"
4143 [(set (match_operand:SI 0 "register_operand" "=r,r")
4146 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4147 (parallel [(const_int 0)]))]
4148 UNSPEC_FIX_NOTRUNC))]
4150 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4151 [(set_attr "type" "sseicvt")
4152 (set_attr "athlon_decode" "double,vector")
4153 (set_attr "bdver1_decode" "double,double")
4154 (set_attr "btver2_decode" "double,double")
4155 (set_attr "prefix_rep" "1")
4156 (set_attr "prefix" "maybe_vex")
4157 (set_attr "mode" "SI")])
4159 (define_insn "sse2_cvtsd2si_2"
4160 [(set (match_operand:SI 0 "register_operand" "=r,r")
4161 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4162 UNSPEC_FIX_NOTRUNC))]
4164 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4165 [(set_attr "type" "sseicvt")
4166 (set_attr "athlon_decode" "double,vector")
4167 (set_attr "amdfam10_decode" "double,double")
4168 (set_attr "bdver1_decode" "double,double")
4169 (set_attr "prefix_rep" "1")
4170 (set_attr "prefix" "maybe_vex")
4171 (set_attr "mode" "SI")])
4173 (define_insn "sse2_cvtsd2siq<round_name>"
4174 [(set (match_operand:DI 0 "register_operand" "=r,r")
4177 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4178 (parallel [(const_int 0)]))]
4179 UNSPEC_FIX_NOTRUNC))]
4180 "TARGET_SSE2 && TARGET_64BIT"
4181 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4182 [(set_attr "type" "sseicvt")
4183 (set_attr "athlon_decode" "double,vector")
4184 (set_attr "bdver1_decode" "double,double")
4185 (set_attr "prefix_rep" "1")
4186 (set_attr "prefix" "maybe_vex")
4187 (set_attr "mode" "DI")])
4189 (define_insn "sse2_cvtsd2siq_2"
4190 [(set (match_operand:DI 0 "register_operand" "=r,r")
4191 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4192 UNSPEC_FIX_NOTRUNC))]
4193 "TARGET_SSE2 && TARGET_64BIT"
4194 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4195 [(set_attr "type" "sseicvt")
4196 (set_attr "athlon_decode" "double,vector")
4197 (set_attr "amdfam10_decode" "double,double")
4198 (set_attr "bdver1_decode" "double,double")
4199 (set_attr "prefix_rep" "1")
4200 (set_attr "prefix" "maybe_vex")
4201 (set_attr "mode" "DI")])
4203 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4204 [(set (match_operand:SI 0 "register_operand" "=r,r")
4207 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4208 (parallel [(const_int 0)]))))]
4210 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4211 [(set_attr "type" "sseicvt")
4212 (set_attr "athlon_decode" "double,vector")
4213 (set_attr "amdfam10_decode" "double,double")
4214 (set_attr "bdver1_decode" "double,double")
4215 (set_attr "btver2_decode" "double,double")
4216 (set_attr "prefix_rep" "1")
4217 (set_attr "prefix" "maybe_vex")
4218 (set_attr "mode" "SI")])
4220 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4221 [(set (match_operand:DI 0 "register_operand" "=r,r")
4224 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4225 (parallel [(const_int 0)]))))]
4226 "TARGET_SSE2 && TARGET_64BIT"
4227 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4228 [(set_attr "type" "sseicvt")
4229 (set_attr "athlon_decode" "double,vector")
4230 (set_attr "amdfam10_decode" "double,double")
4231 (set_attr "bdver1_decode" "double,double")
4232 (set_attr "prefix_rep" "1")
4233 (set_attr "prefix" "maybe_vex")
4234 (set_attr "mode" "DI")])
4236 ;; For float<si2dfmode><mode>2 insn pattern
4237 (define_mode_attr si2dfmode
4238 [(V8DF "V8SI") (V4DF "V4SI")])
4239 (define_mode_attr si2dfmodelower
4240 [(V8DF "v8si") (V4DF "v4si")])
4242 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4243 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4244 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4245 "<mask_mode512bit_condition>"
4246 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4247 [(set_attr "type" "ssecvt")
4248 (set_attr "prefix" "maybe_vex")
4249 (set_attr "mode" "<MODE>")])
4251 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4252 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4253 (any_float:VF2_AVX512VL
4254 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4256 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4257 [(set_attr "type" "ssecvt")
4258 (set_attr "prefix" "evex")
4259 (set_attr "mode" "<MODE>")])
4261 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4262 (define_mode_attr qq2pssuff
4263 [(V8SF "") (V4SF "{y}")])
4265 (define_mode_attr sselongvecmode
4266 [(V8SF "V8DI") (V4SF "V4DI")])
4268 (define_mode_attr sselongvecmodelower
4269 [(V8SF "v8di") (V4SF "v4di")])
4271 (define_mode_attr sseintvecmode3
4272 [(V8SF "XI") (V4SF "OI")
4273 (V8DF "OI") (V4DF "TI")])
4275 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4276 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4277 (any_float:VF1_128_256VL
4278 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4279 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4280 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4281 [(set_attr "type" "ssecvt")
4282 (set_attr "prefix" "evex")
4283 (set_attr "mode" "<MODE>")])
4285 (define_insn "*<floatsuffix>floatv2div2sf2"
4286 [(set (match_operand:V4SF 0 "register_operand" "=v")
4288 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4289 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4290 "TARGET_AVX512DQ && TARGET_AVX512VL"
4291 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4292 [(set_attr "type" "ssecvt")
4293 (set_attr "prefix" "evex")
4294 (set_attr "mode" "V4SF")])
4296 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4297 [(set (match_operand:V4SF 0 "register_operand" "=v")
4300 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4302 (match_operand:V4SF 2 "vector_move_operand" "0C")
4303 (parallel [(const_int 0) (const_int 1)]))
4304 (match_operand:QI 3 "register_operand" "Yk"))
4305 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4306 "TARGET_AVX512DQ && TARGET_AVX512VL"
4307 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4308 [(set_attr "type" "ssecvt")
4309 (set_attr "prefix" "evex")
4310 (set_attr "mode" "V4SF")])
4312 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4313 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4314 (unsigned_float:VF2_512_256VL
4315 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4317 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4318 [(set_attr "type" "ssecvt")
4319 (set_attr "prefix" "evex")
4320 (set_attr "mode" "<MODE>")])
4322 (define_insn "ufloatv2siv2df2<mask_name>"
4323 [(set (match_operand:V2DF 0 "register_operand" "=v")
4324 (unsigned_float:V2DF
4326 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4327 (parallel [(const_int 0) (const_int 1)]))))]
4329 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4330 [(set_attr "type" "ssecvt")
4331 (set_attr "prefix" "evex")
4332 (set_attr "mode" "V2DF")])
4334 (define_insn "avx512f_cvtdq2pd512_2"
4335 [(set (match_operand:V8DF 0 "register_operand" "=v")
4338 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4339 (parallel [(const_int 0) (const_int 1)
4340 (const_int 2) (const_int 3)
4341 (const_int 4) (const_int 5)
4342 (const_int 6) (const_int 7)]))))]
4344 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4345 [(set_attr "type" "ssecvt")
4346 (set_attr "prefix" "evex")
4347 (set_attr "mode" "V8DF")])
4349 (define_insn "avx_cvtdq2pd256_2"
4350 [(set (match_operand:V4DF 0 "register_operand" "=v")
4353 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4354 (parallel [(const_int 0) (const_int 1)
4355 (const_int 2) (const_int 3)]))))]
4357 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4358 [(set_attr "type" "ssecvt")
4359 (set_attr "prefix" "maybe_evex")
4360 (set_attr "mode" "V4DF")])
4362 (define_insn "sse2_cvtdq2pd<mask_name>"
4363 [(set (match_operand:V2DF 0 "register_operand" "=v")
4366 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4367 (parallel [(const_int 0) (const_int 1)]))))]
4368 "TARGET_SSE2 && <mask_mode512bit_condition>"
4369 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4370 [(set_attr "type" "ssecvt")
4371 (set_attr "prefix" "maybe_vex")
4372 (set_attr "ssememalign" "64")
4373 (set_attr "mode" "V2DF")])
4375 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4376 [(set (match_operand:V8SI 0 "register_operand" "=v")
4378 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4379 UNSPEC_FIX_NOTRUNC))]
4381 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4382 [(set_attr "type" "ssecvt")
4383 (set_attr "prefix" "evex")
4384 (set_attr "mode" "OI")])
4386 (define_insn "avx_cvtpd2dq256<mask_name>"
4387 [(set (match_operand:V4SI 0 "register_operand" "=v")
4388 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4389 UNSPEC_FIX_NOTRUNC))]
4390 "TARGET_AVX && <mask_mode512bit_condition>"
4391 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4392 [(set_attr "type" "ssecvt")
4393 (set_attr "prefix" "<mask_prefix>")
4394 (set_attr "mode" "OI")])
4396 (define_expand "avx_cvtpd2dq256_2"
4397 [(set (match_operand:V8SI 0 "register_operand")
4399 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4403 "operands[2] = CONST0_RTX (V4SImode);")
4405 (define_insn "*avx_cvtpd2dq256_2"
4406 [(set (match_operand:V8SI 0 "register_operand" "=x")
4408 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4410 (match_operand:V4SI 2 "const0_operand")))]
4412 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4413 [(set_attr "type" "ssecvt")
4414 (set_attr "prefix" "vex")
4415 (set_attr "btver2_decode" "vector")
4416 (set_attr "mode" "OI")])
4418 (define_insn "sse2_cvtpd2dq<mask_name>"
4419 [(set (match_operand:V4SI 0 "register_operand" "=v")
4421 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4423 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4424 "TARGET_SSE2 && <mask_mode512bit_condition>"
4427 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4429 return "cvtpd2dq\t{%1, %0|%0, %1}";
4431 [(set_attr "type" "ssecvt")
4432 (set_attr "prefix_rep" "1")
4433 (set_attr "prefix_data16" "0")
4434 (set_attr "prefix" "maybe_vex")
4435 (set_attr "mode" "TI")
4436 (set_attr "amdfam10_decode" "double")
4437 (set_attr "athlon_decode" "vector")
4438 (set_attr "bdver1_decode" "double")])
4440 ;; For ufix_notrunc* insn patterns
4441 (define_mode_attr pd2udqsuff
4442 [(V8DF "") (V4DF "{y}")])
4444 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4445 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4447 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4448 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4450 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4451 [(set_attr "type" "ssecvt")
4452 (set_attr "prefix" "evex")
4453 (set_attr "mode" "<sseinsnmode>")])
4455 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4456 [(set (match_operand:V4SI 0 "register_operand" "=v")
4459 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4460 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4461 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4463 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4464 [(set_attr "type" "ssecvt")
4465 (set_attr "prefix" "evex")
4466 (set_attr "mode" "TI")])
4468 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4469 [(set (match_operand:V8SI 0 "register_operand" "=v")
4471 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4473 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4474 [(set_attr "type" "ssecvt")
4475 (set_attr "prefix" "evex")
4476 (set_attr "mode" "OI")])
4478 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4479 [(set (match_operand:V4SI 0 "register_operand" "=v")
4481 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4482 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4484 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4485 [(set_attr "type" "ssecvt")
4486 (set_attr "prefix" "evex")
4487 (set_attr "mode" "TI")])
4489 (define_insn "<fixsuffix>fix_truncv4dfv4si2<mask_name>"
4490 [(set (match_operand:V4SI 0 "register_operand" "=v")
4491 (any_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4492 "(TARGET_AVX && !<ufix_bool>) || (TARGET_AVX512VL && TARGET_AVX512F)"
4493 "vcvttpd2<fixsuffix>dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4494 [(set_attr "type" "ssecvt")
4495 (set_attr "prefix" "maybe_evex")
4496 (set_attr "mode" "OI")])
4498 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4499 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4500 (any_fix:<sseintvecmode>
4501 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4502 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4503 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4504 [(set_attr "type" "ssecvt")
4505 (set_attr "prefix" "evex")
4506 (set_attr "mode" "<sseintvecmode2>")])
4508 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4509 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4510 (unspec:<sseintvecmode>
4511 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4512 UNSPEC_FIX_NOTRUNC))]
4513 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4514 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4515 [(set_attr "type" "ssecvt")
4516 (set_attr "prefix" "evex")
4517 (set_attr "mode" "<sseintvecmode2>")])
4519 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4520 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4521 (unspec:<sseintvecmode>
4522 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4523 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4524 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4525 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4526 [(set_attr "type" "ssecvt")
4527 (set_attr "prefix" "evex")
4528 (set_attr "mode" "<sseintvecmode2>")])
4530 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4531 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4532 (any_fix:<sselongvecmode>
4533 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4534 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4535 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4536 [(set_attr "type" "ssecvt")
4537 (set_attr "prefix" "evex")
4538 (set_attr "mode" "<sseintvecmode3>")])
4540 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4541 [(set (match_operand:V2DI 0 "register_operand" "=v")
4544 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4545 (parallel [(const_int 0) (const_int 1)]))))]
4546 "TARGET_AVX512DQ && TARGET_AVX512VL"
4547 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4548 [(set_attr "type" "ssecvt")
4549 (set_attr "prefix" "evex")
4550 (set_attr "mode" "TI")])
4552 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4553 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4554 (unsigned_fix:<sseintvecmode>
4555 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4557 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4558 [(set_attr "type" "ssecvt")
4559 (set_attr "prefix" "evex")
4560 (set_attr "mode" "<sseintvecmode2>")])
4562 (define_expand "avx_cvttpd2dq256_2"
4563 [(set (match_operand:V8SI 0 "register_operand")
4565 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4568 "operands[2] = CONST0_RTX (V4SImode);")
4570 (define_insn "sse2_cvttpd2dq<mask_name>"
4571 [(set (match_operand:V4SI 0 "register_operand" "=v")
4573 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4574 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4575 "TARGET_SSE2 && <mask_mode512bit_condition>"
4578 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4580 return "cvttpd2dq\t{%1, %0|%0, %1}";
4582 [(set_attr "type" "ssecvt")
4583 (set_attr "amdfam10_decode" "double")
4584 (set_attr "athlon_decode" "vector")
4585 (set_attr "bdver1_decode" "double")
4586 (set_attr "prefix" "maybe_vex")
4587 (set_attr "mode" "TI")])
4589 (define_insn "sse2_cvtsd2ss<round_name>"
4590 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4593 (float_truncate:V2SF
4594 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4595 (match_operand:V4SF 1 "register_operand" "0,0,v")
4599 cvtsd2ss\t{%2, %0|%0, %2}
4600 cvtsd2ss\t{%2, %0|%0, %q2}
4601 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4602 [(set_attr "isa" "noavx,noavx,avx")
4603 (set_attr "type" "ssecvt")
4604 (set_attr "athlon_decode" "vector,double,*")
4605 (set_attr "amdfam10_decode" "vector,double,*")
4606 (set_attr "bdver1_decode" "direct,direct,*")
4607 (set_attr "btver2_decode" "double,double,double")
4608 (set_attr "prefix" "orig,orig,<round_prefix>")
4609 (set_attr "mode" "SF")])
4611 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4612 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4616 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4617 (parallel [(const_int 0) (const_int 1)])))
4618 (match_operand:V2DF 1 "register_operand" "0,0,v")
4622 cvtss2sd\t{%2, %0|%0, %2}
4623 cvtss2sd\t{%2, %0|%0, %k2}
4624 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4625 [(set_attr "isa" "noavx,noavx,avx")
4626 (set_attr "type" "ssecvt")
4627 (set_attr "amdfam10_decode" "vector,double,*")
4628 (set_attr "athlon_decode" "direct,direct,*")
4629 (set_attr "bdver1_decode" "direct,direct,*")
4630 (set_attr "btver2_decode" "double,double,double")
4631 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4632 (set_attr "mode" "DF")])
4634 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4635 [(set (match_operand:V8SF 0 "register_operand" "=v")
4636 (float_truncate:V8SF
4637 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4639 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4640 [(set_attr "type" "ssecvt")
4641 (set_attr "prefix" "evex")
4642 (set_attr "mode" "V8SF")])
4644 (define_insn "avx_cvtpd2ps256<mask_name>"
4645 [(set (match_operand:V4SF 0 "register_operand" "=v")
4646 (float_truncate:V4SF
4647 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4648 "TARGET_AVX && <mask_mode512bit_condition>"
4649 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4650 [(set_attr "type" "ssecvt")
4651 (set_attr "prefix" "maybe_evex")
4652 (set_attr "btver2_decode" "vector")
4653 (set_attr "mode" "V4SF")])
4655 (define_expand "sse2_cvtpd2ps"
4656 [(set (match_operand:V4SF 0 "register_operand")
4658 (float_truncate:V2SF
4659 (match_operand:V2DF 1 "nonimmediate_operand"))
4662 "operands[2] = CONST0_RTX (V2SFmode);")
4664 (define_expand "sse2_cvtpd2ps_mask"
4665 [(set (match_operand:V4SF 0 "register_operand")
4668 (float_truncate:V2SF
4669 (match_operand:V2DF 1 "nonimmediate_operand"))
4671 (match_operand:V4SF 2 "register_operand")
4672 (match_operand:QI 3 "register_operand")))]
4674 "operands[4] = CONST0_RTX (V2SFmode);")
4676 (define_insn "*sse2_cvtpd2ps<mask_name>"
4677 [(set (match_operand:V4SF 0 "register_operand" "=v")
4679 (float_truncate:V2SF
4680 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4681 (match_operand:V2SF 2 "const0_operand")))]
4682 "TARGET_SSE2 && <mask_mode512bit_condition>"
4685 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4687 return "cvtpd2ps\t{%1, %0|%0, %1}";
4689 [(set_attr "type" "ssecvt")
4690 (set_attr "amdfam10_decode" "double")
4691 (set_attr "athlon_decode" "vector")
4692 (set_attr "bdver1_decode" "double")
4693 (set_attr "prefix_data16" "1")
4694 (set_attr "prefix" "maybe_vex")
4695 (set_attr "mode" "V4SF")])
4697 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4698 (define_mode_attr sf2dfmode
4699 [(V8DF "V8SF") (V4DF "V4SF")])
4701 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4702 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4703 (float_extend:VF2_512_256
4704 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4705 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4706 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4707 [(set_attr "type" "ssecvt")
4708 (set_attr "prefix" "maybe_vex")
4709 (set_attr "mode" "<MODE>")])
4711 (define_insn "*avx_cvtps2pd256_2"
4712 [(set (match_operand:V4DF 0 "register_operand" "=x")
4715 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4716 (parallel [(const_int 0) (const_int 1)
4717 (const_int 2) (const_int 3)]))))]
4719 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4720 [(set_attr "type" "ssecvt")
4721 (set_attr "prefix" "vex")
4722 (set_attr "mode" "V4DF")])
4724 (define_insn "vec_unpacks_lo_v16sf"
4725 [(set (match_operand:V8DF 0 "register_operand" "=v")
4728 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4729 (parallel [(const_int 0) (const_int 1)
4730 (const_int 2) (const_int 3)
4731 (const_int 4) (const_int 5)
4732 (const_int 6) (const_int 7)]))))]
4734 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "evex")
4737 (set_attr "mode" "V8DF")])
4739 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4740 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4741 (unspec:<avx512fmaskmode>
4742 [(match_operand:VI_AVX512VL 1 "register_operand" "v")]
4743 UNSPEC_CVTINT2MASK))]
4745 && (<ssescalarmode>mode == QImode
4746 || <ssescalarmode>mode == HImode))
4748 && (<ssescalarmode>mode == SImode
4749 || <ssescalarmode>mode == DImode)))"
4750 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4751 [(set_attr "prefix" "evex")
4752 (set_attr "mode" "<sseinsnmode>")])
4754 (define_insn "<avx512>_cvtmask2<ssemodesuffix><mode>"
4755 [(set (match_operand:VI_AVX512VL 0 "register_operand" "=v")
4757 [(match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")]
4758 UNSPEC_CVTINT2MASK))]
4760 && (<ssescalarmode>mode == QImode
4761 || <ssescalarmode>mode == HImode))
4763 && (<ssescalarmode>mode == SImode
4764 || <ssescalarmode>mode == DImode)))"
4765 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
4766 [(set_attr "prefix" "evex")
4767 (set_attr "mode" "<sseinsnmode>")])
4769 (define_insn "sse2_cvtps2pd<mask_name>"
4770 [(set (match_operand:V2DF 0 "register_operand" "=v")
4773 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4774 (parallel [(const_int 0) (const_int 1)]))))]
4775 "TARGET_SSE2 && <mask_mode512bit_condition>"
4776 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4777 [(set_attr "type" "ssecvt")
4778 (set_attr "amdfam10_decode" "direct")
4779 (set_attr "athlon_decode" "double")
4780 (set_attr "bdver1_decode" "double")
4781 (set_attr "prefix_data16" "0")
4782 (set_attr "prefix" "maybe_vex")
4783 (set_attr "mode" "V2DF")])
4785 (define_expand "vec_unpacks_hi_v4sf"
4790 (match_operand:V4SF 1 "nonimmediate_operand"))
4791 (parallel [(const_int 6) (const_int 7)
4792 (const_int 2) (const_int 3)])))
4793 (set (match_operand:V2DF 0 "register_operand")
4797 (parallel [(const_int 0) (const_int 1)]))))]
4799 "operands[2] = gen_reg_rtx (V4SFmode);")
4801 (define_expand "vec_unpacks_hi_v8sf"
4804 (match_operand:V8SF 1 "nonimmediate_operand")
4805 (parallel [(const_int 4) (const_int 5)
4806 (const_int 6) (const_int 7)])))
4807 (set (match_operand:V4DF 0 "register_operand")
4811 "operands[2] = gen_reg_rtx (V4SFmode);")
4813 (define_expand "vec_unpacks_hi_v16sf"
4816 (match_operand:V16SF 1 "nonimmediate_operand")
4817 (parallel [(const_int 8) (const_int 9)
4818 (const_int 10) (const_int 11)
4819 (const_int 12) (const_int 13)
4820 (const_int 14) (const_int 15)])))
4821 (set (match_operand:V8DF 0 "register_operand")
4825 "operands[2] = gen_reg_rtx (V8SFmode);")
4827 (define_expand "vec_unpacks_lo_v4sf"
4828 [(set (match_operand:V2DF 0 "register_operand")
4831 (match_operand:V4SF 1 "nonimmediate_operand")
4832 (parallel [(const_int 0) (const_int 1)]))))]
4835 (define_expand "vec_unpacks_lo_v8sf"
4836 [(set (match_operand:V4DF 0 "register_operand")
4839 (match_operand:V8SF 1 "nonimmediate_operand")
4840 (parallel [(const_int 0) (const_int 1)
4841 (const_int 2) (const_int 3)]))))]
4844 (define_mode_attr sseunpackfltmode
4845 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4846 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4848 (define_expand "vec_unpacks_float_hi_<mode>"
4849 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4850 (match_operand:VI2_AVX512F 1 "register_operand")]
4853 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4855 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4856 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4857 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4861 (define_expand "vec_unpacks_float_lo_<mode>"
4862 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4863 (match_operand:VI2_AVX512F 1 "register_operand")]
4866 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4868 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4869 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4870 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4874 (define_expand "vec_unpacku_float_hi_<mode>"
4875 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4876 (match_operand:VI2_AVX512F 1 "register_operand")]
4879 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4881 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4882 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4883 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4887 (define_expand "vec_unpacku_float_lo_<mode>"
4888 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4889 (match_operand:VI2_AVX512F 1 "register_operand")]
4892 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4894 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4895 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4896 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4900 (define_expand "vec_unpacks_float_hi_v4si"
4903 (match_operand:V4SI 1 "nonimmediate_operand")
4904 (parallel [(const_int 2) (const_int 3)
4905 (const_int 2) (const_int 3)])))
4906 (set (match_operand:V2DF 0 "register_operand")
4910 (parallel [(const_int 0) (const_int 1)]))))]
4912 "operands[2] = gen_reg_rtx (V4SImode);")
4914 (define_expand "vec_unpacks_float_lo_v4si"
4915 [(set (match_operand:V2DF 0 "register_operand")
4918 (match_operand:V4SI 1 "nonimmediate_operand")
4919 (parallel [(const_int 0) (const_int 1)]))))]
4922 (define_expand "vec_unpacks_float_hi_v8si"
4925 (match_operand:V8SI 1 "nonimmediate_operand")
4926 (parallel [(const_int 4) (const_int 5)
4927 (const_int 6) (const_int 7)])))
4928 (set (match_operand:V4DF 0 "register_operand")
4932 "operands[2] = gen_reg_rtx (V4SImode);")
4934 (define_expand "vec_unpacks_float_lo_v8si"
4935 [(set (match_operand:V4DF 0 "register_operand")
4938 (match_operand:V8SI 1 "nonimmediate_operand")
4939 (parallel [(const_int 0) (const_int 1)
4940 (const_int 2) (const_int 3)]))))]
4943 (define_expand "vec_unpacks_float_hi_v16si"
4946 (match_operand:V16SI 1 "nonimmediate_operand")
4947 (parallel [(const_int 8) (const_int 9)
4948 (const_int 10) (const_int 11)
4949 (const_int 12) (const_int 13)
4950 (const_int 14) (const_int 15)])))
4951 (set (match_operand:V8DF 0 "register_operand")
4955 "operands[2] = gen_reg_rtx (V8SImode);")
4957 (define_expand "vec_unpacks_float_lo_v16si"
4958 [(set (match_operand:V8DF 0 "register_operand")
4961 (match_operand:V16SI 1 "nonimmediate_operand")
4962 (parallel [(const_int 0) (const_int 1)
4963 (const_int 2) (const_int 3)
4964 (const_int 4) (const_int 5)
4965 (const_int 6) (const_int 7)]))))]
4968 (define_expand "vec_unpacku_float_hi_v4si"
4971 (match_operand:V4SI 1 "nonimmediate_operand")
4972 (parallel [(const_int 2) (const_int 3)
4973 (const_int 2) (const_int 3)])))
4978 (parallel [(const_int 0) (const_int 1)]))))
4980 (lt:V2DF (match_dup 6) (match_dup 3)))
4982 (and:V2DF (match_dup 7) (match_dup 4)))
4983 (set (match_operand:V2DF 0 "register_operand")
4984 (plus:V2DF (match_dup 6) (match_dup 8)))]
4987 REAL_VALUE_TYPE TWO32r;
4991 real_ldexp (&TWO32r, &dconst1, 32);
4992 x = const_double_from_real_value (TWO32r, DFmode);
4994 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4995 operands[4] = force_reg (V2DFmode,
4996 ix86_build_const_vector (V2DFmode, 1, x));
4998 operands[5] = gen_reg_rtx (V4SImode);
5000 for (i = 6; i < 9; i++)
5001 operands[i] = gen_reg_rtx (V2DFmode);
5004 (define_expand "vec_unpacku_float_lo_v4si"
5008 (match_operand:V4SI 1 "nonimmediate_operand")
5009 (parallel [(const_int 0) (const_int 1)]))))
5011 (lt:V2DF (match_dup 5) (match_dup 3)))
5013 (and:V2DF (match_dup 6) (match_dup 4)))
5014 (set (match_operand:V2DF 0 "register_operand")
5015 (plus:V2DF (match_dup 5) (match_dup 7)))]
5018 REAL_VALUE_TYPE TWO32r;
5022 real_ldexp (&TWO32r, &dconst1, 32);
5023 x = const_double_from_real_value (TWO32r, DFmode);
5025 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5026 operands[4] = force_reg (V2DFmode,
5027 ix86_build_const_vector (V2DFmode, 1, x));
5029 for (i = 5; i < 8; i++)
5030 operands[i] = gen_reg_rtx (V2DFmode);
5033 (define_expand "vec_unpacku_float_hi_v8si"
5034 [(match_operand:V4DF 0 "register_operand")
5035 (match_operand:V8SI 1 "register_operand")]
5038 REAL_VALUE_TYPE TWO32r;
5042 real_ldexp (&TWO32r, &dconst1, 32);
5043 x = const_double_from_real_value (TWO32r, DFmode);
5045 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5046 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5047 tmp[5] = gen_reg_rtx (V4SImode);
5049 for (i = 2; i < 5; i++)
5050 tmp[i] = gen_reg_rtx (V4DFmode);
5051 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5052 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5053 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5054 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5055 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5056 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5060 (define_expand "vec_unpacku_float_hi_v16si"
5061 [(match_operand:V8DF 0 "register_operand")
5062 (match_operand:V16SI 1 "register_operand")]
5065 REAL_VALUE_TYPE TWO32r;
5068 real_ldexp (&TWO32r, &dconst1, 32);
5069 x = const_double_from_real_value (TWO32r, DFmode);
5071 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5072 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5073 tmp[2] = gen_reg_rtx (V8DFmode);
5074 tmp[3] = gen_reg_rtx (V8SImode);
5075 k = gen_reg_rtx (QImode);
5077 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5078 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5079 emit_insn (gen_rtx_SET (VOIDmode, k,
5080 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5081 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5082 emit_move_insn (operands[0], tmp[2]);
5086 (define_expand "vec_unpacku_float_lo_v8si"
5087 [(match_operand:V4DF 0 "register_operand")
5088 (match_operand:V8SI 1 "nonimmediate_operand")]
5091 REAL_VALUE_TYPE TWO32r;
5095 real_ldexp (&TWO32r, &dconst1, 32);
5096 x = const_double_from_real_value (TWO32r, DFmode);
5098 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5099 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5101 for (i = 2; i < 5; i++)
5102 tmp[i] = gen_reg_rtx (V4DFmode);
5103 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5104 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5105 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5106 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5107 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5111 (define_expand "vec_unpacku_float_lo_v16si"
5112 [(match_operand:V8DF 0 "register_operand")
5113 (match_operand:V16SI 1 "nonimmediate_operand")]
5116 REAL_VALUE_TYPE TWO32r;
5119 real_ldexp (&TWO32r, &dconst1, 32);
5120 x = const_double_from_real_value (TWO32r, DFmode);
5122 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5123 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5124 tmp[2] = gen_reg_rtx (V8DFmode);
5125 k = gen_reg_rtx (QImode);
5127 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5128 emit_insn (gen_rtx_SET (VOIDmode, k,
5129 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5130 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5131 emit_move_insn (operands[0], tmp[2]);
5135 (define_expand "vec_pack_trunc_<mode>"
5137 (float_truncate:<sf2dfmode>
5138 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5140 (float_truncate:<sf2dfmode>
5141 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5142 (set (match_operand:<ssePSmode> 0 "register_operand")
5143 (vec_concat:<ssePSmode>
5148 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5149 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5152 (define_expand "vec_pack_trunc_v2df"
5153 [(match_operand:V4SF 0 "register_operand")
5154 (match_operand:V2DF 1 "nonimmediate_operand")
5155 (match_operand:V2DF 2 "nonimmediate_operand")]
5160 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5162 tmp0 = gen_reg_rtx (V4DFmode);
5163 tmp1 = force_reg (V2DFmode, operands[1]);
5165 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5166 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5170 tmp0 = gen_reg_rtx (V4SFmode);
5171 tmp1 = gen_reg_rtx (V4SFmode);
5173 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5174 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5175 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5180 (define_expand "vec_pack_sfix_trunc_v8df"
5181 [(match_operand:V16SI 0 "register_operand")
5182 (match_operand:V8DF 1 "nonimmediate_operand")
5183 (match_operand:V8DF 2 "nonimmediate_operand")]
5188 r1 = gen_reg_rtx (V8SImode);
5189 r2 = gen_reg_rtx (V8SImode);
5191 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5192 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5193 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5197 (define_expand "vec_pack_sfix_trunc_v4df"
5198 [(match_operand:V8SI 0 "register_operand")
5199 (match_operand:V4DF 1 "nonimmediate_operand")
5200 (match_operand:V4DF 2 "nonimmediate_operand")]
5205 r1 = gen_reg_rtx (V4SImode);
5206 r2 = gen_reg_rtx (V4SImode);
5208 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5209 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5210 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5214 (define_expand "vec_pack_sfix_trunc_v2df"
5215 [(match_operand:V4SI 0 "register_operand")
5216 (match_operand:V2DF 1 "nonimmediate_operand")
5217 (match_operand:V2DF 2 "nonimmediate_operand")]
5220 rtx tmp0, tmp1, tmp2;
5222 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5224 tmp0 = gen_reg_rtx (V4DFmode);
5225 tmp1 = force_reg (V2DFmode, operands[1]);
5227 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5228 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5232 tmp0 = gen_reg_rtx (V4SImode);
5233 tmp1 = gen_reg_rtx (V4SImode);
5234 tmp2 = gen_reg_rtx (V2DImode);
5236 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5237 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5238 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5239 gen_lowpart (V2DImode, tmp0),
5240 gen_lowpart (V2DImode, tmp1)));
5241 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5246 (define_mode_attr ssepackfltmode
5247 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5249 (define_expand "vec_pack_ufix_trunc_<mode>"
5250 [(match_operand:<ssepackfltmode> 0 "register_operand")
5251 (match_operand:VF2 1 "register_operand")
5252 (match_operand:VF2 2 "register_operand")]
5255 if (<MODE>mode == V8DFmode)
5259 r1 = gen_reg_rtx (V8SImode);
5260 r2 = gen_reg_rtx (V8SImode);
5262 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5263 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5264 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5269 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5270 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5271 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5272 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5273 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5275 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5276 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5280 tmp[5] = gen_reg_rtx (V8SFmode);
5281 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5282 gen_lowpart (V8SFmode, tmp[3]), 0);
5283 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5285 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5286 operands[0], 0, OPTAB_DIRECT);
5287 if (tmp[6] != operands[0])
5288 emit_move_insn (operands[0], tmp[6]);
5294 (define_expand "vec_pack_sfix_v4df"
5295 [(match_operand:V8SI 0 "register_operand")
5296 (match_operand:V4DF 1 "nonimmediate_operand")
5297 (match_operand:V4DF 2 "nonimmediate_operand")]
5302 r1 = gen_reg_rtx (V4SImode);
5303 r2 = gen_reg_rtx (V4SImode);
5305 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5306 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5307 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5311 (define_expand "vec_pack_sfix_v2df"
5312 [(match_operand:V4SI 0 "register_operand")
5313 (match_operand:V2DF 1 "nonimmediate_operand")
5314 (match_operand:V2DF 2 "nonimmediate_operand")]
5317 rtx tmp0, tmp1, tmp2;
5319 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5321 tmp0 = gen_reg_rtx (V4DFmode);
5322 tmp1 = force_reg (V2DFmode, operands[1]);
5324 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5325 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5329 tmp0 = gen_reg_rtx (V4SImode);
5330 tmp1 = gen_reg_rtx (V4SImode);
5331 tmp2 = gen_reg_rtx (V2DImode);
5333 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5334 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5335 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5336 gen_lowpart (V2DImode, tmp0),
5337 gen_lowpart (V2DImode, tmp1)));
5338 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5343 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5345 ;; Parallel single-precision floating point element swizzling
5347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5349 (define_expand "sse_movhlps_exp"
5350 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5353 (match_operand:V4SF 1 "nonimmediate_operand")
5354 (match_operand:V4SF 2 "nonimmediate_operand"))
5355 (parallel [(const_int 6)
5361 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5363 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5365 /* Fix up the destination if needed. */
5366 if (dst != operands[0])
5367 emit_move_insn (operands[0], dst);
5372 (define_insn "sse_movhlps"
5373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5376 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5377 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5378 (parallel [(const_int 6)
5382 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5384 movhlps\t{%2, %0|%0, %2}
5385 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5386 movlps\t{%H2, %0|%0, %H2}
5387 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5388 %vmovhps\t{%2, %0|%q0, %2}"
5389 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5390 (set_attr "type" "ssemov")
5391 (set_attr "ssememalign" "64")
5392 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5393 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5395 (define_expand "sse_movlhps_exp"
5396 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5399 (match_operand:V4SF 1 "nonimmediate_operand")
5400 (match_operand:V4SF 2 "nonimmediate_operand"))
5401 (parallel [(const_int 0)
5407 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5409 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5411 /* Fix up the destination if needed. */
5412 if (dst != operands[0])
5413 emit_move_insn (operands[0], dst);
5418 (define_insn "sse_movlhps"
5419 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5422 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5423 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5424 (parallel [(const_int 0)
5428 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5430 movlhps\t{%2, %0|%0, %2}
5431 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5432 movhps\t{%2, %0|%0, %q2}
5433 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5434 %vmovlps\t{%2, %H0|%H0, %2}"
5435 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5436 (set_attr "type" "ssemov")
5437 (set_attr "ssememalign" "64")
5438 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5439 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5441 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5442 [(set (match_operand:V16SF 0 "register_operand" "=v")
5445 (match_operand:V16SF 1 "register_operand" "v")
5446 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5447 (parallel [(const_int 2) (const_int 18)
5448 (const_int 3) (const_int 19)
5449 (const_int 6) (const_int 22)
5450 (const_int 7) (const_int 23)
5451 (const_int 10) (const_int 26)
5452 (const_int 11) (const_int 27)
5453 (const_int 14) (const_int 30)
5454 (const_int 15) (const_int 31)])))]
5456 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5457 [(set_attr "type" "sselog")
5458 (set_attr "prefix" "evex")
5459 (set_attr "mode" "V16SF")])
5461 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5462 (define_insn "avx_unpckhps256<mask_name>"
5463 [(set (match_operand:V8SF 0 "register_operand" "=v")
5466 (match_operand:V8SF 1 "register_operand" "v")
5467 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5468 (parallel [(const_int 2) (const_int 10)
5469 (const_int 3) (const_int 11)
5470 (const_int 6) (const_int 14)
5471 (const_int 7) (const_int 15)])))]
5472 "TARGET_AVX && <mask_mode512bit_condition>"
5473 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5474 [(set_attr "type" "sselog")
5475 (set_attr "prefix" "vex")
5476 (set_attr "mode" "V8SF")])
5478 (define_expand "vec_interleave_highv8sf"
5482 (match_operand:V8SF 1 "register_operand" "x")
5483 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5484 (parallel [(const_int 0) (const_int 8)
5485 (const_int 1) (const_int 9)
5486 (const_int 4) (const_int 12)
5487 (const_int 5) (const_int 13)])))
5493 (parallel [(const_int 2) (const_int 10)
5494 (const_int 3) (const_int 11)
5495 (const_int 6) (const_int 14)
5496 (const_int 7) (const_int 15)])))
5497 (set (match_operand:V8SF 0 "register_operand")
5502 (parallel [(const_int 4) (const_int 5)
5503 (const_int 6) (const_int 7)
5504 (const_int 12) (const_int 13)
5505 (const_int 14) (const_int 15)])))]
5508 operands[3] = gen_reg_rtx (V8SFmode);
5509 operands[4] = gen_reg_rtx (V8SFmode);
5512 (define_insn "vec_interleave_highv4sf<mask_name>"
5513 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5516 (match_operand:V4SF 1 "register_operand" "0,v")
5517 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5518 (parallel [(const_int 2) (const_int 6)
5519 (const_int 3) (const_int 7)])))]
5520 "TARGET_SSE && <mask_mode512bit_condition>"
5522 unpckhps\t{%2, %0|%0, %2}
5523 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5524 [(set_attr "isa" "noavx,avx")
5525 (set_attr "type" "sselog")
5526 (set_attr "prefix" "orig,vex")
5527 (set_attr "mode" "V4SF")])
5529 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5530 [(set (match_operand:V16SF 0 "register_operand" "=v")
5533 (match_operand:V16SF 1 "register_operand" "v")
5534 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5535 (parallel [(const_int 0) (const_int 16)
5536 (const_int 1) (const_int 17)
5537 (const_int 4) (const_int 20)
5538 (const_int 5) (const_int 21)
5539 (const_int 8) (const_int 24)
5540 (const_int 9) (const_int 25)
5541 (const_int 12) (const_int 28)
5542 (const_int 13) (const_int 29)])))]
5544 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5545 [(set_attr "type" "sselog")
5546 (set_attr "prefix" "evex")
5547 (set_attr "mode" "V16SF")])
5549 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5550 (define_insn "avx_unpcklps256<mask_name>"
5551 [(set (match_operand:V8SF 0 "register_operand" "=v")
5554 (match_operand:V8SF 1 "register_operand" "v")
5555 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5556 (parallel [(const_int 0) (const_int 8)
5557 (const_int 1) (const_int 9)
5558 (const_int 4) (const_int 12)
5559 (const_int 5) (const_int 13)])))]
5560 "TARGET_AVX && <mask_mode512bit_condition>"
5561 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5562 [(set_attr "type" "sselog")
5563 (set_attr "prefix" "vex")
5564 (set_attr "mode" "V8SF")])
5566 (define_insn "unpcklps128_mask"
5567 [(set (match_operand:V4SF 0 "register_operand" "=v")
5571 (match_operand:V4SF 1 "register_operand" "v")
5572 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5573 (parallel [(const_int 0) (const_int 4)
5574 (const_int 1) (const_int 5)]))
5575 (match_operand:V4SF 3 "vector_move_operand" "0C")
5576 (match_operand:QI 4 "register_operand" "Yk")))]
5577 "TARGET_AVX512F && TARGET_AVX512VL"
5578 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5579 [(set_attr "type" "sselog")
5580 (set_attr "prefix" "evex")
5581 (set_attr "mode" "V4SF")])
5583 (define_expand "vec_interleave_lowv8sf"
5587 (match_operand:V8SF 1 "register_operand" "x")
5588 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5589 (parallel [(const_int 0) (const_int 8)
5590 (const_int 1) (const_int 9)
5591 (const_int 4) (const_int 12)
5592 (const_int 5) (const_int 13)])))
5598 (parallel [(const_int 2) (const_int 10)
5599 (const_int 3) (const_int 11)
5600 (const_int 6) (const_int 14)
5601 (const_int 7) (const_int 15)])))
5602 (set (match_operand:V8SF 0 "register_operand")
5607 (parallel [(const_int 0) (const_int 1)
5608 (const_int 2) (const_int 3)
5609 (const_int 8) (const_int 9)
5610 (const_int 10) (const_int 11)])))]
5613 operands[3] = gen_reg_rtx (V8SFmode);
5614 operands[4] = gen_reg_rtx (V8SFmode);
5617 (define_insn "vec_interleave_lowv4sf"
5618 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5621 (match_operand:V4SF 1 "register_operand" "0,x")
5622 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5623 (parallel [(const_int 0) (const_int 4)
5624 (const_int 1) (const_int 5)])))]
5627 unpcklps\t{%2, %0|%0, %2}
5628 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5629 [(set_attr "isa" "noavx,avx")
5630 (set_attr "type" "sselog")
5631 (set_attr "prefix" "orig,vex")
5632 (set_attr "mode" "V4SF")])
5634 ;; These are modeled with the same vec_concat as the others so that we
5635 ;; capture users of shufps that can use the new instructions
5636 (define_insn "avx_movshdup256<mask_name>"
5637 [(set (match_operand:V8SF 0 "register_operand" "=v")
5640 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5642 (parallel [(const_int 1) (const_int 1)
5643 (const_int 3) (const_int 3)
5644 (const_int 5) (const_int 5)
5645 (const_int 7) (const_int 7)])))]
5646 "TARGET_AVX && <mask_mode512bit_condition>"
5647 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5648 [(set_attr "type" "sse")
5649 (set_attr "prefix" "vex")
5650 (set_attr "mode" "V8SF")])
5652 (define_insn "sse3_movshdup<mask_name>"
5653 [(set (match_operand:V4SF 0 "register_operand" "=v")
5656 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5658 (parallel [(const_int 1)
5662 "TARGET_SSE3 && <mask_mode512bit_condition>"
5663 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5664 [(set_attr "type" "sse")
5665 (set_attr "prefix_rep" "1")
5666 (set_attr "prefix" "maybe_vex")
5667 (set_attr "mode" "V4SF")])
5669 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5670 [(set (match_operand:V16SF 0 "register_operand" "=v")
5673 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5675 (parallel [(const_int 1) (const_int 1)
5676 (const_int 3) (const_int 3)
5677 (const_int 5) (const_int 5)
5678 (const_int 7) (const_int 7)
5679 (const_int 9) (const_int 9)
5680 (const_int 11) (const_int 11)
5681 (const_int 13) (const_int 13)
5682 (const_int 15) (const_int 15)])))]
5684 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5685 [(set_attr "type" "sse")
5686 (set_attr "prefix" "evex")
5687 (set_attr "mode" "V16SF")])
5689 (define_insn "avx_movsldup256<mask_name>"
5690 [(set (match_operand:V8SF 0 "register_operand" "=v")
5693 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5695 (parallel [(const_int 0) (const_int 0)
5696 (const_int 2) (const_int 2)
5697 (const_int 4) (const_int 4)
5698 (const_int 6) (const_int 6)])))]
5699 "TARGET_AVX && <mask_mode512bit_condition>"
5700 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5701 [(set_attr "type" "sse")
5702 (set_attr "prefix" "vex")
5703 (set_attr "mode" "V8SF")])
5705 (define_insn "sse3_movsldup<mask_name>"
5706 [(set (match_operand:V4SF 0 "register_operand" "=v")
5709 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5711 (parallel [(const_int 0)
5715 "TARGET_SSE3 && <mask_mode512bit_condition>"
5716 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5717 [(set_attr "type" "sse")
5718 (set_attr "prefix_rep" "1")
5719 (set_attr "prefix" "maybe_vex")
5720 (set_attr "mode" "V4SF")])
5722 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5723 [(set (match_operand:V16SF 0 "register_operand" "=v")
5726 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5728 (parallel [(const_int 0) (const_int 0)
5729 (const_int 2) (const_int 2)
5730 (const_int 4) (const_int 4)
5731 (const_int 6) (const_int 6)
5732 (const_int 8) (const_int 8)
5733 (const_int 10) (const_int 10)
5734 (const_int 12) (const_int 12)
5735 (const_int 14) (const_int 14)])))]
5737 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5738 [(set_attr "type" "sse")
5739 (set_attr "prefix" "evex")
5740 (set_attr "mode" "V16SF")])
5742 (define_expand "avx_shufps256<mask_expand4_name>"
5743 [(match_operand:V8SF 0 "register_operand")
5744 (match_operand:V8SF 1 "register_operand")
5745 (match_operand:V8SF 2 "nonimmediate_operand")
5746 (match_operand:SI 3 "const_int_operand")]
5749 int mask = INTVAL (operands[3]);
5750 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
5753 GEN_INT ((mask >> 0) & 3),
5754 GEN_INT ((mask >> 2) & 3),
5755 GEN_INT (((mask >> 4) & 3) + 8),
5756 GEN_INT (((mask >> 6) & 3) + 8),
5757 GEN_INT (((mask >> 0) & 3) + 4),
5758 GEN_INT (((mask >> 2) & 3) + 4),
5759 GEN_INT (((mask >> 4) & 3) + 12),
5760 GEN_INT (((mask >> 6) & 3) + 12)
5761 <mask_expand4_args>));
5765 ;; One bit in mask selects 2 elements.
5766 (define_insn "avx_shufps256_1<mask_name>"
5767 [(set (match_operand:V8SF 0 "register_operand" "=v")
5770 (match_operand:V8SF 1 "register_operand" "v")
5771 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5772 (parallel [(match_operand 3 "const_0_to_3_operand" )
5773 (match_operand 4 "const_0_to_3_operand" )
5774 (match_operand 5 "const_8_to_11_operand" )
5775 (match_operand 6 "const_8_to_11_operand" )
5776 (match_operand 7 "const_4_to_7_operand" )
5777 (match_operand 8 "const_4_to_7_operand" )
5778 (match_operand 9 "const_12_to_15_operand")
5779 (match_operand 10 "const_12_to_15_operand")])))]
5781 && <mask_mode512bit_condition>
5782 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5783 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5784 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5785 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5788 mask = INTVAL (operands[3]);
5789 mask |= INTVAL (operands[4]) << 2;
5790 mask |= (INTVAL (operands[5]) - 8) << 4;
5791 mask |= (INTVAL (operands[6]) - 8) << 6;
5792 operands[3] = GEN_INT (mask);
5794 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
5796 [(set_attr "type" "sseshuf")
5797 (set_attr "length_immediate" "1")
5798 (set_attr "prefix" "<mask_prefix>")
5799 (set_attr "mode" "V8SF")])
5801 (define_expand "sse_shufps<mask_expand4_name>"
5802 [(match_operand:V4SF 0 "register_operand")
5803 (match_operand:V4SF 1 "register_operand")
5804 (match_operand:V4SF 2 "nonimmediate_operand")
5805 (match_operand:SI 3 "const_int_operand")]
5808 int mask = INTVAL (operands[3]);
5809 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
5812 GEN_INT ((mask >> 0) & 3),
5813 GEN_INT ((mask >> 2) & 3),
5814 GEN_INT (((mask >> 4) & 3) + 4),
5815 GEN_INT (((mask >> 6) & 3) + 4)
5816 <mask_expand4_args>));
5820 (define_insn "sse_shufps_v4sf_mask"
5821 [(set (match_operand:V4SF 0 "register_operand" "=v")
5825 (match_operand:V4SF 1 "register_operand" "v")
5826 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5827 (parallel [(match_operand 3 "const_0_to_3_operand")
5828 (match_operand 4 "const_0_to_3_operand")
5829 (match_operand 5 "const_4_to_7_operand")
5830 (match_operand 6 "const_4_to_7_operand")]))
5831 (match_operand:V4SF 7 "vector_move_operand" "0C")
5832 (match_operand:QI 8 "register_operand" "Yk")))]
5833 "TARGET_AVX512F && TARGET_AVX512VL"
5836 mask |= INTVAL (operands[3]) << 0;
5837 mask |= INTVAL (operands[4]) << 2;
5838 mask |= (INTVAL (operands[5]) - 4) << 4;
5839 mask |= (INTVAL (operands[6]) - 4) << 6;
5840 operands[3] = GEN_INT (mask);
5842 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
5844 [(set_attr "type" "sseshuf")
5845 (set_attr "length_immediate" "1")
5846 (set_attr "prefix" "evex")
5847 (set_attr "mode" "V4SF")])
5849 (define_insn "sse_shufps_<mode>"
5850 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5851 (vec_select:VI4F_128
5852 (vec_concat:<ssedoublevecmode>
5853 (match_operand:VI4F_128 1 "register_operand" "0,x")
5854 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5855 (parallel [(match_operand 3 "const_0_to_3_operand")
5856 (match_operand 4 "const_0_to_3_operand")
5857 (match_operand 5 "const_4_to_7_operand")
5858 (match_operand 6 "const_4_to_7_operand")])))]
5862 mask |= INTVAL (operands[3]) << 0;
5863 mask |= INTVAL (operands[4]) << 2;
5864 mask |= (INTVAL (operands[5]) - 4) << 4;
5865 mask |= (INTVAL (operands[6]) - 4) << 6;
5866 operands[3] = GEN_INT (mask);
5868 switch (which_alternative)
5871 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5873 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5878 [(set_attr "isa" "noavx,avx")
5879 (set_attr "type" "sseshuf")
5880 (set_attr "length_immediate" "1")
5881 (set_attr "prefix" "orig,vex")
5882 (set_attr "mode" "V4SF")])
5884 (define_insn "sse_storehps"
5885 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5887 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5888 (parallel [(const_int 2) (const_int 3)])))]
5891 %vmovhps\t{%1, %0|%q0, %1}
5892 %vmovhlps\t{%1, %d0|%d0, %1}
5893 %vmovlps\t{%H1, %d0|%d0, %H1}"
5894 [(set_attr "type" "ssemov")
5895 (set_attr "ssememalign" "64")
5896 (set_attr "prefix" "maybe_vex")
5897 (set_attr "mode" "V2SF,V4SF,V2SF")])
5899 (define_expand "sse_loadhps_exp"
5900 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5903 (match_operand:V4SF 1 "nonimmediate_operand")
5904 (parallel [(const_int 0) (const_int 1)]))
5905 (match_operand:V2SF 2 "nonimmediate_operand")))]
5908 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5910 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5912 /* Fix up the destination if needed. */
5913 if (dst != operands[0])
5914 emit_move_insn (operands[0], dst);
5919 (define_insn "sse_loadhps"
5920 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5923 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5924 (parallel [(const_int 0) (const_int 1)]))
5925 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5928 movhps\t{%2, %0|%0, %q2}
5929 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5930 movlhps\t{%2, %0|%0, %2}
5931 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5932 %vmovlps\t{%2, %H0|%H0, %2}"
5933 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5934 (set_attr "type" "ssemov")
5935 (set_attr "ssememalign" "64")
5936 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5937 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5939 (define_insn "sse_storelps"
5940 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5942 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5943 (parallel [(const_int 0) (const_int 1)])))]
5946 %vmovlps\t{%1, %0|%q0, %1}
5947 %vmovaps\t{%1, %0|%0, %1}
5948 %vmovlps\t{%1, %d0|%d0, %q1}"
5949 [(set_attr "type" "ssemov")
5950 (set_attr "prefix" "maybe_vex")
5951 (set_attr "mode" "V2SF,V4SF,V2SF")])
5953 (define_expand "sse_loadlps_exp"
5954 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5956 (match_operand:V2SF 2 "nonimmediate_operand")
5958 (match_operand:V4SF 1 "nonimmediate_operand")
5959 (parallel [(const_int 2) (const_int 3)]))))]
5962 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5964 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5966 /* Fix up the destination if needed. */
5967 if (dst != operands[0])
5968 emit_move_insn (operands[0], dst);
5973 (define_insn "sse_loadlps"
5974 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5976 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5978 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5979 (parallel [(const_int 2) (const_int 3)]))))]
5982 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5983 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5984 movlps\t{%2, %0|%0, %q2}
5985 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5986 %vmovlps\t{%2, %0|%q0, %2}"
5987 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5988 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5989 (set_attr "ssememalign" "64")
5990 (set_attr "length_immediate" "1,1,*,*,*")
5991 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5992 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5994 (define_insn "sse_movss"
5995 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5997 (match_operand:V4SF 2 "register_operand" " x,x")
5998 (match_operand:V4SF 1 "register_operand" " 0,x")
6002 movss\t{%2, %0|%0, %2}
6003 vmovss\t{%2, %1, %0|%0, %1, %2}"
6004 [(set_attr "isa" "noavx,avx")
6005 (set_attr "type" "ssemov")
6006 (set_attr "prefix" "orig,vex")
6007 (set_attr "mode" "SF")])
6009 (define_insn "avx2_vec_dup<mode>"
6010 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6011 (vec_duplicate:VF1_128_256
6013 (match_operand:V4SF 1 "register_operand" "x")
6014 (parallel [(const_int 0)]))))]
6016 "vbroadcastss\t{%1, %0|%0, %1}"
6017 [(set_attr "type" "sselog1")
6018 (set_attr "prefix" "vex")
6019 (set_attr "mode" "<MODE>")])
6021 (define_insn "avx2_vec_dupv8sf_1"
6022 [(set (match_operand:V8SF 0 "register_operand" "=x")
6025 (match_operand:V8SF 1 "register_operand" "x")
6026 (parallel [(const_int 0)]))))]
6028 "vbroadcastss\t{%x1, %0|%0, %x1}"
6029 [(set_attr "type" "sselog1")
6030 (set_attr "prefix" "vex")
6031 (set_attr "mode" "V8SF")])
6033 (define_insn "vec_dupv4sf"
6034 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
6036 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
6039 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
6040 vbroadcastss\t{%1, %0|%0, %1}
6041 shufps\t{$0, %0, %0|%0, %0, 0}"
6042 [(set_attr "isa" "avx,avx,noavx")
6043 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
6044 (set_attr "length_immediate" "1,0,1")
6045 (set_attr "prefix_extra" "0,1,*")
6046 (set_attr "prefix" "vex,vex,orig")
6047 (set_attr "mode" "V4SF")])
6049 ;; Although insertps takes register source, we prefer
6050 ;; unpcklps with register source since it is shorter.
6051 (define_insn "*vec_concatv2sf_sse4_1"
6052 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
6054 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
6055 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
6058 unpcklps\t{%2, %0|%0, %2}
6059 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6060 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6061 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6062 %vmovss\t{%1, %0|%0, %1}
6063 punpckldq\t{%2, %0|%0, %2}
6064 movd\t{%1, %0|%0, %1}"
6065 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
6066 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6067 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
6068 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
6069 (set_attr "length_immediate" "*,*,1,1,*,*,*")
6070 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6071 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6073 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6074 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6075 ;; alternatives pretty much forces the MMX alternative to be chosen.
6076 (define_insn "*vec_concatv2sf_sse"
6077 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6079 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6080 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6083 unpcklps\t{%2, %0|%0, %2}
6084 movss\t{%1, %0|%0, %1}
6085 punpckldq\t{%2, %0|%0, %2}
6086 movd\t{%1, %0|%0, %1}"
6087 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6088 (set_attr "mode" "V4SF,SF,DI,DI")])
6090 (define_insn "*vec_concatv4sf"
6091 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6093 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6094 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6097 movlhps\t{%2, %0|%0, %2}
6098 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6099 movhps\t{%2, %0|%0, %q2}
6100 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6101 [(set_attr "isa" "noavx,avx,noavx,avx")
6102 (set_attr "type" "ssemov")
6103 (set_attr "prefix" "orig,vex,orig,vex")
6104 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6106 (define_expand "vec_init<mode>"
6107 [(match_operand:V_128 0 "register_operand")
6111 ix86_expand_vector_init (false, operands[0], operands[1]);
6115 ;; Avoid combining registers from different units in a single alternative,
6116 ;; see comment above inline_secondary_memory_needed function in i386.c
6117 (define_insn "vec_set<mode>_0"
6118 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6119 "=v,v,x ,x,x,v,x ,x ,m ,m ,m")
6121 (vec_duplicate:VI4F_128
6122 (match_operand:<ssescalarmode> 2 "general_operand"
6123 " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF"))
6124 (match_operand:VI4F_128 1 "vector_move_operand"
6125 " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0")
6129 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6130 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6131 %vmovd\t{%2, %0|%0, %2}
6132 movss\t{%2, %0|%0, %2}
6133 movss\t{%2, %0|%0, %2}
6134 vmovss\t{%2, %1, %0|%0, %1, %2}
6135 pinsrd\t{$0, %2, %0|%0, %2, 0}
6136 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6140 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
6142 (cond [(eq_attr "alternative" "0,6,7")
6143 (const_string "sselog")
6144 (eq_attr "alternative" "9")
6145 (const_string "imov")
6146 (eq_attr "alternative" "10")
6147 (const_string "fmov")
6149 (const_string "ssemov")))
6150 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
6151 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
6152 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
6153 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
6155 ;; A subset is vec_setv4sf.
6156 (define_insn "*vec_setv4sf_sse4_1"
6157 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6160 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
6161 (match_operand:V4SF 1 "register_operand" "0,x")
6162 (match_operand:SI 3 "const_int_operand")))]
6164 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6165 < GET_MODE_NUNITS (V4SFmode))"
6167 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6168 switch (which_alternative)
6171 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6173 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6178 [(set_attr "isa" "noavx,avx")
6179 (set_attr "type" "sselog")
6180 (set_attr "prefix_data16" "1,*")
6181 (set_attr "prefix_extra" "1")
6182 (set_attr "length_immediate" "1")
6183 (set_attr "prefix" "orig,vex")
6184 (set_attr "mode" "V4SF")])
6186 (define_insn "sse4_1_insertps"
6187 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6188 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
6189 (match_operand:V4SF 1 "register_operand" "0,x")
6190 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6194 if (MEM_P (operands[2]))
6196 unsigned count_s = INTVAL (operands[3]) >> 6;
6198 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6199 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6201 switch (which_alternative)
6204 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6206 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6211 [(set_attr "isa" "noavx,avx")
6212 (set_attr "type" "sselog")
6213 (set_attr "prefix_data16" "1,*")
6214 (set_attr "prefix_extra" "1")
6215 (set_attr "length_immediate" "1")
6216 (set_attr "prefix" "orig,vex")
6217 (set_attr "mode" "V4SF")])
6220 [(set (match_operand:VI4F_128 0 "memory_operand")
6222 (vec_duplicate:VI4F_128
6223 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6226 "TARGET_SSE && reload_completed"
6227 [(set (match_dup 0) (match_dup 1))]
6228 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6230 (define_expand "vec_set<mode>"
6231 [(match_operand:V 0 "register_operand")
6232 (match_operand:<ssescalarmode> 1 "register_operand")
6233 (match_operand 2 "const_int_operand")]
6236 ix86_expand_vector_set (false, operands[0], operands[1],
6237 INTVAL (operands[2]));
6241 (define_insn_and_split "*vec_extractv4sf_0"
6242 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6244 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6245 (parallel [(const_int 0)])))]
6246 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6248 "&& reload_completed"
6249 [(set (match_dup 0) (match_dup 1))]
6251 if (REG_P (operands[1]))
6252 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6254 operands[1] = adjust_address (operands[1], SFmode, 0);
6257 (define_insn_and_split "*sse4_1_extractps"
6258 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
6260 (match_operand:V4SF 1 "register_operand" "x,0,x")
6261 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
6264 %vextractps\t{%2, %1, %0|%0, %1, %2}
6267 "&& reload_completed && SSE_REG_P (operands[0])"
6270 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6271 switch (INTVAL (operands[2]))
6275 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6276 operands[2], operands[2],
6277 GEN_INT (INTVAL (operands[2]) + 4),
6278 GEN_INT (INTVAL (operands[2]) + 4)));
6281 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6284 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6289 [(set_attr "isa" "*,noavx,avx")
6290 (set_attr "type" "sselog,*,*")
6291 (set_attr "prefix_data16" "1,*,*")
6292 (set_attr "prefix_extra" "1,*,*")
6293 (set_attr "length_immediate" "1,*,*")
6294 (set_attr "prefix" "maybe_vex,*,*")
6295 (set_attr "mode" "V4SF,*,*")])
6297 (define_insn_and_split "*vec_extractv4sf_mem"
6298 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6300 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6301 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6304 "&& reload_completed"
6305 [(set (match_dup 0) (match_dup 1))]
6307 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6310 (define_mode_attr extract_type
6311 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6313 (define_mode_attr extract_suf
6314 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6316 (define_mode_iterator AVX512_VEC
6317 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6319 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6320 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6321 (match_operand:AVX512_VEC 1 "register_operand")
6322 (match_operand:SI 2 "const_0_to_3_operand")
6323 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6324 (match_operand:QI 4 "register_operand")]
6328 mask = INTVAL (operands[2]);
6330 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6331 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6333 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6334 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6335 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6336 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6339 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6340 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6345 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6346 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6347 (vec_merge:<ssequartermode>
6348 (vec_select:<ssequartermode>
6349 (match_operand:V8FI 1 "register_operand" "v")
6350 (parallel [(match_operand 2 "const_0_to_7_operand")
6351 (match_operand 3 "const_0_to_7_operand")]))
6352 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6353 (match_operand:QI 5 "register_operand" "k")))]
6354 "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
6356 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6357 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6359 [(set_attr "type" "sselog")
6360 (set_attr "prefix_extra" "1")
6361 (set_attr "length_immediate" "1")
6362 (set_attr "memory" "store")
6363 (set_attr "prefix" "evex")
6364 (set_attr "mode" "<sseinsnmode>")])
6366 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6367 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6368 (vec_merge:<ssequartermode>
6369 (vec_select:<ssequartermode>
6370 (match_operand:V16FI 1 "register_operand" "v")
6371 (parallel [(match_operand 2 "const_0_to_15_operand")
6372 (match_operand 3 "const_0_to_15_operand")
6373 (match_operand 4 "const_0_to_15_operand")
6374 (match_operand 5 "const_0_to_15_operand")]))
6375 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6376 (match_operand:QI 7 "register_operand" "Yk")))]
6377 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
6378 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
6379 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
6381 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6382 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6384 [(set_attr "type" "sselog")
6385 (set_attr "prefix_extra" "1")
6386 (set_attr "length_immediate" "1")
6387 (set_attr "memory" "store")
6388 (set_attr "prefix" "evex")
6389 (set_attr "mode" "<sseinsnmode>")])
6391 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6392 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6393 (vec_select:<ssequartermode>
6394 (match_operand:V8FI 1 "register_operand" "v")
6395 (parallel [(match_operand 2 "const_0_to_7_operand")
6396 (match_operand 3 "const_0_to_7_operand")])))]
6397 "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
6399 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6400 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6402 [(set_attr "type" "sselog")
6403 (set_attr "prefix_extra" "1")
6404 (set_attr "length_immediate" "1")
6405 (set (attr "memory")
6406 (if_then_else (match_test "MEM_P (operands[0])")
6407 (const_string "store")
6408 (const_string "none")))
6409 (set_attr "prefix" "evex")
6410 (set_attr "mode" "<sseinsnmode>")])
6412 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6413 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6414 (vec_select:<ssequartermode>
6415 (match_operand:V16FI 1 "register_operand" "v")
6416 (parallel [(match_operand 2 "const_0_to_15_operand")
6417 (match_operand 3 "const_0_to_15_operand")
6418 (match_operand 4 "const_0_to_15_operand")
6419 (match_operand 5 "const_0_to_15_operand")])))]
6420 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
6421 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
6422 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
6424 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6425 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6427 [(set_attr "type" "sselog")
6428 (set_attr "prefix_extra" "1")
6429 (set_attr "length_immediate" "1")
6430 (set (attr "memory")
6431 (if_then_else (match_test "MEM_P (operands[0])")
6432 (const_string "store")
6433 (const_string "none")))
6434 (set_attr "prefix" "evex")
6435 (set_attr "mode" "<sseinsnmode>")])
6437 (define_mode_attr extract_type_2
6438 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6440 (define_mode_attr extract_suf_2
6441 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6443 (define_mode_iterator AVX512_VEC_2
6444 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6446 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6447 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6448 (match_operand:AVX512_VEC_2 1 "register_operand")
6449 (match_operand:SI 2 "const_0_to_1_operand")
6450 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6451 (match_operand:QI 4 "register_operand")]
6454 rtx (*insn)(rtx, rtx, rtx, rtx);
6456 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6457 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6459 switch (INTVAL (operands[2]))
6462 insn = gen_vec_extract_lo_<mode>_mask;
6465 insn = gen_vec_extract_hi_<mode>_mask;
6471 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6476 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6477 (vec_select:<ssehalfvecmode>
6478 (match_operand:V8FI 1 "nonimmediate_operand")
6479 (parallel [(const_int 0) (const_int 1)
6480 (const_int 2) (const_int 3)])))]
6481 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6482 && reload_completed"
6485 rtx op1 = operands[1];
6487 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6489 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6490 emit_move_insn (operands[0], op1);
6494 (define_insn "vec_extract_lo_<mode>_maskm"
6495 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6496 (vec_merge:<ssehalfvecmode>
6497 (vec_select:<ssehalfvecmode>
6498 (match_operand:V8FI 1 "register_operand" "v")
6499 (parallel [(const_int 0) (const_int 1)
6500 (const_int 2) (const_int 3)]))
6501 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6502 (match_operand:QI 3 "register_operand" "Yk")))]
6504 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6505 [(set_attr "type" "sselog")
6506 (set_attr "prefix_extra" "1")
6507 (set_attr "length_immediate" "1")
6508 (set_attr "prefix" "evex")
6509 (set_attr "mode" "<sseinsnmode>")])
6511 (define_insn "vec_extract_lo_<mode><mask_name>"
6512 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6513 (vec_select:<ssehalfvecmode>
6514 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
6515 (parallel [(const_int 0) (const_int 1)
6516 (const_int 2) (const_int 3)])))]
6517 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6520 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6524 [(set_attr "type" "sselog")
6525 (set_attr "prefix_extra" "1")
6526 (set_attr "length_immediate" "1")
6527 (set (attr "memory")
6528 (if_then_else (match_test "MEM_P (operands[0])")
6529 (const_string "store")
6530 (const_string "none")))
6531 (set_attr "prefix" "evex")
6532 (set_attr "mode" "<sseinsnmode>")])
6534 (define_insn "vec_extract_hi_<mode>_maskm"
6535 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6536 (vec_merge:<ssehalfvecmode>
6537 (vec_select:<ssehalfvecmode>
6538 (match_operand:V8FI 1 "register_operand" "v")
6539 (parallel [(const_int 4) (const_int 5)
6540 (const_int 6) (const_int 7)]))
6541 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6542 (match_operand:QI 3 "register_operand" "Yk")))]
6544 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6545 [(set_attr "type" "sselog")
6546 (set_attr "prefix_extra" "1")
6547 (set_attr "length_immediate" "1")
6548 (set_attr "memory" "store")
6549 (set_attr "prefix" "evex")
6550 (set_attr "mode" "<sseinsnmode>")])
6552 (define_insn "vec_extract_hi_<mode><mask_name>"
6553 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6554 (vec_select:<ssehalfvecmode>
6555 (match_operand:V8FI 1 "register_operand" "v")
6556 (parallel [(const_int 4) (const_int 5)
6557 (const_int 6) (const_int 7)])))]
6559 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6560 [(set_attr "type" "sselog")
6561 (set_attr "prefix_extra" "1")
6562 (set_attr "length_immediate" "1")
6563 (set (attr "memory")
6564 (if_then_else (match_test "MEM_P (operands[0])")
6565 (const_string "store")
6566 (const_string "none")))
6567 (set_attr "prefix" "evex")
6568 (set_attr "mode" "<sseinsnmode>")])
6570 (define_insn "vec_extract_hi_<mode>_maskm"
6571 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6572 (vec_merge:<ssehalfvecmode>
6573 (vec_select:<ssehalfvecmode>
6574 (match_operand:V16FI 1 "register_operand" "v")
6575 (parallel [(const_int 8) (const_int 9)
6576 (const_int 10) (const_int 11)
6577 (const_int 12) (const_int 13)
6578 (const_int 14) (const_int 15)]))
6579 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6580 (match_operand:QI 3 "register_operand" "k")))]
6582 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6583 [(set_attr "type" "sselog")
6584 (set_attr "prefix_extra" "1")
6585 (set_attr "length_immediate" "1")
6586 (set (attr "memory")
6587 (if_then_else (match_test "MEM_P (operands[0])")
6588 (const_string "store")
6589 (const_string "none")))
6590 (set_attr "prefix" "evex")
6591 (set_attr "mode" "<sseinsnmode>")])
6593 (define_insn "vec_extract_hi_<mode><mask_name>"
6594 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6595 (vec_select:<ssehalfvecmode>
6596 (match_operand:V16FI 1 "register_operand" "v,v")
6597 (parallel [(const_int 8) (const_int 9)
6598 (const_int 10) (const_int 11)
6599 (const_int 12) (const_int 13)
6600 (const_int 14) (const_int 15)])))]
6601 "TARGET_AVX512F && (!<mask_applied> || TARGET_AVX512DQ)"
6603 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6604 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6605 [(set_attr "type" "sselog")
6606 (set_attr "prefix_extra" "1")
6607 (set_attr "isa" "avx512dq,noavx512dq")
6608 (set_attr "length_immediate" "1")
6609 (set (attr "memory")
6610 (if_then_else (match_test "MEM_P (operands[0])")
6611 (const_string "store")
6612 (const_string "none")))
6613 (set_attr "prefix" "evex")
6614 (set_attr "mode" "<sseinsnmode>")])
6616 (define_expand "avx512vl_vextractf128<mode>"
6617 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6618 (match_operand:VI48F_256 1 "register_operand")
6619 (match_operand:SI 2 "const_0_to_1_operand")
6620 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6621 (match_operand:QI 4 "register_operand")]
6622 "TARGET_AVX512DQ && TARGET_AVX512VL"
6624 rtx (*insn)(rtx, rtx, rtx, rtx);
6626 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6627 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6629 switch (INTVAL (operands[2]))
6632 insn = gen_vec_extract_lo_<mode>_mask;
6635 insn = gen_vec_extract_hi_<mode>_mask;
6641 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6645 (define_expand "avx_vextractf128<mode>"
6646 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6647 (match_operand:V_256 1 "register_operand")
6648 (match_operand:SI 2 "const_0_to_1_operand")]
6651 rtx (*insn)(rtx, rtx);
6653 switch (INTVAL (operands[2]))
6656 insn = gen_vec_extract_lo_<mode>;
6659 insn = gen_vec_extract_hi_<mode>;
6665 emit_insn (insn (operands[0], operands[1]));
6669 (define_insn "vec_extract_lo_<mode><mask_name>"
6670 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6671 (vec_select:<ssehalfvecmode>
6672 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6673 (parallel [(const_int 0) (const_int 1)
6674 (const_int 2) (const_int 3)
6675 (const_int 4) (const_int 5)
6676 (const_int 6) (const_int 7)])))]
6678 && <mask_mode512bit_condition>
6679 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6682 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6688 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6689 (vec_select:<ssehalfvecmode>
6690 (match_operand:V16FI 1 "nonimmediate_operand")
6691 (parallel [(const_int 0) (const_int 1)
6692 (const_int 2) (const_int 3)
6693 (const_int 4) (const_int 5)
6694 (const_int 6) (const_int 7)])))]
6695 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6696 && reload_completed"
6699 rtx op1 = operands[1];
6701 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6703 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6704 emit_move_insn (operands[0], op1);
6708 (define_insn "vec_extract_lo_<mode><mask_name>"
6709 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6710 (vec_select:<ssehalfvecmode>
6711 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6712 (parallel [(const_int 0) (const_int 1)])))]
6714 && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512DQ))
6715 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6718 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6722 [(set_attr "type" "sselog")
6723 (set_attr "prefix_extra" "1")
6724 (set_attr "length_immediate" "1")
6725 (set_attr "memory" "none,store")
6726 (set_attr "prefix" "evex")
6727 (set_attr "mode" "XI")])
6730 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6731 (vec_select:<ssehalfvecmode>
6732 (match_operand:VI8F_256 1 "nonimmediate_operand")
6733 (parallel [(const_int 0) (const_int 1)])))]
6734 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6735 && reload_completed"
6738 rtx op1 = operands[1];
6740 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6742 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6743 emit_move_insn (operands[0], op1);
6747 (define_insn "vec_extract_hi_<mode><mask_name>"
6748 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
6749 (vec_select:<ssehalfvecmode>
6750 (match_operand:VI8F_256 1 "register_operand" "v,v")
6751 (parallel [(const_int 2) (const_int 3)])))]
6754 if (TARGET_AVX512DQ && TARGET_AVX512VL)
6755 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
6757 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
6759 [(set_attr "type" "sselog")
6760 (set_attr "prefix_extra" "1")
6761 (set_attr "length_immediate" "1")
6762 (set_attr "memory" "none,store")
6763 (set_attr "prefix" "vex")
6764 (set_attr "mode" "<sseinsnmode>")])
6767 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6768 (vec_select:<ssehalfvecmode>
6769 (match_operand:VI4F_256 1 "nonimmediate_operand")
6770 (parallel [(const_int 0) (const_int 1)
6771 (const_int 2) (const_int 3)])))]
6772 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
6775 rtx op1 = operands[1];
6777 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6779 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6780 emit_move_insn (operands[0], op1);
6785 (define_insn "vec_extract_lo_<mode><mask_name>"
6786 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6787 (vec_select:<ssehalfvecmode>
6788 (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
6789 (parallel [(const_int 0) (const_int 1)
6790 (const_int 2) (const_int 3)])))]
6791 "(!<mask_applied> && TARGET_AVX) || (TARGET_AVX512VL && TARGET_AVX512DQ)"
6794 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6798 [(set_attr "type" "sselog")
6799 (set_attr "prefix_extra" "1")
6800 (set_attr "length_immediate" "1")
6801 (set (attr "memory")
6802 (if_then_else (match_test "MEM_P (operands[0])")
6803 (const_string "store")
6804 (const_string "none")))
6805 (set_attr "prefix" "evex")
6806 (set_attr "mode" "<sseinsnmode>")])
6808 (define_insn "vec_extract_lo_<mode>_maskm"
6809 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6810 (vec_merge:<ssehalfvecmode>
6811 (vec_select:<ssehalfvecmode>
6812 (match_operand:VI4F_256 1 "register_operand" "v")
6813 (parallel [(const_int 0) (const_int 1)
6814 (const_int 2) (const_int 3)]))
6815 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6816 (match_operand:QI 3 "register_operand" "k")))]
6817 "TARGET_AVX512VL && TARGET_AVX512F"
6818 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
6819 [(set_attr "type" "sselog")
6820 (set_attr "length_immediate" "1")
6821 (set_attr "prefix" "evex")
6822 (set_attr "mode" "<sseinsnmode>")])
6824 (define_insn "vec_extract_hi_<mode>_maskm"
6825 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6826 (vec_merge:<ssehalfvecmode>
6827 (vec_select:<ssehalfvecmode>
6828 (match_operand:VI4F_256 1 "register_operand" "v")
6829 (parallel [(const_int 4) (const_int 5)
6830 (const_int 6) (const_int 7)]))
6831 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6832 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
6833 "TARGET_AVX512F && TARGET_AVX512VL"
6835 return "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
6837 [(set_attr "type" "sselog")
6838 (set_attr "prefix_extra" "1")
6839 (set_attr "length_immediate" "1")
6840 (set_attr "memory" "store")
6841 (set_attr "prefix" "evex")
6842 (set_attr "mode" "<sseinsnmode>")])
6844 (define_insn "vec_extract_hi_<mode><mask_name>"
6845 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6846 (vec_select:<ssehalfvecmode>
6847 (match_operand:VI4F_256 1 "register_operand" "v")
6848 (parallel [(const_int 4) (const_int 5)
6849 (const_int 6) (const_int 7)])))]
6850 "TARGET_AVX && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512F))"
6852 if (TARGET_AVX512VL && TARGET_AVX512F)
6853 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
6855 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
6857 [(set_attr "type" "sselog")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "length_immediate" "1")
6860 (set_attr "memory" "none")
6861 (set (attr "prefix")
6863 (match_test "TARGET_AVX512VL")
6864 (const_string "evex")
6865 (const_string "vex")))
6866 (set_attr "mode" "<sseinsnmode>")])
6868 (define_insn_and_split "vec_extract_lo_v32hi"
6869 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6871 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6872 (parallel [(const_int 0) (const_int 1)
6873 (const_int 2) (const_int 3)
6874 (const_int 4) (const_int 5)
6875 (const_int 6) (const_int 7)
6876 (const_int 8) (const_int 9)
6877 (const_int 10) (const_int 11)
6878 (const_int 12) (const_int 13)
6879 (const_int 14) (const_int 15)])))]
6880 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6882 "&& reload_completed"
6883 [(set (match_dup 0) (match_dup 1))]
6885 if (REG_P (operands[1]))
6886 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6888 operands[1] = adjust_address (operands[1], V16HImode, 0);
6891 (define_insn "vec_extract_hi_v32hi"
6892 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6894 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6895 (parallel [(const_int 16) (const_int 17)
6896 (const_int 18) (const_int 19)
6897 (const_int 20) (const_int 21)
6898 (const_int 22) (const_int 23)
6899 (const_int 24) (const_int 25)
6900 (const_int 26) (const_int 27)
6901 (const_int 28) (const_int 29)
6902 (const_int 30) (const_int 31)])))]
6904 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6905 [(set_attr "type" "sselog")
6906 (set_attr "prefix_extra" "1")
6907 (set_attr "length_immediate" "1")
6908 (set_attr "memory" "none,store")
6909 (set_attr "prefix" "evex")
6910 (set_attr "mode" "XI")])
6912 (define_insn_and_split "vec_extract_lo_v16hi"
6913 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6915 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6916 (parallel [(const_int 0) (const_int 1)
6917 (const_int 2) (const_int 3)
6918 (const_int 4) (const_int 5)
6919 (const_int 6) (const_int 7)])))]
6920 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6922 "&& reload_completed"
6923 [(set (match_dup 0) (match_dup 1))]
6925 if (REG_P (operands[1]))
6926 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
6928 operands[1] = adjust_address (operands[1], V8HImode, 0);
6931 (define_insn "vec_extract_hi_v16hi"
6932 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6934 (match_operand:V16HI 1 "register_operand" "x,x")
6935 (parallel [(const_int 8) (const_int 9)
6936 (const_int 10) (const_int 11)
6937 (const_int 12) (const_int 13)
6938 (const_int 14) (const_int 15)])))]
6940 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6941 [(set_attr "type" "sselog")
6942 (set_attr "prefix_extra" "1")
6943 (set_attr "length_immediate" "1")
6944 (set_attr "memory" "none,store")
6945 (set_attr "prefix" "vex")
6946 (set_attr "mode" "OI")])
6948 (define_insn_and_split "vec_extract_lo_v64qi"
6949 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6951 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6952 (parallel [(const_int 0) (const_int 1)
6953 (const_int 2) (const_int 3)
6954 (const_int 4) (const_int 5)
6955 (const_int 6) (const_int 7)
6956 (const_int 8) (const_int 9)
6957 (const_int 10) (const_int 11)
6958 (const_int 12) (const_int 13)
6959 (const_int 14) (const_int 15)
6960 (const_int 16) (const_int 17)
6961 (const_int 18) (const_int 19)
6962 (const_int 20) (const_int 21)
6963 (const_int 22) (const_int 23)
6964 (const_int 24) (const_int 25)
6965 (const_int 26) (const_int 27)
6966 (const_int 28) (const_int 29)
6967 (const_int 30) (const_int 31)])))]
6968 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6970 "&& reload_completed"
6971 [(set (match_dup 0) (match_dup 1))]
6973 if (REG_P (operands[1]))
6974 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6976 operands[1] = adjust_address (operands[1], V32QImode, 0);
6979 (define_insn "vec_extract_hi_v64qi"
6980 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6982 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6983 (parallel [(const_int 32) (const_int 33)
6984 (const_int 34) (const_int 35)
6985 (const_int 36) (const_int 37)
6986 (const_int 38) (const_int 39)
6987 (const_int 40) (const_int 41)
6988 (const_int 42) (const_int 43)
6989 (const_int 44) (const_int 45)
6990 (const_int 46) (const_int 47)
6991 (const_int 48) (const_int 49)
6992 (const_int 50) (const_int 51)
6993 (const_int 52) (const_int 53)
6994 (const_int 54) (const_int 55)
6995 (const_int 56) (const_int 57)
6996 (const_int 58) (const_int 59)
6997 (const_int 60) (const_int 61)
6998 (const_int 62) (const_int 63)])))]
7000 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7001 [(set_attr "type" "sselog")
7002 (set_attr "prefix_extra" "1")
7003 (set_attr "length_immediate" "1")
7004 (set_attr "memory" "none,store")
7005 (set_attr "prefix" "evex")
7006 (set_attr "mode" "XI")])
7008 (define_insn_and_split "vec_extract_lo_v32qi"
7009 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7011 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7012 (parallel [(const_int 0) (const_int 1)
7013 (const_int 2) (const_int 3)
7014 (const_int 4) (const_int 5)
7015 (const_int 6) (const_int 7)
7016 (const_int 8) (const_int 9)
7017 (const_int 10) (const_int 11)
7018 (const_int 12) (const_int 13)
7019 (const_int 14) (const_int 15)])))]
7020 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7022 "&& reload_completed"
7023 [(set (match_dup 0) (match_dup 1))]
7025 if (REG_P (operands[1]))
7026 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7028 operands[1] = adjust_address (operands[1], V16QImode, 0);
7031 (define_insn "vec_extract_hi_v32qi"
7032 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7034 (match_operand:V32QI 1 "register_operand" "x,x")
7035 (parallel [(const_int 16) (const_int 17)
7036 (const_int 18) (const_int 19)
7037 (const_int 20) (const_int 21)
7038 (const_int 22) (const_int 23)
7039 (const_int 24) (const_int 25)
7040 (const_int 26) (const_int 27)
7041 (const_int 28) (const_int 29)
7042 (const_int 30) (const_int 31)])))]
7044 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7045 [(set_attr "type" "sselog")
7046 (set_attr "prefix_extra" "1")
7047 (set_attr "length_immediate" "1")
7048 (set_attr "memory" "none,store")
7049 (set_attr "prefix" "vex")
7050 (set_attr "mode" "OI")])
7052 ;; Modes handled by vec_extract patterns.
7053 (define_mode_iterator VEC_EXTRACT_MODE
7054 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7055 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7056 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7057 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7058 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7059 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7061 (define_expand "vec_extract<mode>"
7062 [(match_operand:<ssescalarmode> 0 "register_operand")
7063 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7064 (match_operand 2 "const_int_operand")]
7067 ix86_expand_vector_extract (false, operands[0], operands[1],
7068 INTVAL (operands[2]));
7072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7074 ;; Parallel double-precision floating point element swizzling
7076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7078 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7079 [(set (match_operand:V8DF 0 "register_operand" "=v")
7082 (match_operand:V8DF 1 "nonimmediate_operand" "v")
7083 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7084 (parallel [(const_int 1) (const_int 9)
7085 (const_int 3) (const_int 11)
7086 (const_int 5) (const_int 13)
7087 (const_int 7) (const_int 15)])))]
7089 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7090 [(set_attr "type" "sselog")
7091 (set_attr "prefix" "evex")
7092 (set_attr "mode" "V8DF")])
7094 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7095 (define_insn "avx_unpckhpd256<mask_name>"
7096 [(set (match_operand:V4DF 0 "register_operand" "=v")
7099 (match_operand:V4DF 1 "register_operand" "v")
7100 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7101 (parallel [(const_int 1) (const_int 5)
7102 (const_int 3) (const_int 7)])))]
7103 "TARGET_AVX && <mask_mode512bit_condition>"
7104 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7105 [(set_attr "type" "sselog")
7106 (set_attr "prefix" "vex")
7107 (set_attr "mode" "V4DF")])
7109 (define_expand "vec_interleave_highv4df"
7113 (match_operand:V4DF 1 "register_operand" "x")
7114 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7115 (parallel [(const_int 0) (const_int 4)
7116 (const_int 2) (const_int 6)])))
7122 (parallel [(const_int 1) (const_int 5)
7123 (const_int 3) (const_int 7)])))
7124 (set (match_operand:V4DF 0 "register_operand")
7129 (parallel [(const_int 2) (const_int 3)
7130 (const_int 6) (const_int 7)])))]
7133 operands[3] = gen_reg_rtx (V4DFmode);
7134 operands[4] = gen_reg_rtx (V4DFmode);
7138 (define_insn "avx512vl_unpckhpd128_mask"
7139 [(set (match_operand:V2DF 0 "register_operand" "=v")
7143 (match_operand:V2DF 1 "register_operand" "v")
7144 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7145 (parallel [(const_int 1) (const_int 3)]))
7146 (match_operand:V2DF 3 "vector_move_operand" "0C")
7147 (match_operand:QI 4 "register_operand" "Yk")))]
7148 "TARGET_AVX512VL && TARGET_AVX512F"
7149 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7150 [(set_attr "type" "sselog")
7151 (set_attr "prefix" "evex")
7152 (set_attr "mode" "V2DF")])
7154 (define_expand "vec_interleave_highv2df"
7155 [(set (match_operand:V2DF 0 "register_operand")
7158 (match_operand:V2DF 1 "nonimmediate_operand")
7159 (match_operand:V2DF 2 "nonimmediate_operand"))
7160 (parallel [(const_int 1)
7164 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7165 operands[2] = force_reg (V2DFmode, operands[2]);
7168 (define_insn "*vec_interleave_highv2df"
7169 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7172 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7173 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7174 (parallel [(const_int 1)
7176 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7178 unpckhpd\t{%2, %0|%0, %2}
7179 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7180 %vmovddup\t{%H1, %0|%0, %H1}
7181 movlpd\t{%H1, %0|%0, %H1}
7182 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7183 %vmovhpd\t{%1, %0|%q0, %1}"
7184 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7185 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7186 (set_attr "ssememalign" "64")
7187 (set_attr "prefix_data16" "*,*,*,1,*,1")
7188 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7189 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7191 (define_expand "avx512f_movddup512<mask_name>"
7192 [(set (match_operand:V8DF 0 "register_operand")
7195 (match_operand:V8DF 1 "nonimmediate_operand")
7197 (parallel [(const_int 0) (const_int 8)
7198 (const_int 2) (const_int 10)
7199 (const_int 4) (const_int 12)
7200 (const_int 6) (const_int 14)])))]
7203 (define_expand "avx512f_unpcklpd512<mask_name>"
7204 [(set (match_operand:V8DF 0 "register_operand")
7207 (match_operand:V8DF 1 "register_operand")
7208 (match_operand:V8DF 2 "nonimmediate_operand"))
7209 (parallel [(const_int 0) (const_int 8)
7210 (const_int 2) (const_int 10)
7211 (const_int 4) (const_int 12)
7212 (const_int 6) (const_int 14)])))]
7215 (define_insn "*avx512f_unpcklpd512<mask_name>"
7216 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7219 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7220 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7221 (parallel [(const_int 0) (const_int 8)
7222 (const_int 2) (const_int 10)
7223 (const_int 4) (const_int 12)
7224 (const_int 6) (const_int 14)])))]
7227 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7228 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7229 [(set_attr "type" "sselog")
7230 (set_attr "prefix" "evex")
7231 (set_attr "mode" "V8DF")])
7233 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7234 (define_expand "avx_movddup256<mask_name>"
7235 [(set (match_operand:V4DF 0 "register_operand")
7238 (match_operand:V4DF 1 "nonimmediate_operand")
7240 (parallel [(const_int 0) (const_int 4)
7241 (const_int 2) (const_int 6)])))]
7242 "TARGET_AVX && <mask_mode512bit_condition>")
7244 (define_expand "avx_unpcklpd256<mask_name>"
7245 [(set (match_operand:V4DF 0 "register_operand")
7248 (match_operand:V4DF 1 "register_operand")
7249 (match_operand:V4DF 2 "nonimmediate_operand"))
7250 (parallel [(const_int 0) (const_int 4)
7251 (const_int 2) (const_int 6)])))]
7252 "TARGET_AVX && <mask_mode512bit_condition>")
7254 (define_insn "*avx_unpcklpd256<mask_name>"
7255 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7258 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7259 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7260 (parallel [(const_int 0) (const_int 4)
7261 (const_int 2) (const_int 6)])))]
7262 "TARGET_AVX && <mask_mode512bit_condition>"
7264 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7265 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7266 [(set_attr "type" "sselog")
7267 (set_attr "prefix" "vex")
7268 (set_attr "mode" "V4DF")])
7270 (define_expand "vec_interleave_lowv4df"
7274 (match_operand:V4DF 1 "register_operand" "x")
7275 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7276 (parallel [(const_int 0) (const_int 4)
7277 (const_int 2) (const_int 6)])))
7283 (parallel [(const_int 1) (const_int 5)
7284 (const_int 3) (const_int 7)])))
7285 (set (match_operand:V4DF 0 "register_operand")
7290 (parallel [(const_int 0) (const_int 1)
7291 (const_int 4) (const_int 5)])))]
7294 operands[3] = gen_reg_rtx (V4DFmode);
7295 operands[4] = gen_reg_rtx (V4DFmode);
7298 (define_insn "avx512vl_unpcklpd128_mask"
7299 [(set (match_operand:V2DF 0 "register_operand" "=v")
7303 (match_operand:V2DF 1 "register_operand" "v")
7304 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7305 (parallel [(const_int 0) (const_int 2)]))
7306 (match_operand:V2DF 3 "vector_move_operand" "0C")
7307 (match_operand:QI 4 "register_operand" "Yk")))]
7308 "TARGET_AVX512VL && TARGET_AVX512F"
7309 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7310 [(set_attr "type" "sselog")
7311 (set_attr "prefix" "evex")
7312 (set_attr "mode" "V2DF")])
7314 (define_expand "vec_interleave_lowv2df"
7315 [(set (match_operand:V2DF 0 "register_operand")
7318 (match_operand:V2DF 1 "nonimmediate_operand")
7319 (match_operand:V2DF 2 "nonimmediate_operand"))
7320 (parallel [(const_int 0)
7324 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7325 operands[1] = force_reg (V2DFmode, operands[1]);
7328 (define_insn "*vec_interleave_lowv2df"
7329 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7332 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7333 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7334 (parallel [(const_int 0)
7336 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7338 unpcklpd\t{%2, %0|%0, %2}
7339 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7340 %vmovddup\t{%1, %0|%0, %q1}
7341 movhpd\t{%2, %0|%0, %q2}
7342 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7343 %vmovlpd\t{%2, %H0|%H0, %2}"
7344 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7345 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7346 (set_attr "ssememalign" "64")
7347 (set_attr "prefix_data16" "*,*,*,1,*,1")
7348 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7349 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7352 [(set (match_operand:V2DF 0 "memory_operand")
7355 (match_operand:V2DF 1 "register_operand")
7357 (parallel [(const_int 0)
7359 "TARGET_SSE3 && reload_completed"
7362 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7363 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7364 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7369 [(set (match_operand:V2DF 0 "register_operand")
7372 (match_operand:V2DF 1 "memory_operand")
7374 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7375 (match_operand:SI 3 "const_int_operand")])))]
7376 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7377 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7379 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7382 (define_insn "avx512f_vmscalef<mode><round_name>"
7383 [(set (match_operand:VF_128 0 "register_operand" "=v")
7386 [(match_operand:VF_128 1 "register_operand" "v")
7387 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7392 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7393 [(set_attr "prefix" "evex")
7394 (set_attr "mode" "<ssescalarmode>")])
7396 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7397 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7399 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7400 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7403 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7404 [(set_attr "prefix" "evex")
7405 (set_attr "mode" "<MODE>")])
7407 (define_expand "<avx512>_vternlog<mode>_maskz"
7408 [(match_operand:VI48_AVX512VL 0 "register_operand")
7409 (match_operand:VI48_AVX512VL 1 "register_operand")
7410 (match_operand:VI48_AVX512VL 2 "register_operand")
7411 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7412 (match_operand:SI 4 "const_0_to_255_operand")
7413 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7416 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7417 operands[0], operands[1], operands[2], operands[3],
7418 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7422 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7423 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7424 (unspec:VI48_AVX512VL
7425 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7426 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7427 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7428 (match_operand:SI 4 "const_0_to_255_operand")]
7431 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7432 [(set_attr "type" "sselog")
7433 (set_attr "prefix" "evex")
7434 (set_attr "mode" "<sseinsnmode>")])
7436 (define_insn "<avx512>_vternlog<mode>_mask"
7437 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7438 (vec_merge:VI48_AVX512VL
7439 (unspec:VI48_AVX512VL
7440 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7441 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7442 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7443 (match_operand:SI 4 "const_0_to_255_operand")]
7446 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7448 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7449 [(set_attr "type" "sselog")
7450 (set_attr "prefix" "evex")
7451 (set_attr "mode" "<sseinsnmode>")])
7453 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7454 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7455 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7458 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7459 [(set_attr "prefix" "evex")
7460 (set_attr "mode" "<MODE>")])
7462 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7463 [(set (match_operand:VF_128 0 "register_operand" "=v")
7466 [(match_operand:VF_128 1 "register_operand" "v")
7467 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7472 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7473 [(set_attr "prefix" "evex")
7474 (set_attr "mode" "<ssescalarmode>")])
7476 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7477 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7478 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7479 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7480 (match_operand:SI 3 "const_0_to_255_operand")]
7483 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7484 [(set_attr "prefix" "evex")
7485 (set_attr "mode" "<sseinsnmode>")])
7487 (define_expand "avx512f_shufps512_mask"
7488 [(match_operand:V16SF 0 "register_operand")
7489 (match_operand:V16SF 1 "register_operand")
7490 (match_operand:V16SF 2 "nonimmediate_operand")
7491 (match_operand:SI 3 "const_0_to_255_operand")
7492 (match_operand:V16SF 4 "register_operand")
7493 (match_operand:HI 5 "register_operand")]
7496 int mask = INTVAL (operands[3]);
7497 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7498 GEN_INT ((mask >> 0) & 3),
7499 GEN_INT ((mask >> 2) & 3),
7500 GEN_INT (((mask >> 4) & 3) + 16),
7501 GEN_INT (((mask >> 6) & 3) + 16),
7502 GEN_INT (((mask >> 0) & 3) + 4),
7503 GEN_INT (((mask >> 2) & 3) + 4),
7504 GEN_INT (((mask >> 4) & 3) + 20),
7505 GEN_INT (((mask >> 6) & 3) + 20),
7506 GEN_INT (((mask >> 0) & 3) + 8),
7507 GEN_INT (((mask >> 2) & 3) + 8),
7508 GEN_INT (((mask >> 4) & 3) + 24),
7509 GEN_INT (((mask >> 6) & 3) + 24),
7510 GEN_INT (((mask >> 0) & 3) + 12),
7511 GEN_INT (((mask >> 2) & 3) + 12),
7512 GEN_INT (((mask >> 4) & 3) + 28),
7513 GEN_INT (((mask >> 6) & 3) + 28),
7514 operands[4], operands[5]));
7519 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7520 [(match_operand:VF_AVX512VL 0 "register_operand")
7521 (match_operand:VF_AVX512VL 1 "register_operand")
7522 (match_operand:VF_AVX512VL 2 "register_operand")
7523 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7524 (match_operand:SI 4 "const_0_to_255_operand")
7525 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7528 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7529 operands[0], operands[1], operands[2], operands[3],
7530 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7531 <round_saeonly_expand_operand6>));
7535 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7538 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7539 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7540 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7541 (match_operand:SI 4 "const_0_to_255_operand")]
7544 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7545 [(set_attr "prefix" "evex")
7546 (set_attr "mode" "<MODE>")])
7548 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7549 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7550 (vec_merge:VF_AVX512VL
7552 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7553 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7554 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7555 (match_operand:SI 4 "const_0_to_255_operand")]
7558 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7560 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7561 [(set_attr "prefix" "evex")
7562 (set_attr "mode" "<MODE>")])
7564 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7565 [(match_operand:VF_128 0 "register_operand")
7566 (match_operand:VF_128 1 "register_operand")
7567 (match_operand:VF_128 2 "register_operand")
7568 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7569 (match_operand:SI 4 "const_0_to_255_operand")
7570 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7573 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7574 operands[0], operands[1], operands[2], operands[3],
7575 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7576 <round_saeonly_expand_operand6>));
7580 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7581 [(set (match_operand:VF_128 0 "register_operand" "=v")
7584 [(match_operand:VF_128 1 "register_operand" "0")
7585 (match_operand:VF_128 2 "register_operand" "v")
7586 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7587 (match_operand:SI 4 "const_0_to_255_operand")]
7592 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7593 [(set_attr "prefix" "evex")
7594 (set_attr "mode" "<ssescalarmode>")])
7596 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7597 [(set (match_operand:VF_128 0 "register_operand" "=v")
7601 [(match_operand:VF_128 1 "register_operand" "0")
7602 (match_operand:VF_128 2 "register_operand" "v")
7603 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7604 (match_operand:SI 4 "const_0_to_255_operand")]
7609 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7611 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7612 [(set_attr "prefix" "evex")
7613 (set_attr "mode" "<ssescalarmode>")])
7615 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7616 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7618 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7619 (match_operand:SI 2 "const_0_to_255_operand")]
7622 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7623 [(set_attr "length_immediate" "1")
7624 (set_attr "prefix" "evex")
7625 (set_attr "mode" "<MODE>")])
7627 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7628 [(set (match_operand:VF_128 0 "register_operand" "=v")
7631 [(match_operand:VF_128 1 "register_operand" "v")
7632 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7633 (match_operand:SI 3 "const_0_to_255_operand")]
7638 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7639 [(set_attr "length_immediate" "1")
7640 (set_attr "prefix" "evex")
7641 (set_attr "mode" "<MODE>")])
7643 ;; One bit in mask selects 2 elements.
7644 (define_insn "avx512f_shufps512_1<mask_name>"
7645 [(set (match_operand:V16SF 0 "register_operand" "=v")
7648 (match_operand:V16SF 1 "register_operand" "v")
7649 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7650 (parallel [(match_operand 3 "const_0_to_3_operand")
7651 (match_operand 4 "const_0_to_3_operand")
7652 (match_operand 5 "const_16_to_19_operand")
7653 (match_operand 6 "const_16_to_19_operand")
7654 (match_operand 7 "const_4_to_7_operand")
7655 (match_operand 8 "const_4_to_7_operand")
7656 (match_operand 9 "const_20_to_23_operand")
7657 (match_operand 10 "const_20_to_23_operand")
7658 (match_operand 11 "const_8_to_11_operand")
7659 (match_operand 12 "const_8_to_11_operand")
7660 (match_operand 13 "const_24_to_27_operand")
7661 (match_operand 14 "const_24_to_27_operand")
7662 (match_operand 15 "const_12_to_15_operand")
7663 (match_operand 16 "const_12_to_15_operand")
7664 (match_operand 17 "const_28_to_31_operand")
7665 (match_operand 18 "const_28_to_31_operand")])))]
7667 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7668 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7669 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7670 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7671 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7672 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7673 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7674 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7675 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7676 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7677 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7678 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7681 mask = INTVAL (operands[3]);
7682 mask |= INTVAL (operands[4]) << 2;
7683 mask |= (INTVAL (operands[5]) - 16) << 4;
7684 mask |= (INTVAL (operands[6]) - 16) << 6;
7685 operands[3] = GEN_INT (mask);
7687 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7689 [(set_attr "type" "sselog")
7690 (set_attr "length_immediate" "1")
7691 (set_attr "prefix" "evex")
7692 (set_attr "mode" "V16SF")])
7694 (define_expand "avx512f_shufpd512_mask"
7695 [(match_operand:V8DF 0 "register_operand")
7696 (match_operand:V8DF 1 "register_operand")
7697 (match_operand:V8DF 2 "nonimmediate_operand")
7698 (match_operand:SI 3 "const_0_to_255_operand")
7699 (match_operand:V8DF 4 "register_operand")
7700 (match_operand:QI 5 "register_operand")]
7703 int mask = INTVAL (operands[3]);
7704 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7706 GEN_INT (mask & 2 ? 9 : 8),
7707 GEN_INT (mask & 4 ? 3 : 2),
7708 GEN_INT (mask & 8 ? 11 : 10),
7709 GEN_INT (mask & 16 ? 5 : 4),
7710 GEN_INT (mask & 32 ? 13 : 12),
7711 GEN_INT (mask & 64 ? 7 : 6),
7712 GEN_INT (mask & 128 ? 15 : 14),
7713 operands[4], operands[5]));
7717 (define_insn "avx512f_shufpd512_1<mask_name>"
7718 [(set (match_operand:V8DF 0 "register_operand" "=v")
7721 (match_operand:V8DF 1 "register_operand" "v")
7722 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7723 (parallel [(match_operand 3 "const_0_to_1_operand")
7724 (match_operand 4 "const_8_to_9_operand")
7725 (match_operand 5 "const_2_to_3_operand")
7726 (match_operand 6 "const_10_to_11_operand")
7727 (match_operand 7 "const_4_to_5_operand")
7728 (match_operand 8 "const_12_to_13_operand")
7729 (match_operand 9 "const_6_to_7_operand")
7730 (match_operand 10 "const_14_to_15_operand")])))]
7734 mask = INTVAL (operands[3]);
7735 mask |= (INTVAL (operands[4]) - 8) << 1;
7736 mask |= (INTVAL (operands[5]) - 2) << 2;
7737 mask |= (INTVAL (operands[6]) - 10) << 3;
7738 mask |= (INTVAL (operands[7]) - 4) << 4;
7739 mask |= (INTVAL (operands[8]) - 12) << 5;
7740 mask |= (INTVAL (operands[9]) - 6) << 6;
7741 mask |= (INTVAL (operands[10]) - 14) << 7;
7742 operands[3] = GEN_INT (mask);
7744 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7746 [(set_attr "type" "sselog")
7747 (set_attr "length_immediate" "1")
7748 (set_attr "prefix" "evex")
7749 (set_attr "mode" "V8DF")])
7751 (define_expand "avx_shufpd256<mask_expand4_name>"
7752 [(match_operand:V4DF 0 "register_operand")
7753 (match_operand:V4DF 1 "register_operand")
7754 (match_operand:V4DF 2 "nonimmediate_operand")
7755 (match_operand:SI 3 "const_int_operand")]
7758 int mask = INTVAL (operands[3]);
7759 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
7763 GEN_INT (mask & 2 ? 5 : 4),
7764 GEN_INT (mask & 4 ? 3 : 2),
7765 GEN_INT (mask & 8 ? 7 : 6)
7766 <mask_expand4_args>));
7770 (define_insn "avx_shufpd256_1<mask_name>"
7771 [(set (match_operand:V4DF 0 "register_operand" "=v")
7774 (match_operand:V4DF 1 "register_operand" "v")
7775 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7776 (parallel [(match_operand 3 "const_0_to_1_operand")
7777 (match_operand 4 "const_4_to_5_operand")
7778 (match_operand 5 "const_2_to_3_operand")
7779 (match_operand 6 "const_6_to_7_operand")])))]
7780 "TARGET_AVX && <mask_mode512bit_condition>"
7783 mask = INTVAL (operands[3]);
7784 mask |= (INTVAL (operands[4]) - 4) << 1;
7785 mask |= (INTVAL (operands[5]) - 2) << 2;
7786 mask |= (INTVAL (operands[6]) - 6) << 3;
7787 operands[3] = GEN_INT (mask);
7789 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
7791 [(set_attr "type" "sseshuf")
7792 (set_attr "length_immediate" "1")
7793 (set_attr "prefix" "vex")
7794 (set_attr "mode" "V4DF")])
7796 (define_expand "sse2_shufpd<mask_expand4_name>"
7797 [(match_operand:V2DF 0 "register_operand")
7798 (match_operand:V2DF 1 "register_operand")
7799 (match_operand:V2DF 2 "nonimmediate_operand")
7800 (match_operand:SI 3 "const_int_operand")]
7803 int mask = INTVAL (operands[3]);
7804 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
7805 operands[2], GEN_INT (mask & 1),
7806 GEN_INT (mask & 2 ? 3 : 2)
7807 <mask_expand4_args>));
7811 (define_insn "sse2_shufpd_v2df_mask"
7812 [(set (match_operand:V2DF 0 "register_operand" "=v")
7816 (match_operand:V2DF 1 "register_operand" "v")
7817 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7818 (parallel [(match_operand 3 "const_0_to_1_operand")
7819 (match_operand 4 "const_2_to_3_operand")]))
7820 (match_operand:V2DF 5 "vector_move_operand" "0C")
7821 (match_operand:QI 6 "register_operand" "Yk")))]
7825 mask = INTVAL (operands[3]);
7826 mask |= (INTVAL (operands[4]) - 2) << 1;
7827 operands[3] = GEN_INT (mask);
7829 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
7831 [(set_attr "type" "sseshuf")
7832 (set_attr "length_immediate" "1")
7833 (set_attr "prefix" "evex")
7834 (set_attr "mode" "V2DF")])
7836 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
7837 (define_insn "avx2_interleave_highv4di<mask_name>"
7838 [(set (match_operand:V4DI 0 "register_operand" "=v")
7841 (match_operand:V4DI 1 "register_operand" "v")
7842 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
7843 (parallel [(const_int 1)
7847 "TARGET_AVX2 && <mask_mode512bit_condition>"
7848 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7849 [(set_attr "type" "sselog")
7850 (set_attr "prefix" "vex")
7851 (set_attr "mode" "OI")])
7853 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7854 [(set (match_operand:V8DI 0 "register_operand" "=v")
7857 (match_operand:V8DI 1 "register_operand" "v")
7858 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7859 (parallel [(const_int 1) (const_int 9)
7860 (const_int 3) (const_int 11)
7861 (const_int 5) (const_int 13)
7862 (const_int 7) (const_int 15)])))]
7864 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7865 [(set_attr "type" "sselog")
7866 (set_attr "prefix" "evex")
7867 (set_attr "mode" "XI")])
7869 (define_insn "vec_interleave_highv2di<mask_name>"
7870 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
7873 (match_operand:V2DI 1 "register_operand" "0,v")
7874 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
7875 (parallel [(const_int 1)
7877 "TARGET_SSE2 && <mask_mode512bit_condition>"
7879 punpckhqdq\t{%2, %0|%0, %2}
7880 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7881 [(set_attr "isa" "noavx,avx")
7882 (set_attr "type" "sselog")
7883 (set_attr "prefix_data16" "1,*")
7884 (set_attr "prefix" "orig,<mask_prefix>")
7885 (set_attr "mode" "TI")])
7887 (define_insn "avx2_interleave_lowv4di<mask_name>"
7888 [(set (match_operand:V4DI 0 "register_operand" "=v")
7891 (match_operand:V4DI 1 "register_operand" "v")
7892 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
7893 (parallel [(const_int 0)
7897 "TARGET_AVX2 && <mask_mode512bit_condition>"
7898 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7899 [(set_attr "type" "sselog")
7900 (set_attr "prefix" "vex")
7901 (set_attr "mode" "OI")])
7903 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7904 [(set (match_operand:V8DI 0 "register_operand" "=v")
7907 (match_operand:V8DI 1 "register_operand" "v")
7908 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7909 (parallel [(const_int 0) (const_int 8)
7910 (const_int 2) (const_int 10)
7911 (const_int 4) (const_int 12)
7912 (const_int 6) (const_int 14)])))]
7914 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7915 [(set_attr "type" "sselog")
7916 (set_attr "prefix" "evex")
7917 (set_attr "mode" "XI")])
7919 (define_insn "vec_interleave_lowv2di<mask_name>"
7920 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
7923 (match_operand:V2DI 1 "register_operand" "0,v")
7924 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
7925 (parallel [(const_int 0)
7927 "TARGET_SSE2 && <mask_mode512bit_condition>"
7929 punpcklqdq\t{%2, %0|%0, %2}
7930 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7931 [(set_attr "isa" "noavx,avx")
7932 (set_attr "type" "sselog")
7933 (set_attr "prefix_data16" "1,*")
7934 (set_attr "prefix" "orig,vex")
7935 (set_attr "mode" "TI")])
7937 (define_insn "sse2_shufpd_<mode>"
7938 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
7939 (vec_select:VI8F_128
7940 (vec_concat:<ssedoublevecmode>
7941 (match_operand:VI8F_128 1 "register_operand" "0,x")
7942 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
7943 (parallel [(match_operand 3 "const_0_to_1_operand")
7944 (match_operand 4 "const_2_to_3_operand")])))]
7948 mask = INTVAL (operands[3]);
7949 mask |= (INTVAL (operands[4]) - 2) << 1;
7950 operands[3] = GEN_INT (mask);
7952 switch (which_alternative)
7955 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
7957 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7962 [(set_attr "isa" "noavx,avx")
7963 (set_attr "type" "sseshuf")
7964 (set_attr "length_immediate" "1")
7965 (set_attr "prefix" "orig,vex")
7966 (set_attr "mode" "V2DF")])
7968 ;; Avoid combining registers from different units in a single alternative,
7969 ;; see comment above inline_secondary_memory_needed function in i386.c
7970 (define_insn "sse2_storehpd"
7971 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
7973 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
7974 (parallel [(const_int 1)])))]
7975 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7977 %vmovhpd\t{%1, %0|%0, %1}
7979 vunpckhpd\t{%d1, %0|%0, %d1}
7983 [(set_attr "isa" "*,noavx,avx,*,*,*")
7984 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
7985 (set (attr "prefix_data16")
7987 (and (eq_attr "alternative" "0")
7988 (not (match_test "TARGET_AVX")))
7990 (const_string "*")))
7991 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
7992 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
7995 [(set (match_operand:DF 0 "register_operand")
7997 (match_operand:V2DF 1 "memory_operand")
7998 (parallel [(const_int 1)])))]
7999 "TARGET_SSE2 && reload_completed"
8000 [(set (match_dup 0) (match_dup 1))]
8001 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8003 (define_insn "*vec_extractv2df_1_sse"
8004 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8006 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8007 (parallel [(const_int 1)])))]
8008 "!TARGET_SSE2 && TARGET_SSE
8009 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8011 movhps\t{%1, %0|%q0, %1}
8012 movhlps\t{%1, %0|%0, %1}
8013 movlps\t{%H1, %0|%0, %H1}"
8014 [(set_attr "type" "ssemov")
8015 (set_attr "ssememalign" "64")
8016 (set_attr "mode" "V2SF,V4SF,V2SF")])
8018 ;; Avoid combining registers from different units in a single alternative,
8019 ;; see comment above inline_secondary_memory_needed function in i386.c
8020 (define_insn "sse2_storelpd"
8021 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8023 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8024 (parallel [(const_int 0)])))]
8025 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8027 %vmovlpd\t{%1, %0|%0, %1}
8032 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8033 (set_attr "prefix_data16" "1,*,*,*,*")
8034 (set_attr "prefix" "maybe_vex")
8035 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8038 [(set (match_operand:DF 0 "register_operand")
8040 (match_operand:V2DF 1 "nonimmediate_operand")
8041 (parallel [(const_int 0)])))]
8042 "TARGET_SSE2 && reload_completed"
8043 [(set (match_dup 0) (match_dup 1))]
8045 if (REG_P (operands[1]))
8046 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8048 operands[1] = adjust_address (operands[1], DFmode, 0);
8051 (define_insn "*vec_extractv2df_0_sse"
8052 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8054 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8055 (parallel [(const_int 0)])))]
8056 "!TARGET_SSE2 && TARGET_SSE
8057 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8059 movlps\t{%1, %0|%0, %1}
8060 movaps\t{%1, %0|%0, %1}
8061 movlps\t{%1, %0|%0, %q1}"
8062 [(set_attr "type" "ssemov")
8063 (set_attr "mode" "V2SF,V4SF,V2SF")])
8065 (define_expand "sse2_loadhpd_exp"
8066 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8069 (match_operand:V2DF 1 "nonimmediate_operand")
8070 (parallel [(const_int 0)]))
8071 (match_operand:DF 2 "nonimmediate_operand")))]
8074 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8076 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8078 /* Fix up the destination if needed. */
8079 if (dst != operands[0])
8080 emit_move_insn (operands[0], dst);
8085 ;; Avoid combining registers from different units in a single alternative,
8086 ;; see comment above inline_secondary_memory_needed function in i386.c
8087 (define_insn "sse2_loadhpd"
8088 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8092 (match_operand:V2DF 1 "nonimmediate_operand"
8094 (parallel [(const_int 0)]))
8095 (match_operand:DF 2 "nonimmediate_operand"
8096 " m,m,x,x,x,*f,r")))]
8097 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8099 movhpd\t{%2, %0|%0, %2}
8100 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8101 unpcklpd\t{%2, %0|%0, %2}
8102 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8106 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8107 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8108 (set_attr "ssememalign" "64")
8109 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8110 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8111 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8114 [(set (match_operand:V2DF 0 "memory_operand")
8116 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8117 (match_operand:DF 1 "register_operand")))]
8118 "TARGET_SSE2 && reload_completed"
8119 [(set (match_dup 0) (match_dup 1))]
8120 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8122 (define_expand "sse2_loadlpd_exp"
8123 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8125 (match_operand:DF 2 "nonimmediate_operand")
8127 (match_operand:V2DF 1 "nonimmediate_operand")
8128 (parallel [(const_int 1)]))))]
8131 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8133 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8135 /* Fix up the destination if needed. */
8136 if (dst != operands[0])
8137 emit_move_insn (operands[0], dst);
8142 ;; Avoid combining registers from different units in a single alternative,
8143 ;; see comment above inline_secondary_memory_needed function in i386.c
8144 (define_insn "sse2_loadlpd"
8145 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8146 "=x,x,x,x,x,x,x,x,m,m ,m")
8148 (match_operand:DF 2 "nonimmediate_operand"
8149 " m,m,m,x,x,0,0,x,x,*f,r")
8151 (match_operand:V2DF 1 "vector_move_operand"
8152 " C,0,x,0,x,x,o,o,0,0 ,0")
8153 (parallel [(const_int 1)]))))]
8154 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8156 %vmovsd\t{%2, %0|%0, %2}
8157 movlpd\t{%2, %0|%0, %2}
8158 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8159 movsd\t{%2, %0|%0, %2}
8160 vmovsd\t{%2, %1, %0|%0, %1, %2}
8161 shufpd\t{$2, %1, %0|%0, %1, 2}
8162 movhpd\t{%H1, %0|%0, %H1}
8163 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8167 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8169 (cond [(eq_attr "alternative" "5")
8170 (const_string "sselog")
8171 (eq_attr "alternative" "9")
8172 (const_string "fmov")
8173 (eq_attr "alternative" "10")
8174 (const_string "imov")
8176 (const_string "ssemov")))
8177 (set_attr "ssememalign" "64")
8178 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8179 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8180 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8181 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8184 [(set (match_operand:V2DF 0 "memory_operand")
8186 (match_operand:DF 1 "register_operand")
8187 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8188 "TARGET_SSE2 && reload_completed"
8189 [(set (match_dup 0) (match_dup 1))]
8190 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8192 (define_insn "sse2_movsd"
8193 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8195 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8196 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8200 movsd\t{%2, %0|%0, %2}
8201 vmovsd\t{%2, %1, %0|%0, %1, %2}
8202 movlpd\t{%2, %0|%0, %q2}
8203 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8204 %vmovlpd\t{%2, %0|%q0, %2}
8205 shufpd\t{$2, %1, %0|%0, %1, 2}
8206 movhps\t{%H1, %0|%0, %H1}
8207 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8208 %vmovhps\t{%1, %H0|%H0, %1}"
8209 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8212 (eq_attr "alternative" "5")
8213 (const_string "sselog")
8214 (const_string "ssemov")))
8215 (set (attr "prefix_data16")
8217 (and (eq_attr "alternative" "2,4")
8218 (not (match_test "TARGET_AVX")))
8220 (const_string "*")))
8221 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8222 (set_attr "ssememalign" "64")
8223 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8224 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8226 (define_insn "vec_dupv2df<mask_name>"
8227 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8229 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8230 "TARGET_SSE2 && <mask_mode512bit_condition>"
8233 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8234 [(set_attr "isa" "noavx,sse3")
8235 (set_attr "type" "sselog1")
8236 (set_attr "prefix" "orig,maybe_vex")
8237 (set_attr "mode" "V2DF,DF")])
8239 (define_insn "*vec_concatv2df"
8240 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8242 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8243 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8246 unpcklpd\t{%2, %0|%0, %2}
8247 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8248 %vmovddup\t{%1, %0|%0, %1}
8249 movhpd\t{%2, %0|%0, %2}
8250 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8251 %vmovsd\t{%1, %0|%0, %1}
8252 movlhps\t{%2, %0|%0, %2}
8253 movhps\t{%2, %0|%0, %2}"
8254 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8257 (eq_attr "alternative" "0,1,2")
8258 (const_string "sselog")
8259 (const_string "ssemov")))
8260 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8261 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8262 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8266 ;; Parallel integer down-conversion operations
8268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8270 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8271 (define_mode_attr pmov_src_mode
8272 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8273 (define_mode_attr pmov_src_lower
8274 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8275 (define_mode_attr pmov_suff_1
8276 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8278 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8279 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8280 (any_truncate:PMOV_DST_MODE_1
8281 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8283 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8284 [(set_attr "type" "ssemov")
8285 (set_attr "memory" "none,store")
8286 (set_attr "prefix" "evex")
8287 (set_attr "mode" "<sseinsnmode>")])
8289 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8290 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8291 (vec_merge:PMOV_DST_MODE_1
8292 (any_truncate:PMOV_DST_MODE_1
8293 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8294 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8295 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8297 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8298 [(set_attr "type" "ssemov")
8299 (set_attr "memory" "none,store")
8300 (set_attr "prefix" "evex")
8301 (set_attr "mode" "<sseinsnmode>")])
8303 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8304 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8305 (vec_merge:PMOV_DST_MODE_1
8306 (any_truncate:PMOV_DST_MODE_1
8307 (match_operand:<pmov_src_mode> 1 "register_operand"))
8309 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8312 (define_mode_iterator PMOV_DST_MODE_2
8313 [(V32QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512BW && TARGET_AVX512VL")
8314 (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
8315 (define_mode_attr pmov_suff_2
8316 [(V32QI "wb") (V16QI "wb") (V8HI "dw") (V4SI "qd")])
8318 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8319 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8320 (any_truncate:PMOV_DST_MODE_2
8321 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8323 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8324 [(set_attr "type" "ssemov")
8325 (set_attr "memory" "none,store")
8326 (set_attr "prefix" "evex")
8327 (set_attr "mode" "<sseinsnmode>")])
8329 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8330 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8331 (vec_merge:PMOV_DST_MODE_2
8332 (any_truncate:PMOV_DST_MODE_2
8333 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8334 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8335 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8337 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8338 [(set_attr "type" "ssemov")
8339 (set_attr "memory" "none,store")
8340 (set_attr "prefix" "evex")
8341 (set_attr "mode" "<sseinsnmode>")])
8343 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_store_mask"
8344 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8345 (vec_merge:PMOV_DST_MODE_2
8346 (any_truncate:PMOV_DST_MODE_2
8347 (match_operand:<ssedoublemode> 1 "register_operand"))
8349 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8352 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8353 (define_mode_attr pmov_dst_3
8354 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8355 (define_mode_attr pmov_dst_zeroed_3
8356 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8357 (define_mode_attr pmov_suff_3
8358 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8360 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8361 [(set (match_operand:V16QI 0 "register_operand" "=v")
8363 (any_truncate:<pmov_dst_3>
8364 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8365 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8367 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8368 [(set_attr "type" "ssemov")
8369 (set_attr "prefix" "evex")
8370 (set_attr "mode" "TI")])
8372 (define_insn "*avx512vl_<code>v2div2qi2_store"
8373 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8376 (match_operand:V2DI 1 "register_operand" "v"))
8379 (parallel [(const_int 2) (const_int 3)
8380 (const_int 4) (const_int 5)
8381 (const_int 6) (const_int 7)
8382 (const_int 8) (const_int 9)
8383 (const_int 10) (const_int 11)
8384 (const_int 12) (const_int 13)
8385 (const_int 14) (const_int 15)]))))]
8387 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8388 [(set_attr "type" "ssemov")
8389 (set_attr "memory" "store")
8390 (set_attr "prefix" "evex")
8391 (set_attr "mode" "TI")])
8393 (define_insn "avx512vl_<code>v2div2qi2_mask"
8394 [(set (match_operand:V16QI 0 "register_operand" "=v")
8398 (match_operand:V2DI 1 "register_operand" "v"))
8400 (match_operand:V16QI 2 "vector_move_operand" "0C")
8401 (parallel [(const_int 0) (const_int 1)]))
8402 (match_operand:QI 3 "register_operand" "Yk"))
8403 (const_vector:V14QI [(const_int 0) (const_int 0)
8404 (const_int 0) (const_int 0)
8405 (const_int 0) (const_int 0)
8406 (const_int 0) (const_int 0)
8407 (const_int 0) (const_int 0)
8408 (const_int 0) (const_int 0)
8409 (const_int 0) (const_int 0)])))]
8411 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8412 [(set_attr "type" "ssemov")
8413 (set_attr "prefix" "evex")
8414 (set_attr "mode" "TI")])
8416 (define_insn "avx512vl_<code>v2div2qi2_store_mask"
8417 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8421 (match_operand:V2DI 1 "register_operand" "v"))
8424 (parallel [(const_int 0) (const_int 1)]))
8425 (match_operand:QI 2 "register_operand" "Yk"))
8428 (parallel [(const_int 2) (const_int 3)
8429 (const_int 4) (const_int 5)
8430 (const_int 6) (const_int 7)
8431 (const_int 8) (const_int 9)
8432 (const_int 10) (const_int 11)
8433 (const_int 12) (const_int 13)
8434 (const_int 14) (const_int 15)]))))]
8436 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8437 [(set_attr "type" "ssemov")
8438 (set_attr "memory" "store")
8439 (set_attr "prefix" "evex")
8440 (set_attr "mode" "TI")])
8442 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8443 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8446 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8449 (parallel [(const_int 4) (const_int 5)
8450 (const_int 6) (const_int 7)
8451 (const_int 8) (const_int 9)
8452 (const_int 10) (const_int 11)
8453 (const_int 12) (const_int 13)
8454 (const_int 14) (const_int 15)]))))]
8456 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8457 [(set_attr "type" "ssemov")
8458 (set_attr "memory" "store")
8459 (set_attr "prefix" "evex")
8460 (set_attr "mode" "TI")])
8462 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8463 [(set (match_operand:V16QI 0 "register_operand" "=v")
8467 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8469 (match_operand:V16QI 2 "vector_move_operand" "0C")
8470 (parallel [(const_int 0) (const_int 1)
8471 (const_int 2) (const_int 3)]))
8472 (match_operand:QI 3 "register_operand" "Yk"))
8473 (const_vector:V12QI [(const_int 0) (const_int 0)
8474 (const_int 0) (const_int 0)
8475 (const_int 0) (const_int 0)
8476 (const_int 0) (const_int 0)
8477 (const_int 0) (const_int 0)
8478 (const_int 0) (const_int 0)])))]
8480 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8481 [(set_attr "type" "ssemov")
8482 (set_attr "prefix" "evex")
8483 (set_attr "mode" "TI")])
8485 (define_insn "avx512vl_<code><mode>v4qi2_store_mask"
8486 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8490 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8493 (parallel [(const_int 0) (const_int 1)
8494 (const_int 2) (const_int 3)]))
8495 (match_operand:QI 2 "register_operand" "Yk"))
8498 (parallel [(const_int 4) (const_int 5)
8499 (const_int 6) (const_int 7)
8500 (const_int 8) (const_int 9)
8501 (const_int 10) (const_int 11)
8502 (const_int 12) (const_int 13)
8503 (const_int 14) (const_int 15)]))))]
8505 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8506 [(set_attr "type" "ssemov")
8507 (set_attr "memory" "store")
8508 (set_attr "prefix" "evex")
8509 (set_attr "mode" "TI")])
8511 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8512 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8515 (match_operand:VI2_128_4_256 1 "register_operand" "v"))
8518 (parallel [(const_int 8) (const_int 9)
8519 (const_int 10) (const_int 11)
8520 (const_int 12) (const_int 13)
8521 (const_int 14) (const_int 15)]))))]
8523 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8524 [(set_attr "type" "ssemov")
8525 (set_attr "memory" "store")
8526 (set_attr "prefix" "evex")
8527 (set_attr "mode" "TI")])
8529 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8530 [(set (match_operand:V16QI 0 "register_operand" "=v")
8534 (match_operand:VI2_128_4_256 1 "register_operand" "v"))
8536 (match_operand:V16QI 2 "vector_move_operand" "0C")
8537 (parallel [(const_int 0) (const_int 1)
8538 (const_int 2) (const_int 3)
8539 (const_int 4) (const_int 5)
8540 (const_int 6) (const_int 7)]))
8541 (match_operand:QI 3 "register_operand" "Yk"))
8542 (const_vector:V8QI [(const_int 0) (const_int 0)
8543 (const_int 0) (const_int 0)
8544 (const_int 0) (const_int 0)
8545 (const_int 0) (const_int 0)])))]
8547 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8548 [(set_attr "type" "ssemov")
8549 (set_attr "prefix" "evex")
8550 (set_attr "mode" "TI")])
8552 (define_insn "avx512vl_<code><mode>v8qi2_store_mask"
8553 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8557 (match_operand:VI2_128_4_256 1 "register_operand" "v"))
8560 (parallel [(const_int 0) (const_int 1)
8561 (const_int 2) (const_int 3)
8562 (const_int 4) (const_int 5)
8563 (const_int 6) (const_int 7)]))
8564 (match_operand:QI 2 "register_operand" "Yk"))
8567 (parallel [(const_int 8) (const_int 9)
8568 (const_int 10) (const_int 11)
8569 (const_int 12) (const_int 13)
8570 (const_int 14) (const_int 15)]))))]
8572 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8573 [(set_attr "type" "ssemov")
8574 (set_attr "memory" "store")
8575 (set_attr "prefix" "evex")
8576 (set_attr "mode" "TI")])
8578 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8579 (define_mode_attr pmov_dst_4
8580 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8581 (define_mode_attr pmov_dst_zeroed_4
8582 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8583 (define_mode_attr pmov_suff_4
8584 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8586 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8587 [(set (match_operand:V8HI 0 "register_operand" "=v")
8589 (any_truncate:<pmov_dst_4>
8590 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8591 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8593 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8594 [(set_attr "type" "ssemov")
8595 (set_attr "prefix" "evex")
8596 (set_attr "mode" "TI")])
8598 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8599 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8602 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8605 (parallel [(const_int 4) (const_int 5)
8606 (const_int 6) (const_int 7)]))))]
8608 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8609 [(set_attr "type" "ssemov")
8610 (set_attr "memory" "store")
8611 (set_attr "prefix" "evex")
8612 (set_attr "mode" "TI")])
8614 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8615 [(set (match_operand:V8HI 0 "register_operand" "=v")
8619 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8621 (match_operand:V8HI 2 "vector_move_operand" "0C")
8622 (parallel [(const_int 0) (const_int 1)
8623 (const_int 2) (const_int 3)]))
8624 (match_operand:QI 3 "register_operand" "Yk"))
8625 (const_vector:V4HI [(const_int 0) (const_int 0)
8626 (const_int 0) (const_int 0)])))]
8628 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8629 [(set_attr "type" "ssemov")
8630 (set_attr "prefix" "evex")
8631 (set_attr "mode" "TI")])
8633 (define_insn "avx512vl_<code><mode>v4hi2_store_mask"
8634 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8638 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8641 (parallel [(const_int 0) (const_int 1)
8642 (const_int 2) (const_int 3)]))
8643 (match_operand:QI 2 "register_operand" "Yk"))
8646 (parallel [(const_int 4) (const_int 5)
8647 (const_int 6) (const_int 7)]))))]
8649 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8650 [(set_attr "type" "ssemov")
8651 (set_attr "memory" "store")
8652 (set_attr "prefix" "evex")
8653 (set_attr "mode" "TI")])
8655 (define_insn "*avx512vl_<code>v2div2hi2_store"
8656 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8659 (match_operand:V2DI 1 "register_operand" "v"))
8662 (parallel [(const_int 2) (const_int 3)
8663 (const_int 4) (const_int 5)
8664 (const_int 6) (const_int 7)]))))]
8666 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8667 [(set_attr "type" "ssemov")
8668 (set_attr "memory" "store")
8669 (set_attr "prefix" "evex")
8670 (set_attr "mode" "TI")])
8672 (define_insn "avx512vl_<code>v2div2hi2_mask"
8673 [(set (match_operand:V8HI 0 "register_operand" "=v")
8677 (match_operand:V2DI 1 "register_operand" "v"))
8679 (match_operand:V8HI 2 "vector_move_operand" "0C")
8680 (parallel [(const_int 0) (const_int 1)]))
8681 (match_operand:QI 3 "register_operand" "Yk"))
8682 (const_vector:V6HI [(const_int 0) (const_int 0)
8683 (const_int 0) (const_int 0)
8684 (const_int 0) (const_int 0)])))]
8686 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8687 [(set_attr "type" "ssemov")
8688 (set_attr "prefix" "evex")
8689 (set_attr "mode" "TI")])
8691 (define_insn "avx512vl_<code>v2div2hi2_store_mask"
8692 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8696 (match_operand:V2DI 1 "register_operand" "v"))
8699 (parallel [(const_int 0) (const_int 1)]))
8700 (match_operand:QI 2 "register_operand" "Yk"))
8703 (parallel [(const_int 2) (const_int 3)
8704 (const_int 4) (const_int 5)
8705 (const_int 6) (const_int 7)]))))]
8707 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8708 [(set_attr "type" "ssemov")
8709 (set_attr "memory" "store")
8710 (set_attr "prefix" "evex")
8711 (set_attr "mode" "TI")])
8713 (define_insn "*avx512vl_<code>v2div2si2"
8714 [(set (match_operand:V4SI 0 "register_operand" "=v")
8717 (match_operand:V2DI 1 "register_operand" "v"))
8718 (match_operand:V2SI 2 "const0_operand")))]
8720 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
8721 [(set_attr "type" "ssemov")
8722 (set_attr "prefix" "evex")
8723 (set_attr "mode" "TI")])
8725 (define_insn "*avx512vl_<code>v2div2si2_store"
8726 [(set (match_operand:V4SI 0 "memory_operand" "=m")
8729 (match_operand:V2DI 1 "register_operand" "v"))
8732 (parallel [(const_int 2) (const_int 3)]))))]
8734 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
8735 [(set_attr "type" "ssemov")
8736 (set_attr "memory" "store")
8737 (set_attr "prefix" "evex")
8738 (set_attr "mode" "TI")])
8740 (define_insn "avx512vl_<code>v2div2si2_mask"
8741 [(set (match_operand:V4SI 0 "register_operand" "=v")
8745 (match_operand:V2DI 1 "register_operand" "v"))
8747 (match_operand:V4SI 2 "vector_move_operand" "0C")
8748 (parallel [(const_int 0) (const_int 1)]))
8749 (match_operand:QI 3 "register_operand" "Yk"))
8750 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8752 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8753 [(set_attr "type" "ssemov")
8754 (set_attr "prefix" "evex")
8755 (set_attr "mode" "TI")])
8757 (define_insn "avx512vl_<code>v2div2si2_store_mask"
8758 [(set (match_operand:V4SI 0 "memory_operand" "=m")
8762 (match_operand:V2DI 1 "register_operand" "v"))
8765 (parallel [(const_int 0) (const_int 1)]))
8766 (match_operand:QI 2 "register_operand" "Yk"))
8769 (parallel [(const_int 2) (const_int 3)]))))]
8771 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8772 [(set_attr "type" "ssemov")
8773 (set_attr "memory" "store")
8774 (set_attr "prefix" "evex")
8775 (set_attr "mode" "TI")])
8777 (define_insn "*avx512f_<code>v8div16qi2"
8778 [(set (match_operand:V16QI 0 "register_operand" "=v")
8781 (match_operand:V8DI 1 "register_operand" "v"))
8782 (const_vector:V8QI [(const_int 0) (const_int 0)
8783 (const_int 0) (const_int 0)
8784 (const_int 0) (const_int 0)
8785 (const_int 0) (const_int 0)])))]
8787 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8788 [(set_attr "type" "ssemov")
8789 (set_attr "prefix" "evex")
8790 (set_attr "mode" "TI")])
8792 (define_insn "*avx512f_<code>v8div16qi2_store"
8793 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8796 (match_operand:V8DI 1 "register_operand" "v"))
8799 (parallel [(const_int 8) (const_int 9)
8800 (const_int 10) (const_int 11)
8801 (const_int 12) (const_int 13)
8802 (const_int 14) (const_int 15)]))))]
8804 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8805 [(set_attr "type" "ssemov")
8806 (set_attr "memory" "store")
8807 (set_attr "prefix" "evex")
8808 (set_attr "mode" "TI")])
8810 (define_insn "avx512f_<code>v8div16qi2_mask"
8811 [(set (match_operand:V16QI 0 "register_operand" "=v")
8815 (match_operand:V8DI 1 "register_operand" "v"))
8817 (match_operand:V16QI 2 "vector_move_operand" "0C")
8818 (parallel [(const_int 0) (const_int 1)
8819 (const_int 2) (const_int 3)
8820 (const_int 4) (const_int 5)
8821 (const_int 6) (const_int 7)]))
8822 (match_operand:QI 3 "register_operand" "Yk"))
8823 (const_vector:V8QI [(const_int 0) (const_int 0)
8824 (const_int 0) (const_int 0)
8825 (const_int 0) (const_int 0)
8826 (const_int 0) (const_int 0)])))]
8828 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8829 [(set_attr "type" "ssemov")
8830 (set_attr "prefix" "evex")
8831 (set_attr "mode" "TI")])
8833 (define_insn "avx512f_<code>v8div16qi2_mask_store"
8834 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8838 (match_operand:V8DI 1 "register_operand" "v"))
8841 (parallel [(const_int 0) (const_int 1)
8842 (const_int 2) (const_int 3)
8843 (const_int 4) (const_int 5)
8844 (const_int 6) (const_int 7)]))
8845 (match_operand:QI 2 "register_operand" "Yk"))
8848 (parallel [(const_int 8) (const_int 9)
8849 (const_int 10) (const_int 11)
8850 (const_int 12) (const_int 13)
8851 (const_int 14) (const_int 15)]))))]
8853 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8854 [(set_attr "type" "ssemov")
8855 (set_attr "memory" "store")
8856 (set_attr "prefix" "evex")
8857 (set_attr "mode" "TI")])
8859 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8861 ;; Parallel integral arithmetic
8863 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8865 (define_expand "neg<mode>2"
8866 [(set (match_operand:VI_AVX2 0 "register_operand")
8869 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
8871 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
8873 (define_expand "<plusminus_insn><mode>3<mask_name>"
8874 [(set (match_operand:VI_AVX2 0 "register_operand")
8876 (match_operand:VI_AVX2 1 "nonimmediate_operand")
8877 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8878 "TARGET_SSE2 && <mask_mode512bit_condition>"
8879 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8881 (define_insn "*<plusminus_insn><mode>3<mask_name>"
8882 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
8884 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
8885 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
8886 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
8888 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
8889 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8890 [(set_attr "isa" "noavx,avx")
8891 (set_attr "type" "sseiadd")
8892 (set_attr "prefix_data16" "1,*")
8893 (set_attr "prefix" "<mask_prefix3>")
8894 (set_attr "mode" "<sseinsnmode>")])
8896 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
8897 [(set (match_operand:VI12_AVX2 0 "register_operand")
8898 (sat_plusminus:VI12_AVX2
8899 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
8900 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
8901 "TARGET_SSE2 && <mask_mode512bit_condition>"
8902 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8904 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
8905 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
8906 (sat_plusminus:VI12_AVX2
8907 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
8908 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
8910 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8911 && <mask_mode512bit_condition>"
8913 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
8914 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8915 [(set_attr "isa" "noavx,avx")
8916 (set_attr "type" "sseiadd")
8917 (set_attr "prefix_data16" "1,*")
8918 (set_attr "prefix" "orig,maybe_evex")
8919 (set_attr "mode" "TI")])
8921 (define_expand "mul<mode>3<mask_name>"
8922 [(set (match_operand:VI1_AVX2 0 "register_operand")
8923 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
8924 (match_operand:VI1_AVX2 2 "register_operand")))]
8925 "TARGET_SSE2&& <mask_mode512bit_condition>"
8927 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
8931 (define_expand "mul<mode>3<mask_name>"
8932 [(set (match_operand:VI2_AVX2 0 "register_operand")
8933 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
8934 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
8935 "TARGET_SSE2 && <mask_mode512bit_condition>"
8936 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
8938 (define_insn "*mul<mode>3<mask_name>"
8939 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
8940 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
8941 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
8943 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
8944 && <mask_mode512bit_condition>"
8946 pmullw\t{%2, %0|%0, %2}
8947 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8948 [(set_attr "isa" "noavx,avx")
8949 (set_attr "type" "sseimul")
8950 (set_attr "prefix_data16" "1,*")
8951 (set_attr "prefix" "orig,vex")
8952 (set_attr "mode" "<sseinsnmode>")])
8954 (define_expand "<s>mul<mode>3_highpart<mask_name>"
8955 [(set (match_operand:VI2_AVX2 0 "register_operand")
8957 (lshiftrt:<ssedoublemode>
8958 (mult:<ssedoublemode>
8959 (any_extend:<ssedoublemode>
8960 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
8961 (any_extend:<ssedoublemode>
8962 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
8964 "TARGET_SSE2 && <mask_mode512bit_condition>"
8965 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
8967 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
8968 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
8970 (lshiftrt:<ssedoublemode>
8971 (mult:<ssedoublemode>
8972 (any_extend:<ssedoublemode>
8973 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
8974 (any_extend:<ssedoublemode>
8975 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
8978 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
8979 && <mask_mode512bit_condition>"
8981 pmulh<u>w\t{%2, %0|%0, %2}
8982 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8983 [(set_attr "isa" "noavx,avx")
8984 (set_attr "type" "sseimul")
8985 (set_attr "prefix_data16" "1,*")
8986 (set_attr "prefix" "orig,vex")
8987 (set_attr "mode" "<sseinsnmode>")])
8989 (define_expand "vec_widen_umult_even_v16si<mask_name>"
8990 [(set (match_operand:V8DI 0 "register_operand")
8994 (match_operand:V16SI 1 "nonimmediate_operand")
8995 (parallel [(const_int 0) (const_int 2)
8996 (const_int 4) (const_int 6)
8997 (const_int 8) (const_int 10)
8998 (const_int 12) (const_int 14)])))
9001 (match_operand:V16SI 2 "nonimmediate_operand")
9002 (parallel [(const_int 0) (const_int 2)
9003 (const_int 4) (const_int 6)
9004 (const_int 8) (const_int 10)
9005 (const_int 12) (const_int 14)])))))]
9007 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9009 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9010 [(set (match_operand:V8DI 0 "register_operand" "=v")
9014 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9015 (parallel [(const_int 0) (const_int 2)
9016 (const_int 4) (const_int 6)
9017 (const_int 8) (const_int 10)
9018 (const_int 12) (const_int 14)])))
9021 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9022 (parallel [(const_int 0) (const_int 2)
9023 (const_int 4) (const_int 6)
9024 (const_int 8) (const_int 10)
9025 (const_int 12) (const_int 14)])))))]
9026 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9027 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9028 [(set_attr "isa" "avx512f")
9029 (set_attr "type" "sseimul")
9030 (set_attr "prefix_extra" "1")
9031 (set_attr "prefix" "evex")
9032 (set_attr "mode" "XI")])
9034 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9035 [(set (match_operand:V4DI 0 "register_operand")
9039 (match_operand:V8SI 1 "nonimmediate_operand")
9040 (parallel [(const_int 0) (const_int 2)
9041 (const_int 4) (const_int 6)])))
9044 (match_operand:V8SI 2 "nonimmediate_operand")
9045 (parallel [(const_int 0) (const_int 2)
9046 (const_int 4) (const_int 6)])))))]
9047 "TARGET_AVX2 && <mask_mode512bit_condition>"
9048 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9050 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9051 [(set (match_operand:V4DI 0 "register_operand" "=v")
9055 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9056 (parallel [(const_int 0) (const_int 2)
9057 (const_int 4) (const_int 6)])))
9060 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9061 (parallel [(const_int 0) (const_int 2)
9062 (const_int 4) (const_int 6)])))))]
9064 && ix86_binary_operator_ok (MULT, V8SImode, operands)
9065 && <mask_mode512bit_condition>"
9066 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9067 [(set_attr "type" "sseimul")
9068 (set_attr "prefix" "maybe_evex")
9069 (set_attr "mode" "OI")])
9071 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9072 [(set (match_operand:V2DI 0 "register_operand")
9076 (match_operand:V4SI 1 "nonimmediate_operand")
9077 (parallel [(const_int 0) (const_int 2)])))
9080 (match_operand:V4SI 2 "nonimmediate_operand")
9081 (parallel [(const_int 0) (const_int 2)])))))]
9082 "TARGET_SSE2 && <mask_mode512bit_condition>"
9083 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9085 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9086 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9090 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9091 (parallel [(const_int 0) (const_int 2)])))
9094 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9095 (parallel [(const_int 0) (const_int 2)])))))]
9097 && ix86_binary_operator_ok (MULT, V4SImode, operands)
9098 && <mask_mode512bit_condition>"
9100 pmuludq\t{%2, %0|%0, %2}
9101 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9102 [(set_attr "isa" "noavx,avx")
9103 (set_attr "type" "sseimul")
9104 (set_attr "prefix_data16" "1,*")
9105 (set_attr "prefix" "orig,maybe_evex")
9106 (set_attr "mode" "TI")])
9108 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9109 [(set (match_operand:V8DI 0 "register_operand")
9113 (match_operand:V16SI 1 "nonimmediate_operand")
9114 (parallel [(const_int 0) (const_int 2)
9115 (const_int 4) (const_int 6)
9116 (const_int 8) (const_int 10)
9117 (const_int 12) (const_int 14)])))
9120 (match_operand:V16SI 2 "nonimmediate_operand")
9121 (parallel [(const_int 0) (const_int 2)
9122 (const_int 4) (const_int 6)
9123 (const_int 8) (const_int 10)
9124 (const_int 12) (const_int 14)])))))]
9126 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9128 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9129 [(set (match_operand:V8DI 0 "register_operand" "=v")
9133 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9134 (parallel [(const_int 0) (const_int 2)
9135 (const_int 4) (const_int 6)
9136 (const_int 8) (const_int 10)
9137 (const_int 12) (const_int 14)])))
9140 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9141 (parallel [(const_int 0) (const_int 2)
9142 (const_int 4) (const_int 6)
9143 (const_int 8) (const_int 10)
9144 (const_int 12) (const_int 14)])))))]
9145 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9146 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9147 [(set_attr "isa" "avx512f")
9148 (set_attr "type" "sseimul")
9149 (set_attr "prefix_extra" "1")
9150 (set_attr "prefix" "evex")
9151 (set_attr "mode" "XI")])
9153 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9154 [(set (match_operand:V4DI 0 "register_operand")
9158 (match_operand:V8SI 1 "nonimmediate_operand")
9159 (parallel [(const_int 0) (const_int 2)
9160 (const_int 4) (const_int 6)])))
9163 (match_operand:V8SI 2 "nonimmediate_operand")
9164 (parallel [(const_int 0) (const_int 2)
9165 (const_int 4) (const_int 6)])))))]
9166 "TARGET_AVX2 && <mask_mode512bit_condition>"
9167 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9169 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9170 [(set (match_operand:V4DI 0 "register_operand" "=v")
9174 (match_operand:V8SI 1 "nonimmediate_operand" "v")
9175 (parallel [(const_int 0) (const_int 2)
9176 (const_int 4) (const_int 6)])))
9179 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9180 (parallel [(const_int 0) (const_int 2)
9181 (const_int 4) (const_int 6)])))))]
9183 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9184 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9185 [(set_attr "type" "sseimul")
9186 (set_attr "prefix_extra" "1")
9187 (set_attr "prefix" "vex")
9188 (set_attr "mode" "OI")])
9190 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9191 [(set (match_operand:V2DI 0 "register_operand")
9195 (match_operand:V4SI 1 "nonimmediate_operand")
9196 (parallel [(const_int 0) (const_int 2)])))
9199 (match_operand:V4SI 2 "nonimmediate_operand")
9200 (parallel [(const_int 0) (const_int 2)])))))]
9201 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
9202 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9204 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9205 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9209 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9210 (parallel [(const_int 0) (const_int 2)])))
9213 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9214 (parallel [(const_int 0) (const_int 2)])))))]
9216 && ix86_binary_operator_ok (MULT, V4SImode, operands)
9217 && <mask_mode512bit_condition>"
9219 pmuldq\t{%2, %0|%0, %2}
9220 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9221 [(set_attr "isa" "noavx,avx")
9222 (set_attr "type" "sseimul")
9223 (set_attr "prefix_data16" "1,*")
9224 (set_attr "prefix_extra" "1")
9225 (set_attr "prefix" "orig,vex")
9226 (set_attr "mode" "TI")])
9228 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9229 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9230 (unspec:<sseunpackmode>
9231 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9232 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9233 UNSPEC_PMADDWD512))]
9234 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9235 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9236 [(set_attr "type" "sseiadd")
9237 (set_attr "prefix" "evex")
9238 (set_attr "mode" "XI")])
9240 (define_expand "avx2_pmaddwd"
9241 [(set (match_operand:V8SI 0 "register_operand")
9246 (match_operand:V16HI 1 "nonimmediate_operand")
9247 (parallel [(const_int 0) (const_int 2)
9248 (const_int 4) (const_int 6)
9249 (const_int 8) (const_int 10)
9250 (const_int 12) (const_int 14)])))
9253 (match_operand:V16HI 2 "nonimmediate_operand")
9254 (parallel [(const_int 0) (const_int 2)
9255 (const_int 4) (const_int 6)
9256 (const_int 8) (const_int 10)
9257 (const_int 12) (const_int 14)]))))
9260 (vec_select:V8HI (match_dup 1)
9261 (parallel [(const_int 1) (const_int 3)
9262 (const_int 5) (const_int 7)
9263 (const_int 9) (const_int 11)
9264 (const_int 13) (const_int 15)])))
9266 (vec_select:V8HI (match_dup 2)
9267 (parallel [(const_int 1) (const_int 3)
9268 (const_int 5) (const_int 7)
9269 (const_int 9) (const_int 11)
9270 (const_int 13) (const_int 15)]))))))]
9272 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9274 (define_insn "*avx2_pmaddwd"
9275 [(set (match_operand:V8SI 0 "register_operand" "=x")
9280 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9281 (parallel [(const_int 0) (const_int 2)
9282 (const_int 4) (const_int 6)
9283 (const_int 8) (const_int 10)
9284 (const_int 12) (const_int 14)])))
9287 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9288 (parallel [(const_int 0) (const_int 2)
9289 (const_int 4) (const_int 6)
9290 (const_int 8) (const_int 10)
9291 (const_int 12) (const_int 14)]))))
9294 (vec_select:V8HI (match_dup 1)
9295 (parallel [(const_int 1) (const_int 3)
9296 (const_int 5) (const_int 7)
9297 (const_int 9) (const_int 11)
9298 (const_int 13) (const_int 15)])))
9300 (vec_select:V8HI (match_dup 2)
9301 (parallel [(const_int 1) (const_int 3)
9302 (const_int 5) (const_int 7)
9303 (const_int 9) (const_int 11)
9304 (const_int 13) (const_int 15)]))))))]
9305 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9306 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9307 [(set_attr "type" "sseiadd")
9308 (set_attr "prefix" "vex")
9309 (set_attr "mode" "OI")])
9311 (define_expand "sse2_pmaddwd"
9312 [(set (match_operand:V4SI 0 "register_operand")
9317 (match_operand:V8HI 1 "nonimmediate_operand")
9318 (parallel [(const_int 0) (const_int 2)
9319 (const_int 4) (const_int 6)])))
9322 (match_operand:V8HI 2 "nonimmediate_operand")
9323 (parallel [(const_int 0) (const_int 2)
9324 (const_int 4) (const_int 6)]))))
9327 (vec_select:V4HI (match_dup 1)
9328 (parallel [(const_int 1) (const_int 3)
9329 (const_int 5) (const_int 7)])))
9331 (vec_select:V4HI (match_dup 2)
9332 (parallel [(const_int 1) (const_int 3)
9333 (const_int 5) (const_int 7)]))))))]
9335 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9337 (define_insn "*sse2_pmaddwd"
9338 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9343 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9344 (parallel [(const_int 0) (const_int 2)
9345 (const_int 4) (const_int 6)])))
9348 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9349 (parallel [(const_int 0) (const_int 2)
9350 (const_int 4) (const_int 6)]))))
9353 (vec_select:V4HI (match_dup 1)
9354 (parallel [(const_int 1) (const_int 3)
9355 (const_int 5) (const_int 7)])))
9357 (vec_select:V4HI (match_dup 2)
9358 (parallel [(const_int 1) (const_int 3)
9359 (const_int 5) (const_int 7)]))))))]
9360 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9362 pmaddwd\t{%2, %0|%0, %2}
9363 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9364 [(set_attr "isa" "noavx,avx")
9365 (set_attr "type" "sseiadd")
9366 (set_attr "atom_unit" "simul")
9367 (set_attr "prefix_data16" "1,*")
9368 (set_attr "prefix" "orig,vex")
9369 (set_attr "mode" "TI")])
9371 (define_insn "avx512dq_mul<mode>3<mask_name>"
9372 [(set (match_operand:VI8 0 "register_operand" "=v")
9374 (match_operand:VI8 1 "register_operand" "v")
9375 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9376 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9377 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9378 [(set_attr "type" "sseimul")
9379 (set_attr "prefix" "evex")
9380 (set_attr "mode" "<sseinsnmode>")])
9382 (define_expand "mul<mode>3<mask_name>"
9383 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9385 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9386 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9387 "TARGET_SSE2 && <mask_mode512bit_condition>"
9391 if (!nonimmediate_operand (operands[1], <MODE>mode))
9392 operands[1] = force_reg (<MODE>mode, operands[1]);
9393 if (!nonimmediate_operand (operands[2], <MODE>mode))
9394 operands[2] = force_reg (<MODE>mode, operands[2]);
9395 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9399 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9404 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9405 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
9407 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
9408 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
9409 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9411 pmulld\t{%2, %0|%0, %2}
9412 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9413 [(set_attr "isa" "noavx,avx")
9414 (set_attr "type" "sseimul")
9415 (set_attr "prefix_extra" "1")
9416 (set_attr "prefix" "<mask_prefix3>")
9417 (set_attr "btver2_decode" "vector,vector")
9418 (set_attr "mode" "<sseinsnmode>")])
9420 (define_expand "mul<mode>3"
9421 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9422 (mult:VI8_AVX2_AVX512F
9423 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9424 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9427 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9431 (define_expand "vec_widen_<s>mult_hi_<mode>"
9432 [(match_operand:<sseunpackmode> 0 "register_operand")
9433 (any_extend:<sseunpackmode>
9434 (match_operand:VI124_AVX2 1 "register_operand"))
9435 (match_operand:VI124_AVX2 2 "register_operand")]
9438 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9443 (define_expand "vec_widen_<s>mult_lo_<mode>"
9444 [(match_operand:<sseunpackmode> 0 "register_operand")
9445 (any_extend:<sseunpackmode>
9446 (match_operand:VI124_AVX2 1 "register_operand"))
9447 (match_operand:VI124_AVX2 2 "register_operand")]
9450 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9455 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9456 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9457 (define_expand "vec_widen_smult_even_v4si"
9458 [(match_operand:V2DI 0 "register_operand")
9459 (match_operand:V4SI 1 "nonimmediate_operand")
9460 (match_operand:V4SI 2 "nonimmediate_operand")]
9463 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9468 (define_expand "vec_widen_<s>mult_odd_<mode>"
9469 [(match_operand:<sseunpackmode> 0 "register_operand")
9470 (any_extend:<sseunpackmode>
9471 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9472 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9475 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9480 (define_mode_attr SDOT_PMADD_SUF
9481 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9483 (define_expand "sdot_prod<mode>"
9484 [(match_operand:<sseunpackmode> 0 "register_operand")
9485 (match_operand:VI2_AVX2 1 "register_operand")
9486 (match_operand:VI2_AVX2 2 "register_operand")
9487 (match_operand:<sseunpackmode> 3 "register_operand")]
9490 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9491 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9492 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9493 gen_rtx_PLUS (<sseunpackmode>mode,
9498 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9499 ;; back together when madd is available.
9500 (define_expand "sdot_prodv4si"
9501 [(match_operand:V2DI 0 "register_operand")
9502 (match_operand:V4SI 1 "register_operand")
9503 (match_operand:V4SI 2 "register_operand")
9504 (match_operand:V2DI 3 "register_operand")]
9507 rtx t = gen_reg_rtx (V2DImode);
9508 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9509 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9513 (define_expand "usadv16qi"
9514 [(match_operand:V4SI 0 "register_operand")
9515 (match_operand:V16QI 1 "register_operand")
9516 (match_operand:V16QI 2 "nonimmediate_operand")
9517 (match_operand:V4SI 3 "nonimmediate_operand")]
9520 rtx t1 = gen_reg_rtx (V2DImode);
9521 rtx t2 = gen_reg_rtx (V4SImode);
9522 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9523 convert_move (t2, t1, 0);
9524 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9528 (define_expand "usadv32qi"
9529 [(match_operand:V8SI 0 "register_operand")
9530 (match_operand:V32QI 1 "register_operand")
9531 (match_operand:V32QI 2 "nonimmediate_operand")
9532 (match_operand:V8SI 3 "nonimmediate_operand")]
9535 rtx t1 = gen_reg_rtx (V4DImode);
9536 rtx t2 = gen_reg_rtx (V8SImode);
9537 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9538 convert_move (t2, t1, 0);
9539 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9543 (define_insn "ashr<mode>3<mask_name>"
9544 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,v")
9546 (match_operand:VI24_AVX2 1 "register_operand" "0,v")
9547 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9548 "TARGET_SSE2 && <mask_mode512bit_condition>"
9550 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9551 vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9552 [(set_attr "isa" "noavx,avx")
9553 (set_attr "type" "sseishft")
9554 (set (attr "length_immediate")
9555 (if_then_else (match_operand 2 "const_int_operand")
9557 (const_string "0")))
9558 (set_attr "prefix_data16" "1,*")
9559 (set_attr "prefix" "orig,maybe_evex")
9560 (set_attr "mode" "<sseinsnmode>")])
9562 (define_insn "ashrv4di3<mask_name>"
9563 [(set (match_operand:V4DI 0 "register_operand" "=v,v")
9565 (match_operand:V4DI 1 "nonimmediate_operand" "v,vm")
9566 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9568 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9569 [(set_attr "type" "sseishft")
9570 (set (attr "length_immediate")
9571 (if_then_else (match_operand 2 "const_int_operand")
9573 (const_string "0")))
9574 (set_attr "mode" "OI")])
9576 (define_insn "ashrv2di3<mask_name>_1"
9577 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9579 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9580 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9582 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9583 [(set_attr "type" "sseishft")
9584 (set (attr "length_immediate")
9585 (if_then_else (match_operand 2 "const_int_operand")
9587 (const_string "0")))
9588 (set_attr "mode" "TI")])
9590 (define_insn "ashr<mode>3<mask_name>"
9591 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9593 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
9594 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9595 "TARGET_AVX512F && <mask_mode512bit_condition>"
9596 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9597 [(set_attr "type" "sseishft")
9598 (set (attr "length_immediate")
9599 (if_then_else (match_operand 2 "const_int_operand")
9601 (const_string "0")))
9602 (set_attr "mode" "<sseinsnmode>")])
9604 (define_insn "<shift_insn><mode>3<mask_name>"
9605 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,v")
9606 (any_lshift:VI248_AVX2
9607 (match_operand:VI248_AVX2 1 "register_operand" "0,v")
9608 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9610 && (!<mask_applied> || TARGET_AVX512VL || <MODE>mode == V32HImode)"
9612 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9613 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9614 [(set_attr "isa" "noavx,avx")
9615 (set_attr "type" "sseishft")
9616 (set (attr "length_immediate")
9617 (if_then_else (match_operand 2 "const_int_operand")
9619 (const_string "0")))
9620 (set_attr "prefix_data16" "1,*")
9621 (set_attr "prefix" "orig,vex")
9622 (set_attr "mode" "<sseinsnmode>")])
9624 (define_insn "<shift_insn><mode>3<mask_name>"
9625 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9626 (any_lshift:VI48_512
9627 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
9628 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
9629 "TARGET_AVX512F && <mask_mode512bit_condition>"
9630 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9631 [(set_attr "isa" "avx512f")
9632 (set_attr "type" "sseishft")
9633 (set (attr "length_immediate")
9634 (if_then_else (match_operand 2 "const_int_operand")
9636 (const_string "0")))
9637 (set_attr "prefix" "evex")
9638 (set_attr "mode" "<sseinsnmode>")])
9641 (define_expand "vec_shl_<mode>"
9644 (match_operand:VI_128 1 "register_operand")
9645 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
9646 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
9649 operands[1] = gen_lowpart (V1TImode, operands[1]);
9650 operands[3] = gen_reg_rtx (V1TImode);
9651 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
9654 (define_insn "<sse2_avx2>_ashl<mode>3"
9655 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
9657 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
9658 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
9661 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
9663 switch (which_alternative)
9666 return "pslldq\t{%2, %0|%0, %2}";
9668 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
9673 [(set_attr "isa" "noavx,avx")
9674 (set_attr "type" "sseishft")
9675 (set_attr "length_immediate" "1")
9676 (set_attr "prefix_data16" "1,*")
9677 (set_attr "prefix" "orig,vex")
9678 (set_attr "mode" "<sseinsnmode>")])
9680 (define_expand "vec_shr_<mode>"
9683 (match_operand:VI_128 1 "register_operand")
9684 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
9685 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
9688 operands[1] = gen_lowpart (V1TImode, operands[1]);
9689 operands[3] = gen_reg_rtx (V1TImode);
9690 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
9693 (define_insn "<sse2_avx2>_lshr<mode>3"
9694 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
9695 (lshiftrt:VIMAX_AVX2
9696 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
9697 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
9700 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
9702 switch (which_alternative)
9705 return "psrldq\t{%2, %0|%0, %2}";
9707 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
9712 [(set_attr "isa" "noavx,avx")
9713 (set_attr "type" "sseishft")
9714 (set_attr "length_immediate" "1")
9715 (set_attr "atom_unit" "sishuf")
9716 (set_attr "prefix_data16" "1,*")
9717 (set_attr "prefix" "orig,vex")
9718 (set_attr "mode" "<sseinsnmode>")])
9720 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
9721 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9722 (any_rotate:VI48_AVX512VL
9723 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
9724 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
9726 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9727 [(set_attr "prefix" "evex")
9728 (set_attr "mode" "<sseinsnmode>")])
9730 (define_insn "<avx512>_<rotate><mode><mask_name>"
9731 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9732 (any_rotate:VI48_AVX512VL
9733 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
9734 (match_operand:SI 2 "const_0_to_255_operand")))]
9736 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9737 [(set_attr "prefix" "evex")
9738 (set_attr "mode" "<sseinsnmode>")])
9740 (define_expand "<code><mode>3<mask_name><round_name>"
9741 [(set (match_operand:VI124_256_1248_512 0 "register_operand")
9742 (maxmin:VI124_256_1248_512
9743 (match_operand:VI124_256_1248_512 1 "nonimmediate_operand")
9744 (match_operand:VI124_256_1248_512 2 "nonimmediate_operand")))]
9745 "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
9746 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9748 (define_insn "*avx2_<code><mode>3<mask_name><round_name>"
9749 [(set (match_operand:VI124_256_1248_512 0 "register_operand" "=v")
9750 (maxmin:VI124_256_1248_512
9751 (match_operand:VI124_256_1248_512 1 "nonimmediate_operand" "%v")
9752 (match_operand:VI124_256_1248_512 2 "nonimmediate_operand" "<round_constraint>")))]
9753 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
9754 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
9755 "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9756 [(set_attr "type" "sseiadd")
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "prefix" "maybe_evex")
9759 (set_attr "mode" "OI")])
9761 (define_insn "<mask_codefor><code><mode>3<mask_name>"
9762 [(set (match_operand:VI128_256 0 "register_operand" "=v")
9764 (match_operand:VI128_256 1 "register_operand" "v")
9765 (match_operand:VI128_256 2 "nonimmediate_operand" "vm")))]
9767 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9768 [(set_attr "type" "sseiadd")
9769 (set_attr "prefix" "evex")
9770 (set_attr "mode" "<sseinsnmode>")])
9772 (define_expand "<code><mode>3"
9773 [(set (match_operand:VI8_AVX2 0 "register_operand")
9775 (match_operand:VI8_AVX2 1 "register_operand")
9776 (match_operand:VI8_AVX2 2 "register_operand")))]
9783 if (TARGET_AVX512VL)
9784 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
9787 xops[0] = operands[0];
9789 if (<CODE> == SMAX || <CODE> == UMAX)
9791 xops[1] = operands[1];
9792 xops[2] = operands[2];
9796 xops[1] = operands[2];
9797 xops[2] = operands[1];
9800 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
9802 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
9803 xops[4] = operands[1];
9804 xops[5] = operands[2];
9806 ok = ix86_expand_int_vcond (xops);
9812 (define_expand "<code><mode>3"
9813 [(set (match_operand:VI124_128 0 "register_operand")
9815 (match_operand:VI124_128 1 "nonimmediate_operand")
9816 (match_operand:VI124_128 2 "nonimmediate_operand")))]
9819 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
9820 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
9826 xops[0] = operands[0];
9827 operands[1] = force_reg (<MODE>mode, operands[1]);
9828 operands[2] = force_reg (<MODE>mode, operands[2]);
9832 xops[1] = operands[1];
9833 xops[2] = operands[2];
9837 xops[1] = operands[2];
9838 xops[2] = operands[1];
9841 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
9842 xops[4] = operands[1];
9843 xops[5] = operands[2];
9845 ok = ix86_expand_int_vcond (xops);
9851 (define_insn "*sse4_1_<code><mode>3<mask_name>"
9852 [(set (match_operand:VI14_128 0 "register_operand" "=x,v")
9854 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v")
9855 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))]
9857 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
9858 && <mask_mode512bit_condition>"
9860 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
9861 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9862 [(set_attr "isa" "noavx,avx")
9863 (set_attr "type" "sseiadd")
9864 (set_attr "prefix_extra" "1,*")
9865 (set_attr "prefix" "orig,vex")
9866 (set_attr "mode" "TI")])
9868 (define_insn "*<code>v8hi3"
9869 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9871 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9872 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
9873 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
9875 p<maxmin_int>w\t{%2, %0|%0, %2}
9876 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
9877 [(set_attr "isa" "noavx,avx")
9878 (set_attr "type" "sseiadd")
9879 (set_attr "prefix_data16" "1,*")
9880 (set_attr "prefix_extra" "*,1")
9881 (set_attr "prefix" "orig,vex")
9882 (set_attr "mode" "TI")])
9884 (define_expand "<code><mode>3"
9885 [(set (match_operand:VI124_128 0 "register_operand")
9887 (match_operand:VI124_128 1 "nonimmediate_operand")
9888 (match_operand:VI124_128 2 "nonimmediate_operand")))]
9891 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
9892 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
9893 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
9895 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
9896 operands[1] = force_reg (<MODE>mode, operands[1]);
9897 if (rtx_equal_p (op3, op2))
9898 op3 = gen_reg_rtx (V8HImode);
9899 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
9900 emit_insn (gen_addv8hi3 (op0, op3, op2));
9908 operands[1] = force_reg (<MODE>mode, operands[1]);
9909 operands[2] = force_reg (<MODE>mode, operands[2]);
9911 xops[0] = operands[0];
9915 xops[1] = operands[1];
9916 xops[2] = operands[2];
9920 xops[1] = operands[2];
9921 xops[2] = operands[1];
9924 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
9925 xops[4] = operands[1];
9926 xops[5] = operands[2];
9928 ok = ix86_expand_int_vcond (xops);
9934 (define_insn "*sse4_1_<code><mode>3<mask_name>"
9935 [(set (match_operand:VI24_128 0 "register_operand" "=x,v")
9937 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v")
9938 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))]
9940 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
9941 && <mask_mode512bit_condition>"
9943 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
9944 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9945 [(set_attr "isa" "noavx,avx")
9946 (set_attr "type" "sseiadd")
9947 (set_attr "prefix_extra" "1,*")
9948 (set_attr "prefix" "orig,vex")
9949 (set_attr "mode" "TI")])
9951 (define_insn "*<code>v16qi3"
9952 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9954 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
9955 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
9956 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
9958 p<maxmin_int>b\t{%2, %0|%0, %2}
9959 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
9960 [(set_attr "isa" "noavx,avx")
9961 (set_attr "type" "sseiadd")
9962 (set_attr "prefix_data16" "1,*")
9963 (set_attr "prefix_extra" "*,1")
9964 (set_attr "prefix" "orig,vex")
9965 (set_attr "mode" "TI")])
9967 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9969 ;; Parallel integral comparisons
9971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9973 (define_expand "avx2_eq<mode>3"
9974 [(set (match_operand:VI_256 0 "register_operand")
9976 (match_operand:VI_256 1 "nonimmediate_operand")
9977 (match_operand:VI_256 2 "nonimmediate_operand")))]
9979 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
9981 (define_insn "*avx2_eq<mode>3"
9982 [(set (match_operand:VI_256 0 "register_operand" "=x")
9984 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
9985 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
9986 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
9987 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9988 [(set_attr "type" "ssecmp")
9989 (set_attr "prefix_extra" "1")
9990 (set_attr "prefix" "vex")
9991 (set_attr "mode" "OI")])
9993 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
9994 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
9995 (unspec:<avx512fmaskmode>
9996 [(match_operand:VI_AVX512VL 1 "register_operand")
9997 (match_operand:VI_AVX512VL 2 "nonimmediate_operand")]
10000 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10002 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10003 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10004 (unspec:<avx512fmaskmode>
10005 [(match_operand:VI_AVX512VL 1 "register_operand" "%v")
10006 (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
10007 UNSPEC_MASKED_EQ))]
10008 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10009 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10010 [(set_attr "type" "ssecmp")
10011 (set_attr "prefix_extra" "1")
10012 (set_attr "prefix" "evex")
10013 (set_attr "mode" "<sseinsnmode>")])
10015 (define_insn "*sse4_1_eqv2di3"
10016 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10018 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
10019 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
10020 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10022 pcmpeqq\t{%2, %0|%0, %2}
10023 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10024 [(set_attr "isa" "noavx,avx")
10025 (set_attr "type" "ssecmp")
10026 (set_attr "prefix_extra" "1")
10027 (set_attr "prefix" "orig,vex")
10028 (set_attr "mode" "TI")])
10030 (define_insn "*sse2_eq<mode>3"
10031 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10033 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10034 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10035 "TARGET_SSE2 && !TARGET_XOP
10036 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10038 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10039 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10040 [(set_attr "isa" "noavx,avx")
10041 (set_attr "type" "ssecmp")
10042 (set_attr "prefix_data16" "1,*")
10043 (set_attr "prefix" "orig,vex")
10044 (set_attr "mode" "TI")])
10046 (define_expand "sse2_eq<mode>3"
10047 [(set (match_operand:VI124_128 0 "register_operand")
10049 (match_operand:VI124_128 1 "nonimmediate_operand")
10050 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10051 "TARGET_SSE2 && !TARGET_XOP "
10052 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10054 (define_expand "sse4_1_eqv2di3"
10055 [(set (match_operand:V2DI 0 "register_operand")
10057 (match_operand:V2DI 1 "nonimmediate_operand")
10058 (match_operand:V2DI 2 "nonimmediate_operand")))]
10060 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10062 (define_insn "sse4_2_gtv2di3"
10063 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10065 (match_operand:V2DI 1 "register_operand" "0,x")
10066 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
10069 pcmpgtq\t{%2, %0|%0, %2}
10070 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10071 [(set_attr "isa" "noavx,avx")
10072 (set_attr "type" "ssecmp")
10073 (set_attr "prefix_extra" "1")
10074 (set_attr "prefix" "orig,vex")
10075 (set_attr "mode" "TI")])
10077 (define_insn "avx2_gt<mode>3"
10078 [(set (match_operand:VI_256 0 "register_operand" "=x")
10080 (match_operand:VI_256 1 "register_operand" "x")
10081 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10083 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10084 [(set_attr "type" "ssecmp")
10085 (set_attr "prefix_extra" "1")
10086 (set_attr "prefix" "vex")
10087 (set_attr "mode" "OI")])
10089 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10090 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10091 (unspec:<avx512fmaskmode>
10092 [(match_operand:VI_AVX512VL 1 "register_operand" "v")
10093 (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10095 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10096 [(set_attr "type" "ssecmp")
10097 (set_attr "prefix_extra" "1")
10098 (set_attr "prefix" "evex")
10099 (set_attr "mode" "<sseinsnmode>")])
10101 (define_insn "sse2_gt<mode>3"
10102 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10104 (match_operand:VI124_128 1 "register_operand" "0,x")
10105 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10106 "TARGET_SSE2 && !TARGET_XOP"
10108 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10109 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10110 [(set_attr "isa" "noavx,avx")
10111 (set_attr "type" "ssecmp")
10112 (set_attr "prefix_data16" "1,*")
10113 (set_attr "prefix" "orig,vex")
10114 (set_attr "mode" "TI")])
10116 (define_expand "vcond<V_512:mode><VI_512:mode>"
10117 [(set (match_operand:V_512 0 "register_operand")
10118 (if_then_else:V_512
10119 (match_operator 3 ""
10120 [(match_operand:VI_512 4 "nonimmediate_operand")
10121 (match_operand:VI_512 5 "general_operand")])
10122 (match_operand:V_512 1)
10123 (match_operand:V_512 2)))]
10125 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10126 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10128 bool ok = ix86_expand_int_vcond (operands);
10133 (define_expand "vcond<V_256:mode><VI_256:mode>"
10134 [(set (match_operand:V_256 0 "register_operand")
10135 (if_then_else:V_256
10136 (match_operator 3 ""
10137 [(match_operand:VI_256 4 "nonimmediate_operand")
10138 (match_operand:VI_256 5 "general_operand")])
10139 (match_operand:V_256 1)
10140 (match_operand:V_256 2)))]
10142 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10143 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10145 bool ok = ix86_expand_int_vcond (operands);
10150 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10151 [(set (match_operand:V_128 0 "register_operand")
10152 (if_then_else:V_128
10153 (match_operator 3 ""
10154 [(match_operand:VI124_128 4 "nonimmediate_operand")
10155 (match_operand:VI124_128 5 "general_operand")])
10156 (match_operand:V_128 1)
10157 (match_operand:V_128 2)))]
10159 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10160 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10162 bool ok = ix86_expand_int_vcond (operands);
10167 (define_expand "vcond<VI8F_128:mode>v2di"
10168 [(set (match_operand:VI8F_128 0 "register_operand")
10169 (if_then_else:VI8F_128
10170 (match_operator 3 ""
10171 [(match_operand:V2DI 4 "nonimmediate_operand")
10172 (match_operand:V2DI 5 "general_operand")])
10173 (match_operand:VI8F_128 1)
10174 (match_operand:VI8F_128 2)))]
10177 bool ok = ix86_expand_int_vcond (operands);
10182 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10183 [(set (match_operand:V_512 0 "register_operand")
10184 (if_then_else:V_512
10185 (match_operator 3 ""
10186 [(match_operand:VI_512 4 "nonimmediate_operand")
10187 (match_operand:VI_512 5 "nonimmediate_operand")])
10188 (match_operand:V_512 1 "general_operand")
10189 (match_operand:V_512 2 "general_operand")))]
10191 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10192 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10194 bool ok = ix86_expand_int_vcond (operands);
10199 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10200 [(set (match_operand:V_256 0 "register_operand")
10201 (if_then_else:V_256
10202 (match_operator 3 ""
10203 [(match_operand:VI_256 4 "nonimmediate_operand")
10204 (match_operand:VI_256 5 "nonimmediate_operand")])
10205 (match_operand:V_256 1 "general_operand")
10206 (match_operand:V_256 2 "general_operand")))]
10208 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10209 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10211 bool ok = ix86_expand_int_vcond (operands);
10216 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10217 [(set (match_operand:V_128 0 "register_operand")
10218 (if_then_else:V_128
10219 (match_operator 3 ""
10220 [(match_operand:VI124_128 4 "nonimmediate_operand")
10221 (match_operand:VI124_128 5 "nonimmediate_operand")])
10222 (match_operand:V_128 1 "general_operand")
10223 (match_operand:V_128 2 "general_operand")))]
10225 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10226 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10228 bool ok = ix86_expand_int_vcond (operands);
10233 (define_expand "vcondu<VI8F_128:mode>v2di"
10234 [(set (match_operand:VI8F_128 0 "register_operand")
10235 (if_then_else:VI8F_128
10236 (match_operator 3 ""
10237 [(match_operand:V2DI 4 "nonimmediate_operand")
10238 (match_operand:V2DI 5 "nonimmediate_operand")])
10239 (match_operand:VI8F_128 1 "general_operand")
10240 (match_operand:VI8F_128 2 "general_operand")))]
10243 bool ok = ix86_expand_int_vcond (operands);
10248 (define_mode_iterator VEC_PERM_AVX2
10249 [V16QI V8HI V4SI V2DI V4SF V2DF
10250 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10251 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10252 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10253 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10254 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10255 (V32HI "TARGET_AVX512BW")])
10257 (define_expand "vec_perm<mode>"
10258 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10259 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10260 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10261 (match_operand:<sseintvecmode> 3 "register_operand")]
10262 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10264 ix86_expand_vec_perm (operands);
10268 (define_mode_iterator VEC_PERM_CONST
10269 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10270 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10271 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10272 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10273 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10274 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10275 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10276 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10277 (V32HI "TARGET_AVX512BW")])
10279 (define_expand "vec_perm_const<mode>"
10280 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10281 (match_operand:VEC_PERM_CONST 1 "register_operand")
10282 (match_operand:VEC_PERM_CONST 2 "register_operand")
10283 (match_operand:<sseintvecmode> 3)]
10286 if (ix86_expand_vec_perm_const (operands))
10292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10294 ;; Parallel bitwise logical operations
10296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10298 (define_expand "one_cmpl<mode>2"
10299 [(set (match_operand:VI 0 "register_operand")
10300 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10304 int i, n = GET_MODE_NUNITS (<MODE>mode);
10305 rtvec v = rtvec_alloc (n);
10307 for (i = 0; i < n; ++i)
10308 RTVEC_ELT (v, i) = constm1_rtx;
10310 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10313 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
10314 [(set (match_operand:VI_AVX2 0 "register_operand")
10316 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10317 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10318 "TARGET_SSE2 && <mask_mode512bit_condition>")
10320 (define_insn "*andnot<mode>3<mask_name>"
10321 [(set (match_operand:VI 0 "register_operand" "=x,v")
10323 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10324 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10325 "TARGET_SSE && <mask_mode512bit_condition>"
10327 static char buf[64];
10331 switch (get_attr_mode (insn))
10334 gcc_assert (TARGET_AVX512F);
10336 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10338 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10339 switch (<MODE>mode)
10343 if (TARGET_AVX512F)
10345 tmp = "pandn<ssemodesuffix>";
10352 if (TARGET_AVX512VL)
10354 tmp = "pandn<ssemodesuffix>";
10358 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10363 gcc_assert (TARGET_AVX512F);
10365 gcc_assert (TARGET_AVX);
10367 gcc_assert (TARGET_SSE);
10373 gcc_unreachable ();
10376 switch (which_alternative)
10379 ops = "%s\t{%%2, %%0|%%0, %%2}";
10382 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10385 gcc_unreachable ();
10388 snprintf (buf, sizeof (buf), ops, tmp);
10391 [(set_attr "isa" "noavx,avx")
10392 (set_attr "type" "sselog")
10393 (set (attr "prefix_data16")
10395 (and (eq_attr "alternative" "0")
10396 (eq_attr "mode" "TI"))
10398 (const_string "*")))
10399 (set_attr "prefix" "<mask_prefix3>")
10401 (cond [(and (match_test "<MODE_SIZE> == 16")
10402 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10403 (const_string "<ssePSmode>")
10404 (match_test "TARGET_AVX2")
10405 (const_string "<sseinsnmode>")
10406 (match_test "TARGET_AVX")
10408 (match_test "<MODE_SIZE> > 16")
10409 (const_string "V8SF")
10410 (const_string "<sseinsnmode>"))
10411 (ior (not (match_test "TARGET_SSE2"))
10412 (match_test "optimize_function_for_size_p (cfun)"))
10413 (const_string "V4SF")
10415 (const_string "<sseinsnmode>")))])
10417 (define_expand "<code><mode>3"
10418 [(set (match_operand:VI 0 "register_operand")
10420 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10421 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10424 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10428 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10429 [(set (match_operand:VI 0 "register_operand" "=x,v")
10431 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10432 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10433 "TARGET_SSE && <mask_mode512bit_condition>
10434 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10436 static char buf[64];
10440 switch (get_attr_mode (insn))
10443 gcc_assert (TARGET_AVX512F);
10445 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10447 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10448 switch (<MODE>mode)
10452 if (TARGET_AVX512F)
10454 tmp = "p<logic><ssemodesuffix>";
10461 if (TARGET_AVX512VL)
10463 tmp = "p<logic><ssemodesuffix>";
10467 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10472 gcc_assert (TARGET_AVX512F);
10474 gcc_assert (TARGET_AVX);
10476 gcc_assert (TARGET_SSE);
10482 gcc_unreachable ();
10485 switch (which_alternative)
10488 ops = "%s\t{%%2, %%0|%%0, %%2}";
10491 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10494 gcc_unreachable ();
10497 snprintf (buf, sizeof (buf), ops, tmp);
10500 [(set_attr "isa" "noavx,avx")
10501 (set_attr "type" "sselog")
10502 (set (attr "prefix_data16")
10504 (and (eq_attr "alternative" "0")
10505 (eq_attr "mode" "TI"))
10507 (const_string "*")))
10508 (set_attr "prefix" "<mask_prefix3>")
10510 (cond [(and (match_test "<MODE_SIZE> == 16")
10511 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10512 (const_string "<ssePSmode>")
10513 (match_test "TARGET_AVX2")
10514 (const_string "<sseinsnmode>")
10515 (match_test "TARGET_AVX")
10517 (match_test "<MODE_SIZE> > 16")
10518 (const_string "V8SF")
10519 (const_string "<sseinsnmode>"))
10520 (ior (not (match_test "TARGET_SSE2"))
10521 (match_test "optimize_function_for_size_p (cfun)"))
10522 (const_string "V4SF")
10524 (const_string "<sseinsnmode>")))])
10526 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
10527 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10528 (unspec:<avx512fmaskmode>
10529 [(match_operand:VI_AVX512VL 1 "register_operand" "v")
10530 (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
10533 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10534 [(set_attr "prefix" "evex")
10535 (set_attr "mode" "<sseinsnmode>")])
10537 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
10538 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10539 (unspec:<avx512fmaskmode>
10540 [(match_operand:VI_AVX512VL 1 "register_operand" "v")
10541 (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
10544 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10545 [(set_attr "prefix" "evex")
10546 (set_attr "mode" "<sseinsnmode>")])
10548 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10550 ;; Parallel integral element swizzling
10552 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10554 (define_expand "vec_pack_trunc_<mode>"
10555 [(match_operand:<ssepackmode> 0 "register_operand")
10556 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
10557 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
10560 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
10561 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
10562 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
10566 (define_insn "<sse2_avx2>_packsswb<mask_name>"
10567 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
10568 (vec_concat:VI1_AVX2
10569 (ss_truncate:<ssehalfvecmode>
10570 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
10571 (ss_truncate:<ssehalfvecmode>
10572 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
10573 "TARGET_SSE2 && <mask_mode512bit_condition>"
10575 packsswb\t{%2, %0|%0, %2}
10576 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10577 [(set_attr "isa" "noavx,avx")
10578 (set_attr "type" "sselog")
10579 (set_attr "prefix_data16" "1,*")
10580 (set_attr "prefix" "orig,maybe_evex")
10581 (set_attr "mode" "<sseinsnmode>")])
10583 (define_insn "<sse2_avx2>_packssdw<mask_name>"
10584 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10585 (vec_concat:VI2_AVX2
10586 (ss_truncate:<ssehalfvecmode>
10587 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
10588 (ss_truncate:<ssehalfvecmode>
10589 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
10590 "TARGET_SSE2 && <mask_mode512bit_condition>"
10592 packssdw\t{%2, %0|%0, %2}
10593 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10594 [(set_attr "isa" "noavx,avx")
10595 (set_attr "type" "sselog")
10596 (set_attr "prefix_data16" "1,*")
10597 (set_attr "prefix" "orig,vex")
10598 (set_attr "mode" "<sseinsnmode>")])
10600 (define_insn "<sse2_avx2>_packuswb<mask_name>"
10601 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
10602 (vec_concat:VI1_AVX2
10603 (us_truncate:<ssehalfvecmode>
10604 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
10605 (us_truncate:<ssehalfvecmode>
10606 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
10607 "TARGET_SSE2 && <mask_mode512bit_condition>"
10609 packuswb\t{%2, %0|%0, %2}
10610 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10611 [(set_attr "isa" "noavx,avx")
10612 (set_attr "type" "sselog")
10613 (set_attr "prefix_data16" "1,*")
10614 (set_attr "prefix" "orig,vex")
10615 (set_attr "mode" "<sseinsnmode>")])
10617 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
10618 [(set (match_operand:V64QI 0 "register_operand" "=v")
10621 (match_operand:V64QI 1 "register_operand" "v")
10622 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
10623 (parallel [(const_int 8) (const_int 72)
10624 (const_int 9) (const_int 73)
10625 (const_int 10) (const_int 74)
10626 (const_int 11) (const_int 75)
10627 (const_int 12) (const_int 76)
10628 (const_int 13) (const_int 77)
10629 (const_int 14) (const_int 78)
10630 (const_int 15) (const_int 79)
10631 (const_int 24) (const_int 88)
10632 (const_int 25) (const_int 89)
10633 (const_int 26) (const_int 90)
10634 (const_int 27) (const_int 91)
10635 (const_int 28) (const_int 92)
10636 (const_int 29) (const_int 93)
10637 (const_int 30) (const_int 94)
10638 (const_int 31) (const_int 95)
10639 (const_int 40) (const_int 104)
10640 (const_int 41) (const_int 105)
10641 (const_int 42) (const_int 106)
10642 (const_int 43) (const_int 107)
10643 (const_int 44) (const_int 108)
10644 (const_int 45) (const_int 109)
10645 (const_int 46) (const_int 110)
10646 (const_int 47) (const_int 111)
10647 (const_int 56) (const_int 120)
10648 (const_int 57) (const_int 121)
10649 (const_int 58) (const_int 122)
10650 (const_int 59) (const_int 123)
10651 (const_int 60) (const_int 124)
10652 (const_int 61) (const_int 125)
10653 (const_int 62) (const_int 126)
10654 (const_int 63) (const_int 127)])))]
10656 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10657 [(set_attr "type" "sselog")
10658 (set_attr "prefix" "evex")
10659 (set_attr "mode" "XI")])
10661 (define_insn "avx2_interleave_highv32qi<mask_name>"
10662 [(set (match_operand:V32QI 0 "register_operand" "=v")
10665 (match_operand:V32QI 1 "register_operand" "v")
10666 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
10667 (parallel [(const_int 8) (const_int 40)
10668 (const_int 9) (const_int 41)
10669 (const_int 10) (const_int 42)
10670 (const_int 11) (const_int 43)
10671 (const_int 12) (const_int 44)
10672 (const_int 13) (const_int 45)
10673 (const_int 14) (const_int 46)
10674 (const_int 15) (const_int 47)
10675 (const_int 24) (const_int 56)
10676 (const_int 25) (const_int 57)
10677 (const_int 26) (const_int 58)
10678 (const_int 27) (const_int 59)
10679 (const_int 28) (const_int 60)
10680 (const_int 29) (const_int 61)
10681 (const_int 30) (const_int 62)
10682 (const_int 31) (const_int 63)])))]
10683 "TARGET_AVX2 && <mask_mode512bit_condition>"
10684 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10685 [(set_attr "type" "sselog")
10686 (set_attr "prefix" "<mask_prefix>")
10687 (set_attr "mode" "OI")])
10689 (define_insn "vec_interleave_highv16qi<mask_name>"
10690 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
10693 (match_operand:V16QI 1 "register_operand" "0,v")
10694 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
10695 (parallel [(const_int 8) (const_int 24)
10696 (const_int 9) (const_int 25)
10697 (const_int 10) (const_int 26)
10698 (const_int 11) (const_int 27)
10699 (const_int 12) (const_int 28)
10700 (const_int 13) (const_int 29)
10701 (const_int 14) (const_int 30)
10702 (const_int 15) (const_int 31)])))]
10703 "TARGET_SSE2 && <mask_mode512bit_condition>"
10705 punpckhbw\t{%2, %0|%0, %2}
10706 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10707 [(set_attr "isa" "noavx,avx")
10708 (set_attr "type" "sselog")
10709 (set_attr "prefix_data16" "1,*")
10710 (set_attr "prefix" "orig,<mask_prefix>")
10711 (set_attr "mode" "TI")])
10713 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
10714 [(set (match_operand:V64QI 0 "register_operand" "=v")
10717 (match_operand:V64QI 1 "register_operand" "v")
10718 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
10719 (parallel [(const_int 0) (const_int 64)
10720 (const_int 1) (const_int 65)
10721 (const_int 2) (const_int 66)
10722 (const_int 3) (const_int 67)
10723 (const_int 4) (const_int 68)
10724 (const_int 5) (const_int 69)
10725 (const_int 6) (const_int 70)
10726 (const_int 7) (const_int 71)
10727 (const_int 16) (const_int 80)
10728 (const_int 17) (const_int 81)
10729 (const_int 18) (const_int 82)
10730 (const_int 19) (const_int 83)
10731 (const_int 20) (const_int 84)
10732 (const_int 21) (const_int 85)
10733 (const_int 22) (const_int 86)
10734 (const_int 23) (const_int 87)
10735 (const_int 32) (const_int 96)
10736 (const_int 33) (const_int 97)
10737 (const_int 34) (const_int 98)
10738 (const_int 35) (const_int 99)
10739 (const_int 36) (const_int 100)
10740 (const_int 37) (const_int 101)
10741 (const_int 38) (const_int 102)
10742 (const_int 39) (const_int 103)
10743 (const_int 48) (const_int 112)
10744 (const_int 49) (const_int 113)
10745 (const_int 50) (const_int 114)
10746 (const_int 51) (const_int 115)
10747 (const_int 52) (const_int 116)
10748 (const_int 53) (const_int 117)
10749 (const_int 54) (const_int 118)
10750 (const_int 55) (const_int 119)])))]
10752 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10753 [(set_attr "type" "sselog")
10754 (set_attr "prefix" "evex")
10755 (set_attr "mode" "XI")])
10757 (define_insn "avx2_interleave_lowv32qi<mask_name>"
10758 [(set (match_operand:V32QI 0 "register_operand" "=v")
10761 (match_operand:V32QI 1 "register_operand" "v")
10762 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
10763 (parallel [(const_int 0) (const_int 32)
10764 (const_int 1) (const_int 33)
10765 (const_int 2) (const_int 34)
10766 (const_int 3) (const_int 35)
10767 (const_int 4) (const_int 36)
10768 (const_int 5) (const_int 37)
10769 (const_int 6) (const_int 38)
10770 (const_int 7) (const_int 39)
10771 (const_int 16) (const_int 48)
10772 (const_int 17) (const_int 49)
10773 (const_int 18) (const_int 50)
10774 (const_int 19) (const_int 51)
10775 (const_int 20) (const_int 52)
10776 (const_int 21) (const_int 53)
10777 (const_int 22) (const_int 54)
10778 (const_int 23) (const_int 55)])))]
10779 "TARGET_AVX2 && <mask_mode512bit_condition>"
10780 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10781 [(set_attr "type" "sselog")
10782 (set_attr "prefix" "maybe_vex")
10783 (set_attr "mode" "OI")])
10785 (define_insn "vec_interleave_lowv16qi<mask_name>"
10786 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
10789 (match_operand:V16QI 1 "register_operand" "0,v")
10790 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
10791 (parallel [(const_int 0) (const_int 16)
10792 (const_int 1) (const_int 17)
10793 (const_int 2) (const_int 18)
10794 (const_int 3) (const_int 19)
10795 (const_int 4) (const_int 20)
10796 (const_int 5) (const_int 21)
10797 (const_int 6) (const_int 22)
10798 (const_int 7) (const_int 23)])))]
10799 "TARGET_SSE2 && <mask_mode512bit_condition>"
10801 punpcklbw\t{%2, %0|%0, %2}
10802 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10803 [(set_attr "isa" "noavx,avx")
10804 (set_attr "type" "sselog")
10805 (set_attr "prefix_data16" "1,*")
10806 (set_attr "prefix" "orig,vex")
10807 (set_attr "mode" "TI")])
10809 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
10810 [(set (match_operand:V32HI 0 "register_operand" "=v")
10813 (match_operand:V32HI 1 "register_operand" "v")
10814 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
10815 (parallel [(const_int 4) (const_int 36)
10816 (const_int 5) (const_int 37)
10817 (const_int 6) (const_int 38)
10818 (const_int 7) (const_int 39)
10819 (const_int 12) (const_int 44)
10820 (const_int 13) (const_int 45)
10821 (const_int 14) (const_int 46)
10822 (const_int 15) (const_int 47)
10823 (const_int 20) (const_int 52)
10824 (const_int 21) (const_int 53)
10825 (const_int 22) (const_int 54)
10826 (const_int 23) (const_int 55)
10827 (const_int 28) (const_int 60)
10828 (const_int 29) (const_int 61)
10829 (const_int 30) (const_int 62)
10830 (const_int 31) (const_int 63)])))]
10832 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10833 [(set_attr "type" "sselog")
10834 (set_attr "prefix" "evex")
10835 (set_attr "mode" "XI")])
10837 (define_insn "avx2_interleave_highv16hi<mask_name>"
10838 [(set (match_operand:V16HI 0 "register_operand" "=v")
10841 (match_operand:V16HI 1 "register_operand" "v")
10842 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
10843 (parallel [(const_int 4) (const_int 20)
10844 (const_int 5) (const_int 21)
10845 (const_int 6) (const_int 22)
10846 (const_int 7) (const_int 23)
10847 (const_int 12) (const_int 28)
10848 (const_int 13) (const_int 29)
10849 (const_int 14) (const_int 30)
10850 (const_int 15) (const_int 31)])))]
10851 "TARGET_AVX2 && <mask_mode512bit_condition>"
10852 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10853 [(set_attr "type" "sselog")
10854 (set_attr "prefix" "maybe_evex")
10855 (set_attr "mode" "OI")])
10857 (define_insn "vec_interleave_highv8hi<mask_name>"
10858 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
10861 (match_operand:V8HI 1 "register_operand" "0,v")
10862 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
10863 (parallel [(const_int 4) (const_int 12)
10864 (const_int 5) (const_int 13)
10865 (const_int 6) (const_int 14)
10866 (const_int 7) (const_int 15)])))]
10867 "TARGET_SSE2 && <mask_mode512bit_condition>"
10869 punpckhwd\t{%2, %0|%0, %2}
10870 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10871 [(set_attr "isa" "noavx,avx")
10872 (set_attr "type" "sselog")
10873 (set_attr "prefix_data16" "1,*")
10874 (set_attr "prefix" "orig,maybe_vex")
10875 (set_attr "mode" "TI")])
10877 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
10878 [(set (match_operand:V32HI 0 "register_operand" "=v")
10881 (match_operand:V32HI 1 "register_operand" "v")
10882 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
10883 (parallel [(const_int 0) (const_int 32)
10884 (const_int 1) (const_int 33)
10885 (const_int 2) (const_int 34)
10886 (const_int 3) (const_int 35)
10887 (const_int 8) (const_int 40)
10888 (const_int 9) (const_int 41)
10889 (const_int 10) (const_int 42)
10890 (const_int 11) (const_int 43)
10891 (const_int 16) (const_int 48)
10892 (const_int 17) (const_int 49)
10893 (const_int 18) (const_int 50)
10894 (const_int 19) (const_int 51)
10895 (const_int 24) (const_int 56)
10896 (const_int 25) (const_int 57)
10897 (const_int 26) (const_int 58)
10898 (const_int 27) (const_int 59)])))]
10900 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10901 [(set_attr "type" "sselog")
10902 (set_attr "prefix" "evex")
10903 (set_attr "mode" "XI")])
10905 (define_insn "avx2_interleave_lowv16hi<mask_name>"
10906 [(set (match_operand:V16HI 0 "register_operand" "=v")
10909 (match_operand:V16HI 1 "register_operand" "v")
10910 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
10911 (parallel [(const_int 0) (const_int 16)
10912 (const_int 1) (const_int 17)
10913 (const_int 2) (const_int 18)
10914 (const_int 3) (const_int 19)
10915 (const_int 8) (const_int 24)
10916 (const_int 9) (const_int 25)
10917 (const_int 10) (const_int 26)
10918 (const_int 11) (const_int 27)])))]
10919 "TARGET_AVX2 && <mask_mode512bit_condition>"
10920 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10921 [(set_attr "type" "sselog")
10922 (set_attr "prefix" "maybe_evex")
10923 (set_attr "mode" "OI")])
10925 (define_insn "vec_interleave_lowv8hi<mask_name>"
10926 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
10929 (match_operand:V8HI 1 "register_operand" "0,v")
10930 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
10931 (parallel [(const_int 0) (const_int 8)
10932 (const_int 1) (const_int 9)
10933 (const_int 2) (const_int 10)
10934 (const_int 3) (const_int 11)])))]
10935 "TARGET_SSE2 && <mask_mode512bit_condition>"
10937 punpcklwd\t{%2, %0|%0, %2}
10938 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10939 [(set_attr "isa" "noavx,avx")
10940 (set_attr "type" "sselog")
10941 (set_attr "prefix_data16" "1,*")
10942 (set_attr "prefix" "orig,maybe_evex")
10943 (set_attr "mode" "TI")])
10945 (define_insn "avx2_interleave_highv8si<mask_name>"
10946 [(set (match_operand:V8SI 0 "register_operand" "=v")
10949 (match_operand:V8SI 1 "register_operand" "v")
10950 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
10951 (parallel [(const_int 2) (const_int 10)
10952 (const_int 3) (const_int 11)
10953 (const_int 6) (const_int 14)
10954 (const_int 7) (const_int 15)])))]
10955 "TARGET_AVX2 && <mask_mode512bit_condition>"
10956 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10957 [(set_attr "type" "sselog")
10958 (set_attr "prefix" "maybe_evex")
10959 (set_attr "mode" "OI")])
10961 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
10962 [(set (match_operand:V16SI 0 "register_operand" "=v")
10965 (match_operand:V16SI 1 "register_operand" "v")
10966 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
10967 (parallel [(const_int 2) (const_int 18)
10968 (const_int 3) (const_int 19)
10969 (const_int 6) (const_int 22)
10970 (const_int 7) (const_int 23)
10971 (const_int 10) (const_int 26)
10972 (const_int 11) (const_int 27)
10973 (const_int 14) (const_int 30)
10974 (const_int 15) (const_int 31)])))]
10976 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10977 [(set_attr "type" "sselog")
10978 (set_attr "prefix" "evex")
10979 (set_attr "mode" "XI")])
10982 (define_insn "vec_interleave_highv4si<mask_name>"
10983 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
10986 (match_operand:V4SI 1 "register_operand" "0,v")
10987 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
10988 (parallel [(const_int 2) (const_int 6)
10989 (const_int 3) (const_int 7)])))]
10990 "TARGET_SSE2 && <mask_mode512bit_condition>"
10992 punpckhdq\t{%2, %0|%0, %2}
10993 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10994 [(set_attr "isa" "noavx,avx")
10995 (set_attr "type" "sselog")
10996 (set_attr "prefix_data16" "1,*")
10997 (set_attr "prefix" "orig,maybe_vex")
10998 (set_attr "mode" "TI")])
11000 (define_insn "avx2_interleave_lowv8si<mask_name>"
11001 [(set (match_operand:V8SI 0 "register_operand" "=v")
11004 (match_operand:V8SI 1 "register_operand" "v")
11005 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11006 (parallel [(const_int 0) (const_int 8)
11007 (const_int 1) (const_int 9)
11008 (const_int 4) (const_int 12)
11009 (const_int 5) (const_int 13)])))]
11010 "TARGET_AVX2 && <mask_mode512bit_condition>"
11011 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11012 [(set_attr "type" "sselog")
11013 (set_attr "prefix" "maybe_evex")
11014 (set_attr "mode" "OI")])
11016 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11017 [(set (match_operand:V16SI 0 "register_operand" "=v")
11020 (match_operand:V16SI 1 "register_operand" "v")
11021 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11022 (parallel [(const_int 0) (const_int 16)
11023 (const_int 1) (const_int 17)
11024 (const_int 4) (const_int 20)
11025 (const_int 5) (const_int 21)
11026 (const_int 8) (const_int 24)
11027 (const_int 9) (const_int 25)
11028 (const_int 12) (const_int 28)
11029 (const_int 13) (const_int 29)])))]
11031 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11032 [(set_attr "type" "sselog")
11033 (set_attr "prefix" "evex")
11034 (set_attr "mode" "XI")])
11036 (define_insn "vec_interleave_lowv4si<mask_name>"
11037 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11040 (match_operand:V4SI 1 "register_operand" "0,v")
11041 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11042 (parallel [(const_int 0) (const_int 4)
11043 (const_int 1) (const_int 5)])))]
11044 "TARGET_SSE2 && <mask_mode512bit_condition>"
11046 punpckldq\t{%2, %0|%0, %2}
11047 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11048 [(set_attr "isa" "noavx,avx")
11049 (set_attr "type" "sselog")
11050 (set_attr "prefix_data16" "1,*")
11051 (set_attr "prefix" "orig,vex")
11052 (set_attr "mode" "TI")])
11054 (define_expand "vec_interleave_high<mode>"
11055 [(match_operand:VI_256 0 "register_operand" "=x")
11056 (match_operand:VI_256 1 "register_operand" "x")
11057 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11060 rtx t1 = gen_reg_rtx (<MODE>mode);
11061 rtx t2 = gen_reg_rtx (<MODE>mode);
11062 rtx t3 = gen_reg_rtx (V4DImode);
11063 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11064 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11065 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11066 gen_lowpart (V4DImode, t2),
11067 GEN_INT (1 + (3 << 4))));
11068 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11072 (define_expand "vec_interleave_low<mode>"
11073 [(match_operand:VI_256 0 "register_operand" "=x")
11074 (match_operand:VI_256 1 "register_operand" "x")
11075 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11078 rtx t1 = gen_reg_rtx (<MODE>mode);
11079 rtx t2 = gen_reg_rtx (<MODE>mode);
11080 rtx t3 = gen_reg_rtx (V4DImode);
11081 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11082 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11083 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11084 gen_lowpart (V4DImode, t2),
11085 GEN_INT (0 + (2 << 4))));
11086 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11090 ;; Modes handled by pinsr patterns.
11091 (define_mode_iterator PINSR_MODE
11092 [(V16QI "TARGET_SSE4_1") V8HI
11093 (V4SI "TARGET_SSE4_1")
11094 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11096 (define_mode_attr sse2p4_1
11097 [(V16QI "sse4_1") (V8HI "sse2")
11098 (V4SI "sse4_1") (V2DI "sse4_1")])
11100 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11101 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11102 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11103 (vec_merge:PINSR_MODE
11104 (vec_duplicate:PINSR_MODE
11105 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11106 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11107 (match_operand:SI 3 "const_int_operand")))]
11109 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11110 < GET_MODE_NUNITS (<MODE>mode))"
11112 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11114 switch (which_alternative)
11117 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11118 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11121 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11123 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11124 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11127 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11129 gcc_unreachable ();
11132 [(set_attr "isa" "noavx,noavx,avx,avx")
11133 (set_attr "type" "sselog")
11134 (set (attr "prefix_rex")
11136 (and (not (match_test "TARGET_AVX"))
11137 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11139 (const_string "*")))
11140 (set (attr "prefix_data16")
11142 (and (not (match_test "TARGET_AVX"))
11143 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11145 (const_string "*")))
11146 (set (attr "prefix_extra")
11148 (and (not (match_test "TARGET_AVX"))
11149 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11151 (const_string "1")))
11152 (set_attr "length_immediate" "1")
11153 (set_attr "prefix" "orig,orig,vex,vex")
11154 (set_attr "mode" "TI")])
11156 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11157 [(match_operand:AVX512_VEC 0 "register_operand")
11158 (match_operand:AVX512_VEC 1 "register_operand")
11159 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11160 (match_operand:SI 3 "const_0_to_3_operand")
11161 (match_operand:AVX512_VEC 4 "register_operand")
11162 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11166 mask = INTVAL (operands[3]);
11167 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11168 0xFFFF ^ (0xF000 >> mask * 4)
11169 : 0xFF ^ (0xC0 >> mask * 2);
11170 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11171 (operands[0], operands[1], operands[2], GEN_INT (selector),
11172 operands[4], operands[5]));
11176 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11177 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11178 (vec_merge:AVX512_VEC
11179 (match_operand:AVX512_VEC 1 "register_operand" "v")
11180 (vec_duplicate:AVX512_VEC
11181 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11182 (match_operand:SI 3 "const_int_operand" "n")))]
11186 int selector = INTVAL (operands[3]);
11188 if (selector == 0xFFF || selector == 0x3F)
11190 else if ( selector == 0xF0FF || selector == 0xCF)
11192 else if ( selector == 0xFF0F || selector == 0xF3)
11194 else if ( selector == 0xFFF0 || selector == 0xFC)
11197 gcc_unreachable ();
11199 operands[3] = GEN_INT (mask);
11201 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11203 [(set_attr "type" "sselog")
11204 (set_attr "length_immediate" "1")
11205 (set_attr "prefix" "evex")
11206 (set_attr "mode" "<sseinsnmode>")])
11208 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11209 [(match_operand:AVX512_VEC_2 0 "register_operand")
11210 (match_operand:AVX512_VEC_2 1 "register_operand")
11211 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11212 (match_operand:SI 3 "const_0_to_1_operand")
11213 (match_operand:AVX512_VEC_2 4 "register_operand")
11214 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11217 int mask = INTVAL (operands[3]);
11219 emit_insn (gen_vec_set_lo_<mode>_mask
11220 (operands[0], operands[1], operands[2],
11221 operands[4], operands[5]));
11223 emit_insn (gen_vec_set_hi_<mode>_mask
11224 (operands[0], operands[1], operands[2],
11225 operands[4], operands[5]));
11229 (define_insn "vec_set_lo_<mode><mask_name>"
11230 [(set (match_operand:V16FI 0 "register_operand" "=v")
11232 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11233 (vec_select:<ssehalfvecmode>
11234 (match_operand:V16FI 1 "register_operand" "v")
11235 (parallel [(const_int 8) (const_int 9)
11236 (const_int 10) (const_int 11)
11237 (const_int 12) (const_int 13)
11238 (const_int 14) (const_int 15)]))))]
11240 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11241 [(set_attr "type" "sselog")
11242 (set_attr "length_immediate" "1")
11243 (set_attr "prefix" "evex")
11244 (set_attr "mode" "<sseinsnmode>")])
11246 (define_insn "vec_set_hi_<mode><mask_name>"
11247 [(set (match_operand:V16FI 0 "register_operand" "=v")
11249 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11250 (vec_select:<ssehalfvecmode>
11251 (match_operand:V16FI 1 "register_operand" "v")
11252 (parallel [(const_int 0) (const_int 1)
11253 (const_int 2) (const_int 3)
11254 (const_int 4) (const_int 5)
11255 (const_int 6) (const_int 7)]))))]
11257 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11258 [(set_attr "type" "sselog")
11259 (set_attr "length_immediate" "1")
11260 (set_attr "prefix" "evex")
11261 (set_attr "mode" "<sseinsnmode>")])
11263 (define_insn "vec_set_lo_<mode><mask_name>"
11264 [(set (match_operand:V8FI 0 "register_operand" "=v")
11266 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11267 (vec_select:<ssehalfvecmode>
11268 (match_operand:V8FI 1 "register_operand" "v")
11269 (parallel [(const_int 4) (const_int 5)
11270 (const_int 6) (const_int 7)]))))]
11272 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11273 [(set_attr "type" "sselog")
11274 (set_attr "length_immediate" "1")
11275 (set_attr "prefix" "evex")
11276 (set_attr "mode" "XI")])
11278 (define_insn "vec_set_hi_<mode><mask_name>"
11279 [(set (match_operand:V8FI 0 "register_operand" "=v")
11281 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11282 (vec_select:<ssehalfvecmode>
11283 (match_operand:V8FI 1 "register_operand" "v")
11284 (parallel [(const_int 0) (const_int 1)
11285 (const_int 2) (const_int 3)]))))]
11287 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11288 [(set_attr "type" "sselog")
11289 (set_attr "length_immediate" "1")
11290 (set_attr "prefix" "evex")
11291 (set_attr "mode" "XI")])
11293 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11294 [(match_operand:VI8F_256 0 "register_operand")
11295 (match_operand:VI8F_256 1 "register_operand")
11296 (match_operand:VI8F_256 2 "nonimmediate_operand")
11297 (match_operand:SI 3 "const_0_to_3_operand")
11298 (match_operand:VI8F_256 4 "register_operand")
11299 (match_operand:QI 5 "register_operand")]
11302 int mask = INTVAL (operands[3]);
11303 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11304 (operands[0], operands[1], operands[2],
11305 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11306 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11307 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11308 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11309 operands[4], operands[5]));
11313 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11314 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11315 (vec_select:VI8F_256
11316 (vec_concat:<ssedoublemode>
11317 (match_operand:VI8F_256 1 "register_operand" "v")
11318 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11319 (parallel [(match_operand 3 "const_0_to_3_operand")
11320 (match_operand 4 "const_0_to_3_operand")
11321 (match_operand 5 "const_4_to_7_operand")
11322 (match_operand 6 "const_4_to_7_operand")])))]
11324 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11325 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11328 mask = INTVAL (operands[3]) / 2;
11329 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11330 operands[3] = GEN_INT (mask);
11331 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11333 [(set_attr "type" "sselog")
11334 (set_attr "length_immediate" "1")
11335 (set_attr "prefix" "evex")
11336 (set_attr "mode" "XI")])
11338 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11339 [(match_operand:V8FI 0 "register_operand")
11340 (match_operand:V8FI 1 "register_operand")
11341 (match_operand:V8FI 2 "nonimmediate_operand")
11342 (match_operand:SI 3 "const_0_to_255_operand")
11343 (match_operand:V8FI 4 "register_operand")
11344 (match_operand:QI 5 "register_operand")]
11347 int mask = INTVAL (operands[3]);
11348 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11349 (operands[0], operands[1], operands[2],
11350 GEN_INT (((mask >> 0) & 3) * 2),
11351 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11352 GEN_INT (((mask >> 2) & 3) * 2),
11353 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11354 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11355 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11356 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11357 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11358 operands[4], operands[5]));
11362 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11363 [(set (match_operand:V8FI 0 "register_operand" "=v")
11365 (vec_concat:<ssedoublemode>
11366 (match_operand:V8FI 1 "register_operand" "v")
11367 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11368 (parallel [(match_operand 3 "const_0_to_7_operand")
11369 (match_operand 4 "const_0_to_7_operand")
11370 (match_operand 5 "const_0_to_7_operand")
11371 (match_operand 6 "const_0_to_7_operand")
11372 (match_operand 7 "const_8_to_15_operand")
11373 (match_operand 8 "const_8_to_15_operand")
11374 (match_operand 9 "const_8_to_15_operand")
11375 (match_operand 10 "const_8_to_15_operand")])))]
11377 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11378 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11379 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11380 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11383 mask = INTVAL (operands[3]) / 2;
11384 mask |= INTVAL (operands[5]) / 2 << 2;
11385 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11386 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11387 operands[3] = GEN_INT (mask);
11389 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11391 [(set_attr "type" "sselog")
11392 (set_attr "length_immediate" "1")
11393 (set_attr "prefix" "evex")
11394 (set_attr "mode" "<sseinsnmode>")])
11396 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11397 [(match_operand:VI4F_256 0 "register_operand")
11398 (match_operand:VI4F_256 1 "register_operand")
11399 (match_operand:VI4F_256 2 "nonimmediate_operand")
11400 (match_operand:SI 3 "const_0_to_3_operand")
11401 (match_operand:VI4F_256 4 "register_operand")
11402 (match_operand:QI 5 "register_operand")]
11405 int mask = INTVAL (operands[3]);
11406 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11407 (operands[0], operands[1], operands[2],
11408 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11409 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11410 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11411 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11412 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11413 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11414 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11415 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11416 operands[4], operands[5]));
11420 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11421 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11422 (vec_select:VI4F_256
11423 (vec_concat:<ssedoublemode>
11424 (match_operand:VI4F_256 1 "register_operand" "v")
11425 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11426 (parallel [(match_operand 3 "const_0_to_7_operand")
11427 (match_operand 4 "const_0_to_7_operand")
11428 (match_operand 5 "const_0_to_7_operand")
11429 (match_operand 6 "const_0_to_7_operand")
11430 (match_operand 7 "const_8_to_15_operand")
11431 (match_operand 8 "const_8_to_15_operand")
11432 (match_operand 9 "const_8_to_15_operand")
11433 (match_operand 10 "const_8_to_15_operand")])))]
11435 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11436 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11437 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11438 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11439 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11440 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11443 mask = INTVAL (operands[3]) / 4;
11444 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11445 operands[3] = GEN_INT (mask);
11447 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11449 [(set_attr "type" "sselog")
11450 (set_attr "length_immediate" "1")
11451 (set_attr "prefix" "evex")
11452 (set_attr "mode" "<sseinsnmode>")])
11454 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11455 [(match_operand:V16FI 0 "register_operand")
11456 (match_operand:V16FI 1 "register_operand")
11457 (match_operand:V16FI 2 "nonimmediate_operand")
11458 (match_operand:SI 3 "const_0_to_255_operand")
11459 (match_operand:V16FI 4 "register_operand")
11460 (match_operand:HI 5 "register_operand")]
11463 int mask = INTVAL (operands[3]);
11464 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
11465 (operands[0], operands[1], operands[2],
11466 GEN_INT (((mask >> 0) & 3) * 4),
11467 GEN_INT (((mask >> 0) & 3) * 4 + 1),
11468 GEN_INT (((mask >> 0) & 3) * 4 + 2),
11469 GEN_INT (((mask >> 0) & 3) * 4 + 3),
11470 GEN_INT (((mask >> 2) & 3) * 4),
11471 GEN_INT (((mask >> 2) & 3) * 4 + 1),
11472 GEN_INT (((mask >> 2) & 3) * 4 + 2),
11473 GEN_INT (((mask >> 2) & 3) * 4 + 3),
11474 GEN_INT (((mask >> 4) & 3) * 4 + 16),
11475 GEN_INT (((mask >> 4) & 3) * 4 + 17),
11476 GEN_INT (((mask >> 4) & 3) * 4 + 18),
11477 GEN_INT (((mask >> 4) & 3) * 4 + 19),
11478 GEN_INT (((mask >> 6) & 3) * 4 + 16),
11479 GEN_INT (((mask >> 6) & 3) * 4 + 17),
11480 GEN_INT (((mask >> 6) & 3) * 4 + 18),
11481 GEN_INT (((mask >> 6) & 3) * 4 + 19),
11482 operands[4], operands[5]));
11486 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
11487 [(set (match_operand:V16FI 0 "register_operand" "=v")
11489 (vec_concat:<ssedoublemode>
11490 (match_operand:V16FI 1 "register_operand" "v")
11491 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
11492 (parallel [(match_operand 3 "const_0_to_15_operand")
11493 (match_operand 4 "const_0_to_15_operand")
11494 (match_operand 5 "const_0_to_15_operand")
11495 (match_operand 6 "const_0_to_15_operand")
11496 (match_operand 7 "const_0_to_15_operand")
11497 (match_operand 8 "const_0_to_15_operand")
11498 (match_operand 9 "const_0_to_15_operand")
11499 (match_operand 10 "const_0_to_15_operand")
11500 (match_operand 11 "const_16_to_31_operand")
11501 (match_operand 12 "const_16_to_31_operand")
11502 (match_operand 13 "const_16_to_31_operand")
11503 (match_operand 14 "const_16_to_31_operand")
11504 (match_operand 15 "const_16_to_31_operand")
11505 (match_operand 16 "const_16_to_31_operand")
11506 (match_operand 17 "const_16_to_31_operand")
11507 (match_operand 18 "const_16_to_31_operand")])))]
11509 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11510 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11511 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11512 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11513 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11514 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
11515 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
11516 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
11517 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
11518 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
11519 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
11520 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
11523 mask = INTVAL (operands[3]) / 4;
11524 mask |= INTVAL (operands[7]) / 4 << 2;
11525 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
11526 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
11527 operands[3] = GEN_INT (mask);
11529 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
11531 [(set_attr "type" "sselog")
11532 (set_attr "length_immediate" "1")
11533 (set_attr "prefix" "evex")
11534 (set_attr "mode" "<sseinsnmode>")])
11536 (define_expand "avx512f_pshufdv3_mask"
11537 [(match_operand:V16SI 0 "register_operand")
11538 (match_operand:V16SI 1 "nonimmediate_operand")
11539 (match_operand:SI 2 "const_0_to_255_operand")
11540 (match_operand:V16SI 3 "register_operand")
11541 (match_operand:HI 4 "register_operand")]
11544 int mask = INTVAL (operands[2]);
11545 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
11546 GEN_INT ((mask >> 0) & 3),
11547 GEN_INT ((mask >> 2) & 3),
11548 GEN_INT ((mask >> 4) & 3),
11549 GEN_INT ((mask >> 6) & 3),
11550 GEN_INT (((mask >> 0) & 3) + 4),
11551 GEN_INT (((mask >> 2) & 3) + 4),
11552 GEN_INT (((mask >> 4) & 3) + 4),
11553 GEN_INT (((mask >> 6) & 3) + 4),
11554 GEN_INT (((mask >> 0) & 3) + 8),
11555 GEN_INT (((mask >> 2) & 3) + 8),
11556 GEN_INT (((mask >> 4) & 3) + 8),
11557 GEN_INT (((mask >> 6) & 3) + 8),
11558 GEN_INT (((mask >> 0) & 3) + 12),
11559 GEN_INT (((mask >> 2) & 3) + 12),
11560 GEN_INT (((mask >> 4) & 3) + 12),
11561 GEN_INT (((mask >> 6) & 3) + 12),
11562 operands[3], operands[4]));
11566 (define_insn "avx512f_pshufd_1<mask_name>"
11567 [(set (match_operand:V16SI 0 "register_operand" "=v")
11569 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
11570 (parallel [(match_operand 2 "const_0_to_3_operand")
11571 (match_operand 3 "const_0_to_3_operand")
11572 (match_operand 4 "const_0_to_3_operand")
11573 (match_operand 5 "const_0_to_3_operand")
11574 (match_operand 6 "const_4_to_7_operand")
11575 (match_operand 7 "const_4_to_7_operand")
11576 (match_operand 8 "const_4_to_7_operand")
11577 (match_operand 9 "const_4_to_7_operand")
11578 (match_operand 10 "const_8_to_11_operand")
11579 (match_operand 11 "const_8_to_11_operand")
11580 (match_operand 12 "const_8_to_11_operand")
11581 (match_operand 13 "const_8_to_11_operand")
11582 (match_operand 14 "const_12_to_15_operand")
11583 (match_operand 15 "const_12_to_15_operand")
11584 (match_operand 16 "const_12_to_15_operand")
11585 (match_operand 17 "const_12_to_15_operand")])))]
11587 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
11588 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
11589 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
11590 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
11591 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
11592 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
11593 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
11594 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
11595 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
11596 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
11597 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
11598 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
11601 mask |= INTVAL (operands[2]) << 0;
11602 mask |= INTVAL (operands[3]) << 2;
11603 mask |= INTVAL (operands[4]) << 4;
11604 mask |= INTVAL (operands[5]) << 6;
11605 operands[2] = GEN_INT (mask);
11607 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
11609 [(set_attr "type" "sselog1")
11610 (set_attr "prefix" "evex")
11611 (set_attr "length_immediate" "1")
11612 (set_attr "mode" "XI")])
11614 (define_expand "avx512vl_pshufdv3_mask"
11615 [(match_operand:V8SI 0 "register_operand")
11616 (match_operand:V8SI 1 "nonimmediate_operand")
11617 (match_operand:SI 2 "const_0_to_255_operand")
11618 (match_operand:V8SI 3 "register_operand")
11619 (match_operand:QI 4 "register_operand")]
11622 int mask = INTVAL (operands[2]);
11623 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
11624 GEN_INT ((mask >> 0) & 3),
11625 GEN_INT ((mask >> 2) & 3),
11626 GEN_INT ((mask >> 4) & 3),
11627 GEN_INT ((mask >> 6) & 3),
11628 GEN_INT (((mask >> 0) & 3) + 4),
11629 GEN_INT (((mask >> 2) & 3) + 4),
11630 GEN_INT (((mask >> 4) & 3) + 4),
11631 GEN_INT (((mask >> 6) & 3) + 4),
11632 operands[3], operands[4]));
11636 (define_expand "avx2_pshufdv3"
11637 [(match_operand:V8SI 0 "register_operand")
11638 (match_operand:V8SI 1 "nonimmediate_operand")
11639 (match_operand:SI 2 "const_0_to_255_operand")]
11642 int mask = INTVAL (operands[2]);
11643 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
11644 GEN_INT ((mask >> 0) & 3),
11645 GEN_INT ((mask >> 2) & 3),
11646 GEN_INT ((mask >> 4) & 3),
11647 GEN_INT ((mask >> 6) & 3),
11648 GEN_INT (((mask >> 0) & 3) + 4),
11649 GEN_INT (((mask >> 2) & 3) + 4),
11650 GEN_INT (((mask >> 4) & 3) + 4),
11651 GEN_INT (((mask >> 6) & 3) + 4)));
11655 (define_insn "avx2_pshufd_1<mask_name>"
11656 [(set (match_operand:V8SI 0 "register_operand" "=v")
11658 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
11659 (parallel [(match_operand 2 "const_0_to_3_operand")
11660 (match_operand 3 "const_0_to_3_operand")
11661 (match_operand 4 "const_0_to_3_operand")
11662 (match_operand 5 "const_0_to_3_operand")
11663 (match_operand 6 "const_4_to_7_operand")
11664 (match_operand 7 "const_4_to_7_operand")
11665 (match_operand 8 "const_4_to_7_operand")
11666 (match_operand 9 "const_4_to_7_operand")])))]
11668 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
11669 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
11670 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
11671 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
11672 && <mask_mode512bit_condition>"
11675 mask |= INTVAL (operands[2]) << 0;
11676 mask |= INTVAL (operands[3]) << 2;
11677 mask |= INTVAL (operands[4]) << 4;
11678 mask |= INTVAL (operands[5]) << 6;
11679 operands[2] = GEN_INT (mask);
11681 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
11683 [(set_attr "type" "sselog1")
11684 (set_attr "prefix" "maybe_evex")
11685 (set_attr "length_immediate" "1")
11686 (set_attr "mode" "OI")])
11688 (define_expand "avx512vl_pshufd_mask"
11689 [(match_operand:V4SI 0 "register_operand")
11690 (match_operand:V4SI 1 "nonimmediate_operand")
11691 (match_operand:SI 2 "const_0_to_255_operand")
11692 (match_operand:V4SI 3 "register_operand")
11693 (match_operand:QI 4 "register_operand")]
11696 int mask = INTVAL (operands[2]);
11697 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
11698 GEN_INT ((mask >> 0) & 3),
11699 GEN_INT ((mask >> 2) & 3),
11700 GEN_INT ((mask >> 4) & 3),
11701 GEN_INT ((mask >> 6) & 3),
11702 operands[3], operands[4]));
11706 (define_expand "sse2_pshufd"
11707 [(match_operand:V4SI 0 "register_operand")
11708 (match_operand:V4SI 1 "nonimmediate_operand")
11709 (match_operand:SI 2 "const_int_operand")]
11712 int mask = INTVAL (operands[2]);
11713 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
11714 GEN_INT ((mask >> 0) & 3),
11715 GEN_INT ((mask >> 2) & 3),
11716 GEN_INT ((mask >> 4) & 3),
11717 GEN_INT ((mask >> 6) & 3)));
11721 (define_insn "sse2_pshufd_1<mask_name>"
11722 [(set (match_operand:V4SI 0 "register_operand" "=v")
11724 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
11725 (parallel [(match_operand 2 "const_0_to_3_operand")
11726 (match_operand 3 "const_0_to_3_operand")
11727 (match_operand 4 "const_0_to_3_operand")
11728 (match_operand 5 "const_0_to_3_operand")])))]
11729 "TARGET_SSE2 && <mask_mode512bit_condition>"
11732 mask |= INTVAL (operands[2]) << 0;
11733 mask |= INTVAL (operands[3]) << 2;
11734 mask |= INTVAL (operands[4]) << 4;
11735 mask |= INTVAL (operands[5]) << 6;
11736 operands[2] = GEN_INT (mask);
11738 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
11740 [(set_attr "type" "sselog1")
11741 (set_attr "prefix_data16" "1")
11742 (set_attr "prefix" "<mask_prefix2>")
11743 (set_attr "length_immediate" "1")
11744 (set_attr "mode" "TI")])
11746 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
11747 [(set (match_operand:V32HI 0 "register_operand" "=v")
11749 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
11750 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11753 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11754 [(set_attr "type" "sselog")
11755 (set_attr "prefix" "evex")
11756 (set_attr "mode" "XI")])
11758 (define_expand "avx512vl_pshuflwv3_mask"
11759 [(match_operand:V16HI 0 "register_operand")
11760 (match_operand:V16HI 1 "nonimmediate_operand")
11761 (match_operand:SI 2 "const_0_to_255_operand")
11762 (match_operand:V16HI 3 "register_operand")
11763 (match_operand:HI 4 "register_operand")]
11766 int mask = INTVAL (operands[2]);
11767 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
11768 GEN_INT ((mask >> 0) & 3),
11769 GEN_INT ((mask >> 2) & 3),
11770 GEN_INT ((mask >> 4) & 3),
11771 GEN_INT ((mask >> 6) & 3),
11772 GEN_INT (((mask >> 0) & 3) + 8),
11773 GEN_INT (((mask >> 2) & 3) + 8),
11774 GEN_INT (((mask >> 4) & 3) + 8),
11775 GEN_INT (((mask >> 6) & 3) + 8),
11776 operands[3], operands[4]));
11780 (define_expand "avx2_pshuflwv3"
11781 [(match_operand:V16HI 0 "register_operand")
11782 (match_operand:V16HI 1 "nonimmediate_operand")
11783 (match_operand:SI 2 "const_0_to_255_operand")]
11786 int mask = INTVAL (operands[2]);
11787 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
11788 GEN_INT ((mask >> 0) & 3),
11789 GEN_INT ((mask >> 2) & 3),
11790 GEN_INT ((mask >> 4) & 3),
11791 GEN_INT ((mask >> 6) & 3),
11792 GEN_INT (((mask >> 0) & 3) + 8),
11793 GEN_INT (((mask >> 2) & 3) + 8),
11794 GEN_INT (((mask >> 4) & 3) + 8),
11795 GEN_INT (((mask >> 6) & 3) + 8)));
11799 (define_insn "avx2_pshuflw_1<mask_name>"
11800 [(set (match_operand:V16HI 0 "register_operand" "=v")
11802 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
11803 (parallel [(match_operand 2 "const_0_to_3_operand")
11804 (match_operand 3 "const_0_to_3_operand")
11805 (match_operand 4 "const_0_to_3_operand")
11806 (match_operand 5 "const_0_to_3_operand")
11811 (match_operand 6 "const_8_to_11_operand")
11812 (match_operand 7 "const_8_to_11_operand")
11813 (match_operand 8 "const_8_to_11_operand")
11814 (match_operand 9 "const_8_to_11_operand")
11818 (const_int 15)])))]
11820 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
11821 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
11822 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
11823 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])
11824 && <mask_mode512bit_condition>"
11827 mask |= INTVAL (operands[2]) << 0;
11828 mask |= INTVAL (operands[3]) << 2;
11829 mask |= INTVAL (operands[4]) << 4;
11830 mask |= INTVAL (operands[5]) << 6;
11831 operands[2] = GEN_INT (mask);
11833 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
11835 [(set_attr "type" "sselog")
11836 (set_attr "prefix" "maybe_evex")
11837 (set_attr "length_immediate" "1")
11838 (set_attr "mode" "OI")])
11840 (define_expand "avx512vl_pshuflw_mask"
11841 [(match_operand:V8HI 0 "register_operand")
11842 (match_operand:V8HI 1 "nonimmediate_operand")
11843 (match_operand:SI 2 "const_0_to_255_operand")
11844 (match_operand:V8HI 3 "register_operand")
11845 (match_operand:QI 4 "register_operand")]
11848 int mask = INTVAL (operands[2]);
11849 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
11850 GEN_INT ((mask >> 0) & 3),
11851 GEN_INT ((mask >> 2) & 3),
11852 GEN_INT ((mask >> 4) & 3),
11853 GEN_INT ((mask >> 6) & 3),
11854 operands[3], operands[4]));
11858 (define_expand "sse2_pshuflw"
11859 [(match_operand:V8HI 0 "register_operand")
11860 (match_operand:V8HI 1 "nonimmediate_operand")
11861 (match_operand:SI 2 "const_int_operand")]
11864 int mask = INTVAL (operands[2]);
11865 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
11866 GEN_INT ((mask >> 0) & 3),
11867 GEN_INT ((mask >> 2) & 3),
11868 GEN_INT ((mask >> 4) & 3),
11869 GEN_INT ((mask >> 6) & 3)));
11873 (define_insn "sse2_pshuflw_1<mask_name>"
11874 [(set (match_operand:V8HI 0 "register_operand" "=v")
11876 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
11877 (parallel [(match_operand 2 "const_0_to_3_operand")
11878 (match_operand 3 "const_0_to_3_operand")
11879 (match_operand 4 "const_0_to_3_operand")
11880 (match_operand 5 "const_0_to_3_operand")
11885 "TARGET_SSE2 && <mask_mode512bit_condition>"
11888 mask |= INTVAL (operands[2]) << 0;
11889 mask |= INTVAL (operands[3]) << 2;
11890 mask |= INTVAL (operands[4]) << 4;
11891 mask |= INTVAL (operands[5]) << 6;
11892 operands[2] = GEN_INT (mask);
11894 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
11896 [(set_attr "type" "sselog")
11897 (set_attr "prefix_data16" "0")
11898 (set_attr "prefix_rep" "1")
11899 (set_attr "prefix" "maybe_vex")
11900 (set_attr "length_immediate" "1")
11901 (set_attr "mode" "TI")])
11903 (define_expand "avx2_pshufhwv3"
11904 [(match_operand:V16HI 0 "register_operand")
11905 (match_operand:V16HI 1 "nonimmediate_operand")
11906 (match_operand:SI 2 "const_0_to_255_operand")]
11909 int mask = INTVAL (operands[2]);
11910 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
11911 GEN_INT (((mask >> 0) & 3) + 4),
11912 GEN_INT (((mask >> 2) & 3) + 4),
11913 GEN_INT (((mask >> 4) & 3) + 4),
11914 GEN_INT (((mask >> 6) & 3) + 4),
11915 GEN_INT (((mask >> 0) & 3) + 12),
11916 GEN_INT (((mask >> 2) & 3) + 12),
11917 GEN_INT (((mask >> 4) & 3) + 12),
11918 GEN_INT (((mask >> 6) & 3) + 12)));
11922 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
11923 [(set (match_operand:V32HI 0 "register_operand" "=v")
11925 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
11926 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11929 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11930 [(set_attr "type" "sselog")
11931 (set_attr "prefix" "evex")
11932 (set_attr "mode" "XI")])
11934 (define_expand "avx512vl_pshufhwv3_mask"
11935 [(match_operand:V16HI 0 "register_operand")
11936 (match_operand:V16HI 1 "nonimmediate_operand")
11937 (match_operand:SI 2 "const_0_to_255_operand")
11938 (match_operand:V16HI 3 "register_operand")
11939 (match_operand:HI 4 "register_operand")]
11942 int mask = INTVAL (operands[2]);
11943 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
11944 GEN_INT (((mask >> 0) & 3) + 4),
11945 GEN_INT (((mask >> 2) & 3) + 4),
11946 GEN_INT (((mask >> 4) & 3) + 4),
11947 GEN_INT (((mask >> 6) & 3) + 4),
11948 GEN_INT (((mask >> 0) & 3) + 12),
11949 GEN_INT (((mask >> 2) & 3) + 12),
11950 GEN_INT (((mask >> 4) & 3) + 12),
11951 GEN_INT (((mask >> 6) & 3) + 12),
11952 operands[3], operands[4]));
11956 (define_insn "avx2_pshufhw_1<mask_name>"
11957 [(set (match_operand:V16HI 0 "register_operand" "=v")
11959 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
11960 (parallel [(const_int 0)
11964 (match_operand 2 "const_4_to_7_operand")
11965 (match_operand 3 "const_4_to_7_operand")
11966 (match_operand 4 "const_4_to_7_operand")
11967 (match_operand 5 "const_4_to_7_operand")
11972 (match_operand 6 "const_12_to_15_operand")
11973 (match_operand 7 "const_12_to_15_operand")
11974 (match_operand 8 "const_12_to_15_operand")
11975 (match_operand 9 "const_12_to_15_operand")])))]
11977 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
11978 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
11979 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
11980 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])
11981 && <mask_mode512bit_condition>"
11984 mask |= (INTVAL (operands[2]) - 4) << 0;
11985 mask |= (INTVAL (operands[3]) - 4) << 2;
11986 mask |= (INTVAL (operands[4]) - 4) << 4;
11987 mask |= (INTVAL (operands[5]) - 4) << 6;
11988 operands[2] = GEN_INT (mask);
11990 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
11992 [(set_attr "type" "sselog")
11993 (set_attr "prefix" "maybe_evex")
11994 (set_attr "length_immediate" "1")
11995 (set_attr "mode" "OI")])
11997 (define_expand "avx512vl_pshufhw_mask"
11998 [(match_operand:V8HI 0 "register_operand")
11999 (match_operand:V8HI 1 "nonimmediate_operand")
12000 (match_operand:SI 2 "const_0_to_255_operand")
12001 (match_operand:V8HI 3 "register_operand")
12002 (match_operand:QI 4 "register_operand")]
12005 int mask = INTVAL (operands[2]);
12006 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12007 GEN_INT (((mask >> 0) & 3) + 4),
12008 GEN_INT (((mask >> 2) & 3) + 4),
12009 GEN_INT (((mask >> 4) & 3) + 4),
12010 GEN_INT (((mask >> 6) & 3) + 4),
12011 operands[3], operands[4]));
12015 (define_expand "sse2_pshufhw"
12016 [(match_operand:V8HI 0 "register_operand")
12017 (match_operand:V8HI 1 "nonimmediate_operand")
12018 (match_operand:SI 2 "const_int_operand")]
12021 int mask = INTVAL (operands[2]);
12022 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12023 GEN_INT (((mask >> 0) & 3) + 4),
12024 GEN_INT (((mask >> 2) & 3) + 4),
12025 GEN_INT (((mask >> 4) & 3) + 4),
12026 GEN_INT (((mask >> 6) & 3) + 4)));
12030 (define_insn "sse2_pshufhw_1<mask_name>"
12031 [(set (match_operand:V8HI 0 "register_operand" "=v")
12033 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12034 (parallel [(const_int 0)
12038 (match_operand 2 "const_4_to_7_operand")
12039 (match_operand 3 "const_4_to_7_operand")
12040 (match_operand 4 "const_4_to_7_operand")
12041 (match_operand 5 "const_4_to_7_operand")])))]
12042 "TARGET_SSE2 && <mask_mode512bit_condition>"
12045 mask |= (INTVAL (operands[2]) - 4) << 0;
12046 mask |= (INTVAL (operands[3]) - 4) << 2;
12047 mask |= (INTVAL (operands[4]) - 4) << 4;
12048 mask |= (INTVAL (operands[5]) - 4) << 6;
12049 operands[2] = GEN_INT (mask);
12051 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12053 [(set_attr "type" "sselog")
12054 (set_attr "prefix_rep" "1")
12055 (set_attr "prefix_data16" "0")
12056 (set_attr "prefix" "maybe_vex")
12057 (set_attr "length_immediate" "1")
12058 (set_attr "mode" "TI")])
12060 (define_expand "sse2_loadd"
12061 [(set (match_operand:V4SI 0 "register_operand")
12063 (vec_duplicate:V4SI
12064 (match_operand:SI 1 "nonimmediate_operand"))
12068 "operands[2] = CONST0_RTX (V4SImode);")
12070 (define_insn "sse2_loadld"
12071 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12073 (vec_duplicate:V4SI
12074 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12075 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12079 %vmovd\t{%2, %0|%0, %2}
12080 %vmovd\t{%2, %0|%0, %2}
12081 movss\t{%2, %0|%0, %2}
12082 movss\t{%2, %0|%0, %2}
12083 vmovss\t{%2, %1, %0|%0, %1, %2}"
12084 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
12085 (set_attr "type" "ssemov")
12086 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12087 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12089 (define_insn "*vec_extract<mode>"
12090 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12091 (vec_select:<ssescalarmode>
12092 (match_operand:VI12_128 1 "register_operand" "x,x")
12094 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12097 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12098 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12099 [(set_attr "type" "sselog1")
12100 (set (attr "prefix_data16")
12102 (and (eq_attr "alternative" "0")
12103 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12105 (const_string "*")))
12106 (set (attr "prefix_extra")
12108 (and (eq_attr "alternative" "0")
12109 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12111 (const_string "1")))
12112 (set_attr "length_immediate" "1")
12113 (set_attr "prefix" "maybe_vex")
12114 (set_attr "mode" "TI")])
12116 (define_insn "*vec_extractv8hi_sse2"
12117 [(set (match_operand:HI 0 "register_operand" "=r")
12119 (match_operand:V8HI 1 "register_operand" "x")
12121 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12122 "TARGET_SSE2 && !TARGET_SSE4_1"
12123 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12124 [(set_attr "type" "sselog1")
12125 (set_attr "prefix_data16" "1")
12126 (set_attr "length_immediate" "1")
12127 (set_attr "mode" "TI")])
12129 (define_insn "*vec_extractv16qi_zext"
12130 [(set (match_operand:SWI48 0 "register_operand" "=r")
12133 (match_operand:V16QI 1 "register_operand" "x")
12135 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12137 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12138 [(set_attr "type" "sselog1")
12139 (set_attr "prefix_extra" "1")
12140 (set_attr "length_immediate" "1")
12141 (set_attr "prefix" "maybe_vex")
12142 (set_attr "mode" "TI")])
12144 (define_insn "*vec_extractv8hi_zext"
12145 [(set (match_operand:SWI48 0 "register_operand" "=r")
12148 (match_operand:V8HI 1 "register_operand" "x")
12150 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12152 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12153 [(set_attr "type" "sselog1")
12154 (set_attr "prefix_data16" "1")
12155 (set_attr "length_immediate" "1")
12156 (set_attr "prefix" "maybe_vex")
12157 (set_attr "mode" "TI")])
12159 (define_insn "*vec_extract<mode>_mem"
12160 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12161 (vec_select:<ssescalarmode>
12162 (match_operand:VI12_128 1 "memory_operand" "o")
12164 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12168 (define_insn "*vec_extract<ssevecmodelower>_0"
12169 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12171 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12172 (parallel [(const_int 0)])))]
12173 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12175 [(set_attr "isa" "*,sse4,*,*")])
12177 (define_insn_and_split "*vec_extractv4si_0_zext"
12178 [(set (match_operand:DI 0 "register_operand" "=r")
12181 (match_operand:V4SI 1 "register_operand" "x")
12182 (parallel [(const_int 0)]))))]
12183 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12185 "&& reload_completed"
12186 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12187 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12189 (define_insn "*vec_extractv2di_0_sse"
12190 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12192 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12193 (parallel [(const_int 0)])))]
12194 "TARGET_SSE && !TARGET_64BIT
12195 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12199 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12201 (match_operand:<ssevecmode> 1 "register_operand")
12202 (parallel [(const_int 0)])))]
12203 "TARGET_SSE && reload_completed"
12204 [(set (match_dup 0) (match_dup 1))]
12205 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12207 (define_insn "*vec_extractv4si"
12208 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
12210 (match_operand:V4SI 1 "register_operand" "x,0,x")
12211 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12214 switch (which_alternative)
12217 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12220 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12221 return "psrldq\t{%2, %0|%0, %2}";
12224 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12225 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12228 gcc_unreachable ();
12231 [(set_attr "isa" "*,noavx,avx")
12232 (set_attr "type" "sselog1,sseishft1,sseishft1")
12233 (set_attr "prefix_extra" "1,*,*")
12234 (set_attr "length_immediate" "1")
12235 (set_attr "prefix" "maybe_vex,orig,vex")
12236 (set_attr "mode" "TI")])
12238 (define_insn "*vec_extractv4si_zext"
12239 [(set (match_operand:DI 0 "register_operand" "=r")
12242 (match_operand:V4SI 1 "register_operand" "x")
12243 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12244 "TARGET_64BIT && TARGET_SSE4_1"
12245 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12246 [(set_attr "type" "sselog1")
12247 (set_attr "prefix_extra" "1")
12248 (set_attr "length_immediate" "1")
12249 (set_attr "prefix" "maybe_vex")
12250 (set_attr "mode" "TI")])
12252 (define_insn "*vec_extractv4si_mem"
12253 [(set (match_operand:SI 0 "register_operand" "=x,r")
12255 (match_operand:V4SI 1 "memory_operand" "o,o")
12256 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12260 (define_insn_and_split "*vec_extractv4si_zext_mem"
12261 [(set (match_operand:DI 0 "register_operand" "=x,r")
12264 (match_operand:V4SI 1 "memory_operand" "o,o")
12265 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12266 "TARGET_64BIT && TARGET_SSE"
12268 "&& reload_completed"
12269 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12271 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12274 (define_insn "*vec_extractv2di_1"
12275 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12277 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12278 (parallel [(const_int 1)])))]
12279 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12281 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12282 %vmovhps\t{%1, %0|%0, %1}
12283 psrldq\t{$8, %0|%0, 8}
12284 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12285 movhlps\t{%1, %0|%0, %1}
12288 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12289 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12290 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12291 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12292 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12293 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12294 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12297 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12298 (vec_select:<ssescalarmode>
12299 (match_operand:VI_128 1 "memory_operand")
12301 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12302 "TARGET_SSE && reload_completed"
12303 [(set (match_dup 0) (match_dup 1))]
12305 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12307 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12310 (define_insn "*vec_dupv4si"
12311 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
12312 (vec_duplicate:V4SI
12313 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
12316 %vpshufd\t{$0, %1, %0|%0, %1, 0}
12317 vbroadcastss\t{%1, %0|%0, %1}
12318 shufps\t{$0, %0, %0|%0, %0, 0}"
12319 [(set_attr "isa" "sse2,avx,noavx")
12320 (set_attr "type" "sselog1,ssemov,sselog1")
12321 (set_attr "length_immediate" "1,0,1")
12322 (set_attr "prefix_extra" "0,1,*")
12323 (set_attr "prefix" "maybe_vex,vex,orig")
12324 (set_attr "mode" "TI,V4SF,V4SF")])
12326 (define_insn "*vec_dupv2di"
12327 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
12328 (vec_duplicate:V2DI
12329 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
12333 vpunpcklqdq\t{%d1, %0|%0, %d1}
12334 %vmovddup\t{%1, %0|%0, %1}
12336 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
12337 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
12338 (set_attr "prefix" "orig,vex,maybe_vex,orig")
12339 (set_attr "mode" "TI,TI,DF,V4SF")])
12341 (define_insn "*vec_concatv2si_sse4_1"
12342 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
12344 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
12345 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
12348 pinsrd\t{$1, %2, %0|%0, %2, 1}
12349 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12350 punpckldq\t{%2, %0|%0, %2}
12351 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12352 %vmovd\t{%1, %0|%0, %1}
12353 punpckldq\t{%2, %0|%0, %2}
12354 movd\t{%1, %0|%0, %1}"
12355 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
12356 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12357 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
12358 (set_attr "length_immediate" "1,1,*,*,*,*,*")
12359 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
12360 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
12362 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12363 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12364 ;; alternatives pretty much forces the MMX alternative to be chosen.
12365 (define_insn "*vec_concatv2si"
12366 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12368 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12369 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12370 "TARGET_SSE && !TARGET_SSE4_1"
12372 punpckldq\t{%2, %0|%0, %2}
12373 movd\t{%1, %0|%0, %1}
12374 movd\t{%1, %0|%0, %1}
12375 unpcklps\t{%2, %0|%0, %2}
12376 movss\t{%1, %0|%0, %1}
12377 punpckldq\t{%2, %0|%0, %2}
12378 movd\t{%1, %0|%0, %1}"
12379 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12380 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12381 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12383 (define_insn "*vec_concatv4si"
12384 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12386 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12387 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12390 punpcklqdq\t{%2, %0|%0, %2}
12391 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12392 movlhps\t{%2, %0|%0, %2}
12393 movhps\t{%2, %0|%0, %q2}
12394 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12395 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12396 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12397 (set_attr "prefix" "orig,vex,orig,orig,vex")
12398 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12400 ;; movd instead of movq is required to handle broken assemblers.
12401 (define_insn "vec_concatv2di"
12402 [(set (match_operand:V2DI 0 "register_operand"
12403 "=x,x ,Yi,x ,!x,x,x,x,x,x")
12405 (match_operand:DI 1 "nonimmediate_operand"
12406 " 0,x ,r ,xm,*y,0,x,0,0,x")
12407 (match_operand:DI 2 "vector_move_operand"
12408 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
12411 pinsrq\t{$1, %2, %0|%0, %2, 1}
12412 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12413 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12414 %vmovq\t{%1, %0|%0, %1}
12415 movq2dq\t{%1, %0|%0, %1}
12416 punpcklqdq\t{%2, %0|%0, %2}
12417 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12418 movlhps\t{%2, %0|%0, %2}
12419 movhps\t{%2, %0|%0, %2}
12420 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12421 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12424 (eq_attr "alternative" "0,1,5,6")
12425 (const_string "sselog")
12426 (const_string "ssemov")))
12427 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
12428 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
12429 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
12430 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
12431 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
12433 (define_expand "vec_unpacks_lo_<mode>"
12434 [(match_operand:<sseunpackmode> 0 "register_operand")
12435 (match_operand:VI124_AVX512F 1 "register_operand")]
12437 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
12439 (define_expand "vec_unpacks_hi_<mode>"
12440 [(match_operand:<sseunpackmode> 0 "register_operand")
12441 (match_operand:VI124_AVX512F 1 "register_operand")]
12443 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
12445 (define_expand "vec_unpacku_lo_<mode>"
12446 [(match_operand:<sseunpackmode> 0 "register_operand")
12447 (match_operand:VI124_AVX512F 1 "register_operand")]
12449 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
12451 (define_expand "vec_unpacku_hi_<mode>"
12452 [(match_operand:<sseunpackmode> 0 "register_operand")
12453 (match_operand:VI124_AVX512F 1 "register_operand")]
12455 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
12457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12463 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
12464 [(set (match_operand:VI12_AVX2 0 "register_operand")
12465 (truncate:VI12_AVX2
12466 (lshiftrt:<ssedoublemode>
12467 (plus:<ssedoublemode>
12468 (plus:<ssedoublemode>
12469 (zero_extend:<ssedoublemode>
12470 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
12471 (zero_extend:<ssedoublemode>
12472 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
12473 (match_dup <mask_expand_op3>))
12475 "TARGET_SSE2 && <mask_mode512bit_condition>"
12478 if (<mask_applied>)
12480 operands[3] = CONST1_RTX(<MODE>mode);
12481 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12483 if (<mask_applied>)
12485 operands[5] = operands[3];
12490 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
12491 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
12492 (truncate:VI12_AVX2
12493 (lshiftrt:<ssedoublemode>
12494 (plus:<ssedoublemode>
12495 (plus:<ssedoublemode>
12496 (zero_extend:<ssedoublemode>
12497 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
12498 (zero_extend:<ssedoublemode>
12499 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
12500 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
12503 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
12504 && <mask_mode512bit_condition>"
12506 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
12507 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12508 [(set_attr "isa" "noavx,avx")
12509 (set_attr "type" "sseiadd")
12510 (set_attr "prefix_data16" "1,*")
12511 (set_attr "prefix" "orig,<mask_prefix>")
12512 (set_attr "mode" "<sseinsnmode>")])
12514 ;; The correct representation for this is absolutely enormous, and
12515 ;; surely not generally useful.
12516 (define_insn "<sse2_avx2>_psadbw"
12517 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
12518 (unspec:VI8_AVX2_AVX512BW
12519 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
12520 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
12524 psadbw\t{%2, %0|%0, %2}
12525 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
12526 [(set_attr "isa" "noavx,avx")
12527 (set_attr "type" "sseiadd")
12528 (set_attr "atom_unit" "simul")
12529 (set_attr "prefix_data16" "1,*")
12530 (set_attr "prefix" "orig,maybe_evex")
12531 (set_attr "mode" "<sseinsnmode>")])
12533 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
12534 [(set (match_operand:SI 0 "register_operand" "=r")
12536 [(match_operand:VF_128_256 1 "register_operand" "x")]
12539 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
12540 [(set_attr "type" "ssemov")
12541 (set_attr "prefix" "maybe_vex")
12542 (set_attr "mode" "<MODE>")])
12544 (define_insn "avx2_pmovmskb"
12545 [(set (match_operand:SI 0 "register_operand" "=r")
12546 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
12549 "vpmovmskb\t{%1, %0|%0, %1}"
12550 [(set_attr "type" "ssemov")
12551 (set_attr "prefix" "vex")
12552 (set_attr "mode" "DI")])
12554 (define_insn "sse2_pmovmskb"
12555 [(set (match_operand:SI 0 "register_operand" "=r")
12556 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
12559 "%vpmovmskb\t{%1, %0|%0, %1}"
12560 [(set_attr "type" "ssemov")
12561 (set_attr "prefix_data16" "1")
12562 (set_attr "prefix" "maybe_vex")
12563 (set_attr "mode" "SI")])
12565 (define_expand "sse2_maskmovdqu"
12566 [(set (match_operand:V16QI 0 "memory_operand")
12567 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
12568 (match_operand:V16QI 2 "register_operand")
12573 (define_insn "*sse2_maskmovdqu"
12574 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
12575 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
12576 (match_operand:V16QI 2 "register_operand" "x")
12577 (mem:V16QI (match_dup 0))]
12581 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
12582 that requires %v to be at the beginning of the opcode name. */
12583 if (Pmode != word_mode)
12584 fputs ("\taddr32", asm_out_file);
12585 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
12587 [(set_attr "type" "ssemov")
12588 (set_attr "prefix_data16" "1")
12589 (set (attr "length_address")
12590 (symbol_ref ("Pmode != word_mode")))
12591 ;; The implicit %rdi operand confuses default length_vex computation.
12592 (set (attr "length_vex")
12593 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
12594 (set_attr "prefix" "maybe_vex")
12595 (set_attr "mode" "TI")])
12597 (define_insn "sse_ldmxcsr"
12598 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
12602 [(set_attr "type" "sse")
12603 (set_attr "atom_sse_attr" "mxcsr")
12604 (set_attr "prefix" "maybe_vex")
12605 (set_attr "memory" "load")])
12607 (define_insn "sse_stmxcsr"
12608 [(set (match_operand:SI 0 "memory_operand" "=m")
12609 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
12612 [(set_attr "type" "sse")
12613 (set_attr "atom_sse_attr" "mxcsr")
12614 (set_attr "prefix" "maybe_vex")
12615 (set_attr "memory" "store")])
12617 (define_insn "sse2_clflush"
12618 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
12622 [(set_attr "type" "sse")
12623 (set_attr "atom_sse_attr" "fence")
12624 (set_attr "memory" "unknown")])
12627 (define_insn "sse3_mwait"
12628 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
12629 (match_operand:SI 1 "register_operand" "c")]
12632 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
12633 ;; Since 32bit register operands are implicitly zero extended to 64bit,
12634 ;; we only need to set up 32bit registers.
12636 [(set_attr "length" "3")])
12638 (define_insn "sse3_monitor_<mode>"
12639 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
12640 (match_operand:SI 1 "register_operand" "c")
12641 (match_operand:SI 2 "register_operand" "d")]
12644 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
12645 ;; RCX and RDX are used. Since 32bit register operands are implicitly
12646 ;; zero extended to 64bit, we only need to set up 32bit registers.
12648 [(set (attr "length")
12649 (symbol_ref ("(Pmode != word_mode) + 3")))])
12651 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12653 ;; SSSE3 instructions
12655 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12657 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
12659 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
12660 [(set (match_operand:V16HI 0 "register_operand" "=x")
12665 (ssse3_plusminus:HI
12667 (match_operand:V16HI 1 "register_operand" "x")
12668 (parallel [(const_int 0)]))
12669 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
12670 (ssse3_plusminus:HI
12671 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
12672 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
12674 (ssse3_plusminus:HI
12675 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
12676 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
12677 (ssse3_plusminus:HI
12678 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
12679 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
12682 (ssse3_plusminus:HI
12683 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
12684 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
12685 (ssse3_plusminus:HI
12686 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
12687 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
12689 (ssse3_plusminus:HI
12690 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
12691 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
12692 (ssse3_plusminus:HI
12693 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
12694 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
12698 (ssse3_plusminus:HI
12700 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
12701 (parallel [(const_int 0)]))
12702 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
12703 (ssse3_plusminus:HI
12704 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
12705 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
12707 (ssse3_plusminus:HI
12708 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
12709 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
12710 (ssse3_plusminus:HI
12711 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
12712 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
12715 (ssse3_plusminus:HI
12716 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
12717 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
12718 (ssse3_plusminus:HI
12719 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
12720 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
12722 (ssse3_plusminus:HI
12723 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
12724 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
12725 (ssse3_plusminus:HI
12726 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
12727 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
12729 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
12730 [(set_attr "type" "sseiadd")
12731 (set_attr "prefix_extra" "1")
12732 (set_attr "prefix" "vex")
12733 (set_attr "mode" "OI")])
12735 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
12736 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12740 (ssse3_plusminus:HI
12742 (match_operand:V8HI 1 "register_operand" "0,x")
12743 (parallel [(const_int 0)]))
12744 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
12745 (ssse3_plusminus:HI
12746 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
12747 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
12749 (ssse3_plusminus:HI
12750 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
12751 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
12752 (ssse3_plusminus:HI
12753 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
12754 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
12757 (ssse3_plusminus:HI
12759 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
12760 (parallel [(const_int 0)]))
12761 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
12762 (ssse3_plusminus:HI
12763 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
12764 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
12766 (ssse3_plusminus:HI
12767 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
12768 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
12769 (ssse3_plusminus:HI
12770 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
12771 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
12774 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
12775 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
12776 [(set_attr "isa" "noavx,avx")
12777 (set_attr "type" "sseiadd")
12778 (set_attr "atom_unit" "complex")
12779 (set_attr "prefix_data16" "1,*")
12780 (set_attr "prefix_extra" "1")
12781 (set_attr "prefix" "orig,vex")
12782 (set_attr "mode" "TI")])
12784 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
12785 [(set (match_operand:V4HI 0 "register_operand" "=y")
12788 (ssse3_plusminus:HI
12790 (match_operand:V4HI 1 "register_operand" "0")
12791 (parallel [(const_int 0)]))
12792 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
12793 (ssse3_plusminus:HI
12794 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
12795 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
12797 (ssse3_plusminus:HI
12799 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
12800 (parallel [(const_int 0)]))
12801 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
12802 (ssse3_plusminus:HI
12803 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
12804 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
12806 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
12807 [(set_attr "type" "sseiadd")
12808 (set_attr "atom_unit" "complex")
12809 (set_attr "prefix_extra" "1")
12810 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
12811 (set_attr "mode" "DI")])
12813 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
12814 [(set (match_operand:V8SI 0 "register_operand" "=x")
12820 (match_operand:V8SI 1 "register_operand" "x")
12821 (parallel [(const_int 0)]))
12822 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
12824 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
12825 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
12828 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
12829 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
12831 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
12832 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
12837 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12838 (parallel [(const_int 0)]))
12839 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
12841 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
12842 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
12845 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
12846 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
12848 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
12849 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
12851 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
12852 [(set_attr "type" "sseiadd")
12853 (set_attr "prefix_extra" "1")
12854 (set_attr "prefix" "vex")
12855 (set_attr "mode" "OI")])
12857 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
12858 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
12863 (match_operand:V4SI 1 "register_operand" "0,x")
12864 (parallel [(const_int 0)]))
12865 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
12867 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
12868 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
12872 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
12873 (parallel [(const_int 0)]))
12874 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
12876 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
12877 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
12880 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
12881 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
12882 [(set_attr "isa" "noavx,avx")
12883 (set_attr "type" "sseiadd")
12884 (set_attr "atom_unit" "complex")
12885 (set_attr "prefix_data16" "1,*")
12886 (set_attr "prefix_extra" "1")
12887 (set_attr "prefix" "orig,vex")
12888 (set_attr "mode" "TI")])
12890 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
12891 [(set (match_operand:V2SI 0 "register_operand" "=y")
12895 (match_operand:V2SI 1 "register_operand" "0")
12896 (parallel [(const_int 0)]))
12897 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
12900 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
12901 (parallel [(const_int 0)]))
12902 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
12904 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
12905 [(set_attr "type" "sseiadd")
12906 (set_attr "atom_unit" "complex")
12907 (set_attr "prefix_extra" "1")
12908 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
12909 (set_attr "mode" "DI")])
12911 (define_insn "avx2_pmaddubsw256"
12912 [(set (match_operand:V16HI 0 "register_operand" "=x")
12917 (match_operand:V32QI 1 "register_operand" "x")
12918 (parallel [(const_int 0) (const_int 2)
12919 (const_int 4) (const_int 6)
12920 (const_int 8) (const_int 10)
12921 (const_int 12) (const_int 14)
12922 (const_int 16) (const_int 18)
12923 (const_int 20) (const_int 22)
12924 (const_int 24) (const_int 26)
12925 (const_int 28) (const_int 30)])))
12928 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
12929 (parallel [(const_int 0) (const_int 2)
12930 (const_int 4) (const_int 6)
12931 (const_int 8) (const_int 10)
12932 (const_int 12) (const_int 14)
12933 (const_int 16) (const_int 18)
12934 (const_int 20) (const_int 22)
12935 (const_int 24) (const_int 26)
12936 (const_int 28) (const_int 30)]))))
12939 (vec_select:V16QI (match_dup 1)
12940 (parallel [(const_int 1) (const_int 3)
12941 (const_int 5) (const_int 7)
12942 (const_int 9) (const_int 11)
12943 (const_int 13) (const_int 15)
12944 (const_int 17) (const_int 19)
12945 (const_int 21) (const_int 23)
12946 (const_int 25) (const_int 27)
12947 (const_int 29) (const_int 31)])))
12949 (vec_select:V16QI (match_dup 2)
12950 (parallel [(const_int 1) (const_int 3)
12951 (const_int 5) (const_int 7)
12952 (const_int 9) (const_int 11)
12953 (const_int 13) (const_int 15)
12954 (const_int 17) (const_int 19)
12955 (const_int 21) (const_int 23)
12956 (const_int 25) (const_int 27)
12957 (const_int 29) (const_int 31)]))))))]
12959 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
12960 [(set_attr "type" "sseiadd")
12961 (set_attr "prefix_extra" "1")
12962 (set_attr "prefix" "vex")
12963 (set_attr "mode" "OI")])
12965 ;;unspec version for intrinsics.
12966 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
12967 [(set (match_operand:VI2_AVX2 0 "register_operand" "=v")
12969 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
12970 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
12971 UNSPEC_PMADDUBSW512))]
12972 "TARGET_AVX512BW && <mask_mode512bit_condition>"
12973 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
12974 [(set_attr "type" "sseiadd")
12975 (set_attr "prefix" "evex")
12976 (set_attr "mode" "XI")])
12978 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
12979 [(set (match_operand:V32HI 0 "register_operand" "=v")
12986 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
12988 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
12990 (const_vector:V32HI [(const_int 1) (const_int 1)
12991 (const_int 1) (const_int 1)
12992 (const_int 1) (const_int 1)
12993 (const_int 1) (const_int 1)
12994 (const_int 1) (const_int 1)
12995 (const_int 1) (const_int 1)
12996 (const_int 1) (const_int 1)
12997 (const_int 1) (const_int 1)
12998 (const_int 1) (const_int 1)
12999 (const_int 1) (const_int 1)
13000 (const_int 1) (const_int 1)
13001 (const_int 1) (const_int 1)
13002 (const_int 1) (const_int 1)
13003 (const_int 1) (const_int 1)
13004 (const_int 1) (const_int 1)
13005 (const_int 1) (const_int 1)]))
13008 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13009 [(set_attr "type" "sseimul")
13010 (set_attr "prefix" "evex")
13011 (set_attr "mode" "XI")])
13013 (define_insn "ssse3_pmaddubsw128"
13014 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13019 (match_operand:V16QI 1 "register_operand" "0,x")
13020 (parallel [(const_int 0) (const_int 2)
13021 (const_int 4) (const_int 6)
13022 (const_int 8) (const_int 10)
13023 (const_int 12) (const_int 14)])))
13026 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13027 (parallel [(const_int 0) (const_int 2)
13028 (const_int 4) (const_int 6)
13029 (const_int 8) (const_int 10)
13030 (const_int 12) (const_int 14)]))))
13033 (vec_select:V8QI (match_dup 1)
13034 (parallel [(const_int 1) (const_int 3)
13035 (const_int 5) (const_int 7)
13036 (const_int 9) (const_int 11)
13037 (const_int 13) (const_int 15)])))
13039 (vec_select:V8QI (match_dup 2)
13040 (parallel [(const_int 1) (const_int 3)
13041 (const_int 5) (const_int 7)
13042 (const_int 9) (const_int 11)
13043 (const_int 13) (const_int 15)]))))))]
13046 pmaddubsw\t{%2, %0|%0, %2}
13047 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13048 [(set_attr "isa" "noavx,avx")
13049 (set_attr "type" "sseiadd")
13050 (set_attr "atom_unit" "simul")
13051 (set_attr "prefix_data16" "1,*")
13052 (set_attr "prefix_extra" "1")
13053 (set_attr "prefix" "orig,vex")
13054 (set_attr "mode" "TI")])
13056 (define_insn "ssse3_pmaddubsw"
13057 [(set (match_operand:V4HI 0 "register_operand" "=y")
13062 (match_operand:V8QI 1 "register_operand" "0")
13063 (parallel [(const_int 0) (const_int 2)
13064 (const_int 4) (const_int 6)])))
13067 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13068 (parallel [(const_int 0) (const_int 2)
13069 (const_int 4) (const_int 6)]))))
13072 (vec_select:V4QI (match_dup 1)
13073 (parallel [(const_int 1) (const_int 3)
13074 (const_int 5) (const_int 7)])))
13076 (vec_select:V4QI (match_dup 2)
13077 (parallel [(const_int 1) (const_int 3)
13078 (const_int 5) (const_int 7)]))))))]
13080 "pmaddubsw\t{%2, %0|%0, %2}"
13081 [(set_attr "type" "sseiadd")
13082 (set_attr "atom_unit" "simul")
13083 (set_attr "prefix_extra" "1")
13084 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13085 (set_attr "mode" "DI")])
13087 (define_mode_iterator PMULHRSW
13088 [V4HI V8HI (V16HI "TARGET_AVX2")])
13090 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13091 [(set (match_operand:PMULHRSW 0 "register_operand")
13092 (vec_merge:PMULHRSW
13094 (lshiftrt:<ssedoublemode>
13095 (plus:<ssedoublemode>
13096 (lshiftrt:<ssedoublemode>
13097 (mult:<ssedoublemode>
13098 (sign_extend:<ssedoublemode>
13099 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13100 (sign_extend:<ssedoublemode>
13101 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13105 (match_operand:PMULHRSW 3 "register_operand")
13106 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13107 "TARGET_AVX512BW && TARGET_AVX512VL"
13109 operands[5] = CONST1_RTX(<MODE>mode);
13110 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13113 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13114 [(set (match_operand:PMULHRSW 0 "register_operand")
13116 (lshiftrt:<ssedoublemode>
13117 (plus:<ssedoublemode>
13118 (lshiftrt:<ssedoublemode>
13119 (mult:<ssedoublemode>
13120 (sign_extend:<ssedoublemode>
13121 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13122 (sign_extend:<ssedoublemode>
13123 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13129 operands[3] = CONST1_RTX(<MODE>mode);
13130 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13133 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13134 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13136 (lshiftrt:<ssedoublemode>
13137 (plus:<ssedoublemode>
13138 (lshiftrt:<ssedoublemode>
13139 (mult:<ssedoublemode>
13140 (sign_extend:<ssedoublemode>
13141 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13142 (sign_extend:<ssedoublemode>
13143 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13145 (match_operand:VI2_AVX2 3 "const1_operand"))
13148 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
13149 && <mask_mode512bit_condition>"
13151 pmulhrsw\t{%2, %0|%0, %2}
13152 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13153 [(set_attr "isa" "noavx,avx")
13154 (set_attr "type" "sseimul")
13155 (set_attr "prefix_data16" "1,*")
13156 (set_attr "prefix_extra" "1")
13157 (set_attr "prefix" "orig,maybe_evex")
13158 (set_attr "mode" "<sseinsnmode>")])
13160 (define_insn "*ssse3_pmulhrswv4hi3"
13161 [(set (match_operand:V4HI 0 "register_operand" "=y")
13168 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13170 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13172 (match_operand:V4HI 3 "const1_operand"))
13174 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13175 "pmulhrsw\t{%2, %0|%0, %2}"
13176 [(set_attr "type" "sseimul")
13177 (set_attr "prefix_extra" "1")
13178 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13179 (set_attr "mode" "DI")])
13181 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13182 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
13184 [(match_operand:VI1_AVX2 1 "register_operand" "0,v")
13185 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,vm")]
13187 "TARGET_SSSE3 && <mask_mode512bit_condition>"
13189 pshufb\t{%2, %0|%0, %2}
13190 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13191 [(set_attr "isa" "noavx,avx")
13192 (set_attr "type" "sselog1")
13193 (set_attr "prefix_data16" "1,*")
13194 (set_attr "prefix_extra" "1")
13195 (set_attr "prefix" "orig,maybe_evex")
13196 (set_attr "btver2_decode" "vector,vector")
13197 (set_attr "mode" "<sseinsnmode>")])
13199 (define_insn "ssse3_pshufbv8qi3"
13200 [(set (match_operand:V8QI 0 "register_operand" "=y")
13201 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13202 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13205 "pshufb\t{%2, %0|%0, %2}";
13206 [(set_attr "type" "sselog1")
13207 (set_attr "prefix_extra" "1")
13208 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13209 (set_attr "mode" "DI")])
13211 (define_insn "<ssse3_avx2>_psign<mode>3"
13212 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13214 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13215 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13219 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13220 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13221 [(set_attr "isa" "noavx,avx")
13222 (set_attr "type" "sselog1")
13223 (set_attr "prefix_data16" "1,*")
13224 (set_attr "prefix_extra" "1")
13225 (set_attr "prefix" "orig,vex")
13226 (set_attr "mode" "<sseinsnmode>")])
13228 (define_insn "ssse3_psign<mode>3"
13229 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13231 [(match_operand:MMXMODEI 1 "register_operand" "0")
13232 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13235 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13236 [(set_attr "type" "sselog1")
13237 (set_attr "prefix_extra" "1")
13238 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13239 (set_attr "mode" "DI")])
13241 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
13242 [(set (match_operand:VI1_AVX2 0 "register_operand" "=v")
13243 (vec_merge:VI1_AVX2
13245 [(match_operand:VI1_AVX2 1 "register_operand" "v")
13246 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "vm")
13247 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13249 (match_operand:VI1_AVX2 4 "vector_move_operand" "0C")
13250 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13253 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13254 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13256 [(set_attr "type" "sseishft")
13257 (set_attr "atom_unit" "sishuf")
13258 (set_attr "prefix_extra" "1")
13259 (set_attr "length_immediate" "1")
13260 (set_attr "prefix" "evex")
13261 (set_attr "mode" "<sseinsnmode>")])
13263 (define_insn "<ssse3_avx2>_palignr<mode>"
13264 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13265 (unspec:SSESCALARMODE
13266 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13267 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13268 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13272 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13274 switch (which_alternative)
13277 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13279 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13281 gcc_unreachable ();
13284 [(set_attr "isa" "noavx,avx")
13285 (set_attr "type" "sseishft")
13286 (set_attr "atom_unit" "sishuf")
13287 (set_attr "prefix_data16" "1,*")
13288 (set_attr "prefix_extra" "1")
13289 (set_attr "length_immediate" "1")
13290 (set_attr "prefix" "orig,vex")
13291 (set_attr "mode" "<sseinsnmode>")])
13293 (define_insn "ssse3_palignrdi"
13294 [(set (match_operand:DI 0 "register_operand" "=y")
13295 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13296 (match_operand:DI 2 "nonimmediate_operand" "ym")
13297 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13301 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13302 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13304 [(set_attr "type" "sseishft")
13305 (set_attr "atom_unit" "sishuf")
13306 (set_attr "prefix_extra" "1")
13307 (set_attr "length_immediate" "1")
13308 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13309 (set_attr "mode" "DI")])
13311 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
13312 [(set (match_operand:VI_AVX2 0 "register_operand" "=v")
13314 (match_operand:VI_AVX2 1 "nonimmediate_operand" "vm")))]
13315 "TARGET_SSSE3 && <mask_mode512bit_condition> && (<MODE>mode != V2DImode || TARGET_AVX)"
13316 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13317 [(set_attr "type" "sselog1")
13318 (set_attr "prefix_data16" "1")
13319 (set_attr "prefix_extra" "1")
13320 (set_attr "prefix" "maybe_vex")
13321 (set_attr "mode" "<sseinsnmode>")])
13323 ;; TODO not in patch check this itterator v
13324 (define_expand "abs<mode>2"
13325 [(set (match_operand:VI_AVX2 0 "register_operand")
13327 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
13328 "TARGET_SSE2 && (<MODE>mode != V2DImode || TARGET_AVX)"
13332 ix86_expand_sse2_abs (operands[0], operands[1]);
13337 (define_insn "abs<mode>2"
13338 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13340 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13342 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13343 [(set_attr "type" "sselog1")
13344 (set_attr "prefix_rep" "0")
13345 (set_attr "prefix_extra" "1")
13346 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13347 (set_attr "mode" "DI")])
13349 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13351 ;; AMD SSE4A instructions
13353 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13355 (define_insn "sse4a_movnt<mode>"
13356 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13358 [(match_operand:MODEF 1 "register_operand" "x")]
13361 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13362 [(set_attr "type" "ssemov")
13363 (set_attr "mode" "<MODE>")])
13365 (define_insn "sse4a_vmmovnt<mode>"
13366 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13367 (unspec:<ssescalarmode>
13368 [(vec_select:<ssescalarmode>
13369 (match_operand:VF_128 1 "register_operand" "x")
13370 (parallel [(const_int 0)]))]
13373 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13374 [(set_attr "type" "ssemov")
13375 (set_attr "mode" "<ssescalarmode>")])
13377 (define_insn "sse4a_extrqi"
13378 [(set (match_operand:V2DI 0 "register_operand" "=x")
13379 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13380 (match_operand 2 "const_0_to_255_operand")
13381 (match_operand 3 "const_0_to_255_operand")]
13384 "extrq\t{%3, %2, %0|%0, %2, %3}"
13385 [(set_attr "type" "sse")
13386 (set_attr "prefix_data16" "1")
13387 (set_attr "length_immediate" "2")
13388 (set_attr "mode" "TI")])
13390 (define_insn "sse4a_extrq"
13391 [(set (match_operand:V2DI 0 "register_operand" "=x")
13392 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13393 (match_operand:V16QI 2 "register_operand" "x")]
13396 "extrq\t{%2, %0|%0, %2}"
13397 [(set_attr "type" "sse")
13398 (set_attr "prefix_data16" "1")
13399 (set_attr "mode" "TI")])
13401 (define_insn "sse4a_insertqi"
13402 [(set (match_operand:V2DI 0 "register_operand" "=x")
13403 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13404 (match_operand:V2DI 2 "register_operand" "x")
13405 (match_operand 3 "const_0_to_255_operand")
13406 (match_operand 4 "const_0_to_255_operand")]
13409 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
13410 [(set_attr "type" "sseins")
13411 (set_attr "prefix_data16" "0")
13412 (set_attr "prefix_rep" "1")
13413 (set_attr "length_immediate" "2")
13414 (set_attr "mode" "TI")])
13416 (define_insn "sse4a_insertq"
13417 [(set (match_operand:V2DI 0 "register_operand" "=x")
13418 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13419 (match_operand:V2DI 2 "register_operand" "x")]
13422 "insertq\t{%2, %0|%0, %2}"
13423 [(set_attr "type" "sseins")
13424 (set_attr "prefix_data16" "0")
13425 (set_attr "prefix_rep" "1")
13426 (set_attr "mode" "TI")])
13428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13430 ;; Intel SSE4.1 instructions
13432 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13434 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
13435 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13436 (vec_merge:VF_128_256
13437 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13438 (match_operand:VF_128_256 1 "register_operand" "0,x")
13439 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
13442 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13443 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13444 [(set_attr "isa" "noavx,avx")
13445 (set_attr "type" "ssemov")
13446 (set_attr "length_immediate" "1")
13447 (set_attr "prefix_data16" "1,*")
13448 (set_attr "prefix_extra" "1")
13449 (set_attr "prefix" "orig,vex")
13450 (set_attr "mode" "<MODE>")])
13452 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
13453 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13455 [(match_operand:VF_128_256 1 "register_operand" "0,x")
13456 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13457 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
13461 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13462 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13463 [(set_attr "isa" "noavx,avx")
13464 (set_attr "type" "ssemov")
13465 (set_attr "length_immediate" "1")
13466 (set_attr "prefix_data16" "1,*")
13467 (set_attr "prefix_extra" "1")
13468 (set_attr "prefix" "orig,vex")
13469 (set_attr "btver2_decode" "vector,vector")
13470 (set_attr "mode" "<MODE>")])
13472 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
13473 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13475 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
13476 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13477 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13481 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13482 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13483 [(set_attr "isa" "noavx,avx")
13484 (set_attr "type" "ssemul")
13485 (set_attr "length_immediate" "1")
13486 (set_attr "prefix_data16" "1,*")
13487 (set_attr "prefix_extra" "1")
13488 (set_attr "prefix" "orig,vex")
13489 (set_attr "btver2_decode" "vector,vector")
13490 (set_attr "mode" "<MODE>")])
13492 (define_insn "<sse4_1_avx2>_movntdqa"
13493 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
13494 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
13497 "%vmovntdqa\t{%1, %0|%0, %1}"
13498 [(set_attr "type" "ssemov")
13499 (set_attr "prefix_extra" "1, *")
13500 (set_attr "prefix" "maybe_vex, evex")
13501 (set_attr "mode" "<sseinsnmode>")])
13503 (define_insn "<sse4_1_avx2>_mpsadbw"
13504 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
13506 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
13507 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
13508 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13512 mpsadbw\t{%3, %2, %0|%0, %2, %3}
13513 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13514 [(set_attr "isa" "noavx,avx")
13515 (set_attr "type" "sselog1")
13516 (set_attr "length_immediate" "1")
13517 (set_attr "prefix_extra" "1")
13518 (set_attr "prefix" "orig,vex")
13519 (set_attr "btver2_decode" "vector,vector")
13520 (set_attr "mode" "<sseinsnmode>")])
13522 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
13523 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13524 (vec_concat:VI2_AVX2
13525 (us_truncate:<ssehalfvecmode>
13526 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
13527 (us_truncate:<ssehalfvecmode>
13528 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
13529 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13531 packusdw\t{%2, %0|%0, %2}
13532 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13533 [(set_attr "isa" "noavx,avx")
13534 (set_attr "type" "sselog")
13535 (set_attr "prefix_extra" "1")
13536 (set_attr "prefix" "orig,maybe_evex")
13537 (set_attr "mode" "<sseinsnmode>")])
13539 (define_insn "<sse4_1_avx2>_pblendvb"
13540 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
13542 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
13543 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
13544 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
13548 pblendvb\t{%3, %2, %0|%0, %2, %3}
13549 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13550 [(set_attr "isa" "noavx,avx")
13551 (set_attr "type" "ssemov")
13552 (set_attr "prefix_extra" "1")
13553 (set_attr "length_immediate" "*,1")
13554 (set_attr "prefix" "orig,vex")
13555 (set_attr "btver2_decode" "vector,vector")
13556 (set_attr "mode" "<sseinsnmode>")])
13558 (define_insn "sse4_1_pblendw"
13559 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13561 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13562 (match_operand:V8HI 1 "register_operand" "0,x")
13563 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
13566 pblendw\t{%3, %2, %0|%0, %2, %3}
13567 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13568 [(set_attr "isa" "noavx,avx")
13569 (set_attr "type" "ssemov")
13570 (set_attr "prefix_extra" "1")
13571 (set_attr "length_immediate" "1")
13572 (set_attr "prefix" "orig,vex")
13573 (set_attr "mode" "TI")])
13575 ;; The builtin uses an 8-bit immediate. Expand that.
13576 (define_expand "avx2_pblendw"
13577 [(set (match_operand:V16HI 0 "register_operand")
13579 (match_operand:V16HI 2 "nonimmediate_operand")
13580 (match_operand:V16HI 1 "register_operand")
13581 (match_operand:SI 3 "const_0_to_255_operand")))]
13584 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
13585 operands[3] = GEN_INT (val << 8 | val);
13588 (define_insn "*avx2_pblendw"
13589 [(set (match_operand:V16HI 0 "register_operand" "=x")
13591 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13592 (match_operand:V16HI 1 "register_operand" "x")
13593 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
13596 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
13597 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13599 [(set_attr "type" "ssemov")
13600 (set_attr "prefix_extra" "1")
13601 (set_attr "length_immediate" "1")
13602 (set_attr "prefix" "vex")
13603 (set_attr "mode" "OI")])
13605 (define_insn "avx2_pblendd<mode>"
13606 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
13607 (vec_merge:VI4_AVX2
13608 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
13609 (match_operand:VI4_AVX2 1 "register_operand" "x")
13610 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
13612 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13613 [(set_attr "type" "ssemov")
13614 (set_attr "prefix_extra" "1")
13615 (set_attr "length_immediate" "1")
13616 (set_attr "prefix" "vex")
13617 (set_attr "mode" "<sseinsnmode>")])
13619 (define_insn "sse4_1_phminposuw"
13620 [(set (match_operand:V8HI 0 "register_operand" "=x")
13621 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
13622 UNSPEC_PHMINPOSUW))]
13624 "%vphminposuw\t{%1, %0|%0, %1}"
13625 [(set_attr "type" "sselog1")
13626 (set_attr "prefix_extra" "1")
13627 (set_attr "prefix" "maybe_vex")
13628 (set_attr "mode" "TI")])
13630 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
13631 [(set (match_operand:V16HI 0 "register_operand" "=v")
13633 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
13634 "TARGET_AVX2 && <mask_mode512bit_condition>"
13635 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13636 [(set_attr "type" "ssemov")
13637 (set_attr "prefix_extra" "1")
13638 (set_attr "prefix" "maybe_evex")
13639 (set_attr "mode" "OI")])
13641 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
13642 [(set (match_operand:V32HI 0 "register_operand" "=v")
13644 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
13646 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13647 [(set_attr "type" "ssemov")
13648 (set_attr "prefix_extra" "1")
13649 (set_attr "prefix" "evex")
13650 (set_attr "mode" "XI")])
13652 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
13653 [(set (match_operand:V8HI 0 "register_operand" "=v")
13656 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13657 (parallel [(const_int 0) (const_int 1)
13658 (const_int 2) (const_int 3)
13659 (const_int 4) (const_int 5)
13660 (const_int 6) (const_int 7)]))))]
13661 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13662 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13663 [(set_attr "type" "ssemov")
13664 (set_attr "ssememalign" "64")
13665 (set_attr "prefix_extra" "1")
13666 (set_attr "prefix" "maybe_vex")
13667 (set_attr "mode" "TI")])
13669 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
13670 [(set (match_operand:V16SI 0 "register_operand" "=v")
13672 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
13674 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13675 [(set_attr "type" "ssemov")
13676 (set_attr "prefix" "evex")
13677 (set_attr "mode" "XI")])
13679 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
13680 [(set (match_operand:V8SI 0 "register_operand" "=v")
13683 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13684 (parallel [(const_int 0) (const_int 1)
13685 (const_int 2) (const_int 3)
13686 (const_int 4) (const_int 5)
13687 (const_int 6) (const_int 7)]))))]
13688 "TARGET_AVX2 && <mask_mode512bit_condition>"
13689 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13690 [(set_attr "type" "ssemov")
13691 (set_attr "prefix_extra" "1")
13692 (set_attr "prefix" "maybe_evex")
13693 (set_attr "mode" "OI")])
13695 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
13696 [(set (match_operand:V4SI 0 "register_operand" "=v")
13699 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13700 (parallel [(const_int 0) (const_int 1)
13701 (const_int 2) (const_int 3)]))))]
13702 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13703 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13704 [(set_attr "type" "ssemov")
13705 (set_attr "ssememalign" "32")
13706 (set_attr "prefix_extra" "1")
13707 (set_attr "prefix" "maybe_vex")
13708 (set_attr "mode" "TI")])
13710 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
13711 [(set (match_operand:V16SI 0 "register_operand" "=v")
13713 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
13715 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13716 [(set_attr "type" "ssemov")
13717 (set_attr "prefix" "evex")
13718 (set_attr "mode" "XI")])
13720 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
13721 [(set (match_operand:V8SI 0 "register_operand" "=v")
13723 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
13724 "TARGET_AVX2 && <mask_mode512bit_condition>"
13725 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13726 [(set_attr "type" "ssemov")
13727 (set_attr "prefix_extra" "1")
13728 (set_attr "prefix" "maybe_evex")
13729 (set_attr "mode" "OI")])
13731 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
13732 [(set (match_operand:V4SI 0 "register_operand" "=v")
13735 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13736 (parallel [(const_int 0) (const_int 1)
13737 (const_int 2) (const_int 3)]))))]
13738 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13739 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13740 [(set_attr "type" "ssemov")
13741 (set_attr "ssememalign" "64")
13742 (set_attr "prefix_extra" "1")
13743 (set_attr "prefix" "maybe_vex")
13744 (set_attr "mode" "TI")])
13746 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
13747 [(set (match_operand:V8DI 0 "register_operand" "=v")
13750 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13751 (parallel [(const_int 0) (const_int 1)
13752 (const_int 2) (const_int 3)
13753 (const_int 4) (const_int 5)
13754 (const_int 6) (const_int 7)]))))]
13756 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13757 [(set_attr "type" "ssemov")
13758 (set_attr "prefix" "evex")
13759 (set_attr "mode" "XI")])
13761 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
13762 [(set (match_operand:V4DI 0 "register_operand" "=v")
13765 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13766 (parallel [(const_int 0) (const_int 1)
13767 (const_int 2) (const_int 3)]))))]
13768 "TARGET_AVX2 && <mask_mode512bit_condition>"
13769 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13770 [(set_attr "type" "ssemov")
13771 (set_attr "prefix_extra" "1")
13772 (set_attr "prefix" "maybe_evex")
13773 (set_attr "mode" "OI")])
13775 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
13776 [(set (match_operand:V2DI 0 "register_operand" "=v")
13779 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
13780 (parallel [(const_int 0) (const_int 1)]))))]
13781 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13782 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
13783 [(set_attr "type" "ssemov")
13784 (set_attr "ssememalign" "16")
13785 (set_attr "prefix_extra" "1")
13786 (set_attr "prefix" "maybe_vex")
13787 (set_attr "mode" "TI")])
13789 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
13790 [(set (match_operand:V8DI 0 "register_operand" "=v")
13792 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
13794 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13795 [(set_attr "type" "ssemov")
13796 (set_attr "prefix" "evex")
13797 (set_attr "mode" "XI")])
13799 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
13800 [(set (match_operand:V4DI 0 "register_operand" "=v")
13803 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13804 (parallel [(const_int 0) (const_int 1)
13805 (const_int 2) (const_int 3)]))))]
13806 "TARGET_AVX2 && <mask_mode512bit_condition>"
13807 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13808 [(set_attr "type" "ssemov")
13809 (set_attr "prefix_extra" "1")
13810 (set_attr "prefix" "maybe_evex")
13811 (set_attr "mode" "OI")])
13813 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
13814 [(set (match_operand:V2DI 0 "register_operand" "=v")
13817 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13818 (parallel [(const_int 0) (const_int 1)]))))]
13819 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13820 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
13821 [(set_attr "type" "ssemov")
13822 (set_attr "ssememalign" "32")
13823 (set_attr "prefix_extra" "1")
13824 (set_attr "prefix" "maybe_vex")
13825 (set_attr "mode" "TI")])
13827 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
13828 [(set (match_operand:V8DI 0 "register_operand" "=v")
13830 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
13832 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13833 [(set_attr "type" "ssemov")
13834 (set_attr "prefix" "evex")
13835 (set_attr "mode" "XI")])
13837 (define_insn "avx2_<code>v4siv4di2<mask_name>"
13838 [(set (match_operand:V4DI 0 "register_operand" "=v")
13840 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
13841 "TARGET_AVX2 && <mask_mode512bit_condition>"
13842 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13843 [(set_attr "type" "ssemov")
13844 (set_attr "prefix" "maybe_evex")
13845 (set_attr "prefix_extra" "1")
13846 (set_attr "mode" "OI")])
13848 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
13849 [(set (match_operand:V2DI 0 "register_operand" "=v")
13852 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
13853 (parallel [(const_int 0) (const_int 1)]))))]
13854 "TARGET_SSE4_1 && <mask_mode512bit_condition>"
13855 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
13856 [(set_attr "type" "ssemov")
13857 (set_attr "ssememalign" "64")
13858 (set_attr "prefix_extra" "1")
13859 (set_attr "prefix" "maybe_vex")
13860 (set_attr "mode" "TI")])
13862 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
13863 ;; setting FLAGS_REG. But it is not a really compare instruction.
13864 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
13865 [(set (reg:CC FLAGS_REG)
13866 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
13867 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
13870 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
13871 [(set_attr "type" "ssecomi")
13872 (set_attr "prefix_extra" "1")
13873 (set_attr "prefix" "vex")
13874 (set_attr "mode" "<MODE>")])
13876 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
13877 ;; But it is not a really compare instruction.
13878 (define_insn "avx_ptest256"
13879 [(set (reg:CC FLAGS_REG)
13880 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
13881 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
13884 "vptest\t{%1, %0|%0, %1}"
13885 [(set_attr "type" "ssecomi")
13886 (set_attr "prefix_extra" "1")
13887 (set_attr "prefix" "vex")
13888 (set_attr "btver2_decode" "vector")
13889 (set_attr "mode" "OI")])
13891 (define_insn "sse4_1_ptest"
13892 [(set (reg:CC FLAGS_REG)
13893 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
13894 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13897 "%vptest\t{%1, %0|%0, %1}"
13898 [(set_attr "type" "ssecomi")
13899 (set_attr "prefix_extra" "1")
13900 (set_attr "prefix" "maybe_vex")
13901 (set_attr "mode" "TI")])
13903 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
13904 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13906 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
13907 (match_operand:SI 2 "const_0_to_15_operand" "n")]
13910 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13911 [(set_attr "type" "ssecvt")
13912 (set (attr "prefix_data16")
13914 (match_test "TARGET_AVX")
13916 (const_string "1")))
13917 (set_attr "prefix_extra" "1")
13918 (set_attr "length_immediate" "1")
13919 (set_attr "prefix" "maybe_vex")
13920 (set_attr "mode" "<MODE>")])
13922 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
13923 [(match_operand:<sseintvecmode> 0 "register_operand")
13924 (match_operand:VF1_128_256 1 "nonimmediate_operand")
13925 (match_operand:SI 2 "const_0_to_15_operand")]
13928 rtx tmp = gen_reg_rtx (<MODE>mode);
13931 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
13934 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
13938 (define_expand "avx512f_roundpd512"
13939 [(match_operand:V8DF 0 "register_operand")
13940 (match_operand:V8DF 1 "nonimmediate_operand")
13941 (match_operand:SI 2 "const_0_to_15_operand")]
13944 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
13948 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
13949 [(match_operand:<ssepackfltmode> 0 "register_operand")
13950 (match_operand:VF2 1 "nonimmediate_operand")
13951 (match_operand:VF2 2 "nonimmediate_operand")
13952 (match_operand:SI 3 "const_0_to_15_operand")]
13957 if (<MODE>mode == V2DFmode
13958 && TARGET_AVX && !TARGET_PREFER_AVX128)
13960 rtx tmp2 = gen_reg_rtx (V4DFmode);
13962 tmp0 = gen_reg_rtx (V4DFmode);
13963 tmp1 = force_reg (V2DFmode, operands[1]);
13965 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
13966 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
13967 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
13971 tmp0 = gen_reg_rtx (<MODE>mode);
13972 tmp1 = gen_reg_rtx (<MODE>mode);
13975 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
13978 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
13981 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
13986 (define_insn "sse4_1_round<ssescalarmodesuffix>"
13987 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
13990 [(match_operand:VF_128 2 "register_operand" "x,x")
13991 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
13993 (match_operand:VF_128 1 "register_operand" "0,x")
13997 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
13998 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13999 [(set_attr "isa" "noavx,avx")
14000 (set_attr "type" "ssecvt")
14001 (set_attr "length_immediate" "1")
14002 (set_attr "prefix_data16" "1,*")
14003 (set_attr "prefix_extra" "1")
14004 (set_attr "prefix" "orig,vex")
14005 (set_attr "mode" "<MODE>")])
14007 (define_expand "round<mode>2"
14008 [(set (match_dup 4)
14010 (match_operand:VF 1 "register_operand")
14012 (set (match_operand:VF 0 "register_operand")
14014 [(match_dup 4) (match_dup 5)]
14016 "TARGET_ROUND && !flag_trapping_math"
14018 enum machine_mode scalar_mode;
14019 const struct real_format *fmt;
14020 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14021 rtx half, vec_half;
14023 scalar_mode = GET_MODE_INNER (<MODE>mode);
14025 /* load nextafter (0.5, 0.0) */
14026 fmt = REAL_MODE_FORMAT (scalar_mode);
14027 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14028 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14029 half = const_double_from_real_value (pred_half, scalar_mode);
14031 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14032 vec_half = force_reg (<MODE>mode, vec_half);
14034 operands[3] = gen_reg_rtx (<MODE>mode);
14035 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14037 operands[4] = gen_reg_rtx (<MODE>mode);
14038 operands[5] = GEN_INT (ROUND_TRUNC);
14041 (define_expand "round<mode>2_sfix"
14042 [(match_operand:<sseintvecmode> 0 "register_operand")
14043 (match_operand:VF1_128_256 1 "register_operand")]
14044 "TARGET_ROUND && !flag_trapping_math"
14046 rtx tmp = gen_reg_rtx (<MODE>mode);
14048 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14051 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14055 (define_expand "round<mode>2_vec_pack_sfix"
14056 [(match_operand:<ssepackfltmode> 0 "register_operand")
14057 (match_operand:VF2 1 "register_operand")
14058 (match_operand:VF2 2 "register_operand")]
14059 "TARGET_ROUND && !flag_trapping_math"
14063 if (<MODE>mode == V2DFmode
14064 && TARGET_AVX && !TARGET_PREFER_AVX128)
14066 rtx tmp2 = gen_reg_rtx (V4DFmode);
14068 tmp0 = gen_reg_rtx (V4DFmode);
14069 tmp1 = force_reg (V2DFmode, operands[1]);
14071 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14072 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14073 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14077 tmp0 = gen_reg_rtx (<MODE>mode);
14078 tmp1 = gen_reg_rtx (<MODE>mode);
14080 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14081 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14084 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14089 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14091 ;; Intel SSE4.2 string/text processing instructions
14093 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14095 (define_insn_and_split "sse4_2_pcmpestr"
14096 [(set (match_operand:SI 0 "register_operand" "=c,c")
14098 [(match_operand:V16QI 2 "register_operand" "x,x")
14099 (match_operand:SI 3 "register_operand" "a,a")
14100 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14101 (match_operand:SI 5 "register_operand" "d,d")
14102 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14104 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14112 (set (reg:CC FLAGS_REG)
14121 && can_create_pseudo_p ()"
14126 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14127 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14128 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14131 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14132 operands[3], operands[4],
14133 operands[5], operands[6]));
14135 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14136 operands[3], operands[4],
14137 operands[5], operands[6]));
14138 if (flags && !(ecx || xmm0))
14139 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14140 operands[2], operands[3],
14141 operands[4], operands[5],
14143 if (!(flags || ecx || xmm0))
14144 emit_note (NOTE_INSN_DELETED);
14148 [(set_attr "type" "sselog")
14149 (set_attr "prefix_data16" "1")
14150 (set_attr "prefix_extra" "1")
14151 (set_attr "ssememalign" "8")
14152 (set_attr "length_immediate" "1")
14153 (set_attr "memory" "none,load")
14154 (set_attr "mode" "TI")])
14156 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14157 [(set (match_operand:SI 0 "register_operand" "=c")
14159 [(match_operand:V16QI 2 "register_operand" "x")
14160 (match_operand:SI 3 "register_operand" "a")
14162 [(match_operand:V16QI 4 "memory_operand" "m")]
14164 (match_operand:SI 5 "register_operand" "d")
14165 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14167 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14171 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14175 (set (reg:CC FLAGS_REG)
14179 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14184 && can_create_pseudo_p ()"
14189 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14190 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14191 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14194 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14195 operands[3], operands[4],
14196 operands[5], operands[6]));
14198 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14199 operands[3], operands[4],
14200 operands[5], operands[6]));
14201 if (flags && !(ecx || xmm0))
14202 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14203 operands[2], operands[3],
14204 operands[4], operands[5],
14206 if (!(flags || ecx || xmm0))
14207 emit_note (NOTE_INSN_DELETED);
14211 [(set_attr "type" "sselog")
14212 (set_attr "prefix_data16" "1")
14213 (set_attr "prefix_extra" "1")
14214 (set_attr "ssememalign" "8")
14215 (set_attr "length_immediate" "1")
14216 (set_attr "memory" "load")
14217 (set_attr "mode" "TI")])
14219 (define_insn "sse4_2_pcmpestri"
14220 [(set (match_operand:SI 0 "register_operand" "=c,c")
14222 [(match_operand:V16QI 1 "register_operand" "x,x")
14223 (match_operand:SI 2 "register_operand" "a,a")
14224 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14225 (match_operand:SI 4 "register_operand" "d,d")
14226 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14228 (set (reg:CC FLAGS_REG)
14237 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14238 [(set_attr "type" "sselog")
14239 (set_attr "prefix_data16" "1")
14240 (set_attr "prefix_extra" "1")
14241 (set_attr "prefix" "maybe_vex")
14242 (set_attr "ssememalign" "8")
14243 (set_attr "length_immediate" "1")
14244 (set_attr "btver2_decode" "vector")
14245 (set_attr "memory" "none,load")
14246 (set_attr "mode" "TI")])
14248 (define_insn "sse4_2_pcmpestrm"
14249 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14251 [(match_operand:V16QI 1 "register_operand" "x,x")
14252 (match_operand:SI 2 "register_operand" "a,a")
14253 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14254 (match_operand:SI 4 "register_operand" "d,d")
14255 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14257 (set (reg:CC FLAGS_REG)
14266 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14267 [(set_attr "type" "sselog")
14268 (set_attr "prefix_data16" "1")
14269 (set_attr "prefix_extra" "1")
14270 (set_attr "ssememalign" "8")
14271 (set_attr "length_immediate" "1")
14272 (set_attr "prefix" "maybe_vex")
14273 (set_attr "btver2_decode" "vector")
14274 (set_attr "memory" "none,load")
14275 (set_attr "mode" "TI")])
14277 (define_insn "sse4_2_pcmpestr_cconly"
14278 [(set (reg:CC FLAGS_REG)
14280 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14281 (match_operand:SI 3 "register_operand" "a,a,a,a")
14282 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14283 (match_operand:SI 5 "register_operand" "d,d,d,d")
14284 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14286 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14287 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14290 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14291 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14292 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14293 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14294 [(set_attr "type" "sselog")
14295 (set_attr "prefix_data16" "1")
14296 (set_attr "prefix_extra" "1")
14297 (set_attr "ssememalign" "8")
14298 (set_attr "length_immediate" "1")
14299 (set_attr "memory" "none,load,none,load")
14300 (set_attr "btver2_decode" "vector,vector,vector,vector")
14301 (set_attr "prefix" "maybe_vex")
14302 (set_attr "mode" "TI")])
14304 (define_insn_and_split "sse4_2_pcmpistr"
14305 [(set (match_operand:SI 0 "register_operand" "=c,c")
14307 [(match_operand:V16QI 2 "register_operand" "x,x")
14308 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14309 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14311 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14317 (set (reg:CC FLAGS_REG)
14324 && can_create_pseudo_p ()"
14329 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14330 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14331 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14334 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14335 operands[3], operands[4]));
14337 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14338 operands[3], operands[4]));
14339 if (flags && !(ecx || xmm0))
14340 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14341 operands[2], operands[3],
14343 if (!(flags || ecx || xmm0))
14344 emit_note (NOTE_INSN_DELETED);
14348 [(set_attr "type" "sselog")
14349 (set_attr "prefix_data16" "1")
14350 (set_attr "prefix_extra" "1")
14351 (set_attr "ssememalign" "8")
14352 (set_attr "length_immediate" "1")
14353 (set_attr "memory" "none,load")
14354 (set_attr "mode" "TI")])
14356 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14357 [(set (match_operand:SI 0 "register_operand" "=c")
14359 [(match_operand:V16QI 2 "register_operand" "x")
14361 [(match_operand:V16QI 3 "memory_operand" "m")]
14363 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14365 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14368 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14371 (set (reg:CC FLAGS_REG)
14374 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14378 && can_create_pseudo_p ()"
14383 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14384 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14385 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14388 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14389 operands[3], operands[4]));
14391 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14392 operands[3], operands[4]));
14393 if (flags && !(ecx || xmm0))
14394 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14395 operands[2], operands[3],
14397 if (!(flags || ecx || xmm0))
14398 emit_note (NOTE_INSN_DELETED);
14402 [(set_attr "type" "sselog")
14403 (set_attr "prefix_data16" "1")
14404 (set_attr "prefix_extra" "1")
14405 (set_attr "ssememalign" "8")
14406 (set_attr "length_immediate" "1")
14407 (set_attr "memory" "load")
14408 (set_attr "mode" "TI")])
14410 (define_insn "sse4_2_pcmpistri"
14411 [(set (match_operand:SI 0 "register_operand" "=c,c")
14413 [(match_operand:V16QI 1 "register_operand" "x,x")
14414 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14415 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14417 (set (reg:CC FLAGS_REG)
14424 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
14425 [(set_attr "type" "sselog")
14426 (set_attr "prefix_data16" "1")
14427 (set_attr "prefix_extra" "1")
14428 (set_attr "ssememalign" "8")
14429 (set_attr "length_immediate" "1")
14430 (set_attr "prefix" "maybe_vex")
14431 (set_attr "memory" "none,load")
14432 (set_attr "btver2_decode" "vector")
14433 (set_attr "mode" "TI")])
14435 (define_insn "sse4_2_pcmpistrm"
14436 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14438 [(match_operand:V16QI 1 "register_operand" "x,x")
14439 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14440 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14442 (set (reg:CC FLAGS_REG)
14449 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
14450 [(set_attr "type" "sselog")
14451 (set_attr "prefix_data16" "1")
14452 (set_attr "prefix_extra" "1")
14453 (set_attr "ssememalign" "8")
14454 (set_attr "length_immediate" "1")
14455 (set_attr "prefix" "maybe_vex")
14456 (set_attr "memory" "none,load")
14457 (set_attr "btver2_decode" "vector")
14458 (set_attr "mode" "TI")])
14460 (define_insn "sse4_2_pcmpistr_cconly"
14461 [(set (reg:CC FLAGS_REG)
14463 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14464 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
14465 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
14467 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14468 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14471 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
14472 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
14473 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
14474 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
14475 [(set_attr "type" "sselog")
14476 (set_attr "prefix_data16" "1")
14477 (set_attr "prefix_extra" "1")
14478 (set_attr "ssememalign" "8")
14479 (set_attr "length_immediate" "1")
14480 (set_attr "memory" "none,load,none,load")
14481 (set_attr "prefix" "maybe_vex")
14482 (set_attr "btver2_decode" "vector,vector,vector,vector")
14483 (set_attr "mode" "TI")])
14485 ;; Packed float variants
14486 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
14487 [(V8DI "V8SF") (V16SI "V16SF")])
14489 (define_expand "avx512pf_gatherpf<mode>sf"
14491 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14492 (mem:<GATHER_SCATTER_SF_MEM_MODE>
14494 [(match_operand 2 "vsib_address_operand")
14495 (match_operand:VI48_512 1 "register_operand")
14496 (match_operand:SI 3 "const1248_operand")]))
14497 (match_operand:SI 4 "const_2_to_3_operand")]
14498 UNSPEC_GATHER_PREFETCH)]
14502 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14503 operands[3]), UNSPEC_VSIBADDR);
14506 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
14508 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14509 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
14511 [(match_operand:P 2 "vsib_address_operand" "Tv")
14512 (match_operand:VI48_512 1 "register_operand" "v")
14513 (match_operand:SI 3 "const1248_operand" "n")]
14515 (match_operand:SI 4 "const_2_to_3_operand" "n")]
14516 UNSPEC_GATHER_PREFETCH)]
14519 switch (INTVAL (operands[4]))
14522 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14524 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14526 gcc_unreachable ();
14529 [(set_attr "type" "sse")
14530 (set_attr "prefix" "evex")
14531 (set_attr "mode" "XI")])
14533 (define_insn "*avx512pf_gatherpf<mode>sf"
14536 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
14538 [(match_operand:P 1 "vsib_address_operand" "Tv")
14539 (match_operand:VI48_512 0 "register_operand" "v")
14540 (match_operand:SI 2 "const1248_operand" "n")]
14542 (match_operand:SI 3 "const_2_to_3_operand" "n")]
14543 UNSPEC_GATHER_PREFETCH)]
14546 switch (INTVAL (operands[3]))
14549 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
14551 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
14553 gcc_unreachable ();
14556 [(set_attr "type" "sse")
14557 (set_attr "prefix" "evex")
14558 (set_attr "mode" "XI")])
14560 ;; Packed double variants
14561 (define_expand "avx512pf_gatherpf<mode>df"
14563 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14566 [(match_operand 2 "vsib_address_operand")
14567 (match_operand:VI4_256_8_512 1 "register_operand")
14568 (match_operand:SI 3 "const1248_operand")]))
14569 (match_operand:SI 4 "const_2_to_3_operand")]
14570 UNSPEC_GATHER_PREFETCH)]
14574 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14575 operands[3]), UNSPEC_VSIBADDR);
14578 (define_insn "*avx512pf_gatherpf<mode>df_mask"
14580 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14581 (match_operator:V8DF 5 "vsib_mem_operator"
14583 [(match_operand:P 2 "vsib_address_operand" "Tv")
14584 (match_operand:VI4_256_8_512 1 "register_operand" "v")
14585 (match_operand:SI 3 "const1248_operand" "n")]
14587 (match_operand:SI 4 "const_2_to_3_operand" "n")]
14588 UNSPEC_GATHER_PREFETCH)]
14591 switch (INTVAL (operands[4]))
14594 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14596 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14598 gcc_unreachable ();
14601 [(set_attr "type" "sse")
14602 (set_attr "prefix" "evex")
14603 (set_attr "mode" "XI")])
14605 (define_insn "*avx512pf_gatherpf<mode>df"
14608 (match_operator:V8DF 4 "vsib_mem_operator"
14610 [(match_operand:P 1 "vsib_address_operand" "Tv")
14611 (match_operand:VI4_256_8_512 0 "register_operand" "v")
14612 (match_operand:SI 2 "const1248_operand" "n")]
14614 (match_operand:SI 3 "const_2_to_3_operand" "n")]
14615 UNSPEC_GATHER_PREFETCH)]
14618 switch (INTVAL (operands[3]))
14621 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
14623 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
14625 gcc_unreachable ();
14628 [(set_attr "type" "sse")
14629 (set_attr "prefix" "evex")
14630 (set_attr "mode" "XI")])
14632 ;; Packed float variants
14633 (define_expand "avx512pf_scatterpf<mode>sf"
14635 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14636 (mem:<GATHER_SCATTER_SF_MEM_MODE>
14638 [(match_operand 2 "vsib_address_operand")
14639 (match_operand:VI48_512 1 "register_operand")
14640 (match_operand:SI 3 "const1248_operand")]))
14641 (match_operand:SI 4 "const2367_operand")]
14642 UNSPEC_SCATTER_PREFETCH)]
14646 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14647 operands[3]), UNSPEC_VSIBADDR);
14650 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
14652 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14653 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
14655 [(match_operand:P 2 "vsib_address_operand" "Tv")
14656 (match_operand:VI48_512 1 "register_operand" "v")
14657 (match_operand:SI 3 "const1248_operand" "n")]
14659 (match_operand:SI 4 "const2367_operand" "n")]
14660 UNSPEC_SCATTER_PREFETCH)]
14663 switch (INTVAL (operands[4]))
14667 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14670 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
14672 gcc_unreachable ();
14675 [(set_attr "type" "sse")
14676 (set_attr "prefix" "evex")
14677 (set_attr "mode" "XI")])
14679 (define_insn "*avx512pf_scatterpf<mode>sf"
14682 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
14684 [(match_operand:P 1 "vsib_address_operand" "Tv")
14685 (match_operand:VI48_512 0 "register_operand" "v")
14686 (match_operand:SI 2 "const1248_operand" "n")]
14688 (match_operand:SI 3 "const2367_operand" "n")]
14689 UNSPEC_SCATTER_PREFETCH)]
14692 switch (INTVAL (operands[3]))
14696 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
14699 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
14701 gcc_unreachable ();
14704 [(set_attr "type" "sse")
14705 (set_attr "prefix" "evex")
14706 (set_attr "mode" "XI")])
14708 ;; Packed double variants
14709 (define_expand "avx512pf_scatterpf<mode>df"
14711 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
14714 [(match_operand 2 "vsib_address_operand")
14715 (match_operand:VI4_256_8_512 1 "register_operand")
14716 (match_operand:SI 3 "const1248_operand")]))
14717 (match_operand:SI 4 "const2367_operand")]
14718 UNSPEC_SCATTER_PREFETCH)]
14722 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
14723 operands[3]), UNSPEC_VSIBADDR);
14726 (define_insn "*avx512pf_scatterpf<mode>df_mask"
14728 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
14729 (match_operator:V8DF 5 "vsib_mem_operator"
14731 [(match_operand:P 2 "vsib_address_operand" "Tv")
14732 (match_operand:VI4_256_8_512 1 "register_operand" "v")
14733 (match_operand:SI 3 "const1248_operand" "n")]
14735 (match_operand:SI 4 "const2367_operand" "n")]
14736 UNSPEC_SCATTER_PREFETCH)]
14739 switch (INTVAL (operands[4]))
14743 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14746 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
14748 gcc_unreachable ();
14751 [(set_attr "type" "sse")
14752 (set_attr "prefix" "evex")
14753 (set_attr "mode" "XI")])
14755 (define_insn "*avx512pf_scatterpf<mode>df"
14758 (match_operator:V8DF 4 "vsib_mem_operator"
14760 [(match_operand:P 1 "vsib_address_operand" "Tv")
14761 (match_operand:VI4_256_8_512 0 "register_operand" "v")
14762 (match_operand:SI 2 "const1248_operand" "n")]
14764 (match_operand:SI 3 "const2367_operand" "n")]
14765 UNSPEC_SCATTER_PREFETCH)]
14768 switch (INTVAL (operands[3]))
14772 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
14775 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
14777 gcc_unreachable ();
14780 [(set_attr "type" "sse")
14781 (set_attr "prefix" "evex")
14782 (set_attr "mode" "XI")])
14784 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
14785 [(set (match_operand:VF_512 0 "register_operand" "=v")
14787 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14790 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14791 [(set_attr "prefix" "evex")
14792 (set_attr "type" "sse")
14793 (set_attr "mode" "<MODE>")])
14795 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
14796 [(set (match_operand:VF_512 0 "register_operand" "=v")
14798 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14801 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14802 [(set_attr "prefix" "evex")
14803 (set_attr "type" "sse")
14804 (set_attr "mode" "<MODE>")])
14806 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
14807 [(set (match_operand:VF_128 0 "register_operand" "=v")
14810 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14812 (match_operand:VF_128 2 "register_operand" "v")
14815 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
14816 [(set_attr "length_immediate" "1")
14817 (set_attr "prefix" "evex")
14818 (set_attr "type" "sse")
14819 (set_attr "mode" "<MODE>")])
14821 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
14822 [(set (match_operand:VF_512 0 "register_operand" "=v")
14824 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14827 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14828 [(set_attr "prefix" "evex")
14829 (set_attr "type" "sse")
14830 (set_attr "mode" "<MODE>")])
14832 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
14833 [(set (match_operand:VF_128 0 "register_operand" "=v")
14836 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14838 (match_operand:VF_128 2 "register_operand" "v")
14841 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
14842 [(set_attr "length_immediate" "1")
14843 (set_attr "type" "sse")
14844 (set_attr "prefix" "evex")
14845 (set_attr "mode" "<MODE>")])
14847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14849 ;; XOP instructions
14851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14853 (define_code_iterator xop_plus [plus ss_plus])
14855 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
14856 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
14858 ;; XOP parallel integer multiply/add instructions.
14860 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
14861 [(set (match_operand:VI24_128 0 "register_operand" "=x")
14864 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
14865 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
14866 (match_operand:VI24_128 3 "register_operand" "x")))]
14868 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14869 [(set_attr "type" "ssemuladd")
14870 (set_attr "mode" "TI")])
14872 (define_insn "xop_p<macs>dql"
14873 [(set (match_operand:V2DI 0 "register_operand" "=x")
14878 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
14879 (parallel [(const_int 0) (const_int 2)])))
14882 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
14883 (parallel [(const_int 0) (const_int 2)]))))
14884 (match_operand:V2DI 3 "register_operand" "x")))]
14886 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14887 [(set_attr "type" "ssemuladd")
14888 (set_attr "mode" "TI")])
14890 (define_insn "xop_p<macs>dqh"
14891 [(set (match_operand:V2DI 0 "register_operand" "=x")
14896 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
14897 (parallel [(const_int 1) (const_int 3)])))
14900 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
14901 (parallel [(const_int 1) (const_int 3)]))))
14902 (match_operand:V2DI 3 "register_operand" "x")))]
14904 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14905 [(set_attr "type" "ssemuladd")
14906 (set_attr "mode" "TI")])
14908 ;; XOP parallel integer multiply/add instructions for the intrinisics
14909 (define_insn "xop_p<macs>wd"
14910 [(set (match_operand:V4SI 0 "register_operand" "=x")
14915 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
14916 (parallel [(const_int 1) (const_int 3)
14917 (const_int 5) (const_int 7)])))
14920 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14921 (parallel [(const_int 1) (const_int 3)
14922 (const_int 5) (const_int 7)]))))
14923 (match_operand:V4SI 3 "register_operand" "x")))]
14925 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14926 [(set_attr "type" "ssemuladd")
14927 (set_attr "mode" "TI")])
14929 (define_insn "xop_p<madcs>wd"
14930 [(set (match_operand:V4SI 0 "register_operand" "=x")
14936 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
14937 (parallel [(const_int 0) (const_int 2)
14938 (const_int 4) (const_int 6)])))
14941 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14942 (parallel [(const_int 0) (const_int 2)
14943 (const_int 4) (const_int 6)]))))
14948 (parallel [(const_int 1) (const_int 3)
14949 (const_int 5) (const_int 7)])))
14953 (parallel [(const_int 1) (const_int 3)
14954 (const_int 5) (const_int 7)])))))
14955 (match_operand:V4SI 3 "register_operand" "x")))]
14957 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14958 [(set_attr "type" "ssemuladd")
14959 (set_attr "mode" "TI")])
14961 ;; XOP parallel XMM conditional moves
14962 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
14963 [(set (match_operand:V 0 "register_operand" "=x,x")
14965 (match_operand:V 3 "nonimmediate_operand" "x,m")
14966 (match_operand:V 1 "register_operand" "x,x")
14967 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
14969 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14970 [(set_attr "type" "sse4arg")])
14972 ;; XOP horizontal add/subtract instructions
14973 (define_insn "xop_phadd<u>bw"
14974 [(set (match_operand:V8HI 0 "register_operand" "=x")
14978 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
14979 (parallel [(const_int 0) (const_int 2)
14980 (const_int 4) (const_int 6)
14981 (const_int 8) (const_int 10)
14982 (const_int 12) (const_int 14)])))
14986 (parallel [(const_int 1) (const_int 3)
14987 (const_int 5) (const_int 7)
14988 (const_int 9) (const_int 11)
14989 (const_int 13) (const_int 15)])))))]
14991 "vphadd<u>bw\t{%1, %0|%0, %1}"
14992 [(set_attr "type" "sseiadd1")])
14994 (define_insn "xop_phadd<u>bd"
14995 [(set (match_operand:V4SI 0 "register_operand" "=x")
15000 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15001 (parallel [(const_int 0) (const_int 4)
15002 (const_int 8) (const_int 12)])))
15006 (parallel [(const_int 1) (const_int 5)
15007 (const_int 9) (const_int 13)]))))
15012 (parallel [(const_int 2) (const_int 6)
15013 (const_int 10) (const_int 14)])))
15017 (parallel [(const_int 3) (const_int 7)
15018 (const_int 11) (const_int 15)]))))))]
15020 "vphadd<u>bd\t{%1, %0|%0, %1}"
15021 [(set_attr "type" "sseiadd1")])
15023 (define_insn "xop_phadd<u>bq"
15024 [(set (match_operand:V2DI 0 "register_operand" "=x")
15030 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15031 (parallel [(const_int 0) (const_int 8)])))
15035 (parallel [(const_int 1) (const_int 9)]))))
15040 (parallel [(const_int 2) (const_int 10)])))
15044 (parallel [(const_int 3) (const_int 11)])))))
15050 (parallel [(const_int 4) (const_int 12)])))
15054 (parallel [(const_int 5) (const_int 13)]))))
15059 (parallel [(const_int 6) (const_int 14)])))
15063 (parallel [(const_int 7) (const_int 15)])))))))]
15065 "vphadd<u>bq\t{%1, %0|%0, %1}"
15066 [(set_attr "type" "sseiadd1")])
15068 (define_insn "xop_phadd<u>wd"
15069 [(set (match_operand:V4SI 0 "register_operand" "=x")
15073 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15074 (parallel [(const_int 0) (const_int 2)
15075 (const_int 4) (const_int 6)])))
15079 (parallel [(const_int 1) (const_int 3)
15080 (const_int 5) (const_int 7)])))))]
15082 "vphadd<u>wd\t{%1, %0|%0, %1}"
15083 [(set_attr "type" "sseiadd1")])
15085 (define_insn "xop_phadd<u>wq"
15086 [(set (match_operand:V2DI 0 "register_operand" "=x")
15091 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15092 (parallel [(const_int 0) (const_int 4)])))
15096 (parallel [(const_int 1) (const_int 5)]))))
15101 (parallel [(const_int 2) (const_int 6)])))
15105 (parallel [(const_int 3) (const_int 7)]))))))]
15107 "vphadd<u>wq\t{%1, %0|%0, %1}"
15108 [(set_attr "type" "sseiadd1")])
15110 (define_insn "xop_phadd<u>dq"
15111 [(set (match_operand:V2DI 0 "register_operand" "=x")
15115 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15116 (parallel [(const_int 0) (const_int 2)])))
15120 (parallel [(const_int 1) (const_int 3)])))))]
15122 "vphadd<u>dq\t{%1, %0|%0, %1}"
15123 [(set_attr "type" "sseiadd1")])
15125 (define_insn "xop_phsubbw"
15126 [(set (match_operand:V8HI 0 "register_operand" "=x")
15130 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15131 (parallel [(const_int 0) (const_int 2)
15132 (const_int 4) (const_int 6)
15133 (const_int 8) (const_int 10)
15134 (const_int 12) (const_int 14)])))
15138 (parallel [(const_int 1) (const_int 3)
15139 (const_int 5) (const_int 7)
15140 (const_int 9) (const_int 11)
15141 (const_int 13) (const_int 15)])))))]
15143 "vphsubbw\t{%1, %0|%0, %1}"
15144 [(set_attr "type" "sseiadd1")])
15146 (define_insn "xop_phsubwd"
15147 [(set (match_operand:V4SI 0 "register_operand" "=x")
15151 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15152 (parallel [(const_int 0) (const_int 2)
15153 (const_int 4) (const_int 6)])))
15157 (parallel [(const_int 1) (const_int 3)
15158 (const_int 5) (const_int 7)])))))]
15160 "vphsubwd\t{%1, %0|%0, %1}"
15161 [(set_attr "type" "sseiadd1")])
15163 (define_insn "xop_phsubdq"
15164 [(set (match_operand:V2DI 0 "register_operand" "=x")
15168 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15169 (parallel [(const_int 0) (const_int 2)])))
15173 (parallel [(const_int 1) (const_int 3)])))))]
15175 "vphsubdq\t{%1, %0|%0, %1}"
15176 [(set_attr "type" "sseiadd1")])
15178 ;; XOP permute instructions
15179 (define_insn "xop_pperm"
15180 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15182 [(match_operand:V16QI 1 "register_operand" "x,x")
15183 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15184 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15185 UNSPEC_XOP_PERMUTE))]
15186 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15187 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15188 [(set_attr "type" "sse4arg")
15189 (set_attr "mode" "TI")])
15191 ;; XOP pack instructions that combine two vectors into a smaller vector
15192 (define_insn "xop_pperm_pack_v2di_v4si"
15193 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15196 (match_operand:V2DI 1 "register_operand" "x,x"))
15198 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15199 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15200 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15201 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15202 [(set_attr "type" "sse4arg")
15203 (set_attr "mode" "TI")])
15205 (define_insn "xop_pperm_pack_v4si_v8hi"
15206 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15209 (match_operand:V4SI 1 "register_operand" "x,x"))
15211 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15212 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15213 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15214 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15215 [(set_attr "type" "sse4arg")
15216 (set_attr "mode" "TI")])
15218 (define_insn "xop_pperm_pack_v8hi_v16qi"
15219 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15222 (match_operand:V8HI 1 "register_operand" "x,x"))
15224 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15225 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15226 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15227 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15228 [(set_attr "type" "sse4arg")
15229 (set_attr "mode" "TI")])
15231 ;; XOP packed rotate instructions
15232 (define_expand "rotl<mode>3"
15233 [(set (match_operand:VI_128 0 "register_operand")
15235 (match_operand:VI_128 1 "nonimmediate_operand")
15236 (match_operand:SI 2 "general_operand")))]
15239 /* If we were given a scalar, convert it to parallel */
15240 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15242 rtvec vs = rtvec_alloc (<ssescalarnum>);
15243 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15244 rtx reg = gen_reg_rtx (<MODE>mode);
15245 rtx op2 = operands[2];
15248 if (GET_MODE (op2) != <ssescalarmode>mode)
15250 op2 = gen_reg_rtx (<ssescalarmode>mode);
15251 convert_move (op2, operands[2], false);
15254 for (i = 0; i < <ssescalarnum>; i++)
15255 RTVEC_ELT (vs, i) = op2;
15257 emit_insn (gen_vec_init<mode> (reg, par));
15258 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15263 (define_expand "rotr<mode>3"
15264 [(set (match_operand:VI_128 0 "register_operand")
15266 (match_operand:VI_128 1 "nonimmediate_operand")
15267 (match_operand:SI 2 "general_operand")))]
15270 /* If we were given a scalar, convert it to parallel */
15271 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15273 rtvec vs = rtvec_alloc (<ssescalarnum>);
15274 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15275 rtx neg = gen_reg_rtx (<MODE>mode);
15276 rtx reg = gen_reg_rtx (<MODE>mode);
15277 rtx op2 = operands[2];
15280 if (GET_MODE (op2) != <ssescalarmode>mode)
15282 op2 = gen_reg_rtx (<ssescalarmode>mode);
15283 convert_move (op2, operands[2], false);
15286 for (i = 0; i < <ssescalarnum>; i++)
15287 RTVEC_ELT (vs, i) = op2;
15289 emit_insn (gen_vec_init<mode> (reg, par));
15290 emit_insn (gen_neg<mode>2 (neg, reg));
15291 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15296 (define_insn "xop_rotl<mode>3"
15297 [(set (match_operand:VI_128 0 "register_operand" "=x")
15299 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15300 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15302 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15303 [(set_attr "type" "sseishft")
15304 (set_attr "length_immediate" "1")
15305 (set_attr "mode" "TI")])
15307 (define_insn "xop_rotr<mode>3"
15308 [(set (match_operand:VI_128 0 "register_operand" "=x")
15310 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15311 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15315 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15316 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15318 [(set_attr "type" "sseishft")
15319 (set_attr "length_immediate" "1")
15320 (set_attr "mode" "TI")])
15322 (define_expand "vrotr<mode>3"
15323 [(match_operand:VI_128 0 "register_operand")
15324 (match_operand:VI_128 1 "register_operand")
15325 (match_operand:VI_128 2 "register_operand")]
15328 rtx reg = gen_reg_rtx (<MODE>mode);
15329 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15330 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15334 (define_expand "vrotl<mode>3"
15335 [(match_operand:VI_128 0 "register_operand")
15336 (match_operand:VI_128 1 "register_operand")
15337 (match_operand:VI_128 2 "register_operand")]
15340 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15344 (define_insn "xop_vrotl<mode>3"
15345 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15346 (if_then_else:VI_128
15348 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15351 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15355 (neg:VI_128 (match_dup 2)))))]
15356 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15357 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15358 [(set_attr "type" "sseishft")
15359 (set_attr "prefix_data16" "0")
15360 (set_attr "prefix_extra" "2")
15361 (set_attr "mode" "TI")])
15363 ;; XOP packed shift instructions.
15364 (define_expand "vlshr<mode>3"
15365 [(set (match_operand:VI12_128 0 "register_operand")
15367 (match_operand:VI12_128 1 "register_operand")
15368 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15371 rtx neg = gen_reg_rtx (<MODE>mode);
15372 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15373 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15377 (define_expand "vlshr<mode>3"
15378 [(set (match_operand:VI48_128 0 "register_operand")
15380 (match_operand:VI48_128 1 "register_operand")
15381 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15382 "TARGET_AVX2 || TARGET_XOP"
15386 rtx neg = gen_reg_rtx (<MODE>mode);
15387 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15388 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15393 (define_expand "vlshr<mode>3"
15394 [(set (match_operand:VI48_512 0 "register_operand")
15396 (match_operand:VI48_512 1 "register_operand")
15397 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15400 (define_expand "vlshr<mode>3"
15401 [(set (match_operand:VI48_256 0 "register_operand")
15403 (match_operand:VI48_256 1 "register_operand")
15404 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15407 (define_expand "vashr<mode>3<mask_name>"
15408 [(set (match_operand:VI128_128 0 "register_operand")
15409 (ashiftrt:VI128_128
15410 (match_operand:VI128_128 1 "register_operand")
15411 (match_operand:VI128_128 2 "nonimmediate_operand")))]
15412 "TARGET_XOP || ((TARGET_AVX512BW || <MODE>mode == V2DImode) && TARGET_AVX512VL)"
15414 if (!((TARGET_AVX512BW || <MODE>mode == V2DImode) && TARGET_AVX512VL))
15416 rtx neg = gen_reg_rtx (<MODE>mode);
15417 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15418 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
15423 (define_expand "vashrv4si3"
15424 [(set (match_operand:V4SI 0 "register_operand")
15425 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
15426 (match_operand:V4SI 2 "nonimmediate_operand")))]
15427 "TARGET_AVX2 || TARGET_XOP"
15431 rtx neg = gen_reg_rtx (V4SImode);
15432 emit_insn (gen_negv4si2 (neg, operands[2]));
15433 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
15438 (define_expand "vashrv16si3"
15439 [(set (match_operand:V16SI 0 "register_operand")
15440 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
15441 (match_operand:V16SI 2 "nonimmediate_operand")))]
15444 (define_expand "vashrv8si3"
15445 [(set (match_operand:V8SI 0 "register_operand")
15446 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
15447 (match_operand:V8SI 2 "nonimmediate_operand")))]
15450 (define_expand "vashl<mode>3"
15451 [(set (match_operand:VI12_128 0 "register_operand")
15453 (match_operand:VI12_128 1 "register_operand")
15454 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15457 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
15461 (define_expand "vashl<mode>3"
15462 [(set (match_operand:VI48_128 0 "register_operand")
15464 (match_operand:VI48_128 1 "register_operand")
15465 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15466 "TARGET_AVX2 || TARGET_XOP"
15470 operands[2] = force_reg (<MODE>mode, operands[2]);
15471 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
15476 (define_expand "vashl<mode>3"
15477 [(set (match_operand:VI48_512 0 "register_operand")
15479 (match_operand:VI48_512 1 "register_operand")
15480 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15483 (define_expand "vashl<mode>3"
15484 [(set (match_operand:VI48_256 0 "register_operand")
15486 (match_operand:VI48_256 1 "register_operand")
15487 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15490 (define_insn "xop_sha<mode>3"
15491 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15492 (if_then_else:VI_128
15494 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15497 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15501 (neg:VI_128 (match_dup 2)))))]
15502 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15503 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15504 [(set_attr "type" "sseishft")
15505 (set_attr "prefix_data16" "0")
15506 (set_attr "prefix_extra" "2")
15507 (set_attr "mode" "TI")])
15509 (define_insn "xop_shl<mode>3"
15510 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15511 (if_then_else:VI_128
15513 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15516 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15520 (neg:VI_128 (match_dup 2)))))]
15521 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15522 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15523 [(set_attr "type" "sseishft")
15524 (set_attr "prefix_data16" "0")
15525 (set_attr "prefix_extra" "2")
15526 (set_attr "mode" "TI")])
15528 (define_expand "<shift_insn><mode>3"
15529 [(set (match_operand:VI1_AVX2 0 "register_operand")
15530 (any_shift:VI1_AVX2
15531 (match_operand:VI1_AVX2 1 "register_operand")
15532 (match_operand:SI 2 "nonmemory_operand")))]
15535 if (TARGET_XOP && <MODE>mode == V16QImode)
15537 bool negate = false;
15538 rtx (*gen) (rtx, rtx, rtx);
15542 if (<CODE> != ASHIFT)
15544 if (CONST_INT_P (operands[2]))
15545 operands[2] = GEN_INT (-INTVAL (operands[2]));
15549 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
15550 for (i = 0; i < 16; i++)
15551 XVECEXP (par, 0, i) = operands[2];
15553 tmp = gen_reg_rtx (V16QImode);
15554 emit_insn (gen_vec_initv16qi (tmp, par));
15557 emit_insn (gen_negv16qi2 (tmp, tmp));
15559 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
15560 emit_insn (gen (operands[0], operands[1], tmp));
15563 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
15567 (define_expand "ashrv2di3<mask_name>"
15568 [(set (match_operand:V2DI 0 "register_operand")
15570 (match_operand:V2DI 1 "register_operand")
15571 (match_operand:DI 2 "nonmemory_operand")))]
15572 "TARGET_XOP || TARGET_AVX512VL"
15574 rtx reg = gen_reg_rtx (V2DImode);
15576 bool negate = false;
15579 if (!TARGET_AVX512VL)
15581 if (CONST_INT_P (operands[2]))
15582 operands[2] = GEN_INT (-INTVAL (operands[2]));
15586 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
15587 for (i = 0; i < 2; i++)
15588 XVECEXP (par, 0, i) = operands[2];
15590 emit_insn (gen_vec_initv2di (reg, par));
15593 emit_insn (gen_negv2di2 (reg, reg));
15595 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
15600 ;; XOP FRCZ support
15601 (define_insn "xop_frcz<mode>2"
15602 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
15604 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
15607 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
15608 [(set_attr "type" "ssecvt1")
15609 (set_attr "mode" "<MODE>")])
15611 (define_expand "xop_vmfrcz<mode>2"
15612 [(set (match_operand:VF_128 0 "register_operand")
15615 [(match_operand:VF_128 1 "nonimmediate_operand")]
15620 "operands[2] = CONST0_RTX (<MODE>mode);")
15622 (define_insn "*xop_vmfrcz<mode>2"
15623 [(set (match_operand:VF_128 0 "register_operand" "=x")
15626 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
15628 (match_operand:VF_128 2 "const0_operand")
15631 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
15632 [(set_attr "type" "ssecvt1")
15633 (set_attr "mode" "<MODE>")])
15635 (define_insn "xop_maskcmp<mode>3"
15636 [(set (match_operand:VI_128 0 "register_operand" "=x")
15637 (match_operator:VI_128 1 "ix86_comparison_int_operator"
15638 [(match_operand:VI_128 2 "register_operand" "x")
15639 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
15641 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
15642 [(set_attr "type" "sse4arg")
15643 (set_attr "prefix_data16" "0")
15644 (set_attr "prefix_rep" "0")
15645 (set_attr "prefix_extra" "2")
15646 (set_attr "length_immediate" "1")
15647 (set_attr "mode" "TI")])
15649 (define_insn "xop_maskcmp_uns<mode>3"
15650 [(set (match_operand:VI_128 0 "register_operand" "=x")
15651 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
15652 [(match_operand:VI_128 2 "register_operand" "x")
15653 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
15655 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
15656 [(set_attr "type" "ssecmp")
15657 (set_attr "prefix_data16" "0")
15658 (set_attr "prefix_rep" "0")
15659 (set_attr "prefix_extra" "2")
15660 (set_attr "length_immediate" "1")
15661 (set_attr "mode" "TI")])
15663 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
15664 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
15665 ;; the exact instruction generated for the intrinsic.
15666 (define_insn "xop_maskcmp_uns2<mode>3"
15667 [(set (match_operand:VI_128 0 "register_operand" "=x")
15669 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
15670 [(match_operand:VI_128 2 "register_operand" "x")
15671 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
15672 UNSPEC_XOP_UNSIGNED_CMP))]
15674 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
15675 [(set_attr "type" "ssecmp")
15676 (set_attr "prefix_data16" "0")
15677 (set_attr "prefix_extra" "2")
15678 (set_attr "length_immediate" "1")
15679 (set_attr "mode" "TI")])
15681 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
15682 ;; being added here to be complete.
15683 (define_insn "xop_pcom_tf<mode>3"
15684 [(set (match_operand:VI_128 0 "register_operand" "=x")
15686 [(match_operand:VI_128 1 "register_operand" "x")
15687 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
15688 (match_operand:SI 3 "const_int_operand" "n")]
15689 UNSPEC_XOP_TRUEFALSE))]
15692 return ((INTVAL (operands[3]) != 0)
15693 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15694 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
15696 [(set_attr "type" "ssecmp")
15697 (set_attr "prefix_data16" "0")
15698 (set_attr "prefix_extra" "2")
15699 (set_attr "length_immediate" "1")
15700 (set_attr "mode" "TI")])
15702 (define_insn "xop_vpermil2<mode>3"
15703 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
15705 [(match_operand:VF_128_256 1 "register_operand" "x")
15706 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
15707 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
15708 (match_operand:SI 4 "const_0_to_3_operand" "n")]
15711 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
15712 [(set_attr "type" "sse4arg")
15713 (set_attr "length_immediate" "1")
15714 (set_attr "mode" "<MODE>")])
15716 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15718 (define_insn "aesenc"
15719 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15720 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15721 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15725 aesenc\t{%2, %0|%0, %2}
15726 vaesenc\t{%2, %1, %0|%0, %1, %2}"
15727 [(set_attr "isa" "noavx,avx")
15728 (set_attr "type" "sselog1")
15729 (set_attr "prefix_extra" "1")
15730 (set_attr "prefix" "orig,vex")
15731 (set_attr "btver2_decode" "double,double")
15732 (set_attr "mode" "TI")])
15734 (define_insn "aesenclast"
15735 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15736 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15737 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15738 UNSPEC_AESENCLAST))]
15741 aesenclast\t{%2, %0|%0, %2}
15742 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
15743 [(set_attr "isa" "noavx,avx")
15744 (set_attr "type" "sselog1")
15745 (set_attr "prefix_extra" "1")
15746 (set_attr "prefix" "orig,vex")
15747 (set_attr "btver2_decode" "double,double")
15748 (set_attr "mode" "TI")])
15750 (define_insn "aesdec"
15751 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15752 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15753 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15757 aesdec\t{%2, %0|%0, %2}
15758 vaesdec\t{%2, %1, %0|%0, %1, %2}"
15759 [(set_attr "isa" "noavx,avx")
15760 (set_attr "type" "sselog1")
15761 (set_attr "prefix_extra" "1")
15762 (set_attr "prefix" "orig,vex")
15763 (set_attr "btver2_decode" "double,double")
15764 (set_attr "mode" "TI")])
15766 (define_insn "aesdeclast"
15767 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15768 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15769 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
15770 UNSPEC_AESDECLAST))]
15773 aesdeclast\t{%2, %0|%0, %2}
15774 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
15775 [(set_attr "isa" "noavx,avx")
15776 (set_attr "type" "sselog1")
15777 (set_attr "prefix_extra" "1")
15778 (set_attr "prefix" "orig,vex")
15779 (set_attr "btver2_decode" "double,double")
15780 (set_attr "mode" "TI")])
15782 (define_insn "aesimc"
15783 [(set (match_operand:V2DI 0 "register_operand" "=x")
15784 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
15787 "%vaesimc\t{%1, %0|%0, %1}"
15788 [(set_attr "type" "sselog1")
15789 (set_attr "prefix_extra" "1")
15790 (set_attr "prefix" "maybe_vex")
15791 (set_attr "mode" "TI")])
15793 (define_insn "aeskeygenassist"
15794 [(set (match_operand:V2DI 0 "register_operand" "=x")
15795 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
15796 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15797 UNSPEC_AESKEYGENASSIST))]
15799 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
15800 [(set_attr "type" "sselog1")
15801 (set_attr "prefix_extra" "1")
15802 (set_attr "length_immediate" "1")
15803 (set_attr "prefix" "maybe_vex")
15804 (set_attr "mode" "TI")])
15806 (define_insn "pclmulqdq"
15807 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
15808 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
15809 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
15810 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15814 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
15815 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15816 [(set_attr "isa" "noavx,avx")
15817 (set_attr "type" "sselog1")
15818 (set_attr "prefix_extra" "1")
15819 (set_attr "length_immediate" "1")
15820 (set_attr "prefix" "orig,vex")
15821 (set_attr "mode" "TI")])
15823 (define_expand "avx_vzeroall"
15824 [(match_par_dup 0 [(const_int 0)])]
15827 int nregs = TARGET_64BIT ? 16 : 8;
15830 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
15832 XVECEXP (operands[0], 0, 0)
15833 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
15836 for (regno = 0; regno < nregs; regno++)
15837 XVECEXP (operands[0], 0, regno + 1)
15838 = gen_rtx_SET (VOIDmode,
15839 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
15840 CONST0_RTX (V8SImode));
15843 (define_insn "*avx_vzeroall"
15844 [(match_parallel 0 "vzeroall_operation"
15845 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
15848 [(set_attr "type" "sse")
15849 (set_attr "modrm" "0")
15850 (set_attr "memory" "none")
15851 (set_attr "prefix" "vex")
15852 (set_attr "btver2_decode" "vector")
15853 (set_attr "mode" "OI")])
15855 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
15856 ;; if the upper 128bits are unused.
15857 (define_insn "avx_vzeroupper"
15858 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
15861 [(set_attr "type" "sse")
15862 (set_attr "modrm" "0")
15863 (set_attr "memory" "none")
15864 (set_attr "prefix" "vex")
15865 (set_attr "btver2_decode" "vector")
15866 (set_attr "mode" "OI")])
15868 (define_insn "avx2_pbroadcast<mode>"
15869 [(set (match_operand:VI 0 "register_operand" "=x")
15871 (vec_select:<ssescalarmode>
15872 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
15873 (parallel [(const_int 0)]))))]
15875 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
15876 [(set_attr "type" "ssemov")
15877 (set_attr "prefix_extra" "1")
15878 (set_attr "prefix" "vex")
15879 (set_attr "mode" "<sseinsnmode>")])
15881 (define_insn "avx2_pbroadcast<mode>_1"
15882 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
15883 (vec_duplicate:VI_256
15884 (vec_select:<ssescalarmode>
15885 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
15886 (parallel [(const_int 0)]))))]
15889 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
15890 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
15891 [(set_attr "type" "ssemov")
15892 (set_attr "prefix_extra" "1")
15893 (set_attr "prefix" "vex")
15894 (set_attr "mode" "<sseinsnmode>")])
15896 (define_insn "<avx2_avx512bw>_permvar<mode><mask_name>"
15897 [(set (match_operand:VI48F_256_512_2I 0 "register_operand" "=v")
15898 (unspec:VI48F_256_512_2I
15899 [(match_operand:VI48F_256_512_2I 1 "nonimmediate_operand" "vm")
15900 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
15902 "TARGET_AVX2 && <mask_mode512bit_condition>"
15903 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
15904 [(set_attr "type" "sselog")
15905 (set_attr "prefix" "<mask_prefix2>")
15906 (set_attr "mode" "<sseinsnmode>")])
15908 (define_expand "<avx2_avx512bw>_perm<mode>"
15909 [(match_operand:VI8F_256_512 0 "register_operand")
15910 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
15911 (match_operand:SI 2 "const_0_to_255_operand")]
15914 int mask = INTVAL (operands[2]);
15915 emit_insn (gen_<avx2_avx512bw>_perm<mode>_1 (operands[0], operands[1],
15916 GEN_INT ((mask >> 0) & 3),
15917 GEN_INT ((mask >> 2) & 3),
15918 GEN_INT ((mask >> 4) & 3),
15919 GEN_INT ((mask >> 6) & 3)));
15923 (define_expand "<avx512>_perm<mode>_mask"
15924 [(match_operand:VI8F_256_512 0 "register_operand")
15925 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
15926 (match_operand:SI 2 "const_0_to_255_operand")
15927 (match_operand:VI8F_256_512 3 "vector_move_operand")
15928 (match_operand:<avx512fmaskmode> 4 "register_operand")]
15931 int mask = INTVAL (operands[2]);
15932 emit_insn (gen_<avx2_avx512bw>_perm<mode>_1_mask (operands[0], operands[1],
15933 GEN_INT ((mask >> 0) & 3),
15934 GEN_INT ((mask >> 2) & 3),
15935 GEN_INT ((mask >> 4) & 3),
15936 GEN_INT ((mask >> 6) & 3),
15937 operands[3], operands[4]));
15941 (define_insn "<avx2_avx512bw>_perm<mode>_1<mask_name>"
15942 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
15943 (vec_select:VI8F_256_512
15944 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
15945 (parallel [(match_operand 2 "const_0_to_3_operand")
15946 (match_operand 3 "const_0_to_3_operand")
15947 (match_operand 4 "const_0_to_3_operand")
15948 (match_operand 5 "const_0_to_3_operand")])))]
15949 "TARGET_AVX2 && <mask_mode512bit_condition>"
15952 mask |= INTVAL (operands[2]) << 0;
15953 mask |= INTVAL (operands[3]) << 2;
15954 mask |= INTVAL (operands[4]) << 4;
15955 mask |= INTVAL (operands[5]) << 6;
15956 operands[2] = GEN_INT (mask);
15957 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15959 [(set_attr "type" "sselog")
15960 (set_attr "prefix" "<mask_prefix2>")
15961 (set_attr "mode" "<sseinsnmode>")])
15963 (define_insn "avx2_permv2ti"
15964 [(set (match_operand:V4DI 0 "register_operand" "=x")
15966 [(match_operand:V4DI 1 "register_operand" "x")
15967 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
15968 (match_operand:SI 3 "const_0_to_255_operand" "n")]
15971 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15972 [(set_attr "type" "sselog")
15973 (set_attr "prefix" "vex")
15974 (set_attr "mode" "OI")])
15976 (define_insn "avx2_vec_dupv4df"
15977 [(set (match_operand:V4DF 0 "register_operand" "=x")
15978 (vec_duplicate:V4DF
15980 (match_operand:V2DF 1 "register_operand" "x")
15981 (parallel [(const_int 0)]))))]
15983 "vbroadcastsd\t{%1, %0|%0, %1}"
15984 [(set_attr "type" "sselog1")
15985 (set_attr "prefix" "vex")
15986 (set_attr "mode" "V4DF")])
15988 ;; Modes handled by AVX vec_dup patterns.
15989 (define_mode_iterator AVX_VEC_DUP_MODE
15990 [V8SI V8SF V4DI V4DF])
15992 (define_insn "vec_dup<mode>"
15993 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,v,x")
15994 (vec_duplicate:AVX_VEC_DUP_MODE
15995 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,v,?x")))]
15998 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
15999 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
16001 [(set_attr "type" "ssemov")
16002 (set_attr "prefix_extra" "1")
16003 (set_attr "prefix" "maybe_evex")
16004 (set_attr "isa" "*,avx2,noavx2")
16005 (set_attr "mode" "V8SF")])
16007 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16008 [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
16009 (vec_duplicate:V_AVX512VL
16010 (vec_select:<ssescalarmode>
16011 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16012 (parallel [(const_int 0)]))))]
16014 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16015 [(set_attr "type" "ssemov")
16016 (set_attr "prefix" "evex")
16017 (set_attr "mode" "<sseinsnmode>")])
16019 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16020 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16021 (vec_duplicate:V16FI
16022 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16025 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16026 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16027 [(set_attr "type" "ssemov")
16028 (set_attr "prefix" "evex")
16029 (set_attr "mode" "<sseinsnmode>")])
16031 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16032 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16033 (vec_duplicate:V8FI
16034 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16037 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16038 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16039 [(set_attr "type" "ssemov")
16040 (set_attr "prefix" "evex")
16041 (set_attr "mode" "<sseinsnmode>")])
16043 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16044 [(set (match_operand:VI_AVX512VL 0 "register_operand" "=v")
16045 (vec_duplicate:VI_AVX512VL
16046 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
16047 "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
16049 /* To generate correct assembler. */
16050 if (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
16051 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
16052 return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
16054 [(set_attr "type" "ssemov")
16055 (set_attr "prefix" "evex")
16056 (set_attr "mode" "<sseinsnmode>")])
16058 (define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
16059 [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
16060 (vec_duplicate:V_AVX512VL
16061 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
16063 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16064 [(set_attr "type" "ssemov")
16065 (set_attr "prefix" "evex")
16066 (set_attr "mode" "<sseinsnmode>")])
16068 (define_insn "avx2_vbroadcasti128_<mode>"
16069 [(set (match_operand:VI_256 0 "register_operand" "=x")
16071 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16074 "vbroadcasti128\t{%1, %0|%0, %1}"
16075 [(set_attr "type" "ssemov")
16076 (set_attr "prefix_extra" "1")
16077 (set_attr "prefix" "vex")
16078 (set_attr "mode" "OI")])
16081 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16082 (vec_duplicate:AVX_VEC_DUP_MODE
16083 (match_operand:<ssescalarmode> 1 "register_operand")))]
16084 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16085 [(set (match_dup 2)
16086 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16088 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16089 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16091 (define_insn "avx_vbroadcastf128_<mode>"
16092 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16094 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16098 vbroadcast<i128>\t{%1, %0|%0, %1}
16099 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16100 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16101 [(set_attr "type" "ssemov,sselog1,sselog1")
16102 (set_attr "prefix_extra" "1")
16103 (set_attr "length_immediate" "0,1,1")
16104 (set_attr "prefix" "vex")
16105 (set_attr "mode" "<sseinsnmode>")])
16107 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16108 (define_mode_iterator VI4F_BRCST32x2
16109 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16110 V16SF (V8SF "TARGET_AVX512VL")])
16112 (define_mode_attr 64x2_mode
16113 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16115 (define_mode_attr 32x2mode
16116 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16117 (V8SF "V2SF") (V4SI "V2SI")])
16119 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16120 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16121 (vec_duplicate:VI4F_BRCST32x2
16122 (vec_select:<32x2mode>
16123 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16124 (parallel [(const_int 0) (const_int 1)]))))]
16126 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16127 [(set_attr "type" "ssemov")
16128 (set_attr "prefix_extra" "1")
16129 (set_attr "prefix" "evex")
16130 (set_attr "mode" "<sseinsnmode>")])
16132 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16133 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16134 (vec_duplicate:VI4F_256
16135 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16138 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16139 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16140 [(set_attr "type" "ssemov")
16141 (set_attr "prefix_extra" "1")
16142 (set_attr "prefix" "evex")
16143 (set_attr "mode" "<sseinsnmode>")])
16145 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16146 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16147 (vec_duplicate:V16FI
16148 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16151 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16152 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16153 [(set_attr "type" "ssemov")
16154 (set_attr "prefix_extra" "1")
16155 (set_attr "prefix" "evex")
16156 (set_attr "mode" "<sseinsnmode>")])
16158 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16159 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v,v")
16160 (vec_duplicate:VI8F_256_512
16161 (match_operand:<64x2_mode> 1 "nonimmediate_operand" "v,m")))]
16164 switch (which_alternative)
16167 if (GET_MODE_SIZE (<MODE>mode) == 64)
16168 return "vshuf<shuffletype>64x2\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}";
16170 return "vshuf<shuffletype>64x2\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}";
16172 return "vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
16174 gcc_unreachable ();
16177 [(set_attr "type" "ssemov")
16178 (set_attr "prefix_extra" "1")
16179 (set_attr "prefix" "evex")
16180 (set_attr "mode" "<sseinsnmode>")])
16182 (define_insn "avx512cd_maskb_vec_dup<mode>"
16183 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16184 (vec_duplicate:VI8_AVX512VL
16186 (match_operand:QI 1 "register_operand" "Yk"))))]
16188 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16189 [(set_attr "type" "mskmov")
16190 (set_attr "prefix" "evex")
16191 (set_attr "mode" "XI")])
16193 (define_insn "avx512cd_maskw_vec_dup<mode>"
16194 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16195 (vec_duplicate:VI4_AVX512VL
16197 (match_operand:HI 1 "register_operand" "Yk"))))]
16199 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16200 [(set_attr "type" "mskmov")
16201 (set_attr "prefix" "evex")
16202 (set_attr "mode" "XI")])
16204 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16205 ;; If it so happens that the input is in memory, use vbroadcast.
16206 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16207 (define_insn "*avx_vperm_broadcast_v4sf"
16208 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16210 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16211 (match_parallel 2 "avx_vbroadcast_operand"
16212 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16215 int elt = INTVAL (operands[3]);
16216 switch (which_alternative)
16220 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16221 return "vbroadcastss\t{%1, %0|%0, %k1}";
16223 operands[2] = GEN_INT (elt * 0x55);
16224 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16226 gcc_unreachable ();
16229 [(set_attr "type" "ssemov,ssemov,sselog1")
16230 (set_attr "prefix_extra" "1")
16231 (set_attr "length_immediate" "0,0,1")
16232 (set_attr "prefix" "vex")
16233 (set_attr "mode" "SF,SF,V4SF")])
16235 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
16236 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
16238 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
16239 (match_parallel 2 "avx_vbroadcast_operand"
16240 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16243 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
16244 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
16246 rtx op0 = operands[0], op1 = operands[1];
16247 int elt = INTVAL (operands[3]);
16253 if (TARGET_AVX2 && elt == 0)
16255 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
16260 /* Shuffle element we care about into all elements of the 128-bit lane.
16261 The other lane gets shuffled too, but we don't care. */
16262 if (<MODE>mode == V4DFmode)
16263 mask = (elt & 1 ? 15 : 0);
16265 mask = (elt & 3) * 0x55;
16266 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
16268 /* Shuffle the lane we care about into both lanes of the dest. */
16269 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
16270 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
16274 operands[1] = adjust_address (op1, <ssescalarmode>mode,
16275 elt * GET_MODE_SIZE (<ssescalarmode>mode));
16278 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
16279 [(set (match_operand:VF2 0 "register_operand")
16281 (match_operand:VF2 1 "nonimmediate_operand")
16282 (match_operand:SI 2 "const_0_to_255_operand")))]
16283 "TARGET_AVX && <mask_mode512bit_condition>"
16285 int mask = INTVAL (operands[2]);
16286 rtx perm[<ssescalarnum>];
16289 for (i = 0; i < <ssescalarnum>; i = i + 2)
16291 perm[i] = GEN_INT (((mask >> i) & 1) + i);
16292 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
16296 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
16299 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
16300 [(set (match_operand:VF1 0 "register_operand")
16302 (match_operand:VF1 1 "nonimmediate_operand")
16303 (match_operand:SI 2 "const_0_to_255_operand")))]
16304 "TARGET_AVX && <mask_mode512bit_condition>"
16306 int mask = INTVAL (operands[2]);
16307 rtx perm[<ssescalarnum>];
16310 for (i = 0; i < <ssescalarnum>; i = i + 4)
16312 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
16313 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
16314 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
16315 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
16319 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
16322 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
16323 [(set (match_operand:VF 0 "register_operand" "=v")
16325 (match_operand:VF 1 "nonimmediate_operand" "vm")
16326 (match_parallel 2 ""
16327 [(match_operand 3 "const_int_operand")])))]
16328 "TARGET_AVX && <mask_mode512bit_condition>
16329 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
16331 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
16332 operands[2] = GEN_INT (mask);
16333 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
16335 [(set_attr "type" "sselog")
16336 (set_attr "prefix_extra" "1")
16337 (set_attr "length_immediate" "1")
16338 (set_attr "prefix" "<mask_prefix>")
16339 (set_attr "mode" "<sseinsnmode>")])
16341 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
16342 [(set (match_operand:VF 0 "register_operand" "=v")
16344 [(match_operand:VF 1 "register_operand" "v")
16345 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
16347 "TARGET_AVX && <mask_mode512bit_condition>"
16348 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16349 [(set_attr "type" "sselog")
16350 (set_attr "prefix_extra" "1")
16351 (set_attr "btver2_decode" "vector")
16352 (set_attr "prefix" "<mask_prefix>")
16353 (set_attr "mode" "<sseinsnmode>")])
16355 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
16356 [(match_operand:VI248F 0 "register_operand" "=v")
16357 (match_operand:VI248F 1 "register_operand" "v")
16358 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16359 (match_operand:VI248F 3 "nonimmediate_operand" "vm")
16360 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16363 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
16364 operands[0], operands[1], operands[2], operands[3],
16365 CONST0_RTX (<MODE>mode), operands[4]));
16369 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
16370 [(set (match_operand:VI248F 0 "register_operand" "=v")
16372 [(match_operand:VI248F 1 "register_operand" "v")
16373 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16374 (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
16377 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16378 [(set_attr "type" "sselog")
16379 (set_attr "prefix" "evex")
16380 (set_attr "mode" "<sseinsnmode>")])
16382 (define_insn "<avx512>_vpermi2var<mode>3_mask"
16383 [(set (match_operand:VI248F 0 "register_operand" "=v")
16386 [(match_operand:VI248F 1 "register_operand" "v")
16387 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16388 (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
16389 UNSPEC_VPERMI2_MASK)
16391 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
16393 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
16394 [(set_attr "type" "sselog")
16395 (set_attr "prefix" "evex")
16396 (set_attr "mode" "<sseinsnmode>")])
16398 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
16399 [(match_operand:VI248F 0 "register_operand" "=v")
16400 (match_operand:<sseintvecmode> 1 "register_operand" "v")
16401 (match_operand:VI248F 2 "register_operand" "0")
16402 (match_operand:VI248F 3 "nonimmediate_operand" "vm")
16403 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16406 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
16407 operands[0], operands[1], operands[2], operands[3],
16408 CONST0_RTX (<MODE>mode), operands[4]));
16412 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
16413 [(set (match_operand:VI248F 0 "register_operand" "=v")
16415 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
16416 (match_operand:VI248F 2 "register_operand" "0")
16417 (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
16420 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16421 [(set_attr "type" "sselog")
16422 (set_attr "prefix" "evex")
16423 (set_attr "mode" "<sseinsnmode>")])
16425 (define_insn "<avx512>_vpermt2var<mode>3_mask"
16426 [(set (match_operand:VI248F 0 "register_operand" "=v")
16429 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
16430 (match_operand:VI248F 2 "register_operand" "0")
16431 (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
16434 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
16436 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
16437 [(set_attr "type" "sselog")
16438 (set_attr "prefix" "evex")
16439 (set_attr "mode" "<sseinsnmode>")])
16441 (define_expand "avx_vperm2f128<mode>3"
16442 [(set (match_operand:AVX256MODE2P 0 "register_operand")
16443 (unspec:AVX256MODE2P
16444 [(match_operand:AVX256MODE2P 1 "register_operand")
16445 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
16446 (match_operand:SI 3 "const_0_to_255_operand")]
16447 UNSPEC_VPERMIL2F128))]
16450 int mask = INTVAL (operands[3]);
16451 if ((mask & 0x88) == 0)
16453 rtx perm[<ssescalarnum>], t1, t2;
16454 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
16456 base = (mask & 3) * nelt2;
16457 for (i = 0; i < nelt2; ++i)
16458 perm[i] = GEN_INT (base + i);
16460 base = ((mask >> 4) & 3) * nelt2;
16461 for (i = 0; i < nelt2; ++i)
16462 perm[i + nelt2] = GEN_INT (base + i);
16464 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
16465 operands[1], operands[2]);
16466 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
16467 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
16468 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
16474 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
16475 ;; means that in order to represent this properly in rtl we'd have to
16476 ;; nest *another* vec_concat with a zero operand and do the select from
16477 ;; a 4x wide vector. That doesn't seem very nice.
16478 (define_insn "*avx_vperm2f128<mode>_full"
16479 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
16480 (unspec:AVX256MODE2P
16481 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
16482 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
16483 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16484 UNSPEC_VPERMIL2F128))]
16486 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16487 [(set_attr "type" "sselog")
16488 (set_attr "prefix_extra" "1")
16489 (set_attr "length_immediate" "1")
16490 (set_attr "prefix" "vex")
16491 (set_attr "mode" "<sseinsnmode>")])
16493 (define_insn "*avx_vperm2f128<mode>_nozero"
16494 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
16495 (vec_select:AVX256MODE2P
16496 (vec_concat:<ssedoublevecmode>
16497 (match_operand:AVX256MODE2P 1 "register_operand" "x")
16498 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
16499 (match_parallel 3 ""
16500 [(match_operand 4 "const_int_operand")])))]
16502 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
16504 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
16506 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
16508 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
16509 operands[3] = GEN_INT (mask);
16510 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16512 [(set_attr "type" "sselog")
16513 (set_attr "prefix_extra" "1")
16514 (set_attr "length_immediate" "1")
16515 (set_attr "prefix" "vex")
16516 (set_attr "mode" "<sseinsnmode>")])
16518 (define_insn "*ssse3_palignr<mode>_perm"
16519 [(set (match_operand:V_128 0 "register_operand" "=x,x")
16521 (match_operand:V_128 1 "register_operand" "0,x")
16522 (match_parallel 2 "palignr_operand"
16523 [(match_operand 3 "const_int_operand" "n, n")])))]
16526 enum machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
16527 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
16529 switch (which_alternative)
16532 return "palignr\t{%2, %1, %0|%0, %1, %2}";
16534 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
16536 gcc_unreachable ();
16539 [(set_attr "isa" "noavx,avx")
16540 (set_attr "type" "sseishft")
16541 (set_attr "atom_unit" "sishuf")
16542 (set_attr "prefix_data16" "1,*")
16543 (set_attr "prefix_extra" "1")
16544 (set_attr "length_immediate" "1")
16545 (set_attr "prefix" "orig,vex")])
16547 (define_expand "avx512vl_vinsert<mode>"
16548 [(match_operand:VI48F_256 0 "register_operand")
16549 (match_operand:VI48F_256 1 "register_operand")
16550 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
16551 (match_operand:SI 3 "const_0_to_1_operand")
16552 (match_operand:VI48F_256 4 "register_operand")
16553 (match_operand:<avx512fmaskmode> 5 "register_operand")]
16556 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
16558 switch (INTVAL (operands[3]))
16561 insn = gen_vec_set_lo_<mode>_mask;
16564 insn = gen_vec_set_hi_<mode>_mask;
16567 gcc_unreachable ();
16570 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
16575 (define_expand "avx_vinsertf128<mode>"
16576 [(match_operand:V_256 0 "register_operand")
16577 (match_operand:V_256 1 "register_operand")
16578 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
16579 (match_operand:SI 3 "const_0_to_1_operand")]
16582 rtx (*insn)(rtx, rtx, rtx);
16584 switch (INTVAL (operands[3]))
16587 insn = gen_vec_set_lo_<mode>;
16590 insn = gen_vec_set_hi_<mode>;
16593 gcc_unreachable ();
16596 emit_insn (insn (operands[0], operands[1], operands[2]));
16600 (define_insn "vec_set_lo_<mode><mask_name>"
16601 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
16602 (vec_concat:VI8F_256
16603 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
16604 (vec_select:<ssehalfvecmode>
16605 (match_operand:VI8F_256 1 "register_operand" "v")
16606 (parallel [(const_int 2) (const_int 3)]))))]
16609 if (TARGET_AVX512VL)
16610 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
16612 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
16614 [(set_attr "type" "sselog")
16615 (set_attr "prefix_extra" "1")
16616 (set_attr "length_immediate" "1")
16617 (set_attr "prefix" "vex")
16618 (set_attr "mode" "<sseinsnmode>")])
16620 (define_insn "vec_set_hi_<mode><mask_name>"
16621 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
16622 (vec_concat:VI8F_256
16623 (vec_select:<ssehalfvecmode>
16624 (match_operand:VI8F_256 1 "register_operand" "v")
16625 (parallel [(const_int 0) (const_int 1)]))
16626 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
16629 if (TARGET_AVX512VL)
16630 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
16632 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
16634 [(set_attr "type" "sselog")
16635 (set_attr "prefix_extra" "1")
16636 (set_attr "length_immediate" "1")
16637 (set_attr "prefix" "vex")
16638 (set_attr "mode" "<sseinsnmode>")])
16640 (define_insn "vec_set_lo_<mode><mask_name>"
16641 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
16642 (vec_concat:VI4F_256
16643 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
16644 (vec_select:<ssehalfvecmode>
16645 (match_operand:VI4F_256 1 "register_operand" "v")
16646 (parallel [(const_int 4) (const_int 5)
16647 (const_int 6) (const_int 7)]))))]
16650 if (TARGET_AVX512VL)
16651 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
16653 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
16655 [(set_attr "type" "sselog")
16656 (set_attr "prefix_extra" "1")
16657 (set_attr "length_immediate" "1")
16658 (set_attr "prefix" "vex")
16659 (set_attr "mode" "<sseinsnmode>")])
16661 (define_insn "vec_set_hi_<mode><mask_name>"
16662 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
16663 (vec_concat:VI4F_256
16664 (vec_select:<ssehalfvecmode>
16665 (match_operand:VI4F_256 1 "register_operand" "v")
16666 (parallel [(const_int 0) (const_int 1)
16667 (const_int 2) (const_int 3)]))
16668 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
16671 if (TARGET_AVX512VL)
16672 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
16674 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
16676 [(set_attr "type" "sselog")
16677 (set_attr "prefix_extra" "1")
16678 (set_attr "length_immediate" "1")
16679 (set_attr "prefix" "vex")
16680 (set_attr "mode" "<sseinsnmode>")])
16682 (define_insn "vec_set_lo_v16hi"
16683 [(set (match_operand:V16HI 0 "register_operand" "=x")
16685 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16687 (match_operand:V16HI 1 "register_operand" "x")
16688 (parallel [(const_int 8) (const_int 9)
16689 (const_int 10) (const_int 11)
16690 (const_int 12) (const_int 13)
16691 (const_int 14) (const_int 15)]))))]
16693 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
16694 [(set_attr "type" "sselog")
16695 (set_attr "prefix_extra" "1")
16696 (set_attr "length_immediate" "1")
16697 (set_attr "prefix" "vex")
16698 (set_attr "mode" "OI")])
16700 (define_insn "vec_set_hi_v16hi"
16701 [(set (match_operand:V16HI 0 "register_operand" "=x")
16704 (match_operand:V16HI 1 "register_operand" "x")
16705 (parallel [(const_int 0) (const_int 1)
16706 (const_int 2) (const_int 3)
16707 (const_int 4) (const_int 5)
16708 (const_int 6) (const_int 7)]))
16709 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
16711 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
16712 [(set_attr "type" "sselog")
16713 (set_attr "prefix_extra" "1")
16714 (set_attr "length_immediate" "1")
16715 (set_attr "prefix" "vex")
16716 (set_attr "mode" "OI")])
16718 (define_insn "vec_set_lo_v32qi"
16719 [(set (match_operand:V32QI 0 "register_operand" "=x")
16721 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
16723 (match_operand:V32QI 1 "register_operand" "x")
16724 (parallel [(const_int 16) (const_int 17)
16725 (const_int 18) (const_int 19)
16726 (const_int 20) (const_int 21)
16727 (const_int 22) (const_int 23)
16728 (const_int 24) (const_int 25)
16729 (const_int 26) (const_int 27)
16730 (const_int 28) (const_int 29)
16731 (const_int 30) (const_int 31)]))))]
16733 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
16734 [(set_attr "type" "sselog")
16735 (set_attr "prefix_extra" "1")
16736 (set_attr "length_immediate" "1")
16737 (set_attr "prefix" "vex")
16738 (set_attr "mode" "OI")])
16740 (define_insn "vec_set_hi_v32qi"
16741 [(set (match_operand:V32QI 0 "register_operand" "=x")
16744 (match_operand:V32QI 1 "register_operand" "x")
16745 (parallel [(const_int 0) (const_int 1)
16746 (const_int 2) (const_int 3)
16747 (const_int 4) (const_int 5)
16748 (const_int 6) (const_int 7)
16749 (const_int 8) (const_int 9)
16750 (const_int 10) (const_int 11)
16751 (const_int 12) (const_int 13)
16752 (const_int 14) (const_int 15)]))
16753 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
16755 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
16756 [(set_attr "type" "sselog")
16757 (set_attr "prefix_extra" "1")
16758 (set_attr "length_immediate" "1")
16759 (set_attr "prefix" "vex")
16760 (set_attr "mode" "OI")])
16762 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
16763 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
16765 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
16766 (match_operand:V48_AVX2 1 "memory_operand" "m")]
16769 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
16770 [(set_attr "type" "sselog1")
16771 (set_attr "prefix_extra" "1")
16772 (set_attr "prefix" "vex")
16773 (set_attr "btver2_decode" "vector")
16774 (set_attr "mode" "<sseinsnmode>")])
16776 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
16777 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
16779 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
16780 (match_operand:V48_AVX2 2 "register_operand" "x")
16784 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16785 [(set_attr "type" "sselog1")
16786 (set_attr "prefix_extra" "1")
16787 (set_attr "prefix" "vex")
16788 (set_attr "btver2_decode" "vector")
16789 (set_attr "mode" "<sseinsnmode>")])
16791 (define_expand "maskload<mode>"
16792 [(set (match_operand:V48_AVX2 0 "register_operand")
16794 [(match_operand:<sseintvecmode> 2 "register_operand")
16795 (match_operand:V48_AVX2 1 "memory_operand")]
16799 (define_expand "maskstore<mode>"
16800 [(set (match_operand:V48_AVX2 0 "memory_operand")
16802 [(match_operand:<sseintvecmode> 2 "register_operand")
16803 (match_operand:V48_AVX2 1 "register_operand")
16808 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
16809 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
16810 (unspec:AVX256MODE2P
16811 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
16815 "&& reload_completed"
16818 rtx op0 = operands[0];
16819 rtx op1 = operands[1];
16821 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
16823 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
16824 emit_move_insn (op0, op1);
16828 (define_expand "vec_init<mode>"
16829 [(match_operand:V_256 0 "register_operand")
16833 ix86_expand_vector_init (false, operands[0], operands[1]);
16837 (define_expand "vec_init<mode>"
16838 [(match_operand:VI48F_I12B_512 0 "register_operand")
16842 ix86_expand_vector_init (false, operands[0], operands[1]);
16846 (define_insn "<avx2_avx512bw>_ashrv<mode><mask_name>"
16847 [(set (match_operand:VI248_AVX512 0 "register_operand" "=v")
16848 (ashiftrt:VI248_AVX512
16849 (match_operand:VI248_AVX512 1 "register_operand" "v")
16850 (match_operand:VI248_AVX512 2 "nonimmediate_operand" "vm")))]
16851 "TARGET_AVX2 && <mask_mode512bit_condition>"
16852 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16853 [(set_attr "type" "sseishft")
16854 (set_attr "prefix" "maybe_evex")
16855 (set_attr "mode" "<sseinsnmode>")])
16857 (define_insn "<avx2_avx512bw>_<shift_insn>v<mode><mask_name>"
16858 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v")
16859 (any_lshift:VI248_AVX512BW
16860 (match_operand:VI248_AVX512BW 1 "register_operand" "v")
16861 (match_operand:VI248_AVX512BW 2 "nonimmediate_operand" "vm")))]
16862 "TARGET_AVX2 && <mask_mode512bit_condition>"
16863 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16864 [(set_attr "type" "sseishft")
16865 (set_attr "prefix" "maybe_evex")
16866 (set_attr "mode" "<sseinsnmode>")])
16868 ;; For avx_vec_concat<mode> insn pattern
16869 (define_mode_attr concat_tg_mode
16870 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
16871 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
16873 (define_insn "avx_vec_concat<mode>"
16874 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
16875 (vec_concat:V_256_512
16876 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
16877 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
16880 switch (which_alternative)
16883 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
16885 switch (get_attr_mode (insn))
16888 return "vmovaps\t{%1, %t0|%t0, %1}";
16890 return "vmovapd\t{%1, %t0|%t0, %1}";
16892 return "vmovaps\t{%1, %x0|%x0, %1}";
16894 return "vmovapd\t{%1, %x0|%x0, %1}";
16896 return "vmovdqa\t{%1, %t0|%t0, %1}";
16898 return "vmovdqa\t{%1, %x0|%x0, %1}";
16900 gcc_unreachable ();
16903 gcc_unreachable ();
16906 [(set_attr "type" "sselog,ssemov")
16907 (set_attr "prefix_extra" "1,*")
16908 (set_attr "length_immediate" "1,*")
16909 (set_attr "prefix" "maybe_evex")
16910 (set_attr "mode" "<sseinsnmode>")])
16912 (define_insn "vcvtph2ps<mask_name>"
16913 [(set (match_operand:V4SF 0 "register_operand" "=v")
16915 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
16917 (parallel [(const_int 0) (const_int 1)
16918 (const_int 2) (const_int 3)])))]
16919 "TARGET_F16C || TARGET_AVX512VL"
16920 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16921 [(set_attr "type" "ssecvt")
16922 (set_attr "prefix" "maybe_evex")
16923 (set_attr "mode" "V4SF")])
16925 (define_insn "*vcvtph2ps_load<mask_name>"
16926 [(set (match_operand:V4SF 0 "register_operand" "=v")
16927 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
16928 UNSPEC_VCVTPH2PS))]
16929 "TARGET_F16C || TARGET_AVX512VL"
16930 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16931 [(set_attr "type" "ssecvt")
16932 (set_attr "prefix" "vex")
16933 (set_attr "mode" "V8SF")])
16935 (define_insn "vcvtph2ps256<mask_name>"
16936 [(set (match_operand:V8SF 0 "register_operand" "=v")
16937 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
16938 UNSPEC_VCVTPH2PS))]
16939 "TARGET_F16C || TARGET_AVX512VL"
16940 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16941 [(set_attr "type" "ssecvt")
16942 (set_attr "prefix" "vex")
16943 (set_attr "btver2_decode" "double")
16944 (set_attr "mode" "V8SF")])
16946 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
16947 [(set (match_operand:V16SF 0 "register_operand" "=v")
16949 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16950 UNSPEC_VCVTPH2PS))]
16952 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16953 [(set_attr "type" "ssecvt")
16954 (set_attr "prefix" "evex")
16955 (set_attr "mode" "V16SF")])
16957 (define_expand "vcvtps2ph_mask"
16958 [(set (match_operand:V8HI 0 "register_operand")
16961 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
16962 (match_operand:SI 2 "const_0_to_255_operand")]
16965 (match_operand:V8HI 3 "vector_move_operand")
16966 (match_operand:QI 4 "register_operand")))]
16968 "operands[5] = CONST0_RTX (V4HImode);")
16970 (define_expand "vcvtps2ph"
16971 [(set (match_operand:V8HI 0 "register_operand")
16973 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
16974 (match_operand:SI 2 "const_0_to_255_operand")]
16978 "operands[3] = CONST0_RTX (V4HImode);")
16980 (define_insn "*vcvtps2ph<mask_name>"
16981 [(set (match_operand:V8HI 0 "register_operand" "=v")
16983 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
16984 (match_operand:SI 2 "const_0_to_255_operand" "N")]
16986 (match_operand:V4HI 3 "const0_operand")))]
16987 "(TARGET_F16C && !<mask_applied>) || TARGET_AVX512VL"
16988 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16989 [(set_attr "type" "ssecvt")
16990 (set_attr "prefix" "maybe_evex")
16991 (set_attr "mode" "V4SF")])
16993 (define_insn "*vcvtps2ph_store<mask_name>"
16994 [(set (match_operand:V4HI 0 "memory_operand" "=m")
16995 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
16996 (match_operand:SI 2 "const_0_to_255_operand" "N")]
16997 UNSPEC_VCVTPS2PH))]
16998 "TARGET_F16C || TARGET_AVX512VL"
16999 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17000 [(set_attr "type" "ssecvt")
17001 (set_attr "prefix" "maybe_evex")
17002 (set_attr "mode" "V4SF")])
17004 (define_insn "vcvtps2ph256<mask_name>"
17005 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17006 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17007 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17008 UNSPEC_VCVTPS2PH))]
17009 "TARGET_F16C || TARGET_AVX512VL"
17010 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17011 [(set_attr "type" "ssecvt")
17012 (set_attr "prefix" "maybe_evex")
17013 (set_attr "btver2_decode" "vector")
17014 (set_attr "mode" "V8SF")])
17016 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17017 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17019 [(match_operand:V16SF 1 "register_operand" "v")
17020 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17021 UNSPEC_VCVTPS2PH))]
17023 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17024 [(set_attr "type" "ssecvt")
17025 (set_attr "prefix" "evex")
17026 (set_attr "mode" "V16SF")])
17028 ;; For gather* insn patterns
17029 (define_mode_iterator VEC_GATHER_MODE
17030 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17031 (define_mode_attr VEC_GATHER_IDXSI
17032 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17033 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17034 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17035 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17037 (define_mode_attr VEC_GATHER_IDXDI
17038 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17039 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
17040 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
17041 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
17043 (define_mode_attr VEC_GATHER_SRCDI
17044 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17045 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
17046 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
17047 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
17049 (define_expand "avx2_gathersi<mode>"
17050 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17051 (unspec:VEC_GATHER_MODE
17052 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
17053 (mem:<ssescalarmode>
17055 [(match_operand 2 "vsib_address_operand")
17056 (match_operand:<VEC_GATHER_IDXSI>
17057 3 "register_operand")
17058 (match_operand:SI 5 "const1248_operand ")]))
17059 (mem:BLK (scratch))
17060 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
17062 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17066 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17067 operands[5]), UNSPEC_VSIBADDR);
17070 (define_insn "*avx2_gathersi<mode>"
17071 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17072 (unspec:VEC_GATHER_MODE
17073 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
17074 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17076 [(match_operand:P 3 "vsib_address_operand" "Tv")
17077 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
17078 (match_operand:SI 6 "const1248_operand" "n")]
17080 (mem:BLK (scratch))
17081 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
17083 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17085 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
17086 [(set_attr "type" "ssemov")
17087 (set_attr "prefix" "vex")
17088 (set_attr "mode" "<sseinsnmode>")])
17090 (define_insn "*avx2_gathersi<mode>_2"
17091 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17092 (unspec:VEC_GATHER_MODE
17094 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17096 [(match_operand:P 2 "vsib_address_operand" "Tv")
17097 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
17098 (match_operand:SI 5 "const1248_operand" "n")]
17100 (mem:BLK (scratch))
17101 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
17103 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17105 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
17106 [(set_attr "type" "ssemov")
17107 (set_attr "prefix" "vex")
17108 (set_attr "mode" "<sseinsnmode>")])
17110 (define_expand "avx2_gatherdi<mode>"
17111 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17112 (unspec:VEC_GATHER_MODE
17113 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
17114 (mem:<ssescalarmode>
17116 [(match_operand 2 "vsib_address_operand")
17117 (match_operand:<VEC_GATHER_IDXDI>
17118 3 "register_operand")
17119 (match_operand:SI 5 "const1248_operand ")]))
17120 (mem:BLK (scratch))
17121 (match_operand:<VEC_GATHER_SRCDI>
17122 4 "register_operand")]
17124 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17128 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17129 operands[5]), UNSPEC_VSIBADDR);
17132 (define_insn "*avx2_gatherdi<mode>"
17133 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17134 (unspec:VEC_GATHER_MODE
17135 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
17136 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17138 [(match_operand:P 3 "vsib_address_operand" "Tv")
17139 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
17140 (match_operand:SI 6 "const1248_operand" "n")]
17142 (mem:BLK (scratch))
17143 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
17145 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17147 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
17148 [(set_attr "type" "ssemov")
17149 (set_attr "prefix" "vex")
17150 (set_attr "mode" "<sseinsnmode>")])
17152 (define_insn "*avx2_gatherdi<mode>_2"
17153 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17154 (unspec:VEC_GATHER_MODE
17156 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17158 [(match_operand:P 2 "vsib_address_operand" "Tv")
17159 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
17160 (match_operand:SI 5 "const1248_operand" "n")]
17162 (mem:BLK (scratch))
17163 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
17165 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17168 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
17169 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
17170 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
17172 [(set_attr "type" "ssemov")
17173 (set_attr "prefix" "vex")
17174 (set_attr "mode" "<sseinsnmode>")])
17176 (define_insn "*avx2_gatherdi<mode>_3"
17177 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
17178 (vec_select:<VEC_GATHER_SRCDI>
17180 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
17181 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17183 [(match_operand:P 3 "vsib_address_operand" "Tv")
17184 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
17185 (match_operand:SI 6 "const1248_operand" "n")]
17187 (mem:BLK (scratch))
17188 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
17190 (parallel [(const_int 0) (const_int 1)
17191 (const_int 2) (const_int 3)])))
17192 (clobber (match_scratch:VI4F_256 1 "=&x"))]
17194 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
17195 [(set_attr "type" "ssemov")
17196 (set_attr "prefix" "vex")
17197 (set_attr "mode" "<sseinsnmode>")])
17199 (define_insn "*avx2_gatherdi<mode>_4"
17200 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
17201 (vec_select:<VEC_GATHER_SRCDI>
17204 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17206 [(match_operand:P 2 "vsib_address_operand" "Tv")
17207 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
17208 (match_operand:SI 5 "const1248_operand" "n")]
17210 (mem:BLK (scratch))
17211 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
17213 (parallel [(const_int 0) (const_int 1)
17214 (const_int 2) (const_int 3)])))
17215 (clobber (match_scratch:VI4F_256 1 "=&x"))]
17217 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
17218 [(set_attr "type" "ssemov")
17219 (set_attr "prefix" "vex")
17220 (set_attr "mode" "<sseinsnmode>")])
17222 (define_expand "<avx512>_gathersi<mode>"
17223 [(parallel [(set (match_operand:VI48F 0 "register_operand")
17225 [(match_operand:VI48F 1 "register_operand")
17226 (match_operand:<avx512fmaskmode> 4 "register_operand")
17227 (mem:<ssescalarmode>
17229 [(match_operand 2 "vsib_address_operand")
17230 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
17231 (match_operand:SI 5 "const1248_operand")]))]
17233 (clobber (match_scratch:<avx512fmaskmode> 7))])]
17237 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17238 operands[5]), UNSPEC_VSIBADDR);
17241 (define_insn "*avx512f_gathersi<mode>"
17242 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17244 [(match_operand:VI48F 1 "register_operand" "0")
17245 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
17246 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17248 [(match_operand:P 4 "vsib_address_operand" "Tv")
17249 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
17250 (match_operand:SI 5 "const1248_operand" "n")]
17251 UNSPEC_VSIBADDR)])]
17253 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
17255 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
17256 [(set_attr "type" "ssemov")
17257 (set_attr "prefix" "evex")
17258 (set_attr "mode" "<sseinsnmode>")])
17260 (define_insn "*avx512f_gathersi<mode>_2"
17261 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17264 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
17265 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
17267 [(match_operand:P 3 "vsib_address_operand" "Tv")
17268 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
17269 (match_operand:SI 4 "const1248_operand" "n")]
17270 UNSPEC_VSIBADDR)])]
17272 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
17274 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
17275 [(set_attr "type" "ssemov")
17276 (set_attr "prefix" "evex")
17277 (set_attr "mode" "<sseinsnmode>")])
17280 (define_expand "<avx512>_gatherdi<mode>"
17281 [(parallel [(set (match_operand:VI48F 0 "register_operand")
17283 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
17284 (match_operand:QI 4 "register_operand")
17285 (mem:<ssescalarmode>
17287 [(match_operand 2 "vsib_address_operand")
17288 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
17289 (match_operand:SI 5 "const1248_operand")]))]
17291 (clobber (match_scratch:QI 7))])]
17295 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17296 operands[5]), UNSPEC_VSIBADDR);
17299 (define_insn "*avx512f_gatherdi<mode>"
17300 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17302 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
17303 (match_operand:QI 7 "register_operand" "2")
17304 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17306 [(match_operand:P 4 "vsib_address_operand" "Tv")
17307 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
17308 (match_operand:SI 5 "const1248_operand" "n")]
17309 UNSPEC_VSIBADDR)])]
17311 (clobber (match_scratch:QI 2 "=&Yk"))]
17313 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
17314 [(set_attr "type" "ssemov")
17315 (set_attr "prefix" "evex")
17316 (set_attr "mode" "<sseinsnmode>")])
17318 (define_insn "*avx512f_gatherdi<mode>_2"
17319 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17322 (match_operand:QI 6 "register_operand" "1")
17323 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
17325 [(match_operand:P 3 "vsib_address_operand" "Tv")
17326 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
17327 (match_operand:SI 4 "const1248_operand" "n")]
17328 UNSPEC_VSIBADDR)])]
17330 (clobber (match_scratch:QI 1 "=&Yk"))]
17333 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
17335 if (GET_MODE_SIZE (<MODE>mode) != 64)
17336 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
17338 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
17340 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
17342 [(set_attr "type" "ssemov")
17343 (set_attr "prefix" "evex")
17344 (set_attr "mode" "<sseinsnmode>")])
17346 (define_expand "<avx512>_scattersi<mode>"
17347 [(parallel [(set (mem:VI48F
17349 [(match_operand 0 "vsib_address_operand")
17350 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
17351 (match_operand:SI 4 "const1248_operand")]))
17353 [(match_operand:<avx512fmaskmode> 1 "register_operand")
17354 (match_operand:VI48F 3 "register_operand")]
17356 (clobber (match_scratch:<avx512fmaskmode> 6))])]
17360 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
17361 operands[4]), UNSPEC_VSIBADDR);
17364 (define_insn "*avx512f_scattersi<mode>"
17365 [(set (match_operator:VI48F 5 "vsib_mem_operator"
17367 [(match_operand:P 0 "vsib_address_operand" "Tv")
17368 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
17369 (match_operand:SI 4 "const1248_operand" "n")]
17372 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
17373 (match_operand:VI48F 3 "register_operand" "v")]
17375 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
17377 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
17378 [(set_attr "type" "ssemov")
17379 (set_attr "prefix" "evex")
17380 (set_attr "mode" "<sseinsnmode>")])
17382 (define_expand "<avx512>_scatterdi<mode>"
17383 [(parallel [(set (mem:VI48F
17385 [(match_operand 0 "vsib_address_operand")
17386 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
17387 (match_operand:SI 4 "const1248_operand")]))
17389 [(match_operand:QI 1 "register_operand")
17390 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
17392 (clobber (match_scratch:QI 6))])]
17396 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
17397 operands[4]), UNSPEC_VSIBADDR);
17400 (define_insn "*avx512f_scatterdi<mode>"
17401 [(set (match_operator:VI48F 5 "vsib_mem_operator"
17403 [(match_operand:P 0 "vsib_address_operand" "Tv")
17404 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
17405 (match_operand:SI 4 "const1248_operand" "n")]
17408 [(match_operand:QI 6 "register_operand" "1")
17409 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
17411 (clobber (match_scratch:QI 1 "=&Yk"))]
17413 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
17414 [(set_attr "type" "ssemov")
17415 (set_attr "prefix" "evex")
17416 (set_attr "mode" "<sseinsnmode>")])
17418 (define_insn "<avx512>_compress<mode>_mask"
17419 [(set (match_operand:VI48F 0 "register_operand" "=v")
17421 [(match_operand:VI48F 1 "register_operand" "v")
17422 (match_operand:VI48F 2 "vector_move_operand" "0C")
17423 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
17426 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17427 [(set_attr "type" "ssemov")
17428 (set_attr "prefix" "evex")
17429 (set_attr "mode" "<sseinsnmode>")])
17431 (define_insn "<avx512>_compressstore<mode>_mask"
17432 [(set (match_operand:VI48F 0 "memory_operand" "=m")
17434 [(match_operand:VI48F 1 "register_operand" "x")
17436 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
17437 UNSPEC_COMPRESS_STORE))]
17439 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
17440 [(set_attr "type" "ssemov")
17441 (set_attr "prefix" "evex")
17442 (set_attr "memory" "store")
17443 (set_attr "mode" "<sseinsnmode>")])
17445 (define_expand "<avx512>_expand<mode>_maskz"
17446 [(set (match_operand:VI48F 0 "register_operand")
17448 [(match_operand:VI48F 1 "nonimmediate_operand")
17449 (match_operand:VI48F 2 "vector_move_operand")
17450 (match_operand:<avx512fmaskmode> 3 "register_operand")]
17453 "operands[2] = CONST0_RTX (<MODE>mode);")
17455 (define_insn "<avx512>_expand<mode>_mask"
17456 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
17458 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
17459 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
17460 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
17463 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
17464 [(set_attr "type" "ssemov")
17465 (set_attr "prefix" "evex")
17466 (set_attr "memory" "none,load")
17467 (set_attr "mode" "<sseinsnmode>")])
17469 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
17470 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
17471 (unspec:VF_AVX512VL
17472 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
17473 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
17474 (match_operand:SI 3 "const_0_to_15_operand")]
17476 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
17477 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
17478 [(set_attr "type" "sse")
17479 (set_attr "prefix" "evex")
17480 (set_attr "mode" "<MODE>")])
17482 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
17483 [(set (match_operand:VF_128 0 "register_operand" "=v")
17486 [(match_operand:VF_128 1 "register_operand" "v")
17487 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
17488 (match_operand:SI 3 "const_0_to_15_operand")]
17489 UNSPEC_RANGE_SCALAR)
17493 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
17494 [(set_attr "type" "sse")
17495 (set_attr "prefix" "evex")
17496 (set_attr "mode" "<MODE>")])
17498 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
17499 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
17500 (unspec:<avx512fmaskmode>
17501 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
17502 (match_operand:QI 2 "const_0_to_255_operand" "n")]
17505 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
17506 [(set_attr "type" "sse")
17507 (set_attr "length_immediate" "1")
17508 (set_attr "prefix" "evex")
17509 (set_attr "mode" "<MODE>")])
17511 (define_insn "avx512dq_vmfpclass<mode>"
17512 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
17513 (and:<avx512fmaskmode>
17514 (unspec:<avx512fmaskmode>
17515 [(match_operand:VF_128 1 "register_operand" "v")
17516 (match_operand:QI 2 "const_0_to_255_operand" "n")]
17517 UNSPEC_FPCLASS_SCALAR)
17520 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
17521 [(set_attr "type" "sse")
17522 (set_attr "length_immediate" "1")
17523 (set_attr "prefix" "evex")
17524 (set_attr "mode" "<MODE>")])
17526 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
17527 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
17528 (unspec:VF_AVX512VL
17529 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
17530 (match_operand:SI 2 "const_0_to_15_operand")]
17533 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
17534 [(set_attr "prefix" "evex")
17535 (set_attr "mode" "<MODE>")])
17537 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
17538 [(set (match_operand:VF_128 0 "register_operand" "=v")
17541 [(match_operand:VF_128 1 "register_operand" "v")
17542 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
17543 (match_operand:SI 3 "const_0_to_15_operand")]
17548 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
17549 [(set_attr "prefix" "evex")
17550 (set_attr "mode" "<ssescalarmode>")])
17552 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
17553 [(set (match_operand:VI2_AVX512F 0 "register_operand" "=v")
17554 (unspec:VI2_AVX512F
17555 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
17556 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
17557 (match_operand:SI 3 "const_0_to_255_operand")]
17559 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17560 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
17561 [(set_attr "isa" "avx")
17562 (set_attr "type" "sselog1")
17563 (set_attr "length_immediate" "1")
17564 (set_attr "prefix" "evex")
17565 (set_attr "mode" "<sseinsnmode>")])
17567 (define_insn "clz<mode>2<mask_name>"
17568 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17570 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
17572 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17573 [(set_attr "type" "sse")
17574 (set_attr "prefix" "evex")
17575 (set_attr "mode" "<sseinsnmode>")])
17577 (define_insn "<mask_codefor>conflict<mode><mask_name>"
17578 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17579 (unspec:VI48_AVX512VL
17580 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
17583 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17584 [(set_attr "type" "sse")
17585 (set_attr "prefix" "evex")
17586 (set_attr "mode" "<sseinsnmode>")])
17588 (define_insn "sha1msg1"
17589 [(set (match_operand:V4SI 0 "register_operand" "=x")
17591 [(match_operand:V4SI 1 "register_operand" "0")
17592 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17595 "sha1msg1\t{%2, %0|%0, %2}"
17596 [(set_attr "type" "sselog1")
17597 (set_attr "mode" "TI")])
17599 (define_insn "sha1msg2"
17600 [(set (match_operand:V4SI 0 "register_operand" "=x")
17602 [(match_operand:V4SI 1 "register_operand" "0")
17603 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17606 "sha1msg2\t{%2, %0|%0, %2}"
17607 [(set_attr "type" "sselog1")
17608 (set_attr "mode" "TI")])
17610 (define_insn "sha1nexte"
17611 [(set (match_operand:V4SI 0 "register_operand" "=x")
17613 [(match_operand:V4SI 1 "register_operand" "0")
17614 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17615 UNSPEC_SHA1NEXTE))]
17617 "sha1nexte\t{%2, %0|%0, %2}"
17618 [(set_attr "type" "sselog1")
17619 (set_attr "mode" "TI")])
17621 (define_insn "sha1rnds4"
17622 [(set (match_operand:V4SI 0 "register_operand" "=x")
17624 [(match_operand:V4SI 1 "register_operand" "0")
17625 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
17626 (match_operand:SI 3 "const_0_to_3_operand" "n")]
17627 UNSPEC_SHA1RNDS4))]
17629 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
17630 [(set_attr "type" "sselog1")
17631 (set_attr "length_immediate" "1")
17632 (set_attr "mode" "TI")])
17634 (define_insn "sha256msg1"
17635 [(set (match_operand:V4SI 0 "register_operand" "=x")
17637 [(match_operand:V4SI 1 "register_operand" "0")
17638 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17639 UNSPEC_SHA256MSG1))]
17641 "sha256msg1\t{%2, %0|%0, %2}"
17642 [(set_attr "type" "sselog1")
17643 (set_attr "mode" "TI")])
17645 (define_insn "sha256msg2"
17646 [(set (match_operand:V4SI 0 "register_operand" "=x")
17648 [(match_operand:V4SI 1 "register_operand" "0")
17649 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
17650 UNSPEC_SHA256MSG2))]
17652 "sha256msg2\t{%2, %0|%0, %2}"
17653 [(set_attr "type" "sselog1")
17654 (set_attr "mode" "TI")])
17656 (define_insn "sha256rnds2"
17657 [(set (match_operand:V4SI 0 "register_operand" "=x")
17659 [(match_operand:V4SI 1 "register_operand" "0")
17660 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
17661 (match_operand:V4SI 3 "register_operand" "Yz")]
17662 UNSPEC_SHA256RNDS2))]
17664 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
17665 [(set_attr "type" "sselog1")
17666 (set_attr "length_immediate" "1")
17667 (set_attr "mode" "TI")])