1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
133 (define_c_enum "unspecv" [
143 ;; All vector modes including V?TImode, used in move patterns.
144 (define_mode_iterator VMOVE
145 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
146 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
147 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
148 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
149 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
153 ;; All AVX512VL vector modes
154 (define_mode_iterator V_AVX512VL
155 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
156 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
157 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
158 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
159 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
160 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
161 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
162 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
165 (define_mode_iterator V
166 [(V32QI "TARGET_AVX") V16QI
167 (V16HI "TARGET_AVX") V8HI
168 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
169 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
173 ;; All 128bit vector modes
174 (define_mode_iterator V_128
175 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
177 ;; All 256bit vector modes
178 (define_mode_iterator V_256
179 [V32QI V16HI V8SI V4DI V8SF V4DF])
181 ;; All 512bit vector modes
182 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
184 ;; All 256bit and 512bit vector modes
185 (define_mode_iterator V_256_512
186 [V32QI V16HI V8SI V4DI V8SF V4DF
187 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
188 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
190 ;; All vector float modes
191 (define_mode_iterator VF
192 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
193 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
195 ;; 128- and 256-bit float vector modes
196 (define_mode_iterator VF_128_256
197 [(V8SF "TARGET_AVX") V4SF
198 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
200 ;; All SFmode vector float modes
201 (define_mode_iterator VF1
202 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
204 ;; 128- and 256-bit SF vector modes
205 (define_mode_iterator VF1_128_256
206 [(V8SF "TARGET_AVX") V4SF])
208 ;; All DFmode vector float modes
209 (define_mode_iterator VF2
210 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
212 ;; 128- and 256-bit DF vector modes
213 (define_mode_iterator VF2_128_256
214 [(V4DF "TARGET_AVX") V2DF])
216 (define_mode_iterator VF2_512_256
217 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
219 ;; All 128bit vector float modes
220 (define_mode_iterator VF_128
221 [V4SF (V2DF "TARGET_SSE2")])
223 ;; All 256bit vector float modes
224 (define_mode_iterator VF_256
227 ;; All 512bit vector float modes
228 (define_mode_iterator VF_512
231 ;; All vector integer modes
232 (define_mode_iterator VI
233 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
234 (V32QI "TARGET_AVX") V16QI
235 (V16HI "TARGET_AVX") V8HI
236 (V8SI "TARGET_AVX") V4SI
237 (V4DI "TARGET_AVX") V2DI])
239 (define_mode_iterator VI_AVX2
240 [(V32QI "TARGET_AVX2") V16QI
241 (V16HI "TARGET_AVX2") V8HI
242 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
243 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
245 ;; All QImode vector integer modes
246 (define_mode_iterator VI1
247 [(V32QI "TARGET_AVX") V16QI])
249 (define_mode_iterator VI_UNALIGNED_LOADSTORE
250 [(V32QI "TARGET_AVX") V16QI
251 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
253 ;; All DImode vector integer modes
254 (define_mode_iterator VI8
255 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
257 (define_mode_iterator VI1_AVX2
258 [(V32QI "TARGET_AVX2") V16QI])
260 (define_mode_iterator VI2_AVX2
261 [(V16HI "TARGET_AVX2") V8HI])
263 (define_mode_iterator VI2_AVX512F
264 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
266 (define_mode_iterator VI4_AVX
267 [(V8SI "TARGET_AVX") V4SI])
269 (define_mode_iterator VI4_AVX2
270 [(V8SI "TARGET_AVX2") V4SI])
272 (define_mode_iterator VI4_AVX512F
273 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
275 (define_mode_iterator VI48_AVX512F
276 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
277 (V8DI "TARGET_AVX512F")])
279 (define_mode_iterator VI8_AVX2
280 [(V4DI "TARGET_AVX2") V2DI])
282 (define_mode_iterator VI8_AVX2_AVX512F
283 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
286 (define_mode_iterator V8FI
290 (define_mode_iterator V16FI
293 ;; ??? We should probably use TImode instead.
294 (define_mode_iterator VIMAX_AVX2
295 [(V2TI "TARGET_AVX2") V1TI])
297 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
298 (define_mode_iterator SSESCALARMODE
299 [(V2TI "TARGET_AVX2") TI])
301 (define_mode_iterator VI12_AVX2
302 [(V32QI "TARGET_AVX2") V16QI
303 (V16HI "TARGET_AVX2") V8HI])
305 (define_mode_iterator VI24_AVX2
306 [(V16HI "TARGET_AVX2") V8HI
307 (V8SI "TARGET_AVX2") V4SI])
309 (define_mode_iterator VI124_AVX2_48_AVX512F
310 [(V32QI "TARGET_AVX2") V16QI
311 (V16HI "TARGET_AVX2") V8HI
312 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
313 (V8DI "TARGET_AVX512F")])
315 (define_mode_iterator VI124_AVX512F
316 [(V32QI "TARGET_AVX2") V16QI
317 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
318 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
320 (define_mode_iterator VI124_AVX2
321 [(V32QI "TARGET_AVX2") V16QI
322 (V16HI "TARGET_AVX2") V8HI
323 (V8SI "TARGET_AVX2") V4SI])
325 (define_mode_iterator VI248_AVX2
326 [(V16HI "TARGET_AVX2") V8HI
327 (V8SI "TARGET_AVX2") V4SI
328 (V4DI "TARGET_AVX2") V2DI])
330 (define_mode_iterator VI248_AVX2_8_AVX512F
331 [(V16HI "TARGET_AVX2") V8HI
332 (V8SI "TARGET_AVX2") V4SI
333 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
335 (define_mode_iterator VI48_AVX2_48_AVX512F
336 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
337 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
339 (define_mode_iterator V48_AVX2
342 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
343 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
345 (define_mode_attr avx512
346 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
347 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
348 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
349 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
350 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
351 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
353 (define_mode_attr sse2_avx_avx512f
354 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
355 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
356 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
357 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
358 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
359 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
361 (define_mode_attr sse2_avx2
362 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
363 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
364 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
365 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
366 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
368 (define_mode_attr ssse3_avx2
369 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
370 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
371 (V4SI "ssse3") (V8SI "avx2")
372 (V2DI "ssse3") (V4DI "avx2")
373 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
375 (define_mode_attr sse4_1_avx2
376 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
377 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
378 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
379 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
381 (define_mode_attr avx_avx2
382 [(V4SF "avx") (V2DF "avx")
383 (V8SF "avx") (V4DF "avx")
384 (V4SI "avx2") (V2DI "avx2")
385 (V8SI "avx2") (V4DI "avx2")])
387 (define_mode_attr vec_avx2
388 [(V16QI "vec") (V32QI "avx2")
389 (V8HI "vec") (V16HI "avx2")
390 (V4SI "vec") (V8SI "avx2")
391 (V2DI "vec") (V4DI "avx2")])
393 (define_mode_attr avx2_avx512f
394 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
395 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
396 (V8SF "avx2") (V16SF "avx512f")
397 (V4DF "avx2") (V8DF "avx512f")])
399 (define_mode_attr avx2_avx512bw
400 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
401 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
402 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
403 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
404 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
406 (define_mode_attr shuffletype
407 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
408 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
409 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
410 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
411 (V64QI "i") (V1TI "i") (V2TI "i")])
413 (define_mode_attr ssequartermode
414 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
416 (define_mode_attr ssedoublemodelower
417 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
418 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
419 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
421 (define_mode_attr ssedoublemode
422 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
423 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
424 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
425 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
427 (define_mode_attr ssebytemode
428 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
430 ;; All 128bit vector integer modes
431 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
433 ;; All 256bit vector integer modes
434 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
436 ;; All 512bit vector integer modes
437 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
439 ;; Various 128bit vector integer mode combinations
440 (define_mode_iterator VI12_128 [V16QI V8HI])
441 (define_mode_iterator VI14_128 [V16QI V4SI])
442 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
443 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
444 (define_mode_iterator VI24_128 [V8HI V4SI])
445 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
446 (define_mode_iterator VI48_128 [V4SI V2DI])
448 ;; Various 256bit and 512 vector integer mode combinations
449 (define_mode_iterator VI124_256_48_512
450 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
451 (define_mode_iterator VI48_256 [V8SI V4DI])
452 (define_mode_iterator VI48_512 [V16SI V8DI])
453 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
455 ;; Int-float size matches
456 (define_mode_iterator VI4F_128 [V4SI V4SF])
457 (define_mode_iterator VI8F_128 [V2DI V2DF])
458 (define_mode_iterator VI4F_256 [V8SI V8SF])
459 (define_mode_iterator VI8F_256 [V4DI V4DF])
460 (define_mode_iterator VI8F_256_512
461 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
462 (define_mode_iterator VI48F_256_512
464 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
465 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
466 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
468 ;; Mapping from float mode to required SSE level
469 (define_mode_attr sse
470 [(SF "sse") (DF "sse2")
471 (V4SF "sse") (V2DF "sse2")
472 (V16SF "avx512f") (V8SF "avx")
473 (V8DF "avx512f") (V4DF "avx")])
475 (define_mode_attr sse2
476 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
477 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
479 (define_mode_attr sse3
480 [(V16QI "sse3") (V32QI "avx")])
482 (define_mode_attr sse4_1
483 [(V4SF "sse4_1") (V2DF "sse4_1")
484 (V8SF "avx") (V4DF "avx")
487 (define_mode_attr avxsizesuffix
488 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
489 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
490 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
491 (V16SF "512") (V8DF "512")
492 (V8SF "256") (V4DF "256")
493 (V4SF "") (V2DF "")])
495 ;; SSE instruction mode
496 (define_mode_attr sseinsnmode
497 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
498 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
499 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
500 (V16SF "V16SF") (V8DF "V8DF")
501 (V8SF "V8SF") (V4DF "V4DF")
502 (V4SF "V4SF") (V2DF "V2DF")
505 ;; Mapping of vector modes to corresponding mask size
506 (define_mode_attr avx512fmaskmode
507 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
508 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
509 (V16SI "HI") (V8SI "QI") (V4SI "QI")
510 (V8DI "QI") (V4DI "QI") (V2DI "QI")
511 (V16SF "HI") (V8SF "QI") (V4SF "QI")
512 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
514 ;; Mapping of vector float modes to an integer mode of the same size
515 (define_mode_attr sseintvecmode
516 [(V16SF "V16SI") (V8DF "V8DI")
517 (V8SF "V8SI") (V4DF "V4DI")
518 (V4SF "V4SI") (V2DF "V2DI")
519 (V16SI "V16SI") (V8DI "V8DI")
520 (V8SI "V8SI") (V4DI "V4DI")
521 (V4SI "V4SI") (V2DI "V2DI")
522 (V16HI "V16HI") (V8HI "V8HI")
523 (V32HI "V32HI") (V64QI "V64QI")
524 (V32QI "V32QI") (V16QI "V16QI")])
526 (define_mode_attr sseintvecmodelower
527 [(V16SF "v16si") (V8DF "v8di")
528 (V8SF "v8si") (V4DF "v4di")
529 (V4SF "v4si") (V2DF "v2di")
530 (V8SI "v8si") (V4DI "v4di")
531 (V4SI "v4si") (V2DI "v2di")
532 (V16HI "v16hi") (V8HI "v8hi")
533 (V32QI "v32qi") (V16QI "v16qi")])
535 ;; Mapping of vector modes to a vector mode of double size
536 (define_mode_attr ssedoublevecmode
537 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
538 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
539 (V8SF "V16SF") (V4DF "V8DF")
540 (V4SF "V8SF") (V2DF "V4DF")])
542 ;; Mapping of vector modes to a vector mode of half size
543 (define_mode_attr ssehalfvecmode
544 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
545 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
546 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
547 (V16SF "V8SF") (V8DF "V4DF")
548 (V8SF "V4SF") (V4DF "V2DF")
551 ;; Mapping of vector modes ti packed single mode of the same size
552 (define_mode_attr ssePSmode
553 [(V16SI "V16SF") (V8DF "V16SF")
554 (V16SF "V16SF") (V8DI "V16SF")
555 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
556 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
557 (V8SI "V8SF") (V4SI "V4SF")
558 (V4DI "V8SF") (V2DI "V4SF")
559 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
560 (V8SF "V8SF") (V4SF "V4SF")
561 (V4DF "V8SF") (V2DF "V4SF")])
563 (define_mode_attr ssePSmode2
564 [(V8DI "V8SF") (V4DI "V4SF")])
566 ;; Mapping of vector modes back to the scalar modes
567 (define_mode_attr ssescalarmode
568 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
569 (V32HI "HI") (V16HI "HI") (V8HI "HI")
570 (V16SI "SI") (V8SI "SI") (V4SI "SI")
571 (V8DI "DI") (V4DI "DI") (V2DI "DI")
572 (V16SF "SF") (V8SF "SF") (V4SF "SF")
573 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
575 ;; Mapping of vector modes to the 128bit modes
576 (define_mode_attr ssexmmmode
577 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
578 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
579 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
580 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
581 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
582 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
584 ;; Pointer size override for scalar modes (Intel asm dialect)
585 (define_mode_attr iptr
586 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
587 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
588 (V8SF "k") (V4DF "q")
589 (V4SF "k") (V2DF "q")
592 ;; Number of scalar elements in each vector type
593 (define_mode_attr ssescalarnum
594 [(V64QI "64") (V16SI "16") (V8DI "8")
595 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
596 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
597 (V16SF "16") (V8DF "8")
598 (V8SF "8") (V4DF "4")
599 (V4SF "4") (V2DF "2")])
601 ;; Mask of scalar elements in each vector type
602 (define_mode_attr ssescalarnummask
603 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
604 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
605 (V8SF "7") (V4DF "3")
606 (V4SF "3") (V2DF "1")])
608 (define_mode_attr ssescalarsize
609 [(V8DI "64") (V4DI "64") (V2DI "64")
610 (V64QI "8") (V32QI "8") (V16QI "8")
611 (V32HI "16") (V16HI "16") (V8HI "16")
612 (V16SI "32") (V8SI "32") (V4SI "32")
613 (V16SF "32") (V8DF "64")])
615 ;; SSE prefix for integer vector modes
616 (define_mode_attr sseintprefix
617 [(V2DI "p") (V2DF "")
622 (V16SI "p") (V16SF "")
623 (V16QI "p") (V8HI "p")
624 (V32QI "p") (V16HI "p")
625 (V64QI "p") (V32HI "p")])
627 ;; SSE scalar suffix for vector modes
628 (define_mode_attr ssescalarmodesuffix
630 (V8SF "ss") (V4DF "sd")
631 (V4SF "ss") (V2DF "sd")
632 (V8SI "ss") (V4DI "sd")
635 ;; Pack/unpack vector modes
636 (define_mode_attr sseunpackmode
637 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
638 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
639 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
641 (define_mode_attr ssepackmode
642 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
643 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
644 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
646 ;; Mapping of the max integer size for xop rotate immediate constraint
647 (define_mode_attr sserotatemax
648 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
650 ;; Mapping of mode to cast intrinsic name
651 (define_mode_attr castmode
652 [(V8SI "si") (V8SF "ps") (V4DF "pd")
653 (V16SI "si") (V16SF "ps") (V8DF "pd")])
655 ;; Instruction suffix for sign and zero extensions.
656 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
658 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
659 ;; i64x4 or f64x4 for 512bit modes.
660 (define_mode_attr i128
661 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
662 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
663 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
666 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
667 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
669 ;; Mapping for dbpsabbw modes
670 (define_mode_attr dbpsadbwmode
671 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
673 ;; Mapping suffixes for broadcast
674 (define_mode_attr bcstscalarsuff
675 [(V64QI "b") (V32QI "b") (V16QI "b")
676 (V32HI "w") (V16HI "w") (V8HI "w")
677 (V16SI "d") (V8SI "d") (V4SI "d")
678 (V8DI "q") (V4DI "q") (V2DI "q")
679 (V16SF "ss") (V8SF "ss") (V4SF "ss")
680 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
682 ;; Include define_subst patterns for instructions with mask
685 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
693 ;; All of these patterns are enabled for SSE1 as well as SSE2.
694 ;; This is essential for maintaining stable calling conventions.
696 (define_expand "mov<mode>"
697 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
698 (match_operand:VMOVE 1 "nonimmediate_operand"))]
701 ix86_expand_vector_move (<MODE>mode, operands);
705 (define_insn "*mov<mode>_internal"
706 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
707 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
709 && (register_operand (operands[0], <MODE>mode)
710 || register_operand (operands[1], <MODE>mode))"
712 int mode = get_attr_mode (insn);
713 switch (which_alternative)
716 return standard_sse_constant_opcode (insn, operands[1]);
719 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
720 in avx512f, so we need to use workarounds, to access sse registers
721 16-31, which are evex-only. In avx512vl we don't need workarounds. */
722 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
723 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
724 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
726 if (memory_operand (operands[0], <MODE>mode))
728 if (<MODE_SIZE> == 32)
729 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
730 else if (<MODE_SIZE> == 16)
731 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
735 else if (memory_operand (operands[1], <MODE>mode))
737 if (<MODE_SIZE> == 32)
738 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
739 else if (<MODE_SIZE> == 16)
740 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
745 /* Reg -> reg move is always aligned. Just use wider move. */
750 return "vmovaps\t{%g1, %g0|%g0, %g1}";
753 return "vmovapd\t{%g1, %g0|%g0, %g1}";
756 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
767 && (misaligned_operand (operands[0], <MODE>mode)
768 || misaligned_operand (operands[1], <MODE>mode)))
769 return "vmovups\t{%1, %0|%0, %1}";
771 return "%vmovaps\t{%1, %0|%0, %1}";
777 && (misaligned_operand (operands[0], <MODE>mode)
778 || misaligned_operand (operands[1], <MODE>mode)))
779 return "vmovupd\t{%1, %0|%0, %1}";
781 return "%vmovapd\t{%1, %0|%0, %1}";
786 && (misaligned_operand (operands[0], <MODE>mode)
787 || misaligned_operand (operands[1], <MODE>mode)))
788 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
789 : "vmovdqu\t{%1, %0|%0, %1}";
791 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
792 : "%vmovdqa\t{%1, %0|%0, %1}";
794 if (misaligned_operand (operands[0], <MODE>mode)
795 || misaligned_operand (operands[1], <MODE>mode))
796 return "vmovdqu64\t{%1, %0|%0, %1}";
798 return "vmovdqa64\t{%1, %0|%0, %1}";
807 [(set_attr "type" "sselog1,ssemov,ssemov")
808 (set_attr "prefix" "maybe_vex")
810 (cond [(and (match_test "<MODE_SIZE> == 16")
811 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
812 (and (eq_attr "alternative" "2")
813 (match_test "TARGET_SSE_TYPELESS_STORES"))))
814 (const_string "<ssePSmode>")
815 (match_test "TARGET_AVX")
816 (const_string "<sseinsnmode>")
817 (ior (not (match_test "TARGET_SSE2"))
818 (match_test "optimize_function_for_size_p (cfun)"))
819 (const_string "V4SF")
820 (and (eq_attr "alternative" "0")
821 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
824 (const_string "<sseinsnmode>")))])
826 (define_insn "<avx512>_load<mode>_mask"
827 [(set (match_operand:V_AVX512VL 0 "register_operand" "=v,v")
828 (vec_merge:V_AVX512VL
829 (match_operand:V_AVX512VL 1 "nonimmediate_operand" "v,m")
830 (match_operand:V_AVX512VL 2 "vector_move_operand" "0C,0C")
831 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
834 switch (MODE_<sseinsnmode>)
842 if (misaligned_operand (operands[1], <MODE>mode))
843 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
844 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
846 /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
847 if (<MODE>mode == V64QImode
848 || <MODE>mode == V32QImode
849 || <MODE>mode == V16QImode
850 || <MODE>mode == V32HImode
851 || <MODE>mode == V16HImode
852 || <MODE>mode == V8HImode
853 || misaligned_operand (operands[1], <MODE>mode))
854 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
856 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
859 [(set_attr "type" "ssemov")
860 (set_attr "prefix" "evex")
861 (set_attr "memory" "none,load")
862 (set_attr "mode" "<sseinsnmode>")])
864 (define_insn "avx512f_blendm<mode>"
865 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
867 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
868 (match_operand:VI48F_512 1 "register_operand" "v")
869 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
871 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
872 [(set_attr "type" "ssemov")
873 (set_attr "prefix" "evex")
874 (set_attr "mode" "<sseinsnmode>")])
876 (define_insn "<avx512>_store<mode>_mask"
877 [(set (match_operand:V_AVX512VL 0 "memory_operand" "=m")
878 (vec_merge:V_AVX512VL
879 (match_operand:V_AVX512VL 1 "register_operand" "v")
881 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
884 switch (MODE_<sseinsnmode>)
892 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
894 /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
895 if (<MODE>mode == V64QImode
896 || <MODE>mode == V32QImode
897 || <MODE>mode == V16QImode
898 || <MODE>mode == V32HImode
899 || <MODE>mode == V16HImode
900 || <MODE>mode == V8HImode)
901 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
903 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
906 [(set_attr "type" "ssemov")
907 (set_attr "prefix" "evex")
908 (set_attr "memory" "store")
909 (set_attr "mode" "<sseinsnmode>")])
911 (define_insn "sse2_movq128"
912 [(set (match_operand:V2DI 0 "register_operand" "=x")
915 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
916 (parallel [(const_int 0)]))
919 "%vmovq\t{%1, %0|%0, %q1}"
920 [(set_attr "type" "ssemov")
921 (set_attr "prefix" "maybe_vex")
922 (set_attr "mode" "TI")])
924 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
925 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
926 ;; from memory, we'd prefer to load the memory directly into the %xmm
927 ;; register. To facilitate this happy circumstance, this pattern won't
928 ;; split until after register allocation. If the 64-bit value didn't
929 ;; come from memory, this is the best we can do. This is much better
930 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
933 (define_insn_and_split "movdi_to_sse"
935 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
936 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
937 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
938 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
940 "&& reload_completed"
943 if (register_operand (operands[1], DImode))
945 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
946 Assemble the 64-bit DImode value in an xmm register. */
947 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
948 gen_rtx_SUBREG (SImode, operands[1], 0)));
949 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
950 gen_rtx_SUBREG (SImode, operands[1], 4)));
951 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
954 else if (memory_operand (operands[1], DImode))
956 rtx tmp = gen_reg_rtx (V2DImode);
957 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
958 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
965 [(set (match_operand:V4SF 0 "register_operand")
966 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
967 "TARGET_SSE && reload_completed"
970 (vec_duplicate:V4SF (match_dup 1))
974 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
975 operands[2] = CONST0_RTX (V4SFmode);
979 [(set (match_operand:V2DF 0 "register_operand")
980 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
981 "TARGET_SSE2 && reload_completed"
982 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
984 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
985 operands[2] = CONST0_RTX (DFmode);
988 (define_expand "movmisalign<mode>"
989 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
990 (match_operand:VMOVE 1 "nonimmediate_operand"))]
993 ix86_expand_vector_move_misalign (<MODE>mode, operands);
997 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
998 [(set (match_operand:VF 0 "register_operand")
999 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1001 "TARGET_SSE && <mask_mode512bit_condition>"
1003 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1004 just fine if misaligned_operand is true, and without the UNSPEC it can
1005 be combined with arithmetic instructions. If misaligned_operand is
1006 false, still emit UNSPEC_LOADU insn to honor user's request for
1009 && misaligned_operand (operands[1], <MODE>mode))
1011 rtx src = operands[1];
1013 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1014 operands[2 * <mask_applied>],
1015 operands[3 * <mask_applied>]);
1016 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1021 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1022 [(set (match_operand:VF 0 "register_operand" "=v")
1024 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1026 "TARGET_SSE && <mask_mode512bit_condition>"
1028 switch (get_attr_mode (insn))
1033 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1035 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1038 [(set_attr "type" "ssemov")
1039 (set_attr "movu" "1")
1040 (set_attr "ssememalign" "8")
1041 (set_attr "prefix" "maybe_vex")
1043 (cond [(and (match_test "<MODE_SIZE> == 16")
1044 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1045 (const_string "<ssePSmode>")
1046 (match_test "TARGET_AVX")
1047 (const_string "<MODE>")
1048 (match_test "optimize_function_for_size_p (cfun)")
1049 (const_string "V4SF")
1051 (const_string "<MODE>")))])
1053 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1054 [(set (match_operand:VF 0 "memory_operand" "=m")
1056 [(match_operand:VF 1 "register_operand" "v")]
1060 switch (get_attr_mode (insn))
1065 return "%vmovups\t{%1, %0|%0, %1}";
1067 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1070 [(set_attr "type" "ssemov")
1071 (set_attr "movu" "1")
1072 (set_attr "ssememalign" "8")
1073 (set_attr "prefix" "maybe_vex")
1075 (cond [(and (match_test "<MODE_SIZE> == 16")
1076 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1077 (match_test "TARGET_SSE_TYPELESS_STORES")))
1078 (const_string "<ssePSmode>")
1079 (match_test "TARGET_AVX")
1080 (const_string "<MODE>")
1081 (match_test "optimize_function_for_size_p (cfun)")
1082 (const_string "V4SF")
1084 (const_string "<MODE>")))])
1086 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
1087 [(set (match_operand:VF_512 0 "memory_operand" "=m")
1090 [(match_operand:VF_512 1 "register_operand" "v")]
1093 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1096 switch (get_attr_mode (insn))
1099 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1101 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1104 [(set_attr "type" "ssemov")
1105 (set_attr "movu" "1")
1106 (set_attr "memory" "store")
1107 (set_attr "prefix" "evex")
1108 (set_attr "mode" "<sseinsnmode>")])
1110 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1111 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
1112 (unspec:VI_UNALIGNED_LOADSTORE
1113 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
1115 "TARGET_SSE2 && <mask_mode512bit_condition>"
1117 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1118 just fine if misaligned_operand is true, and without the UNSPEC it can
1119 be combined with arithmetic instructions. If misaligned_operand is
1120 false, still emit UNSPEC_LOADU insn to honor user's request for
1123 && misaligned_operand (operands[1], <MODE>mode))
1125 rtx src = operands[1];
1127 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1128 operands[2 * <mask_applied>],
1129 operands[3 * <mask_applied>]);
1130 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1135 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1136 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1137 (unspec:VI_UNALIGNED_LOADSTORE
1138 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1140 "TARGET_SSE2 && <mask_mode512bit_condition>"
1142 switch (get_attr_mode (insn))
1146 return "%vmovups\t{%1, %0|%0, %1}";
1148 if (<MODE>mode == V8DImode)
1149 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1151 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1153 return "%vmovdqu\t{%1, %0|%0, %1}";
1156 [(set_attr "type" "ssemov")
1157 (set_attr "movu" "1")
1158 (set_attr "ssememalign" "8")
1159 (set (attr "prefix_data16")
1161 (match_test "TARGET_AVX")
1163 (const_string "1")))
1164 (set_attr "prefix" "maybe_vex")
1166 (cond [(and (match_test "<MODE_SIZE> == 16")
1167 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1168 (const_string "<ssePSmode>")
1169 (match_test "TARGET_AVX")
1170 (const_string "<sseinsnmode>")
1171 (match_test "optimize_function_for_size_p (cfun)")
1172 (const_string "V4SF")
1174 (const_string "<sseinsnmode>")))])
1176 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1177 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1178 (unspec:VI_UNALIGNED_LOADSTORE
1179 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1183 switch (get_attr_mode (insn))
1188 return "%vmovups\t{%1, %0|%0, %1}";
1190 if (<MODE>mode == V8DImode)
1191 return "vmovdqu64\t{%1, %0|%0, %1}";
1193 return "vmovdqu32\t{%1, %0|%0, %1}";
1195 return "%vmovdqu\t{%1, %0|%0, %1}";
1198 [(set_attr "type" "ssemov")
1199 (set_attr "movu" "1")
1200 (set_attr "ssememalign" "8")
1201 (set (attr "prefix_data16")
1203 (match_test "TARGET_AVX")
1205 (const_string "1")))
1206 (set_attr "prefix" "maybe_vex")
1208 (cond [(and (match_test "<MODE_SIZE> == 16")
1209 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1210 (match_test "TARGET_SSE_TYPELESS_STORES")))
1211 (const_string "<ssePSmode>")
1212 (match_test "TARGET_AVX")
1213 (const_string "<sseinsnmode>")
1214 (match_test "optimize_function_for_size_p (cfun)")
1215 (const_string "V4SF")
1217 (const_string "<sseinsnmode>")))])
1219 (define_insn "avx512f_storedqu<mode>_mask"
1220 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1223 [(match_operand:VI48_512 1 "register_operand" "v")]
1226 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1229 if (<MODE>mode == V8DImode)
1230 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1232 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1234 [(set_attr "type" "ssemov")
1235 (set_attr "movu" "1")
1236 (set_attr "memory" "store")
1237 (set_attr "prefix" "evex")
1238 (set_attr "mode" "<sseinsnmode>")])
1240 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1241 [(set (match_operand:VI1 0 "register_operand" "=x")
1242 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1245 "%vlddqu\t{%1, %0|%0, %1}"
1246 [(set_attr "type" "ssemov")
1247 (set_attr "movu" "1")
1248 (set_attr "ssememalign" "8")
1249 (set (attr "prefix_data16")
1251 (match_test "TARGET_AVX")
1253 (const_string "0")))
1254 (set (attr "prefix_rep")
1256 (match_test "TARGET_AVX")
1258 (const_string "1")))
1259 (set_attr "prefix" "maybe_vex")
1260 (set_attr "mode" "<sseinsnmode>")])
1262 (define_insn "sse2_movnti<mode>"
1263 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1264 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1267 "movnti\t{%1, %0|%0, %1}"
1268 [(set_attr "type" "ssemov")
1269 (set_attr "prefix_data16" "0")
1270 (set_attr "mode" "<MODE>")])
1272 (define_insn "<sse>_movnt<mode>"
1273 [(set (match_operand:VF 0 "memory_operand" "=m")
1275 [(match_operand:VF 1 "register_operand" "v")]
1278 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1279 [(set_attr "type" "ssemov")
1280 (set_attr "prefix" "maybe_vex")
1281 (set_attr "mode" "<MODE>")])
1283 (define_insn "<sse2>_movnt<mode>"
1284 [(set (match_operand:VI8 0 "memory_operand" "=m")
1285 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1288 "%vmovntdq\t{%1, %0|%0, %1}"
1289 [(set_attr "type" "ssecvt")
1290 (set (attr "prefix_data16")
1292 (match_test "TARGET_AVX")
1294 (const_string "1")))
1295 (set_attr "prefix" "maybe_vex")
1296 (set_attr "mode" "<sseinsnmode>")])
1298 ; Expand patterns for non-temporal stores. At the moment, only those
1299 ; that directly map to insns are defined; it would be possible to
1300 ; define patterns for other modes that would expand to several insns.
1302 ;; Modes handled by storent patterns.
1303 (define_mode_iterator STORENT_MODE
1304 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1305 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1306 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1307 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1308 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1310 (define_expand "storent<mode>"
1311 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1312 (unspec:STORENT_MODE
1313 [(match_operand:STORENT_MODE 1 "register_operand")]
1317 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1319 ;; Parallel floating point arithmetic
1321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1323 (define_expand "<code><mode>2"
1324 [(set (match_operand:VF 0 "register_operand")
1326 (match_operand:VF 1 "register_operand")))]
1328 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1330 (define_insn_and_split "*absneg<mode>2"
1331 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1332 (match_operator:VF 3 "absneg_operator"
1333 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1334 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1337 "&& reload_completed"
1340 enum rtx_code absneg_op;
1346 if (MEM_P (operands[1]))
1347 op1 = operands[2], op2 = operands[1];
1349 op1 = operands[1], op2 = operands[2];
1354 if (rtx_equal_p (operands[0], operands[1]))
1360 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1361 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1362 t = gen_rtx_SET (VOIDmode, operands[0], t);
1366 [(set_attr "isa" "noavx,noavx,avx,avx")])
1368 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1369 [(set (match_operand:VF 0 "register_operand")
1371 (match_operand:VF 1 "<round_nimm_predicate>")
1372 (match_operand:VF 2 "<round_nimm_predicate>")))]
1373 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1374 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1376 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1377 [(set (match_operand:VF 0 "register_operand" "=x,v")
1379 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1380 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1381 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1383 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1384 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1385 [(set_attr "isa" "noavx,avx")
1386 (set_attr "type" "sseadd")
1387 (set_attr "prefix" "<mask_prefix3>")
1388 (set_attr "mode" "<MODE>")])
1390 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1391 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1394 (match_operand:VF_128 1 "register_operand" "0,v")
1395 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1400 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1401 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1402 [(set_attr "isa" "noavx,avx")
1403 (set_attr "type" "sseadd")
1404 (set_attr "prefix" "<round_prefix>")
1405 (set_attr "mode" "<ssescalarmode>")])
1407 (define_expand "mul<mode>3<mask_name><round_name>"
1408 [(set (match_operand:VF 0 "register_operand")
1410 (match_operand:VF 1 "<round_nimm_predicate>")
1411 (match_operand:VF 2 "<round_nimm_predicate>")))]
1412 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1413 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1415 (define_insn "*mul<mode>3<mask_name><round_name>"
1416 [(set (match_operand:VF 0 "register_operand" "=x,v")
1418 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1419 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1420 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1422 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1423 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1424 [(set_attr "isa" "noavx,avx")
1425 (set_attr "type" "ssemul")
1426 (set_attr "prefix" "<mask_prefix3>")
1427 (set_attr "btver2_decode" "direct,double")
1428 (set_attr "mode" "<MODE>")])
1430 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1431 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1434 (match_operand:VF_128 1 "register_operand" "0,v")
1435 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1440 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1441 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1442 [(set_attr "isa" "noavx,avx")
1443 (set_attr "type" "sse<multdiv_mnemonic>")
1444 (set_attr "prefix" "<round_prefix>")
1445 (set_attr "btver2_decode" "direct,double")
1446 (set_attr "mode" "<ssescalarmode>")])
1448 (define_expand "div<mode>3"
1449 [(set (match_operand:VF2 0 "register_operand")
1450 (div:VF2 (match_operand:VF2 1 "register_operand")
1451 (match_operand:VF2 2 "nonimmediate_operand")))]
1453 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1455 (define_expand "div<mode>3"
1456 [(set (match_operand:VF1 0 "register_operand")
1457 (div:VF1 (match_operand:VF1 1 "register_operand")
1458 (match_operand:VF1 2 "nonimmediate_operand")))]
1461 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1464 && TARGET_RECIP_VEC_DIV
1465 && !optimize_insn_for_size_p ()
1466 && flag_finite_math_only && !flag_trapping_math
1467 && flag_unsafe_math_optimizations)
1469 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1474 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1475 [(set (match_operand:VF 0 "register_operand" "=x,v")
1477 (match_operand:VF 1 "register_operand" "0,v")
1478 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1479 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1481 div<ssemodesuffix>\t{%2, %0|%0, %2}
1482 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1483 [(set_attr "isa" "noavx,avx")
1484 (set_attr "type" "ssediv")
1485 (set_attr "prefix" "<mask_prefix3>")
1486 (set_attr "mode" "<MODE>")])
1488 (define_insn "<sse>_rcp<mode>2"
1489 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1491 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1493 "%vrcpps\t{%1, %0|%0, %1}"
1494 [(set_attr "type" "sse")
1495 (set_attr "atom_sse_attr" "rcp")
1496 (set_attr "btver2_sse_attr" "rcp")
1497 (set_attr "prefix" "maybe_vex")
1498 (set_attr "mode" "<MODE>")])
1500 (define_insn "sse_vmrcpv4sf2"
1501 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1503 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1505 (match_operand:V4SF 2 "register_operand" "0,x")
1509 rcpss\t{%1, %0|%0, %k1}
1510 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1511 [(set_attr "isa" "noavx,avx")
1512 (set_attr "type" "sse")
1513 (set_attr "ssememalign" "32")
1514 (set_attr "atom_sse_attr" "rcp")
1515 (set_attr "btver2_sse_attr" "rcp")
1516 (set_attr "prefix" "orig,vex")
1517 (set_attr "mode" "SF")])
1519 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1520 [(set (match_operand:VF_512 0 "register_operand" "=v")
1522 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1525 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1526 [(set_attr "type" "sse")
1527 (set_attr "prefix" "evex")
1528 (set_attr "mode" "<MODE>")])
1530 (define_insn "srcp14<mode>"
1531 [(set (match_operand:VF_128 0 "register_operand" "=v")
1534 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1536 (match_operand:VF_128 2 "register_operand" "v")
1539 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1540 [(set_attr "type" "sse")
1541 (set_attr "prefix" "evex")
1542 (set_attr "mode" "<MODE>")])
1544 (define_expand "sqrt<mode>2"
1545 [(set (match_operand:VF2 0 "register_operand")
1546 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1549 (define_expand "sqrt<mode>2"
1550 [(set (match_operand:VF1 0 "register_operand")
1551 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1555 && TARGET_RECIP_VEC_SQRT
1556 && !optimize_insn_for_size_p ()
1557 && flag_finite_math_only && !flag_trapping_math
1558 && flag_unsafe_math_optimizations)
1560 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1565 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1566 [(set (match_operand:VF 0 "register_operand" "=v")
1567 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1568 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1569 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1570 [(set_attr "type" "sse")
1571 (set_attr "atom_sse_attr" "sqrt")
1572 (set_attr "btver2_sse_attr" "sqrt")
1573 (set_attr "prefix" "maybe_vex")
1574 (set_attr "mode" "<MODE>")])
1576 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1577 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1580 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1581 (match_operand:VF_128 2 "register_operand" "0,v")
1585 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1586 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1587 [(set_attr "isa" "noavx,avx")
1588 (set_attr "type" "sse")
1589 (set_attr "atom_sse_attr" "sqrt")
1590 (set_attr "prefix" "<round_prefix>")
1591 (set_attr "btver2_sse_attr" "sqrt")
1592 (set_attr "mode" "<ssescalarmode>")])
1594 (define_expand "rsqrt<mode>2"
1595 [(set (match_operand:VF1_128_256 0 "register_operand")
1597 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1600 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1604 (define_insn "<sse>_rsqrt<mode>2"
1605 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1607 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1609 "%vrsqrtps\t{%1, %0|%0, %1}"
1610 [(set_attr "type" "sse")
1611 (set_attr "prefix" "maybe_vex")
1612 (set_attr "mode" "<MODE>")])
1614 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1615 [(set (match_operand:VF_512 0 "register_operand" "=v")
1617 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1620 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1621 [(set_attr "type" "sse")
1622 (set_attr "prefix" "evex")
1623 (set_attr "mode" "<MODE>")])
1625 (define_insn "rsqrt14<mode>"
1626 [(set (match_operand:VF_128 0 "register_operand" "=v")
1629 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1631 (match_operand:VF_128 2 "register_operand" "v")
1634 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1635 [(set_attr "type" "sse")
1636 (set_attr "prefix" "evex")
1637 (set_attr "mode" "<MODE>")])
1639 (define_insn "sse_vmrsqrtv4sf2"
1640 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1642 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1644 (match_operand:V4SF 2 "register_operand" "0,x")
1648 rsqrtss\t{%1, %0|%0, %k1}
1649 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1650 [(set_attr "isa" "noavx,avx")
1651 (set_attr "type" "sse")
1652 (set_attr "ssememalign" "32")
1653 (set_attr "prefix" "orig,vex")
1654 (set_attr "mode" "SF")])
1656 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1657 ;; isn't really correct, as those rtl operators aren't defined when
1658 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1660 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1661 [(set (match_operand:VF 0 "register_operand")
1663 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1664 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1665 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1667 if (!flag_finite_math_only)
1668 operands[1] = force_reg (<MODE>mode, operands[1]);
1669 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1672 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1673 [(set (match_operand:VF 0 "register_operand" "=x,v")
1675 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1676 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1677 "TARGET_SSE && flag_finite_math_only
1678 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1679 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1681 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1682 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1683 [(set_attr "isa" "noavx,avx")
1684 (set_attr "type" "sseadd")
1685 (set_attr "btver2_sse_attr" "maxmin")
1686 (set_attr "prefix" "<mask_prefix3>")
1687 (set_attr "mode" "<MODE>")])
1689 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1690 [(set (match_operand:VF 0 "register_operand" "=x,v")
1692 (match_operand:VF 1 "register_operand" "0,v")
1693 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1694 "TARGET_SSE && !flag_finite_math_only
1695 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1697 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1698 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1699 [(set_attr "isa" "noavx,avx")
1700 (set_attr "type" "sseadd")
1701 (set_attr "btver2_sse_attr" "maxmin")
1702 (set_attr "prefix" "<mask_prefix3>")
1703 (set_attr "mode" "<MODE>")])
1705 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1706 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1709 (match_operand:VF_128 1 "register_operand" "0,v")
1710 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1715 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1716 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1717 [(set_attr "isa" "noavx,avx")
1718 (set_attr "type" "sse")
1719 (set_attr "btver2_sse_attr" "maxmin")
1720 (set_attr "prefix" "<round_saeonly_prefix>")
1721 (set_attr "mode" "<ssescalarmode>")])
1723 ;; These versions of the min/max patterns implement exactly the operations
1724 ;; min = (op1 < op2 ? op1 : op2)
1725 ;; max = (!(op1 < op2) ? op1 : op2)
1726 ;; Their operands are not commutative, and thus they may be used in the
1727 ;; presence of -0.0 and NaN.
1729 (define_insn "*ieee_smin<mode>3"
1730 [(set (match_operand:VF 0 "register_operand" "=v,v")
1732 [(match_operand:VF 1 "register_operand" "0,v")
1733 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1737 min<ssemodesuffix>\t{%2, %0|%0, %2}
1738 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1739 [(set_attr "isa" "noavx,avx")
1740 (set_attr "type" "sseadd")
1741 (set_attr "prefix" "orig,vex")
1742 (set_attr "mode" "<MODE>")])
1744 (define_insn "*ieee_smax<mode>3"
1745 [(set (match_operand:VF 0 "register_operand" "=v,v")
1747 [(match_operand:VF 1 "register_operand" "0,v")
1748 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1752 max<ssemodesuffix>\t{%2, %0|%0, %2}
1753 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1754 [(set_attr "isa" "noavx,avx")
1755 (set_attr "type" "sseadd")
1756 (set_attr "prefix" "orig,vex")
1757 (set_attr "mode" "<MODE>")])
1759 (define_insn "avx_addsubv4df3"
1760 [(set (match_operand:V4DF 0 "register_operand" "=x")
1763 (match_operand:V4DF 1 "register_operand" "x")
1764 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1765 (minus:V4DF (match_dup 1) (match_dup 2))
1768 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1769 [(set_attr "type" "sseadd")
1770 (set_attr "prefix" "vex")
1771 (set_attr "mode" "V4DF")])
1773 (define_insn "sse3_addsubv2df3"
1774 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1777 (match_operand:V2DF 1 "register_operand" "0,x")
1778 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1779 (minus:V2DF (match_dup 1) (match_dup 2))
1783 addsubpd\t{%2, %0|%0, %2}
1784 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1785 [(set_attr "isa" "noavx,avx")
1786 (set_attr "type" "sseadd")
1787 (set_attr "atom_unit" "complex")
1788 (set_attr "prefix" "orig,vex")
1789 (set_attr "mode" "V2DF")])
1791 (define_insn "avx_addsubv8sf3"
1792 [(set (match_operand:V8SF 0 "register_operand" "=x")
1795 (match_operand:V8SF 1 "register_operand" "x")
1796 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1797 (minus:V8SF (match_dup 1) (match_dup 2))
1800 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1801 [(set_attr "type" "sseadd")
1802 (set_attr "prefix" "vex")
1803 (set_attr "mode" "V8SF")])
1805 (define_insn "sse3_addsubv4sf3"
1806 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1809 (match_operand:V4SF 1 "register_operand" "0,x")
1810 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1811 (minus:V4SF (match_dup 1) (match_dup 2))
1815 addsubps\t{%2, %0|%0, %2}
1816 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1817 [(set_attr "isa" "noavx,avx")
1818 (set_attr "type" "sseadd")
1819 (set_attr "prefix" "orig,vex")
1820 (set_attr "prefix_rep" "1,*")
1821 (set_attr "mode" "V4SF")])
1823 (define_insn "avx_h<plusminus_insn>v4df3"
1824 [(set (match_operand:V4DF 0 "register_operand" "=x")
1829 (match_operand:V4DF 1 "register_operand" "x")
1830 (parallel [(const_int 0)]))
1831 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1834 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1835 (parallel [(const_int 0)]))
1836 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1839 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1840 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1842 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1843 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1845 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1846 [(set_attr "type" "sseadd")
1847 (set_attr "prefix" "vex")
1848 (set_attr "mode" "V4DF")])
1850 (define_expand "sse3_haddv2df3"
1851 [(set (match_operand:V2DF 0 "register_operand")
1855 (match_operand:V2DF 1 "register_operand")
1856 (parallel [(const_int 0)]))
1857 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1860 (match_operand:V2DF 2 "nonimmediate_operand")
1861 (parallel [(const_int 0)]))
1862 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1865 (define_insn "*sse3_haddv2df3"
1866 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1870 (match_operand:V2DF 1 "register_operand" "0,x")
1871 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1874 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1877 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1878 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1881 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1883 && INTVAL (operands[3]) != INTVAL (operands[4])
1884 && INTVAL (operands[5]) != INTVAL (operands[6])"
1886 haddpd\t{%2, %0|%0, %2}
1887 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1888 [(set_attr "isa" "noavx,avx")
1889 (set_attr "type" "sseadd")
1890 (set_attr "prefix" "orig,vex")
1891 (set_attr "mode" "V2DF")])
1893 (define_insn "sse3_hsubv2df3"
1894 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1898 (match_operand:V2DF 1 "register_operand" "0,x")
1899 (parallel [(const_int 0)]))
1900 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1903 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1904 (parallel [(const_int 0)]))
1905 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1908 hsubpd\t{%2, %0|%0, %2}
1909 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1910 [(set_attr "isa" "noavx,avx")
1911 (set_attr "type" "sseadd")
1912 (set_attr "prefix" "orig,vex")
1913 (set_attr "mode" "V2DF")])
1915 (define_insn "*sse3_haddv2df3_low"
1916 [(set (match_operand:DF 0 "register_operand" "=x,x")
1919 (match_operand:V2DF 1 "register_operand" "0,x")
1920 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1923 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1925 && INTVAL (operands[2]) != INTVAL (operands[3])"
1927 haddpd\t{%0, %0|%0, %0}
1928 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1929 [(set_attr "isa" "noavx,avx")
1930 (set_attr "type" "sseadd1")
1931 (set_attr "prefix" "orig,vex")
1932 (set_attr "mode" "V2DF")])
1934 (define_insn "*sse3_hsubv2df3_low"
1935 [(set (match_operand:DF 0 "register_operand" "=x,x")
1938 (match_operand:V2DF 1 "register_operand" "0,x")
1939 (parallel [(const_int 0)]))
1942 (parallel [(const_int 1)]))))]
1945 hsubpd\t{%0, %0|%0, %0}
1946 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1947 [(set_attr "isa" "noavx,avx")
1948 (set_attr "type" "sseadd1")
1949 (set_attr "prefix" "orig,vex")
1950 (set_attr "mode" "V2DF")])
1952 (define_insn "avx_h<plusminus_insn>v8sf3"
1953 [(set (match_operand:V8SF 0 "register_operand" "=x")
1959 (match_operand:V8SF 1 "register_operand" "x")
1960 (parallel [(const_int 0)]))
1961 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1963 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1964 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1968 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1969 (parallel [(const_int 0)]))
1970 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1972 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1973 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1977 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1978 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1980 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1981 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1984 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1985 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1987 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1988 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1990 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1991 [(set_attr "type" "sseadd")
1992 (set_attr "prefix" "vex")
1993 (set_attr "mode" "V8SF")])
1995 (define_insn "sse3_h<plusminus_insn>v4sf3"
1996 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2001 (match_operand:V4SF 1 "register_operand" "0,x")
2002 (parallel [(const_int 0)]))
2003 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2005 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2006 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2010 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2011 (parallel [(const_int 0)]))
2012 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2014 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2015 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2018 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2019 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2020 [(set_attr "isa" "noavx,avx")
2021 (set_attr "type" "sseadd")
2022 (set_attr "atom_unit" "complex")
2023 (set_attr "prefix" "orig,vex")
2024 (set_attr "prefix_rep" "1,*")
2025 (set_attr "mode" "V4SF")])
2027 (define_expand "reduc_splus_v8df"
2028 [(match_operand:V8DF 0 "register_operand")
2029 (match_operand:V8DF 1 "register_operand")]
2032 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2036 (define_expand "reduc_splus_v4df"
2037 [(match_operand:V4DF 0 "register_operand")
2038 (match_operand:V4DF 1 "register_operand")]
2041 rtx tmp = gen_reg_rtx (V4DFmode);
2042 rtx tmp2 = gen_reg_rtx (V4DFmode);
2043 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2044 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2045 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2049 (define_expand "reduc_splus_v2df"
2050 [(match_operand:V2DF 0 "register_operand")
2051 (match_operand:V2DF 1 "register_operand")]
2054 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2058 (define_expand "reduc_splus_v16sf"
2059 [(match_operand:V16SF 0 "register_operand")
2060 (match_operand:V16SF 1 "register_operand")]
2063 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2067 (define_expand "reduc_splus_v8sf"
2068 [(match_operand:V8SF 0 "register_operand")
2069 (match_operand:V8SF 1 "register_operand")]
2072 rtx tmp = gen_reg_rtx (V8SFmode);
2073 rtx tmp2 = gen_reg_rtx (V8SFmode);
2074 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2075 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2076 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2077 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2081 (define_expand "reduc_splus_v4sf"
2082 [(match_operand:V4SF 0 "register_operand")
2083 (match_operand:V4SF 1 "register_operand")]
2088 rtx tmp = gen_reg_rtx (V4SFmode);
2089 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2090 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2093 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2097 ;; Modes handled by reduc_sm{in,ax}* patterns.
2098 (define_mode_iterator REDUC_SMINMAX_MODE
2099 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2100 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2101 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2102 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
2103 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2104 (V8DF "TARGET_AVX512F")])
2106 (define_expand "reduc_<code>_<mode>"
2107 [(smaxmin:REDUC_SMINMAX_MODE
2108 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2109 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2112 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2116 (define_expand "reduc_<code>_<mode>"
2118 (match_operand:VI48_512 0 "register_operand")
2119 (match_operand:VI48_512 1 "register_operand"))]
2122 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2126 (define_expand "reduc_<code>_<mode>"
2128 (match_operand:VI_256 0 "register_operand")
2129 (match_operand:VI_256 1 "register_operand"))]
2132 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2136 (define_expand "reduc_umin_v8hi"
2138 (match_operand:V8HI 0 "register_operand")
2139 (match_operand:V8HI 1 "register_operand"))]
2142 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2148 ;; Parallel floating point comparisons
2150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2152 (define_insn "avx_cmp<mode>3"
2153 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2155 [(match_operand:VF_128_256 1 "register_operand" "x")
2156 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2157 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2160 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2161 [(set_attr "type" "ssecmp")
2162 (set_attr "length_immediate" "1")
2163 (set_attr "prefix" "vex")
2164 (set_attr "mode" "<MODE>")])
2166 (define_insn "avx_vmcmp<mode>3"
2167 [(set (match_operand:VF_128 0 "register_operand" "=x")
2170 [(match_operand:VF_128 1 "register_operand" "x")
2171 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2172 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2177 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2178 [(set_attr "type" "ssecmp")
2179 (set_attr "length_immediate" "1")
2180 (set_attr "prefix" "vex")
2181 (set_attr "mode" "<ssescalarmode>")])
2183 (define_insn "*<sse>_maskcmp<mode>3_comm"
2184 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2185 (match_operator:VF_128_256 3 "sse_comparison_operator"
2186 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2187 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2189 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2191 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2192 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2193 [(set_attr "isa" "noavx,avx")
2194 (set_attr "type" "ssecmp")
2195 (set_attr "length_immediate" "1")
2196 (set_attr "prefix" "orig,vex")
2197 (set_attr "mode" "<MODE>")])
2199 (define_insn "<sse>_maskcmp<mode>3"
2200 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2201 (match_operator:VF_128_256 3 "sse_comparison_operator"
2202 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2203 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2206 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2207 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2208 [(set_attr "isa" "noavx,avx")
2209 (set_attr "type" "ssecmp")
2210 (set_attr "length_immediate" "1")
2211 (set_attr "prefix" "orig,vex")
2212 (set_attr "mode" "<MODE>")])
2214 (define_insn "<sse>_vmmaskcmp<mode>3"
2215 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2217 (match_operator:VF_128 3 "sse_comparison_operator"
2218 [(match_operand:VF_128 1 "register_operand" "0,x")
2219 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2224 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2225 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2226 [(set_attr "isa" "noavx,avx")
2227 (set_attr "type" "ssecmp")
2228 (set_attr "length_immediate" "1,*")
2229 (set_attr "prefix" "orig,vex")
2230 (set_attr "mode" "<ssescalarmode>")])
2232 (define_mode_attr cmp_imm_predicate
2233 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2234 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2236 (define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2237 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2238 (unspec:<avx512fmaskmode>
2239 [(match_operand:VI48F_512 1 "register_operand" "v")
2240 (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2241 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2243 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2244 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2245 [(set_attr "type" "ssecmp")
2246 (set_attr "length_immediate" "1")
2247 (set_attr "prefix" "evex")
2248 (set_attr "mode" "<sseinsnmode>")])
2250 (define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
2251 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2252 (unspec:<avx512fmaskmode>
2253 [(match_operand:VI48_512 1 "register_operand" "v")
2254 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2255 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2256 UNSPEC_UNSIGNED_PCMP))]
2258 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2259 [(set_attr "type" "ssecmp")
2260 (set_attr "length_immediate" "1")
2261 (set_attr "prefix" "evex")
2262 (set_attr "mode" "<sseinsnmode>")])
2264 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2265 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2266 (and:<avx512fmaskmode>
2267 (unspec:<avx512fmaskmode>
2268 [(match_operand:VF_128 1 "register_operand" "v")
2269 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2270 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2274 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2275 [(set_attr "type" "ssecmp")
2276 (set_attr "length_immediate" "1")
2277 (set_attr "prefix" "evex")
2278 (set_attr "mode" "<ssescalarmode>")])
2280 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2281 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2282 (and:<avx512fmaskmode>
2283 (unspec:<avx512fmaskmode>
2284 [(match_operand:VF_128 1 "register_operand" "v")
2285 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2286 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2288 (and:<avx512fmaskmode>
2289 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2292 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2293 [(set_attr "type" "ssecmp")
2294 (set_attr "length_immediate" "1")
2295 (set_attr "prefix" "evex")
2296 (set_attr "mode" "<ssescalarmode>")])
2298 (define_insn "avx512f_maskcmp<mode>3"
2299 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2300 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2301 [(match_operand:VF 1 "register_operand" "v")
2302 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2304 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2305 [(set_attr "type" "ssecmp")
2306 (set_attr "length_immediate" "1")
2307 (set_attr "prefix" "evex")
2308 (set_attr "mode" "<sseinsnmode>")])
2310 (define_insn "<sse>_comi<round_saeonly_name>"
2311 [(set (reg:CCFP FLAGS_REG)
2314 (match_operand:<ssevecmode> 0 "register_operand" "v")
2315 (parallel [(const_int 0)]))
2317 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2318 (parallel [(const_int 0)]))))]
2319 "SSE_FLOAT_MODE_P (<MODE>mode)"
2320 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2321 [(set_attr "type" "ssecomi")
2322 (set_attr "prefix" "maybe_vex")
2323 (set_attr "prefix_rep" "0")
2324 (set (attr "prefix_data16")
2325 (if_then_else (eq_attr "mode" "DF")
2327 (const_string "0")))
2328 (set_attr "mode" "<MODE>")])
2330 (define_insn "<sse>_ucomi<round_saeonly_name>"
2331 [(set (reg:CCFPU FLAGS_REG)
2334 (match_operand:<ssevecmode> 0 "register_operand" "v")
2335 (parallel [(const_int 0)]))
2337 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2338 (parallel [(const_int 0)]))))]
2339 "SSE_FLOAT_MODE_P (<MODE>mode)"
2340 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2341 [(set_attr "type" "ssecomi")
2342 (set_attr "prefix" "maybe_vex")
2343 (set_attr "prefix_rep" "0")
2344 (set (attr "prefix_data16")
2345 (if_then_else (eq_attr "mode" "DF")
2347 (const_string "0")))
2348 (set_attr "mode" "<MODE>")])
2350 (define_expand "vcond<V_512:mode><VF_512:mode>"
2351 [(set (match_operand:V_512 0 "register_operand")
2353 (match_operator 3 ""
2354 [(match_operand:VF_512 4 "nonimmediate_operand")
2355 (match_operand:VF_512 5 "nonimmediate_operand")])
2356 (match_operand:V_512 1 "general_operand")
2357 (match_operand:V_512 2 "general_operand")))]
2359 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2360 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2362 bool ok = ix86_expand_fp_vcond (operands);
2367 (define_expand "vcond<V_256:mode><VF_256:mode>"
2368 [(set (match_operand:V_256 0 "register_operand")
2370 (match_operator 3 ""
2371 [(match_operand:VF_256 4 "nonimmediate_operand")
2372 (match_operand:VF_256 5 "nonimmediate_operand")])
2373 (match_operand:V_256 1 "general_operand")
2374 (match_operand:V_256 2 "general_operand")))]
2376 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2377 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2379 bool ok = ix86_expand_fp_vcond (operands);
2384 (define_expand "vcond<V_128:mode><VF_128:mode>"
2385 [(set (match_operand:V_128 0 "register_operand")
2387 (match_operator 3 ""
2388 [(match_operand:VF_128 4 "nonimmediate_operand")
2389 (match_operand:VF_128 5 "nonimmediate_operand")])
2390 (match_operand:V_128 1 "general_operand")
2391 (match_operand:V_128 2 "general_operand")))]
2393 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2394 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2396 bool ok = ix86_expand_fp_vcond (operands);
2401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2403 ;; Parallel floating point logical operations
2405 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2407 (define_insn "<sse>_andnot<mode>3"
2408 [(set (match_operand:VF 0 "register_operand" "=x,v")
2411 (match_operand:VF 1 "register_operand" "0,v"))
2412 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2415 static char buf[32];
2419 switch (get_attr_mode (insn))
2426 suffix = "<ssemodesuffix>";
2429 switch (which_alternative)
2432 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2435 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2441 /* There is no vandnp[sd]. Use vpandnq. */
2442 if (<MODE_SIZE> == 64)
2445 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2448 snprintf (buf, sizeof (buf), ops, suffix);
2451 [(set_attr "isa" "noavx,avx")
2452 (set_attr "type" "sselog")
2453 (set_attr "prefix" "orig,maybe_evex")
2455 (cond [(and (match_test "<MODE_SIZE> == 16")
2456 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2457 (const_string "<ssePSmode>")
2458 (match_test "TARGET_AVX")
2459 (const_string "<MODE>")
2460 (match_test "optimize_function_for_size_p (cfun)")
2461 (const_string "V4SF")
2463 (const_string "<MODE>")))])
2465 (define_expand "<code><mode>3"
2466 [(set (match_operand:VF_128_256 0 "register_operand")
2467 (any_logic:VF_128_256
2468 (match_operand:VF_128_256 1 "nonimmediate_operand")
2469 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2471 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2473 (define_expand "<code><mode>3"
2474 [(set (match_operand:VF_512 0 "register_operand")
2476 (match_operand:VF_512 1 "nonimmediate_operand")
2477 (match_operand:VF_512 2 "nonimmediate_operand")))]
2479 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2481 (define_insn "*<code><mode>3"
2482 [(set (match_operand:VF 0 "register_operand" "=x,v")
2484 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2485 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2486 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2488 static char buf[32];
2492 switch (get_attr_mode (insn))
2499 suffix = "<ssemodesuffix>";
2502 switch (which_alternative)
2505 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2508 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2514 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2515 if (<MODE_SIZE> == 64)
2518 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2521 snprintf (buf, sizeof (buf), ops, suffix);
2524 [(set_attr "isa" "noavx,avx")
2525 (set_attr "type" "sselog")
2526 (set_attr "prefix" "orig,maybe_evex")
2528 (cond [(and (match_test "<MODE_SIZE> == 16")
2529 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2530 (const_string "<ssePSmode>")
2531 (match_test "TARGET_AVX")
2532 (const_string "<MODE>")
2533 (match_test "optimize_function_for_size_p (cfun)")
2534 (const_string "V4SF")
2536 (const_string "<MODE>")))])
2538 (define_expand "copysign<mode>3"
2541 (not:VF (match_dup 3))
2542 (match_operand:VF 1 "nonimmediate_operand")))
2544 (and:VF (match_dup 3)
2545 (match_operand:VF 2 "nonimmediate_operand")))
2546 (set (match_operand:VF 0 "register_operand")
2547 (ior:VF (match_dup 4) (match_dup 5)))]
2550 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2552 operands[4] = gen_reg_rtx (<MODE>mode);
2553 operands[5] = gen_reg_rtx (<MODE>mode);
2556 ;; Also define scalar versions. These are used for abs, neg, and
2557 ;; conditional move. Using subregs into vector modes causes register
2558 ;; allocation lossage. These patterns do not allow memory operands
2559 ;; because the native instructions read the full 128-bits.
2561 (define_insn "*andnot<mode>3"
2562 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2565 (match_operand:MODEF 1 "register_operand" "0,x"))
2566 (match_operand:MODEF 2 "register_operand" "x,x")))]
2567 "SSE_FLOAT_MODE_P (<MODE>mode)"
2569 static char buf[32];
2572 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2574 switch (which_alternative)
2577 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2580 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2586 snprintf (buf, sizeof (buf), ops, suffix);
2589 [(set_attr "isa" "noavx,avx")
2590 (set_attr "type" "sselog")
2591 (set_attr "prefix" "orig,vex")
2593 (cond [(and (match_test "<MODE_SIZE> == 16")
2594 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2595 (const_string "V4SF")
2596 (match_test "TARGET_AVX")
2597 (const_string "<ssevecmode>")
2598 (match_test "optimize_function_for_size_p (cfun)")
2599 (const_string "V4SF")
2601 (const_string "<ssevecmode>")))])
2603 (define_insn "*andnottf3"
2604 [(set (match_operand:TF 0 "register_operand" "=x,x")
2606 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2607 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2610 static char buf[32];
2613 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2615 switch (which_alternative)
2618 ops = "%s\t{%%2, %%0|%%0, %%2}";
2621 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2627 snprintf (buf, sizeof (buf), ops, tmp);
2630 [(set_attr "isa" "noavx,avx")
2631 (set_attr "type" "sselog")
2632 (set (attr "prefix_data16")
2634 (and (eq_attr "alternative" "0")
2635 (eq_attr "mode" "TI"))
2637 (const_string "*")))
2638 (set_attr "prefix" "orig,vex")
2640 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2641 (const_string "V4SF")
2642 (match_test "TARGET_AVX")
2644 (ior (not (match_test "TARGET_SSE2"))
2645 (match_test "optimize_function_for_size_p (cfun)"))
2646 (const_string "V4SF")
2648 (const_string "TI")))])
2650 (define_insn "*<code><mode>3"
2651 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2653 (match_operand:MODEF 1 "register_operand" "%0,x")
2654 (match_operand:MODEF 2 "register_operand" "x,x")))]
2655 "SSE_FLOAT_MODE_P (<MODE>mode)"
2657 static char buf[32];
2660 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2662 switch (which_alternative)
2665 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2668 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2674 snprintf (buf, sizeof (buf), ops, suffix);
2677 [(set_attr "isa" "noavx,avx")
2678 (set_attr "type" "sselog")
2679 (set_attr "prefix" "orig,vex")
2681 (cond [(and (match_test "<MODE_SIZE> == 16")
2682 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2683 (const_string "V4SF")
2684 (match_test "TARGET_AVX")
2685 (const_string "<ssevecmode>")
2686 (match_test "optimize_function_for_size_p (cfun)")
2687 (const_string "V4SF")
2689 (const_string "<ssevecmode>")))])
2691 (define_expand "<code>tf3"
2692 [(set (match_operand:TF 0 "register_operand")
2694 (match_operand:TF 1 "nonimmediate_operand")
2695 (match_operand:TF 2 "nonimmediate_operand")))]
2697 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2699 (define_insn "*<code>tf3"
2700 [(set (match_operand:TF 0 "register_operand" "=x,x")
2702 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2703 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2705 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2707 static char buf[32];
2710 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2712 switch (which_alternative)
2715 ops = "%s\t{%%2, %%0|%%0, %%2}";
2718 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2724 snprintf (buf, sizeof (buf), ops, tmp);
2727 [(set_attr "isa" "noavx,avx")
2728 (set_attr "type" "sselog")
2729 (set (attr "prefix_data16")
2731 (and (eq_attr "alternative" "0")
2732 (eq_attr "mode" "TI"))
2734 (const_string "*")))
2735 (set_attr "prefix" "orig,vex")
2737 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2738 (const_string "V4SF")
2739 (match_test "TARGET_AVX")
2741 (ior (not (match_test "TARGET_SSE2"))
2742 (match_test "optimize_function_for_size_p (cfun)"))
2743 (const_string "V4SF")
2745 (const_string "TI")))])
2747 ;; There are no floating point xor for V16SF and V8DF in avx512f
2748 ;; but we need them for negation. Instead we use int versions of
2749 ;; xor. Maybe there could be a better way to do that.
2751 (define_mode_attr avx512flogicsuff
2752 [(V16SF "d") (V8DF "q")])
2754 (define_insn "avx512f_<logic><mode>"
2755 [(set (match_operand:VF_512 0 "register_operand" "=v")
2757 (match_operand:VF_512 1 "register_operand" "v")
2758 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2760 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2761 [(set_attr "type" "sselog")
2762 (set_attr "prefix" "evex")])
2764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2766 ;; FMA floating point multiply/accumulate instructions. These include
2767 ;; scalar versions of the instructions as well as vector versions.
2769 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2771 ;; The standard names for scalar FMA are only available with SSE math enabled.
2772 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2773 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2774 ;; and TARGET_FMA4 are both false.
2775 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2776 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2777 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2778 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2779 (define_mode_iterator FMAMODEM
2780 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2781 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2782 (V4SF "TARGET_FMA || TARGET_FMA4")
2783 (V2DF "TARGET_FMA || TARGET_FMA4")
2784 (V8SF "TARGET_FMA || TARGET_FMA4")
2785 (V4DF "TARGET_FMA || TARGET_FMA4")
2786 (V16SF "TARGET_AVX512F")
2787 (V8DF "TARGET_AVX512F")])
2789 (define_expand "fma<mode>4"
2790 [(set (match_operand:FMAMODEM 0 "register_operand")
2792 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2793 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2794 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2796 (define_expand "fms<mode>4"
2797 [(set (match_operand:FMAMODEM 0 "register_operand")
2799 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2800 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2801 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2803 (define_expand "fnma<mode>4"
2804 [(set (match_operand:FMAMODEM 0 "register_operand")
2806 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2807 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2808 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2810 (define_expand "fnms<mode>4"
2811 [(set (match_operand:FMAMODEM 0 "register_operand")
2813 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2814 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2815 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2817 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2818 (define_mode_iterator FMAMODE
2819 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2820 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2821 (V4SF "TARGET_FMA || TARGET_FMA4")
2822 (V2DF "TARGET_FMA || TARGET_FMA4")
2823 (V8SF "TARGET_FMA || TARGET_FMA4")
2824 (V4DF "TARGET_FMA || TARGET_FMA4")
2825 (V16SF "TARGET_AVX512F")
2826 (V8DF "TARGET_AVX512F")])
2828 (define_expand "fma4i_fmadd_<mode>"
2829 [(set (match_operand:FMAMODE 0 "register_operand")
2831 (match_operand:FMAMODE 1 "nonimmediate_operand")
2832 (match_operand:FMAMODE 2 "nonimmediate_operand")
2833 (match_operand:FMAMODE 3 "nonimmediate_operand")))])
2835 (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
2836 [(match_operand:VF_512 0 "register_operand")
2837 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
2838 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
2839 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
2840 (match_operand:<avx512fmaskmode> 4 "register_operand")]
2843 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
2844 operands[0], operands[1], operands[2], operands[3],
2845 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
2849 (define_mode_iterator FMAMODE_NOVF512
2850 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2851 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2852 (V4SF "TARGET_FMA || TARGET_FMA4")
2853 (V2DF "TARGET_FMA || TARGET_FMA4")
2854 (V8SF "TARGET_FMA || TARGET_FMA4")
2855 (V4DF "TARGET_FMA || TARGET_FMA4")])
2857 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2858 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2859 (fma:FMAMODE_NOVF512
2860 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2861 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2862 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2863 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2865 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2866 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2867 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2868 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2869 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2870 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2871 (set_attr "type" "ssemuladd")
2872 (set_attr "mode" "<MODE>")])
2874 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2875 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2877 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
2878 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2879 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
2880 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2882 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2883 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2884 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2885 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2886 (set_attr "type" "ssemuladd")
2887 (set_attr "mode" "<MODE>")])
2889 (define_insn "avx512f_fmadd_<mode>_mask<round_name>"
2890 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2893 (match_operand:VF_512 1 "register_operand" "0,0")
2894 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2895 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2897 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2900 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2901 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2902 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2903 (set_attr "type" "ssemuladd")
2904 (set_attr "mode" "<MODE>")])
2906 (define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
2907 [(set (match_operand:VF_512 0 "register_operand" "=x")
2910 (match_operand:VF_512 1 "register_operand" "x")
2911 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2912 (match_operand:VF_512 3 "register_operand" "0"))
2914 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2916 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2917 [(set_attr "isa" "fma_avx512f")
2918 (set_attr "type" "ssemuladd")
2919 (set_attr "mode" "<MODE>")])
2921 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2922 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2923 (fma:FMAMODE_NOVF512
2924 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2925 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2926 (neg:FMAMODE_NOVF512
2927 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2928 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2930 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2931 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2932 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2933 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2934 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2935 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2936 (set_attr "type" "ssemuladd")
2937 (set_attr "mode" "<MODE>")])
2939 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2940 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2942 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
2943 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2945 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
2946 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2948 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2949 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2950 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2951 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2952 (set_attr "type" "ssemuladd")
2953 (set_attr "mode" "<MODE>")])
2955 (define_insn "avx512f_fmsub_<mode>_mask<round_name>"
2956 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2959 (match_operand:VF_512 1 "register_operand" "0,0")
2960 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2962 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
2964 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2967 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2968 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2969 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2970 (set_attr "type" "ssemuladd")
2971 (set_attr "mode" "<MODE>")])
2973 (define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
2974 [(set (match_operand:VF_512 0 "register_operand" "=v")
2977 (match_operand:VF_512 1 "register_operand" "v")
2978 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2980 (match_operand:VF_512 3 "register_operand" "0")))
2982 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2984 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2985 [(set_attr "isa" "fma_avx512f")
2986 (set_attr "type" "ssemuladd")
2987 (set_attr "mode" "<MODE>")])
2989 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
2990 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2991 (fma:FMAMODE_NOVF512
2992 (neg:FMAMODE_NOVF512
2993 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2994 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2995 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2996 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2998 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2999 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3000 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3001 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3002 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3003 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3004 (set_attr "type" "ssemuladd")
3005 (set_attr "mode" "<MODE>")])
3007 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3008 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3011 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
3012 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3013 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3014 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3016 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3017 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3018 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3019 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3020 (set_attr "type" "ssemuladd")
3021 (set_attr "mode" "<MODE>")])
3023 (define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
3024 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3028 (match_operand:VF_512 1 "register_operand" "0,0"))
3029 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3030 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3032 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3035 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3036 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3037 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3038 (set_attr "type" "ssemuladd")
3039 (set_attr "mode" "<MODE>")])
3041 (define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
3042 [(set (match_operand:VF_512 0 "register_operand" "=v")
3046 (match_operand:VF_512 1 "register_operand" "v"))
3047 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3048 (match_operand:VF_512 3 "register_operand" "0"))
3050 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3052 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3053 [(set_attr "isa" "fma_avx512f")
3054 (set_attr "type" "ssemuladd")
3055 (set_attr "mode" "<MODE>")])
3057 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3058 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3059 (fma:FMAMODE_NOVF512
3060 (neg:FMAMODE_NOVF512
3061 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
3062 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3063 (neg:FMAMODE_NOVF512
3064 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
3065 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3067 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3068 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3069 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3070 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3071 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3072 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3073 (set_attr "type" "ssemuladd")
3074 (set_attr "mode" "<MODE>")])
3076 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3077 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3080 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
3081 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3083 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3084 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3086 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3087 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3088 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3089 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3090 (set_attr "type" "ssemuladd")
3091 (set_attr "mode" "<MODE>")])
3093 (define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
3094 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3098 (match_operand:VF_512 1 "register_operand" "0,0"))
3099 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3101 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
3103 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3106 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3107 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3108 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3109 (set_attr "type" "ssemuladd")
3110 (set_attr "mode" "<MODE>")])
3112 (define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
3113 [(set (match_operand:VF_512 0 "register_operand" "=v")
3117 (match_operand:VF_512 1 "register_operand" "v"))
3118 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3120 (match_operand:VF_512 3 "register_operand" "0")))
3122 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3124 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3125 [(set_attr "isa" "fma_avx512f")
3126 (set_attr "type" "ssemuladd")
3127 (set_attr "mode" "<MODE>")])
3129 ;; FMA parallel floating point multiply addsub and subadd operations.
3131 ;; It would be possible to represent these without the UNSPEC as
3134 ;; (fma op1 op2 op3)
3135 ;; (fma op1 op2 (neg op3))
3138 ;; But this doesn't seem useful in practice.
3140 (define_expand "fmaddsub_<mode>"
3141 [(set (match_operand:VF 0 "register_operand")
3143 [(match_operand:VF 1 "nonimmediate_operand")
3144 (match_operand:VF 2 "nonimmediate_operand")
3145 (match_operand:VF 3 "nonimmediate_operand")]
3147 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3149 (define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
3150 [(match_operand:VF_512 0 "register_operand")
3151 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
3152 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
3153 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
3154 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3157 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3158 operands[0], operands[1], operands[2], operands[3],
3159 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3163 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3164 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3166 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3167 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3168 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
3170 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3172 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3173 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3174 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3175 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3176 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3177 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3178 (set_attr "type" "ssemuladd")
3179 (set_attr "mode" "<MODE>")])
3181 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3182 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3184 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3185 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3186 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3188 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3190 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3191 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3192 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3193 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3194 (set_attr "type" "ssemuladd")
3195 (set_attr "mode" "<MODE>")])
3197 (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
3198 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3201 [(match_operand:VF_512 1 "register_operand" "0,0")
3202 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3203 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
3206 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3209 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3210 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3211 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3212 (set_attr "type" "ssemuladd")
3213 (set_attr "mode" "<MODE>")])
3215 (define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
3216 [(set (match_operand:VF_512 0 "register_operand" "=v")
3219 [(match_operand:VF_512 1 "register_operand" "v")
3220 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3221 (match_operand:VF_512 3 "register_operand" "0")]
3224 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3226 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3227 [(set_attr "isa" "fma_avx512f")
3228 (set_attr "type" "ssemuladd")
3229 (set_attr "mode" "<MODE>")])
3231 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3232 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3234 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3235 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3237 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
3239 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3241 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3242 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3243 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3244 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3245 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3246 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3247 (set_attr "type" "ssemuladd")
3248 (set_attr "mode" "<MODE>")])
3250 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3251 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3253 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3254 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3256 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3258 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3260 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3261 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3262 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3263 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3264 (set_attr "type" "ssemuladd")
3265 (set_attr "mode" "<MODE>")])
3267 (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
3268 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3271 [(match_operand:VF_512 1 "register_operand" "0,0")
3272 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3274 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
3277 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3280 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3281 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3282 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3283 (set_attr "type" "ssemuladd")
3284 (set_attr "mode" "<MODE>")])
3286 (define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
3287 [(set (match_operand:VF_512 0 "register_operand" "=v")
3290 [(match_operand:VF_512 1 "register_operand" "v")
3291 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3293 (match_operand:VF_512 3 "register_operand" "0"))]
3296 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3298 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3299 [(set_attr "isa" "fma_avx512f")
3300 (set_attr "type" "ssemuladd")
3301 (set_attr "mode" "<MODE>")])
3303 ;; FMA3 floating point scalar intrinsics. These merge result with
3304 ;; high-order elements from the destination register.
3306 (define_expand "fmai_vmfmadd_<mode><round_name>"
3307 [(set (match_operand:VF_128 0 "register_operand")
3310 (match_operand:VF_128 1 "<round_nimm_predicate>")
3311 (match_operand:VF_128 2 "<round_nimm_predicate>")
3312 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3317 (define_insn "*fmai_fmadd_<mode>"
3318 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3321 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3322 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3323 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3326 "TARGET_FMA || TARGET_AVX512F"
3328 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3329 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3330 [(set_attr "type" "ssemuladd")
3331 (set_attr "mode" "<MODE>")])
3333 (define_insn "*fmai_fmsub_<mode>"
3334 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3337 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3338 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3340 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3343 "TARGET_FMA || TARGET_AVX512F"
3345 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3346 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3347 [(set_attr "type" "ssemuladd")
3348 (set_attr "mode" "<MODE>")])
3350 (define_insn "*fmai_fnmadd_<mode><round_name>"
3351 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3355 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3356 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3357 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3360 "TARGET_FMA || TARGET_AVX512F"
3362 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3363 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3364 [(set_attr "type" "ssemuladd")
3365 (set_attr "mode" "<MODE>")])
3367 (define_insn "*fmai_fnmsub_<mode><round_name>"
3368 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3372 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3373 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3375 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3378 "TARGET_FMA || TARGET_AVX512F"
3380 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3381 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3382 [(set_attr "type" "ssemuladd")
3383 (set_attr "mode" "<MODE>")])
3385 ;; FMA4 floating point scalar intrinsics. These write the
3386 ;; entire destination register, with the high-order elements zeroed.
3388 (define_expand "fma4i_vmfmadd_<mode>"
3389 [(set (match_operand:VF_128 0 "register_operand")
3392 (match_operand:VF_128 1 "nonimmediate_operand")
3393 (match_operand:VF_128 2 "nonimmediate_operand")
3394 (match_operand:VF_128 3 "nonimmediate_operand"))
3398 "operands[4] = CONST0_RTX (<MODE>mode);")
3400 (define_insn "*fma4i_vmfmadd_<mode>"
3401 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3404 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3405 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3406 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3407 (match_operand:VF_128 4 "const0_operand")
3410 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3411 [(set_attr "type" "ssemuladd")
3412 (set_attr "mode" "<MODE>")])
3414 (define_insn "*fma4i_vmfmsub_<mode>"
3415 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3418 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3419 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3421 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3422 (match_operand:VF_128 4 "const0_operand")
3425 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3426 [(set_attr "type" "ssemuladd")
3427 (set_attr "mode" "<MODE>")])
3429 (define_insn "*fma4i_vmfnmadd_<mode>"
3430 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3434 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3435 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3436 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3437 (match_operand:VF_128 4 "const0_operand")
3440 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3441 [(set_attr "type" "ssemuladd")
3442 (set_attr "mode" "<MODE>")])
3444 (define_insn "*fma4i_vmfnmsub_<mode>"
3445 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3449 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3450 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3452 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3453 (match_operand:VF_128 4 "const0_operand")
3456 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3457 [(set_attr "type" "ssemuladd")
3458 (set_attr "mode" "<MODE>")])
3460 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3462 ;; Parallel single-precision floating point conversion operations
3464 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3466 (define_insn "sse_cvtpi2ps"
3467 [(set (match_operand:V4SF 0 "register_operand" "=x")
3470 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3471 (match_operand:V4SF 1 "register_operand" "0")
3474 "cvtpi2ps\t{%2, %0|%0, %2}"
3475 [(set_attr "type" "ssecvt")
3476 (set_attr "mode" "V4SF")])
3478 (define_insn "sse_cvtps2pi"
3479 [(set (match_operand:V2SI 0 "register_operand" "=y")
3481 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3483 (parallel [(const_int 0) (const_int 1)])))]
3485 "cvtps2pi\t{%1, %0|%0, %q1}"
3486 [(set_attr "type" "ssecvt")
3487 (set_attr "unit" "mmx")
3488 (set_attr "mode" "DI")])
3490 (define_insn "sse_cvttps2pi"
3491 [(set (match_operand:V2SI 0 "register_operand" "=y")
3493 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3494 (parallel [(const_int 0) (const_int 1)])))]
3496 "cvttps2pi\t{%1, %0|%0, %q1}"
3497 [(set_attr "type" "ssecvt")
3498 (set_attr "unit" "mmx")
3499 (set_attr "prefix_rep" "0")
3500 (set_attr "mode" "SF")])
3502 (define_insn "sse_cvtsi2ss<round_name>"
3503 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3506 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3507 (match_operand:V4SF 1 "register_operand" "0,0,v")
3511 cvtsi2ss\t{%2, %0|%0, %2}
3512 cvtsi2ss\t{%2, %0|%0, %2}
3513 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3514 [(set_attr "isa" "noavx,noavx,avx")
3515 (set_attr "type" "sseicvt")
3516 (set_attr "athlon_decode" "vector,double,*")
3517 (set_attr "amdfam10_decode" "vector,double,*")
3518 (set_attr "bdver1_decode" "double,direct,*")
3519 (set_attr "btver2_decode" "double,double,double")
3520 (set_attr "prefix" "orig,orig,maybe_evex")
3521 (set_attr "mode" "SF")])
3523 (define_insn "sse_cvtsi2ssq<round_name>"
3524 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3527 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3528 (match_operand:V4SF 1 "register_operand" "0,0,v")
3530 "TARGET_SSE && TARGET_64BIT"
3532 cvtsi2ssq\t{%2, %0|%0, %2}
3533 cvtsi2ssq\t{%2, %0|%0, %2}
3534 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3535 [(set_attr "isa" "noavx,noavx,avx")
3536 (set_attr "type" "sseicvt")
3537 (set_attr "athlon_decode" "vector,double,*")
3538 (set_attr "amdfam10_decode" "vector,double,*")
3539 (set_attr "bdver1_decode" "double,direct,*")
3540 (set_attr "btver2_decode" "double,double,double")
3541 (set_attr "length_vex" "*,*,4")
3542 (set_attr "prefix_rex" "1,1,*")
3543 (set_attr "prefix" "orig,orig,maybe_evex")
3544 (set_attr "mode" "SF")])
3546 (define_insn "sse_cvtss2si<round_name>"
3547 [(set (match_operand:SI 0 "register_operand" "=r,r")
3550 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3551 (parallel [(const_int 0)]))]
3552 UNSPEC_FIX_NOTRUNC))]
3554 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3555 [(set_attr "type" "sseicvt")
3556 (set_attr "athlon_decode" "double,vector")
3557 (set_attr "bdver1_decode" "double,double")
3558 (set_attr "prefix_rep" "1")
3559 (set_attr "prefix" "maybe_vex")
3560 (set_attr "mode" "SI")])
3562 (define_insn "sse_cvtss2si_2"
3563 [(set (match_operand:SI 0 "register_operand" "=r,r")
3564 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3565 UNSPEC_FIX_NOTRUNC))]
3567 "%vcvtss2si\t{%1, %0|%0, %k1}"
3568 [(set_attr "type" "sseicvt")
3569 (set_attr "athlon_decode" "double,vector")
3570 (set_attr "amdfam10_decode" "double,double")
3571 (set_attr "bdver1_decode" "double,double")
3572 (set_attr "prefix_rep" "1")
3573 (set_attr "prefix" "maybe_vex")
3574 (set_attr "mode" "SI")])
3576 (define_insn "sse_cvtss2siq<round_name>"
3577 [(set (match_operand:DI 0 "register_operand" "=r,r")
3580 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3581 (parallel [(const_int 0)]))]
3582 UNSPEC_FIX_NOTRUNC))]
3583 "TARGET_SSE && TARGET_64BIT"
3584 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3585 [(set_attr "type" "sseicvt")
3586 (set_attr "athlon_decode" "double,vector")
3587 (set_attr "bdver1_decode" "double,double")
3588 (set_attr "prefix_rep" "1")
3589 (set_attr "prefix" "maybe_vex")
3590 (set_attr "mode" "DI")])
3592 (define_insn "sse_cvtss2siq_2"
3593 [(set (match_operand:DI 0 "register_operand" "=r,r")
3594 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3595 UNSPEC_FIX_NOTRUNC))]
3596 "TARGET_SSE && TARGET_64BIT"
3597 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3598 [(set_attr "type" "sseicvt")
3599 (set_attr "athlon_decode" "double,vector")
3600 (set_attr "amdfam10_decode" "double,double")
3601 (set_attr "bdver1_decode" "double,double")
3602 (set_attr "prefix_rep" "1")
3603 (set_attr "prefix" "maybe_vex")
3604 (set_attr "mode" "DI")])
3606 (define_insn "sse_cvttss2si<round_saeonly_name>"
3607 [(set (match_operand:SI 0 "register_operand" "=r,r")
3610 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3611 (parallel [(const_int 0)]))))]
3613 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3614 [(set_attr "type" "sseicvt")
3615 (set_attr "athlon_decode" "double,vector")
3616 (set_attr "amdfam10_decode" "double,double")
3617 (set_attr "bdver1_decode" "double,double")
3618 (set_attr "prefix_rep" "1")
3619 (set_attr "prefix" "maybe_vex")
3620 (set_attr "mode" "SI")])
3622 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3623 [(set (match_operand:DI 0 "register_operand" "=r,r")
3626 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3627 (parallel [(const_int 0)]))))]
3628 "TARGET_SSE && TARGET_64BIT"
3629 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3630 [(set_attr "type" "sseicvt")
3631 (set_attr "athlon_decode" "double,vector")
3632 (set_attr "amdfam10_decode" "double,double")
3633 (set_attr "bdver1_decode" "double,double")
3634 (set_attr "prefix_rep" "1")
3635 (set_attr "prefix" "maybe_vex")
3636 (set_attr "mode" "DI")])
3638 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3639 [(set (match_operand:VF_128 0 "register_operand" "=v")
3641 (vec_duplicate:VF_128
3642 (unsigned_float:<ssescalarmode>
3643 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3644 (match_operand:VF_128 1 "register_operand" "v")
3646 "TARGET_AVX512F && <round_modev4sf_condition>"
3647 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3648 [(set_attr "type" "sseicvt")
3649 (set_attr "prefix" "evex")
3650 (set_attr "mode" "<ssescalarmode>")])
3652 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3653 [(set (match_operand:VF_128 0 "register_operand" "=v")
3655 (vec_duplicate:VF_128
3656 (unsigned_float:<ssescalarmode>
3657 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3658 (match_operand:VF_128 1 "register_operand" "v")
3660 "TARGET_AVX512F && TARGET_64BIT"
3661 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3662 [(set_attr "type" "sseicvt")
3663 (set_attr "prefix" "evex")
3664 (set_attr "mode" "<ssescalarmode>")])
3666 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3667 [(set (match_operand:VF1 0 "register_operand" "=v")
3669 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3670 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3671 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3672 [(set_attr "type" "ssecvt")
3673 (set_attr "prefix" "maybe_vex")
3674 (set_attr "mode" "<sseinsnmode>")])
3676 (define_insn "ufloatv16siv16sf2<mask_name><round_name>"
3677 [(set (match_operand:V16SF 0 "register_operand" "=v")
3678 (unsigned_float:V16SF
3679 (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
3681 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3682 [(set_attr "type" "ssecvt")
3683 (set_attr "prefix" "evex")
3684 (set_attr "mode" "V16SF")])
3686 (define_expand "floatuns<sseintvecmodelower><mode>2"
3687 [(match_operand:VF1 0 "register_operand")
3688 (match_operand:<sseintvecmode> 1 "register_operand")]
3689 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3691 if (<MODE>mode == V16SFmode)
3692 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3694 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3700 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3701 (define_mode_attr sf2simodelower
3702 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3704 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3705 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3707 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3708 UNSPEC_FIX_NOTRUNC))]
3710 "%vcvtps2dq\t{%1, %0|%0, %1}"
3711 [(set_attr "type" "ssecvt")
3712 (set (attr "prefix_data16")
3714 (match_test "TARGET_AVX")
3716 (const_string "1")))
3717 (set_attr "prefix" "maybe_vex")
3718 (set_attr "mode" "<sseinsnmode>")])
3720 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3721 [(set (match_operand:V16SI 0 "register_operand" "=v")
3723 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3724 UNSPEC_FIX_NOTRUNC))]
3726 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3727 [(set_attr "type" "ssecvt")
3728 (set_attr "prefix" "evex")
3729 (set_attr "mode" "XI")])
3731 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
3732 [(set (match_operand:V16SI 0 "register_operand" "=v")
3734 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3735 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3737 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3738 [(set_attr "type" "ssecvt")
3739 (set_attr "prefix" "evex")
3740 (set_attr "mode" "XI")])
3742 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
3743 [(set (match_operand:V16SI 0 "register_operand" "=v")
3745 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
3747 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
3748 [(set_attr "type" "ssecvt")
3749 (set_attr "prefix" "evex")
3750 (set_attr "mode" "XI")])
3752 (define_insn "fix_truncv8sfv8si2"
3753 [(set (match_operand:V8SI 0 "register_operand" "=x")
3754 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3756 "vcvttps2dq\t{%1, %0|%0, %1}"
3757 [(set_attr "type" "ssecvt")
3758 (set_attr "prefix" "vex")
3759 (set_attr "mode" "OI")])
3761 (define_insn "fix_truncv4sfv4si2"
3762 [(set (match_operand:V4SI 0 "register_operand" "=x")
3763 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3765 "%vcvttps2dq\t{%1, %0|%0, %1}"
3766 [(set_attr "type" "ssecvt")
3767 (set (attr "prefix_rep")
3769 (match_test "TARGET_AVX")
3771 (const_string "1")))
3772 (set (attr "prefix_data16")
3774 (match_test "TARGET_AVX")
3776 (const_string "0")))
3777 (set_attr "prefix_data16" "0")
3778 (set_attr "prefix" "maybe_vex")
3779 (set_attr "mode" "TI")])
3781 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3782 [(match_operand:<sseintvecmode> 0 "register_operand")
3783 (match_operand:VF1 1 "register_operand")]
3786 if (<MODE>mode == V16SFmode)
3787 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
3792 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3793 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3794 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3795 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3800 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3802 ;; Parallel double-precision floating point conversion operations
3804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3806 (define_insn "sse2_cvtpi2pd"
3807 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3808 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3810 "cvtpi2pd\t{%1, %0|%0, %1}"
3811 [(set_attr "type" "ssecvt")
3812 (set_attr "unit" "mmx,*")
3813 (set_attr "prefix_data16" "1,*")
3814 (set_attr "mode" "V2DF")])
3816 (define_insn "sse2_cvtpd2pi"
3817 [(set (match_operand:V2SI 0 "register_operand" "=y")
3818 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3819 UNSPEC_FIX_NOTRUNC))]
3821 "cvtpd2pi\t{%1, %0|%0, %1}"
3822 [(set_attr "type" "ssecvt")
3823 (set_attr "unit" "mmx")
3824 (set_attr "bdver1_decode" "double")
3825 (set_attr "btver2_decode" "direct")
3826 (set_attr "prefix_data16" "1")
3827 (set_attr "mode" "DI")])
3829 (define_insn "sse2_cvttpd2pi"
3830 [(set (match_operand:V2SI 0 "register_operand" "=y")
3831 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3833 "cvttpd2pi\t{%1, %0|%0, %1}"
3834 [(set_attr "type" "ssecvt")
3835 (set_attr "unit" "mmx")
3836 (set_attr "bdver1_decode" "double")
3837 (set_attr "prefix_data16" "1")
3838 (set_attr "mode" "TI")])
3840 (define_insn "sse2_cvtsi2sd"
3841 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3844 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3845 (match_operand:V2DF 1 "register_operand" "0,0,x")
3849 cvtsi2sd\t{%2, %0|%0, %2}
3850 cvtsi2sd\t{%2, %0|%0, %2}
3851 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3852 [(set_attr "isa" "noavx,noavx,avx")
3853 (set_attr "type" "sseicvt")
3854 (set_attr "athlon_decode" "double,direct,*")
3855 (set_attr "amdfam10_decode" "vector,double,*")
3856 (set_attr "bdver1_decode" "double,direct,*")
3857 (set_attr "btver2_decode" "double,double,double")
3858 (set_attr "prefix" "orig,orig,vex")
3859 (set_attr "mode" "DF")])
3861 (define_insn "sse2_cvtsi2sdq<round_name>"
3862 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3865 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3866 (match_operand:V2DF 1 "register_operand" "0,0,v")
3868 "TARGET_SSE2 && TARGET_64BIT"
3870 cvtsi2sdq\t{%2, %0|%0, %2}
3871 cvtsi2sdq\t{%2, %0|%0, %2}
3872 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3873 [(set_attr "isa" "noavx,noavx,avx")
3874 (set_attr "type" "sseicvt")
3875 (set_attr "athlon_decode" "double,direct,*")
3876 (set_attr "amdfam10_decode" "vector,double,*")
3877 (set_attr "bdver1_decode" "double,direct,*")
3878 (set_attr "length_vex" "*,*,4")
3879 (set_attr "prefix_rex" "1,1,*")
3880 (set_attr "prefix" "orig,orig,maybe_evex")
3881 (set_attr "mode" "DF")])
3883 (define_insn "avx512f_vcvtss2usi<round_name>"
3884 [(set (match_operand:SI 0 "register_operand" "=r")
3887 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3888 (parallel [(const_int 0)]))]
3889 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3891 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3892 [(set_attr "type" "sseicvt")
3893 (set_attr "prefix" "evex")
3894 (set_attr "mode" "SI")])
3896 (define_insn "avx512f_vcvtss2usiq<round_name>"
3897 [(set (match_operand:DI 0 "register_operand" "=r")
3900 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3901 (parallel [(const_int 0)]))]
3902 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3903 "TARGET_AVX512F && TARGET_64BIT"
3904 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3905 [(set_attr "type" "sseicvt")
3906 (set_attr "prefix" "evex")
3907 (set_attr "mode" "DI")])
3909 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
3910 [(set (match_operand:SI 0 "register_operand" "=r")
3913 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3914 (parallel [(const_int 0)]))))]
3916 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3917 [(set_attr "type" "sseicvt")
3918 (set_attr "prefix" "evex")
3919 (set_attr "mode" "SI")])
3921 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
3922 [(set (match_operand:DI 0 "register_operand" "=r")
3925 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3926 (parallel [(const_int 0)]))))]
3927 "TARGET_AVX512F && TARGET_64BIT"
3928 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3929 [(set_attr "type" "sseicvt")
3930 (set_attr "prefix" "evex")
3931 (set_attr "mode" "DI")])
3933 (define_insn "avx512f_vcvtsd2usi<round_name>"
3934 [(set (match_operand:SI 0 "register_operand" "=r")
3937 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3938 (parallel [(const_int 0)]))]
3939 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3941 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3942 [(set_attr "type" "sseicvt")
3943 (set_attr "prefix" "evex")
3944 (set_attr "mode" "SI")])
3946 (define_insn "avx512f_vcvtsd2usiq<round_name>"
3947 [(set (match_operand:DI 0 "register_operand" "=r")
3950 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3951 (parallel [(const_int 0)]))]
3952 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3953 "TARGET_AVX512F && TARGET_64BIT"
3954 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3955 [(set_attr "type" "sseicvt")
3956 (set_attr "prefix" "evex")
3957 (set_attr "mode" "DI")])
3959 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
3960 [(set (match_operand:SI 0 "register_operand" "=r")
3963 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3964 (parallel [(const_int 0)]))))]
3966 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3967 [(set_attr "type" "sseicvt")
3968 (set_attr "prefix" "evex")
3969 (set_attr "mode" "SI")])
3971 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
3972 [(set (match_operand:DI 0 "register_operand" "=r")
3975 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3976 (parallel [(const_int 0)]))))]
3977 "TARGET_AVX512F && TARGET_64BIT"
3978 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3979 [(set_attr "type" "sseicvt")
3980 (set_attr "prefix" "evex")
3981 (set_attr "mode" "DI")])
3983 (define_insn "sse2_cvtsd2si<round_name>"
3984 [(set (match_operand:SI 0 "register_operand" "=r,r")
3987 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3988 (parallel [(const_int 0)]))]
3989 UNSPEC_FIX_NOTRUNC))]
3991 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3992 [(set_attr "type" "sseicvt")
3993 (set_attr "athlon_decode" "double,vector")
3994 (set_attr "bdver1_decode" "double,double")
3995 (set_attr "btver2_decode" "double,double")
3996 (set_attr "prefix_rep" "1")
3997 (set_attr "prefix" "maybe_vex")
3998 (set_attr "mode" "SI")])
4000 (define_insn "sse2_cvtsd2si_2"
4001 [(set (match_operand:SI 0 "register_operand" "=r,r")
4002 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4003 UNSPEC_FIX_NOTRUNC))]
4005 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4006 [(set_attr "type" "sseicvt")
4007 (set_attr "athlon_decode" "double,vector")
4008 (set_attr "amdfam10_decode" "double,double")
4009 (set_attr "bdver1_decode" "double,double")
4010 (set_attr "prefix_rep" "1")
4011 (set_attr "prefix" "maybe_vex")
4012 (set_attr "mode" "SI")])
4014 (define_insn "sse2_cvtsd2siq<round_name>"
4015 [(set (match_operand:DI 0 "register_operand" "=r,r")
4018 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4019 (parallel [(const_int 0)]))]
4020 UNSPEC_FIX_NOTRUNC))]
4021 "TARGET_SSE2 && TARGET_64BIT"
4022 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4023 [(set_attr "type" "sseicvt")
4024 (set_attr "athlon_decode" "double,vector")
4025 (set_attr "bdver1_decode" "double,double")
4026 (set_attr "prefix_rep" "1")
4027 (set_attr "prefix" "maybe_vex")
4028 (set_attr "mode" "DI")])
4030 (define_insn "sse2_cvtsd2siq_2"
4031 [(set (match_operand:DI 0 "register_operand" "=r,r")
4032 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4033 UNSPEC_FIX_NOTRUNC))]
4034 "TARGET_SSE2 && TARGET_64BIT"
4035 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4036 [(set_attr "type" "sseicvt")
4037 (set_attr "athlon_decode" "double,vector")
4038 (set_attr "amdfam10_decode" "double,double")
4039 (set_attr "bdver1_decode" "double,double")
4040 (set_attr "prefix_rep" "1")
4041 (set_attr "prefix" "maybe_vex")
4042 (set_attr "mode" "DI")])
4044 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4045 [(set (match_operand:SI 0 "register_operand" "=r,r")
4048 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4049 (parallel [(const_int 0)]))))]
4051 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4052 [(set_attr "type" "sseicvt")
4053 (set_attr "athlon_decode" "double,vector")
4054 (set_attr "amdfam10_decode" "double,double")
4055 (set_attr "bdver1_decode" "double,double")
4056 (set_attr "btver2_decode" "double,double")
4057 (set_attr "prefix_rep" "1")
4058 (set_attr "prefix" "maybe_vex")
4059 (set_attr "mode" "SI")])
4061 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4062 [(set (match_operand:DI 0 "register_operand" "=r,r")
4065 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4066 (parallel [(const_int 0)]))))]
4067 "TARGET_SSE2 && TARGET_64BIT"
4068 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4069 [(set_attr "type" "sseicvt")
4070 (set_attr "athlon_decode" "double,vector")
4071 (set_attr "amdfam10_decode" "double,double")
4072 (set_attr "bdver1_decode" "double,double")
4073 (set_attr "prefix_rep" "1")
4074 (set_attr "prefix" "maybe_vex")
4075 (set_attr "mode" "DI")])
4077 ;; For float<si2dfmode><mode>2 insn pattern
4078 (define_mode_attr si2dfmode
4079 [(V8DF "V8SI") (V4DF "V4SI")])
4080 (define_mode_attr si2dfmodelower
4081 [(V8DF "v8si") (V4DF "v4si")])
4083 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4084 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4085 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4086 "TARGET_AVX && <mask_mode512bit_condition>"
4087 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4088 [(set_attr "type" "ssecvt")
4089 (set_attr "prefix" "maybe_vex")
4090 (set_attr "mode" "<MODE>")])
4092 (define_insn "ufloatv8siv8df<mask_name>"
4093 [(set (match_operand:V8DF 0 "register_operand" "=v")
4094 (unsigned_float:V8DF
4095 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
4097 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4098 [(set_attr "type" "ssecvt")
4099 (set_attr "prefix" "evex")
4100 (set_attr "mode" "V8DF")])
4102 (define_insn "avx512f_cvtdq2pd512_2"
4103 [(set (match_operand:V8DF 0 "register_operand" "=v")
4106 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4107 (parallel [(const_int 0) (const_int 1)
4108 (const_int 2) (const_int 3)
4109 (const_int 4) (const_int 5)
4110 (const_int 6) (const_int 7)]))))]
4112 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4113 [(set_attr "type" "ssecvt")
4114 (set_attr "prefix" "evex")
4115 (set_attr "mode" "V8DF")])
4117 (define_insn "avx_cvtdq2pd256_2"
4118 [(set (match_operand:V4DF 0 "register_operand" "=x")
4121 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
4122 (parallel [(const_int 0) (const_int 1)
4123 (const_int 2) (const_int 3)]))))]
4125 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4126 [(set_attr "type" "ssecvt")
4127 (set_attr "prefix" "vex")
4128 (set_attr "mode" "V4DF")])
4130 (define_insn "sse2_cvtdq2pd"
4131 [(set (match_operand:V2DF 0 "register_operand" "=x")
4134 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4135 (parallel [(const_int 0) (const_int 1)]))))]
4137 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
4138 [(set_attr "type" "ssecvt")
4139 (set_attr "prefix" "maybe_vex")
4140 (set_attr "ssememalign" "64")
4141 (set_attr "mode" "V2DF")])
4143 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4144 [(set (match_operand:V8SI 0 "register_operand" "=v")
4146 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4147 UNSPEC_FIX_NOTRUNC))]
4149 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4150 [(set_attr "type" "ssecvt")
4151 (set_attr "prefix" "evex")
4152 (set_attr "mode" "OI")])
4154 (define_insn "avx_cvtpd2dq256"
4155 [(set (match_operand:V4SI 0 "register_operand" "=x")
4156 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4157 UNSPEC_FIX_NOTRUNC))]
4159 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
4160 [(set_attr "type" "ssecvt")
4161 (set_attr "prefix" "vex")
4162 (set_attr "mode" "OI")])
4164 (define_expand "avx_cvtpd2dq256_2"
4165 [(set (match_operand:V8SI 0 "register_operand")
4167 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4171 "operands[2] = CONST0_RTX (V4SImode);")
4173 (define_insn "*avx_cvtpd2dq256_2"
4174 [(set (match_operand:V8SI 0 "register_operand" "=x")
4176 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4178 (match_operand:V4SI 2 "const0_operand")))]
4180 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4181 [(set_attr "type" "ssecvt")
4182 (set_attr "prefix" "vex")
4183 (set_attr "btver2_decode" "vector")
4184 (set_attr "mode" "OI")])
4186 (define_expand "sse2_cvtpd2dq"
4187 [(set (match_operand:V4SI 0 "register_operand")
4189 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
4193 "operands[2] = CONST0_RTX (V2SImode);")
4195 (define_insn "*sse2_cvtpd2dq"
4196 [(set (match_operand:V4SI 0 "register_operand" "=x")
4198 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4200 (match_operand:V2SI 2 "const0_operand")))]
4204 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
4206 return "cvtpd2dq\t{%1, %0|%0, %1}";
4208 [(set_attr "type" "ssecvt")
4209 (set_attr "prefix_rep" "1")
4210 (set_attr "prefix_data16" "0")
4211 (set_attr "prefix" "maybe_vex")
4212 (set_attr "mode" "TI")
4213 (set_attr "amdfam10_decode" "double")
4214 (set_attr "athlon_decode" "vector")
4215 (set_attr "bdver1_decode" "double")])
4217 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
4218 [(set (match_operand:V8SI 0 "register_operand" "=v")
4220 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4221 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4223 "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4224 [(set_attr "type" "ssecvt")
4225 (set_attr "prefix" "evex")
4226 (set_attr "mode" "OI")])
4228 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4229 [(set (match_operand:V8SI 0 "register_operand" "=v")
4231 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4233 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4234 [(set_attr "type" "ssecvt")
4235 (set_attr "prefix" "evex")
4236 (set_attr "mode" "OI")])
4238 (define_insn "fix_truncv4dfv4si2"
4239 [(set (match_operand:V4SI 0 "register_operand" "=x")
4240 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4242 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
4243 [(set_attr "type" "ssecvt")
4244 (set_attr "prefix" "vex")
4245 (set_attr "mode" "OI")])
4247 (define_expand "avx_cvttpd2dq256_2"
4248 [(set (match_operand:V8SI 0 "register_operand")
4250 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4253 "operands[2] = CONST0_RTX (V4SImode);")
4255 (define_insn "*avx_cvttpd2dq256_2"
4256 [(set (match_operand:V8SI 0 "register_operand" "=x")
4258 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
4259 (match_operand:V4SI 2 "const0_operand")))]
4261 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
4262 [(set_attr "type" "ssecvt")
4263 (set_attr "prefix" "vex")
4264 (set_attr "btver2_decode" "vector")
4265 (set_attr "mode" "OI")])
4267 (define_expand "sse2_cvttpd2dq"
4268 [(set (match_operand:V4SI 0 "register_operand")
4270 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
4273 "operands[2] = CONST0_RTX (V2SImode);")
4275 (define_insn "*sse2_cvttpd2dq"
4276 [(set (match_operand:V4SI 0 "register_operand" "=x")
4278 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4279 (match_operand:V2SI 2 "const0_operand")))]
4283 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4285 return "cvttpd2dq\t{%1, %0|%0, %1}";
4287 [(set_attr "type" "ssecvt")
4288 (set_attr "amdfam10_decode" "double")
4289 (set_attr "athlon_decode" "vector")
4290 (set_attr "bdver1_decode" "double")
4291 (set_attr "prefix" "maybe_vex")
4292 (set_attr "mode" "TI")])
4294 (define_insn "sse2_cvtsd2ss<round_name>"
4295 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4298 (float_truncate:V2SF
4299 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4300 (match_operand:V4SF 1 "register_operand" "0,0,v")
4304 cvtsd2ss\t{%2, %0|%0, %2}
4305 cvtsd2ss\t{%2, %0|%0, %q2}
4306 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4307 [(set_attr "isa" "noavx,noavx,avx")
4308 (set_attr "type" "ssecvt")
4309 (set_attr "athlon_decode" "vector,double,*")
4310 (set_attr "amdfam10_decode" "vector,double,*")
4311 (set_attr "bdver1_decode" "direct,direct,*")
4312 (set_attr "btver2_decode" "double,double,double")
4313 (set_attr "prefix" "orig,orig,<round_prefix>")
4314 (set_attr "mode" "SF")])
4316 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4317 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4321 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
4322 (parallel [(const_int 0) (const_int 1)])))
4323 (match_operand:V2DF 1 "register_operand" "0,0,v")
4327 cvtss2sd\t{%2, %0|%0, %2}
4328 cvtss2sd\t{%2, %0|%0, %k2}
4329 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4330 [(set_attr "isa" "noavx,noavx,avx")
4331 (set_attr "type" "ssecvt")
4332 (set_attr "amdfam10_decode" "vector,double,*")
4333 (set_attr "athlon_decode" "direct,direct,*")
4334 (set_attr "bdver1_decode" "direct,direct,*")
4335 (set_attr "btver2_decode" "double,double,double")
4336 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4337 (set_attr "mode" "DF")])
4339 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4340 [(set (match_operand:V8SF 0 "register_operand" "=v")
4341 (float_truncate:V8SF
4342 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4344 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4345 [(set_attr "type" "ssecvt")
4346 (set_attr "prefix" "evex")
4347 (set_attr "mode" "V8SF")])
4349 (define_insn "avx_cvtpd2ps256"
4350 [(set (match_operand:V4SF 0 "register_operand" "=x")
4351 (float_truncate:V4SF
4352 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4354 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4355 [(set_attr "type" "ssecvt")
4356 (set_attr "prefix" "vex")
4357 (set_attr "btver2_decode" "vector")
4358 (set_attr "mode" "V4SF")])
4360 (define_expand "sse2_cvtpd2ps"
4361 [(set (match_operand:V4SF 0 "register_operand")
4363 (float_truncate:V2SF
4364 (match_operand:V2DF 1 "nonimmediate_operand"))
4367 "operands[2] = CONST0_RTX (V2SFmode);")
4369 (define_insn "*sse2_cvtpd2ps"
4370 [(set (match_operand:V4SF 0 "register_operand" "=x")
4372 (float_truncate:V2SF
4373 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4374 (match_operand:V2SF 2 "const0_operand")))]
4378 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4380 return "cvtpd2ps\t{%1, %0|%0, %1}";
4382 [(set_attr "type" "ssecvt")
4383 (set_attr "amdfam10_decode" "double")
4384 (set_attr "athlon_decode" "vector")
4385 (set_attr "bdver1_decode" "double")
4386 (set_attr "prefix_data16" "1")
4387 (set_attr "prefix" "maybe_vex")
4388 (set_attr "mode" "V4SF")])
4390 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4391 (define_mode_attr sf2dfmode
4392 [(V8DF "V8SF") (V4DF "V4SF")])
4394 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4395 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4396 (float_extend:VF2_512_256
4397 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4398 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4399 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4400 [(set_attr "type" "ssecvt")
4401 (set_attr "prefix" "maybe_vex")
4402 (set_attr "mode" "<MODE>")])
4404 (define_insn "*avx_cvtps2pd256_2"
4405 [(set (match_operand:V4DF 0 "register_operand" "=x")
4408 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4409 (parallel [(const_int 0) (const_int 1)
4410 (const_int 2) (const_int 3)]))))]
4412 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4413 [(set_attr "type" "ssecvt")
4414 (set_attr "prefix" "vex")
4415 (set_attr "mode" "V4DF")])
4417 (define_insn "vec_unpacks_lo_v16sf"
4418 [(set (match_operand:V8DF 0 "register_operand" "=v")
4421 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4422 (parallel [(const_int 0) (const_int 1)
4423 (const_int 2) (const_int 3)
4424 (const_int 4) (const_int 5)
4425 (const_int 6) (const_int 7)]))))]
4427 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4428 [(set_attr "type" "ssecvt")
4429 (set_attr "prefix" "evex")
4430 (set_attr "mode" "V8DF")])
4432 (define_insn "sse2_cvtps2pd"
4433 [(set (match_operand:V2DF 0 "register_operand" "=x")
4436 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4437 (parallel [(const_int 0) (const_int 1)]))))]
4439 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4440 [(set_attr "type" "ssecvt")
4441 (set_attr "amdfam10_decode" "direct")
4442 (set_attr "athlon_decode" "double")
4443 (set_attr "bdver1_decode" "double")
4444 (set_attr "prefix_data16" "0")
4445 (set_attr "prefix" "maybe_vex")
4446 (set_attr "mode" "V2DF")])
4448 (define_expand "vec_unpacks_hi_v4sf"
4453 (match_operand:V4SF 1 "nonimmediate_operand"))
4454 (parallel [(const_int 6) (const_int 7)
4455 (const_int 2) (const_int 3)])))
4456 (set (match_operand:V2DF 0 "register_operand")
4460 (parallel [(const_int 0) (const_int 1)]))))]
4462 "operands[2] = gen_reg_rtx (V4SFmode);")
4464 (define_expand "vec_unpacks_hi_v8sf"
4467 (match_operand:V8SF 1 "nonimmediate_operand")
4468 (parallel [(const_int 4) (const_int 5)
4469 (const_int 6) (const_int 7)])))
4470 (set (match_operand:V4DF 0 "register_operand")
4474 "operands[2] = gen_reg_rtx (V4SFmode);")
4476 (define_expand "vec_unpacks_hi_v16sf"
4479 (match_operand:V16SF 1 "nonimmediate_operand")
4480 (parallel [(const_int 8) (const_int 9)
4481 (const_int 10) (const_int 11)
4482 (const_int 12) (const_int 13)
4483 (const_int 14) (const_int 15)])))
4484 (set (match_operand:V8DF 0 "register_operand")
4488 "operands[2] = gen_reg_rtx (V8SFmode);")
4490 (define_expand "vec_unpacks_lo_v4sf"
4491 [(set (match_operand:V2DF 0 "register_operand")
4494 (match_operand:V4SF 1 "nonimmediate_operand")
4495 (parallel [(const_int 0) (const_int 1)]))))]
4498 (define_expand "vec_unpacks_lo_v8sf"
4499 [(set (match_operand:V4DF 0 "register_operand")
4502 (match_operand:V8SF 1 "nonimmediate_operand")
4503 (parallel [(const_int 0) (const_int 1)
4504 (const_int 2) (const_int 3)]))))]
4507 (define_mode_attr sseunpackfltmode
4508 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4509 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4511 (define_expand "vec_unpacks_float_hi_<mode>"
4512 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4513 (match_operand:VI2_AVX512F 1 "register_operand")]
4516 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4518 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4519 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4520 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4524 (define_expand "vec_unpacks_float_lo_<mode>"
4525 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4526 (match_operand:VI2_AVX512F 1 "register_operand")]
4529 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4531 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4532 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4533 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4537 (define_expand "vec_unpacku_float_hi_<mode>"
4538 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4539 (match_operand:VI2_AVX512F 1 "register_operand")]
4542 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4544 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4545 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4546 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4550 (define_expand "vec_unpacku_float_lo_<mode>"
4551 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4552 (match_operand:VI2_AVX512F 1 "register_operand")]
4555 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4557 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4558 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4559 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4563 (define_expand "vec_unpacks_float_hi_v4si"
4566 (match_operand:V4SI 1 "nonimmediate_operand")
4567 (parallel [(const_int 2) (const_int 3)
4568 (const_int 2) (const_int 3)])))
4569 (set (match_operand:V2DF 0 "register_operand")
4573 (parallel [(const_int 0) (const_int 1)]))))]
4575 "operands[2] = gen_reg_rtx (V4SImode);")
4577 (define_expand "vec_unpacks_float_lo_v4si"
4578 [(set (match_operand:V2DF 0 "register_operand")
4581 (match_operand:V4SI 1 "nonimmediate_operand")
4582 (parallel [(const_int 0) (const_int 1)]))))]
4585 (define_expand "vec_unpacks_float_hi_v8si"
4588 (match_operand:V8SI 1 "nonimmediate_operand")
4589 (parallel [(const_int 4) (const_int 5)
4590 (const_int 6) (const_int 7)])))
4591 (set (match_operand:V4DF 0 "register_operand")
4595 "operands[2] = gen_reg_rtx (V4SImode);")
4597 (define_expand "vec_unpacks_float_lo_v8si"
4598 [(set (match_operand:V4DF 0 "register_operand")
4601 (match_operand:V8SI 1 "nonimmediate_operand")
4602 (parallel [(const_int 0) (const_int 1)
4603 (const_int 2) (const_int 3)]))))]
4606 (define_expand "vec_unpacks_float_hi_v16si"
4609 (match_operand:V16SI 1 "nonimmediate_operand")
4610 (parallel [(const_int 8) (const_int 9)
4611 (const_int 10) (const_int 11)
4612 (const_int 12) (const_int 13)
4613 (const_int 14) (const_int 15)])))
4614 (set (match_operand:V8DF 0 "register_operand")
4618 "operands[2] = gen_reg_rtx (V8SImode);")
4620 (define_expand "vec_unpacks_float_lo_v16si"
4621 [(set (match_operand:V8DF 0 "register_operand")
4624 (match_operand:V16SI 1 "nonimmediate_operand")
4625 (parallel [(const_int 0) (const_int 1)
4626 (const_int 2) (const_int 3)
4627 (const_int 4) (const_int 5)
4628 (const_int 6) (const_int 7)]))))]
4631 (define_expand "vec_unpacku_float_hi_v4si"
4634 (match_operand:V4SI 1 "nonimmediate_operand")
4635 (parallel [(const_int 2) (const_int 3)
4636 (const_int 2) (const_int 3)])))
4641 (parallel [(const_int 0) (const_int 1)]))))
4643 (lt:V2DF (match_dup 6) (match_dup 3)))
4645 (and:V2DF (match_dup 7) (match_dup 4)))
4646 (set (match_operand:V2DF 0 "register_operand")
4647 (plus:V2DF (match_dup 6) (match_dup 8)))]
4650 REAL_VALUE_TYPE TWO32r;
4654 real_ldexp (&TWO32r, &dconst1, 32);
4655 x = const_double_from_real_value (TWO32r, DFmode);
4657 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4658 operands[4] = force_reg (V2DFmode,
4659 ix86_build_const_vector (V2DFmode, 1, x));
4661 operands[5] = gen_reg_rtx (V4SImode);
4663 for (i = 6; i < 9; i++)
4664 operands[i] = gen_reg_rtx (V2DFmode);
4667 (define_expand "vec_unpacku_float_lo_v4si"
4671 (match_operand:V4SI 1 "nonimmediate_operand")
4672 (parallel [(const_int 0) (const_int 1)]))))
4674 (lt:V2DF (match_dup 5) (match_dup 3)))
4676 (and:V2DF (match_dup 6) (match_dup 4)))
4677 (set (match_operand:V2DF 0 "register_operand")
4678 (plus:V2DF (match_dup 5) (match_dup 7)))]
4681 REAL_VALUE_TYPE TWO32r;
4685 real_ldexp (&TWO32r, &dconst1, 32);
4686 x = const_double_from_real_value (TWO32r, DFmode);
4688 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4689 operands[4] = force_reg (V2DFmode,
4690 ix86_build_const_vector (V2DFmode, 1, x));
4692 for (i = 5; i < 8; i++)
4693 operands[i] = gen_reg_rtx (V2DFmode);
4696 (define_expand "vec_unpacku_float_hi_v8si"
4697 [(match_operand:V4DF 0 "register_operand")
4698 (match_operand:V8SI 1 "register_operand")]
4701 REAL_VALUE_TYPE TWO32r;
4705 real_ldexp (&TWO32r, &dconst1, 32);
4706 x = const_double_from_real_value (TWO32r, DFmode);
4708 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4709 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4710 tmp[5] = gen_reg_rtx (V4SImode);
4712 for (i = 2; i < 5; i++)
4713 tmp[i] = gen_reg_rtx (V4DFmode);
4714 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4715 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4716 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4717 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4718 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4719 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4723 (define_expand "vec_unpacku_float_hi_v16si"
4724 [(match_operand:V8DF 0 "register_operand")
4725 (match_operand:V16SI 1 "register_operand")]
4728 REAL_VALUE_TYPE TWO32r;
4731 real_ldexp (&TWO32r, &dconst1, 32);
4732 x = const_double_from_real_value (TWO32r, DFmode);
4734 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4735 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4736 tmp[2] = gen_reg_rtx (V8DFmode);
4737 tmp[3] = gen_reg_rtx (V8SImode);
4738 k = gen_reg_rtx (QImode);
4740 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
4741 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
4742 emit_insn (gen_rtx_SET (VOIDmode, k,
4743 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4744 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4745 emit_move_insn (operands[0], tmp[2]);
4749 (define_expand "vec_unpacku_float_lo_v8si"
4750 [(match_operand:V4DF 0 "register_operand")
4751 (match_operand:V8SI 1 "nonimmediate_operand")]
4754 REAL_VALUE_TYPE TWO32r;
4758 real_ldexp (&TWO32r, &dconst1, 32);
4759 x = const_double_from_real_value (TWO32r, DFmode);
4761 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4762 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4764 for (i = 2; i < 5; i++)
4765 tmp[i] = gen_reg_rtx (V4DFmode);
4766 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4767 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4768 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4769 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4770 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4774 (define_expand "vec_unpacku_float_lo_v16si"
4775 [(match_operand:V8DF 0 "register_operand")
4776 (match_operand:V16SI 1 "nonimmediate_operand")]
4779 REAL_VALUE_TYPE TWO32r;
4782 real_ldexp (&TWO32r, &dconst1, 32);
4783 x = const_double_from_real_value (TWO32r, DFmode);
4785 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4786 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4787 tmp[2] = gen_reg_rtx (V8DFmode);
4788 k = gen_reg_rtx (QImode);
4790 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4791 emit_insn (gen_rtx_SET (VOIDmode, k,
4792 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4793 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4794 emit_move_insn (operands[0], tmp[2]);
4798 (define_expand "vec_pack_trunc_<mode>"
4800 (float_truncate:<sf2dfmode>
4801 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4803 (float_truncate:<sf2dfmode>
4804 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4805 (set (match_operand:<ssePSmode> 0 "register_operand")
4806 (vec_concat:<ssePSmode>
4811 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4812 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4815 (define_expand "vec_pack_trunc_v2df"
4816 [(match_operand:V4SF 0 "register_operand")
4817 (match_operand:V2DF 1 "nonimmediate_operand")
4818 (match_operand:V2DF 2 "nonimmediate_operand")]
4823 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4825 tmp0 = gen_reg_rtx (V4DFmode);
4826 tmp1 = force_reg (V2DFmode, operands[1]);
4828 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4829 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4833 tmp0 = gen_reg_rtx (V4SFmode);
4834 tmp1 = gen_reg_rtx (V4SFmode);
4836 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4837 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4838 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4843 (define_expand "vec_pack_sfix_trunc_v8df"
4844 [(match_operand:V16SI 0 "register_operand")
4845 (match_operand:V8DF 1 "nonimmediate_operand")
4846 (match_operand:V8DF 2 "nonimmediate_operand")]
4851 r1 = gen_reg_rtx (V8SImode);
4852 r2 = gen_reg_rtx (V8SImode);
4854 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4855 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4856 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4860 (define_expand "vec_pack_sfix_trunc_v4df"
4861 [(match_operand:V8SI 0 "register_operand")
4862 (match_operand:V4DF 1 "nonimmediate_operand")
4863 (match_operand:V4DF 2 "nonimmediate_operand")]
4868 r1 = gen_reg_rtx (V4SImode);
4869 r2 = gen_reg_rtx (V4SImode);
4871 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4872 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4873 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4877 (define_expand "vec_pack_sfix_trunc_v2df"
4878 [(match_operand:V4SI 0 "register_operand")
4879 (match_operand:V2DF 1 "nonimmediate_operand")
4880 (match_operand:V2DF 2 "nonimmediate_operand")]
4883 rtx tmp0, tmp1, tmp2;
4885 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4887 tmp0 = gen_reg_rtx (V4DFmode);
4888 tmp1 = force_reg (V2DFmode, operands[1]);
4890 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4891 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4895 tmp0 = gen_reg_rtx (V4SImode);
4896 tmp1 = gen_reg_rtx (V4SImode);
4897 tmp2 = gen_reg_rtx (V2DImode);
4899 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4900 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4901 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4902 gen_lowpart (V2DImode, tmp0),
4903 gen_lowpart (V2DImode, tmp1)));
4904 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4909 (define_mode_attr ssepackfltmode
4910 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4912 (define_expand "vec_pack_ufix_trunc_<mode>"
4913 [(match_operand:<ssepackfltmode> 0 "register_operand")
4914 (match_operand:VF2 1 "register_operand")
4915 (match_operand:VF2 2 "register_operand")]
4918 if (<MODE>mode == V8DFmode)
4922 r1 = gen_reg_rtx (V8SImode);
4923 r2 = gen_reg_rtx (V8SImode);
4925 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
4926 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
4927 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4932 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4933 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4934 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4935 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4936 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4938 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4939 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4943 tmp[5] = gen_reg_rtx (V8SFmode);
4944 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4945 gen_lowpart (V8SFmode, tmp[3]), 0);
4946 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4948 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4949 operands[0], 0, OPTAB_DIRECT);
4950 if (tmp[6] != operands[0])
4951 emit_move_insn (operands[0], tmp[6]);
4957 (define_expand "vec_pack_sfix_v4df"
4958 [(match_operand:V8SI 0 "register_operand")
4959 (match_operand:V4DF 1 "nonimmediate_operand")
4960 (match_operand:V4DF 2 "nonimmediate_operand")]
4965 r1 = gen_reg_rtx (V4SImode);
4966 r2 = gen_reg_rtx (V4SImode);
4968 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4969 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4970 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4974 (define_expand "vec_pack_sfix_v2df"
4975 [(match_operand:V4SI 0 "register_operand")
4976 (match_operand:V2DF 1 "nonimmediate_operand")
4977 (match_operand:V2DF 2 "nonimmediate_operand")]
4980 rtx tmp0, tmp1, tmp2;
4982 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4984 tmp0 = gen_reg_rtx (V4DFmode);
4985 tmp1 = force_reg (V2DFmode, operands[1]);
4987 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4988 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4992 tmp0 = gen_reg_rtx (V4SImode);
4993 tmp1 = gen_reg_rtx (V4SImode);
4994 tmp2 = gen_reg_rtx (V2DImode);
4996 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4997 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4998 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4999 gen_lowpart (V2DImode, tmp0),
5000 gen_lowpart (V2DImode, tmp1)));
5001 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5008 ;; Parallel single-precision floating point element swizzling
5010 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5012 (define_expand "sse_movhlps_exp"
5013 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5016 (match_operand:V4SF 1 "nonimmediate_operand")
5017 (match_operand:V4SF 2 "nonimmediate_operand"))
5018 (parallel [(const_int 6)
5024 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5026 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5028 /* Fix up the destination if needed. */
5029 if (dst != operands[0])
5030 emit_move_insn (operands[0], dst);
5035 (define_insn "sse_movhlps"
5036 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5039 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5040 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5041 (parallel [(const_int 6)
5045 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5047 movhlps\t{%2, %0|%0, %2}
5048 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5049 movlps\t{%H2, %0|%0, %H2}
5050 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5051 %vmovhps\t{%2, %0|%q0, %2}"
5052 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5053 (set_attr "type" "ssemov")
5054 (set_attr "ssememalign" "64")
5055 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5056 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5058 (define_expand "sse_movlhps_exp"
5059 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5062 (match_operand:V4SF 1 "nonimmediate_operand")
5063 (match_operand:V4SF 2 "nonimmediate_operand"))
5064 (parallel [(const_int 0)
5070 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5072 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5074 /* Fix up the destination if needed. */
5075 if (dst != operands[0])
5076 emit_move_insn (operands[0], dst);
5081 (define_insn "sse_movlhps"
5082 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5085 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5086 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5087 (parallel [(const_int 0)
5091 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5093 movlhps\t{%2, %0|%0, %2}
5094 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5095 movhps\t{%2, %0|%0, %q2}
5096 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5097 %vmovlps\t{%2, %H0|%H0, %2}"
5098 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5099 (set_attr "type" "ssemov")
5100 (set_attr "ssememalign" "64")
5101 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5102 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5104 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5105 [(set (match_operand:V16SF 0 "register_operand" "=v")
5108 (match_operand:V16SF 1 "register_operand" "v")
5109 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5110 (parallel [(const_int 2) (const_int 18)
5111 (const_int 3) (const_int 19)
5112 (const_int 6) (const_int 22)
5113 (const_int 7) (const_int 23)
5114 (const_int 10) (const_int 26)
5115 (const_int 11) (const_int 27)
5116 (const_int 14) (const_int 30)
5117 (const_int 15) (const_int 31)])))]
5119 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5120 [(set_attr "type" "sselog")
5121 (set_attr "prefix" "evex")
5122 (set_attr "mode" "V16SF")])
5124 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5125 (define_insn "avx_unpckhps256"
5126 [(set (match_operand:V8SF 0 "register_operand" "=x")
5129 (match_operand:V8SF 1 "register_operand" "x")
5130 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5131 (parallel [(const_int 2) (const_int 10)
5132 (const_int 3) (const_int 11)
5133 (const_int 6) (const_int 14)
5134 (const_int 7) (const_int 15)])))]
5136 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5137 [(set_attr "type" "sselog")
5138 (set_attr "prefix" "vex")
5139 (set_attr "mode" "V8SF")])
5141 (define_expand "vec_interleave_highv8sf"
5145 (match_operand:V8SF 1 "register_operand" "x")
5146 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5147 (parallel [(const_int 0) (const_int 8)
5148 (const_int 1) (const_int 9)
5149 (const_int 4) (const_int 12)
5150 (const_int 5) (const_int 13)])))
5156 (parallel [(const_int 2) (const_int 10)
5157 (const_int 3) (const_int 11)
5158 (const_int 6) (const_int 14)
5159 (const_int 7) (const_int 15)])))
5160 (set (match_operand:V8SF 0 "register_operand")
5165 (parallel [(const_int 4) (const_int 5)
5166 (const_int 6) (const_int 7)
5167 (const_int 12) (const_int 13)
5168 (const_int 14) (const_int 15)])))]
5171 operands[3] = gen_reg_rtx (V8SFmode);
5172 operands[4] = gen_reg_rtx (V8SFmode);
5175 (define_insn "vec_interleave_highv4sf"
5176 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5179 (match_operand:V4SF 1 "register_operand" "0,x")
5180 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5181 (parallel [(const_int 2) (const_int 6)
5182 (const_int 3) (const_int 7)])))]
5185 unpckhps\t{%2, %0|%0, %2}
5186 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5187 [(set_attr "isa" "noavx,avx")
5188 (set_attr "type" "sselog")
5189 (set_attr "prefix" "orig,vex")
5190 (set_attr "mode" "V4SF")])
5192 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5193 [(set (match_operand:V16SF 0 "register_operand" "=v")
5196 (match_operand:V16SF 1 "register_operand" "v")
5197 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5198 (parallel [(const_int 0) (const_int 16)
5199 (const_int 1) (const_int 17)
5200 (const_int 4) (const_int 20)
5201 (const_int 5) (const_int 21)
5202 (const_int 8) (const_int 24)
5203 (const_int 9) (const_int 25)
5204 (const_int 12) (const_int 28)
5205 (const_int 13) (const_int 29)])))]
5207 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5208 [(set_attr "type" "sselog")
5209 (set_attr "prefix" "evex")
5210 (set_attr "mode" "V16SF")])
5212 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5213 (define_insn "avx_unpcklps256"
5214 [(set (match_operand:V8SF 0 "register_operand" "=x")
5217 (match_operand:V8SF 1 "register_operand" "x")
5218 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5219 (parallel [(const_int 0) (const_int 8)
5220 (const_int 1) (const_int 9)
5221 (const_int 4) (const_int 12)
5222 (const_int 5) (const_int 13)])))]
5224 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5225 [(set_attr "type" "sselog")
5226 (set_attr "prefix" "vex")
5227 (set_attr "mode" "V8SF")])
5229 (define_expand "vec_interleave_lowv8sf"
5233 (match_operand:V8SF 1 "register_operand" "x")
5234 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5235 (parallel [(const_int 0) (const_int 8)
5236 (const_int 1) (const_int 9)
5237 (const_int 4) (const_int 12)
5238 (const_int 5) (const_int 13)])))
5244 (parallel [(const_int 2) (const_int 10)
5245 (const_int 3) (const_int 11)
5246 (const_int 6) (const_int 14)
5247 (const_int 7) (const_int 15)])))
5248 (set (match_operand:V8SF 0 "register_operand")
5253 (parallel [(const_int 0) (const_int 1)
5254 (const_int 2) (const_int 3)
5255 (const_int 8) (const_int 9)
5256 (const_int 10) (const_int 11)])))]
5259 operands[3] = gen_reg_rtx (V8SFmode);
5260 operands[4] = gen_reg_rtx (V8SFmode);
5263 (define_insn "vec_interleave_lowv4sf"
5264 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5267 (match_operand:V4SF 1 "register_operand" "0,x")
5268 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5269 (parallel [(const_int 0) (const_int 4)
5270 (const_int 1) (const_int 5)])))]
5273 unpcklps\t{%2, %0|%0, %2}
5274 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5275 [(set_attr "isa" "noavx,avx")
5276 (set_attr "type" "sselog")
5277 (set_attr "prefix" "orig,vex")
5278 (set_attr "mode" "V4SF")])
5280 ;; These are modeled with the same vec_concat as the others so that we
5281 ;; capture users of shufps that can use the new instructions
5282 (define_insn "avx_movshdup256"
5283 [(set (match_operand:V8SF 0 "register_operand" "=x")
5286 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5288 (parallel [(const_int 1) (const_int 1)
5289 (const_int 3) (const_int 3)
5290 (const_int 5) (const_int 5)
5291 (const_int 7) (const_int 7)])))]
5293 "vmovshdup\t{%1, %0|%0, %1}"
5294 [(set_attr "type" "sse")
5295 (set_attr "prefix" "vex")
5296 (set_attr "mode" "V8SF")])
5298 (define_insn "sse3_movshdup"
5299 [(set (match_operand:V4SF 0 "register_operand" "=x")
5302 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5304 (parallel [(const_int 1)
5309 "%vmovshdup\t{%1, %0|%0, %1}"
5310 [(set_attr "type" "sse")
5311 (set_attr "prefix_rep" "1")
5312 (set_attr "prefix" "maybe_vex")
5313 (set_attr "mode" "V4SF")])
5315 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5316 [(set (match_operand:V16SF 0 "register_operand" "=v")
5319 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5321 (parallel [(const_int 1) (const_int 1)
5322 (const_int 3) (const_int 3)
5323 (const_int 5) (const_int 5)
5324 (const_int 7) (const_int 7)
5325 (const_int 9) (const_int 9)
5326 (const_int 11) (const_int 11)
5327 (const_int 13) (const_int 13)
5328 (const_int 15) (const_int 15)])))]
5330 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5331 [(set_attr "type" "sse")
5332 (set_attr "prefix" "evex")
5333 (set_attr "mode" "V16SF")])
5335 (define_insn "avx_movsldup256"
5336 [(set (match_operand:V8SF 0 "register_operand" "=x")
5339 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5341 (parallel [(const_int 0) (const_int 0)
5342 (const_int 2) (const_int 2)
5343 (const_int 4) (const_int 4)
5344 (const_int 6) (const_int 6)])))]
5346 "vmovsldup\t{%1, %0|%0, %1}"
5347 [(set_attr "type" "sse")
5348 (set_attr "prefix" "vex")
5349 (set_attr "mode" "V8SF")])
5351 (define_insn "sse3_movsldup"
5352 [(set (match_operand:V4SF 0 "register_operand" "=x")
5355 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5357 (parallel [(const_int 0)
5362 "%vmovsldup\t{%1, %0|%0, %1}"
5363 [(set_attr "type" "sse")
5364 (set_attr "prefix_rep" "1")
5365 (set_attr "prefix" "maybe_vex")
5366 (set_attr "mode" "V4SF")])
5368 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5369 [(set (match_operand:V16SF 0 "register_operand" "=v")
5372 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5374 (parallel [(const_int 0) (const_int 0)
5375 (const_int 2) (const_int 2)
5376 (const_int 4) (const_int 4)
5377 (const_int 6) (const_int 6)
5378 (const_int 8) (const_int 8)
5379 (const_int 10) (const_int 10)
5380 (const_int 12) (const_int 12)
5381 (const_int 14) (const_int 14)])))]
5383 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5384 [(set_attr "type" "sse")
5385 (set_attr "prefix" "evex")
5386 (set_attr "mode" "V16SF")])
5388 (define_expand "avx_shufps256"
5389 [(match_operand:V8SF 0 "register_operand")
5390 (match_operand:V8SF 1 "register_operand")
5391 (match_operand:V8SF 2 "nonimmediate_operand")
5392 (match_operand:SI 3 "const_int_operand")]
5395 int mask = INTVAL (operands[3]);
5396 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5397 GEN_INT ((mask >> 0) & 3),
5398 GEN_INT ((mask >> 2) & 3),
5399 GEN_INT (((mask >> 4) & 3) + 8),
5400 GEN_INT (((mask >> 6) & 3) + 8),
5401 GEN_INT (((mask >> 0) & 3) + 4),
5402 GEN_INT (((mask >> 2) & 3) + 4),
5403 GEN_INT (((mask >> 4) & 3) + 12),
5404 GEN_INT (((mask >> 6) & 3) + 12)));
5408 ;; One bit in mask selects 2 elements.
5409 (define_insn "avx_shufps256_1"
5410 [(set (match_operand:V8SF 0 "register_operand" "=x")
5413 (match_operand:V8SF 1 "register_operand" "x")
5414 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5415 (parallel [(match_operand 3 "const_0_to_3_operand" )
5416 (match_operand 4 "const_0_to_3_operand" )
5417 (match_operand 5 "const_8_to_11_operand" )
5418 (match_operand 6 "const_8_to_11_operand" )
5419 (match_operand 7 "const_4_to_7_operand" )
5420 (match_operand 8 "const_4_to_7_operand" )
5421 (match_operand 9 "const_12_to_15_operand")
5422 (match_operand 10 "const_12_to_15_operand")])))]
5424 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5425 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5426 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5427 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5430 mask = INTVAL (operands[3]);
5431 mask |= INTVAL (operands[4]) << 2;
5432 mask |= (INTVAL (operands[5]) - 8) << 4;
5433 mask |= (INTVAL (operands[6]) - 8) << 6;
5434 operands[3] = GEN_INT (mask);
5436 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5438 [(set_attr "type" "sseshuf")
5439 (set_attr "length_immediate" "1")
5440 (set_attr "prefix" "vex")
5441 (set_attr "mode" "V8SF")])
5443 (define_expand "sse_shufps"
5444 [(match_operand:V4SF 0 "register_operand")
5445 (match_operand:V4SF 1 "register_operand")
5446 (match_operand:V4SF 2 "nonimmediate_operand")
5447 (match_operand:SI 3 "const_int_operand")]
5450 int mask = INTVAL (operands[3]);
5451 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5452 GEN_INT ((mask >> 0) & 3),
5453 GEN_INT ((mask >> 2) & 3),
5454 GEN_INT (((mask >> 4) & 3) + 4),
5455 GEN_INT (((mask >> 6) & 3) + 4)));
5459 (define_insn "sse_shufps_<mode>"
5460 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5461 (vec_select:VI4F_128
5462 (vec_concat:<ssedoublevecmode>
5463 (match_operand:VI4F_128 1 "register_operand" "0,x")
5464 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5465 (parallel [(match_operand 3 "const_0_to_3_operand")
5466 (match_operand 4 "const_0_to_3_operand")
5467 (match_operand 5 "const_4_to_7_operand")
5468 (match_operand 6 "const_4_to_7_operand")])))]
5472 mask |= INTVAL (operands[3]) << 0;
5473 mask |= INTVAL (operands[4]) << 2;
5474 mask |= (INTVAL (operands[5]) - 4) << 4;
5475 mask |= (INTVAL (operands[6]) - 4) << 6;
5476 operands[3] = GEN_INT (mask);
5478 switch (which_alternative)
5481 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5483 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5488 [(set_attr "isa" "noavx,avx")
5489 (set_attr "type" "sseshuf")
5490 (set_attr "length_immediate" "1")
5491 (set_attr "prefix" "orig,vex")
5492 (set_attr "mode" "V4SF")])
5494 (define_insn "sse_storehps"
5495 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5497 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5498 (parallel [(const_int 2) (const_int 3)])))]
5501 %vmovhps\t{%1, %0|%q0, %1}
5502 %vmovhlps\t{%1, %d0|%d0, %1}
5503 %vmovlps\t{%H1, %d0|%d0, %H1}"
5504 [(set_attr "type" "ssemov")
5505 (set_attr "ssememalign" "64")
5506 (set_attr "prefix" "maybe_vex")
5507 (set_attr "mode" "V2SF,V4SF,V2SF")])
5509 (define_expand "sse_loadhps_exp"
5510 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5513 (match_operand:V4SF 1 "nonimmediate_operand")
5514 (parallel [(const_int 0) (const_int 1)]))
5515 (match_operand:V2SF 2 "nonimmediate_operand")))]
5518 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5520 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5522 /* Fix up the destination if needed. */
5523 if (dst != operands[0])
5524 emit_move_insn (operands[0], dst);
5529 (define_insn "sse_loadhps"
5530 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5533 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5534 (parallel [(const_int 0) (const_int 1)]))
5535 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5538 movhps\t{%2, %0|%0, %q2}
5539 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5540 movlhps\t{%2, %0|%0, %2}
5541 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5542 %vmovlps\t{%2, %H0|%H0, %2}"
5543 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5544 (set_attr "type" "ssemov")
5545 (set_attr "ssememalign" "64")
5546 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5547 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5549 (define_insn "sse_storelps"
5550 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5552 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5553 (parallel [(const_int 0) (const_int 1)])))]
5556 %vmovlps\t{%1, %0|%q0, %1}
5557 %vmovaps\t{%1, %0|%0, %1}
5558 %vmovlps\t{%1, %d0|%d0, %q1}"
5559 [(set_attr "type" "ssemov")
5560 (set_attr "prefix" "maybe_vex")
5561 (set_attr "mode" "V2SF,V4SF,V2SF")])
5563 (define_expand "sse_loadlps_exp"
5564 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5566 (match_operand:V2SF 2 "nonimmediate_operand")
5568 (match_operand:V4SF 1 "nonimmediate_operand")
5569 (parallel [(const_int 2) (const_int 3)]))))]
5572 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5574 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5576 /* Fix up the destination if needed. */
5577 if (dst != operands[0])
5578 emit_move_insn (operands[0], dst);
5583 (define_insn "sse_loadlps"
5584 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5586 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5588 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5589 (parallel [(const_int 2) (const_int 3)]))))]
5592 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5593 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5594 movlps\t{%2, %0|%0, %q2}
5595 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5596 %vmovlps\t{%2, %0|%q0, %2}"
5597 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5598 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5599 (set_attr "ssememalign" "64")
5600 (set_attr "length_immediate" "1,1,*,*,*")
5601 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5602 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5604 (define_insn "sse_movss"
5605 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5607 (match_operand:V4SF 2 "register_operand" " x,x")
5608 (match_operand:V4SF 1 "register_operand" " 0,x")
5612 movss\t{%2, %0|%0, %2}
5613 vmovss\t{%2, %1, %0|%0, %1, %2}"
5614 [(set_attr "isa" "noavx,avx")
5615 (set_attr "type" "ssemov")
5616 (set_attr "prefix" "orig,vex")
5617 (set_attr "mode" "SF")])
5619 (define_insn "avx2_vec_dup<mode>"
5620 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5621 (vec_duplicate:VF1_128_256
5623 (match_operand:V4SF 1 "register_operand" "x")
5624 (parallel [(const_int 0)]))))]
5626 "vbroadcastss\t{%1, %0|%0, %1}"
5627 [(set_attr "type" "sselog1")
5628 (set_attr "prefix" "vex")
5629 (set_attr "mode" "<MODE>")])
5631 (define_insn "avx2_vec_dupv8sf_1"
5632 [(set (match_operand:V8SF 0 "register_operand" "=x")
5635 (match_operand:V8SF 1 "register_operand" "x")
5636 (parallel [(const_int 0)]))))]
5638 "vbroadcastss\t{%x1, %0|%0, %x1}"
5639 [(set_attr "type" "sselog1")
5640 (set_attr "prefix" "vex")
5641 (set_attr "mode" "V8SF")])
5643 (define_insn "vec_dupv4sf"
5644 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5646 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5649 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5650 vbroadcastss\t{%1, %0|%0, %1}
5651 shufps\t{$0, %0, %0|%0, %0, 0}"
5652 [(set_attr "isa" "avx,avx,noavx")
5653 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5654 (set_attr "length_immediate" "1,0,1")
5655 (set_attr "prefix_extra" "0,1,*")
5656 (set_attr "prefix" "vex,vex,orig")
5657 (set_attr "mode" "V4SF")])
5659 ;; Although insertps takes register source, we prefer
5660 ;; unpcklps with register source since it is shorter.
5661 (define_insn "*vec_concatv2sf_sse4_1"
5662 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5664 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5665 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5668 unpcklps\t{%2, %0|%0, %2}
5669 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5670 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5671 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5672 %vmovss\t{%1, %0|%0, %1}
5673 punpckldq\t{%2, %0|%0, %2}
5674 movd\t{%1, %0|%0, %1}"
5675 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5676 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5677 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5678 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5679 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5680 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5681 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5683 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5684 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5685 ;; alternatives pretty much forces the MMX alternative to be chosen.
5686 (define_insn "*vec_concatv2sf_sse"
5687 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5689 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5690 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5693 unpcklps\t{%2, %0|%0, %2}
5694 movss\t{%1, %0|%0, %1}
5695 punpckldq\t{%2, %0|%0, %2}
5696 movd\t{%1, %0|%0, %1}"
5697 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5698 (set_attr "mode" "V4SF,SF,DI,DI")])
5700 (define_insn "*vec_concatv4sf"
5701 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5703 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5704 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5707 movlhps\t{%2, %0|%0, %2}
5708 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5709 movhps\t{%2, %0|%0, %q2}
5710 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5711 [(set_attr "isa" "noavx,avx,noavx,avx")
5712 (set_attr "type" "ssemov")
5713 (set_attr "prefix" "orig,vex,orig,vex")
5714 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5716 (define_expand "vec_init<mode>"
5717 [(match_operand:V_128 0 "register_operand")
5721 ix86_expand_vector_init (false, operands[0], operands[1]);
5725 ;; Avoid combining registers from different units in a single alternative,
5726 ;; see comment above inline_secondary_memory_needed function in i386.c
5727 (define_insn "vec_set<mode>_0"
5728 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5729 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5731 (vec_duplicate:VI4F_128
5732 (match_operand:<ssescalarmode> 2 "general_operand"
5733 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5734 (match_operand:VI4F_128 1 "vector_move_operand"
5735 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5739 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5740 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5741 %vmovd\t{%2, %0|%0, %2}
5742 movss\t{%2, %0|%0, %2}
5743 movss\t{%2, %0|%0, %2}
5744 vmovss\t{%2, %1, %0|%0, %1, %2}
5745 pinsrd\t{$0, %2, %0|%0, %2, 0}
5746 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5750 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5752 (cond [(eq_attr "alternative" "0,6,7")
5753 (const_string "sselog")
5754 (eq_attr "alternative" "9")
5755 (const_string "imov")
5756 (eq_attr "alternative" "10")
5757 (const_string "fmov")
5759 (const_string "ssemov")))
5760 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5761 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5762 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5763 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5765 ;; A subset is vec_setv4sf.
5766 (define_insn "*vec_setv4sf_sse4_1"
5767 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5770 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5771 (match_operand:V4SF 1 "register_operand" "0,x")
5772 (match_operand:SI 3 "const_int_operand")))]
5774 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5775 < GET_MODE_NUNITS (V4SFmode))"
5777 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5778 switch (which_alternative)
5781 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5783 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5788 [(set_attr "isa" "noavx,avx")
5789 (set_attr "type" "sselog")
5790 (set_attr "prefix_data16" "1,*")
5791 (set_attr "prefix_extra" "1")
5792 (set_attr "length_immediate" "1")
5793 (set_attr "prefix" "orig,vex")
5794 (set_attr "mode" "V4SF")])
5796 (define_insn "sse4_1_insertps"
5797 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5798 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5799 (match_operand:V4SF 1 "register_operand" "0,x")
5800 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5804 if (MEM_P (operands[2]))
5806 unsigned count_s = INTVAL (operands[3]) >> 6;
5808 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5809 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5811 switch (which_alternative)
5814 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5816 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5821 [(set_attr "isa" "noavx,avx")
5822 (set_attr "type" "sselog")
5823 (set_attr "prefix_data16" "1,*")
5824 (set_attr "prefix_extra" "1")
5825 (set_attr "length_immediate" "1")
5826 (set_attr "prefix" "orig,vex")
5827 (set_attr "mode" "V4SF")])
5830 [(set (match_operand:VI4F_128 0 "memory_operand")
5832 (vec_duplicate:VI4F_128
5833 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5836 "TARGET_SSE && reload_completed"
5837 [(set (match_dup 0) (match_dup 1))]
5838 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5840 (define_expand "vec_set<mode>"
5841 [(match_operand:V 0 "register_operand")
5842 (match_operand:<ssescalarmode> 1 "register_operand")
5843 (match_operand 2 "const_int_operand")]
5846 ix86_expand_vector_set (false, operands[0], operands[1],
5847 INTVAL (operands[2]));
5851 (define_insn_and_split "*vec_extractv4sf_0"
5852 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5854 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5855 (parallel [(const_int 0)])))]
5856 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5858 "&& reload_completed"
5859 [(set (match_dup 0) (match_dup 1))]
5861 if (REG_P (operands[1]))
5862 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5864 operands[1] = adjust_address (operands[1], SFmode, 0);
5867 (define_insn_and_split "*sse4_1_extractps"
5868 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5870 (match_operand:V4SF 1 "register_operand" "x,0,x")
5871 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5874 %vextractps\t{%2, %1, %0|%0, %1, %2}
5877 "&& reload_completed && SSE_REG_P (operands[0])"
5880 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5881 switch (INTVAL (operands[2]))
5885 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5886 operands[2], operands[2],
5887 GEN_INT (INTVAL (operands[2]) + 4),
5888 GEN_INT (INTVAL (operands[2]) + 4)));
5891 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5894 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5899 [(set_attr "isa" "*,noavx,avx")
5900 (set_attr "type" "sselog,*,*")
5901 (set_attr "prefix_data16" "1,*,*")
5902 (set_attr "prefix_extra" "1,*,*")
5903 (set_attr "length_immediate" "1,*,*")
5904 (set_attr "prefix" "maybe_vex,*,*")
5905 (set_attr "mode" "V4SF,*,*")])
5907 (define_insn_and_split "*vec_extractv4sf_mem"
5908 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5910 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5911 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5914 "&& reload_completed"
5915 [(set (match_dup 0) (match_dup 1))]
5917 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5920 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5921 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5922 (match_operand:V16FI 1 "register_operand")
5923 (match_operand:SI 2 "const_0_to_3_operand")
5924 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5925 (match_operand:QI 4 "register_operand")]
5928 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5929 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5930 switch (INTVAL (operands[2]))
5933 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5934 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5935 GEN_INT (3), operands[3], operands[4]));
5938 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5939 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5940 GEN_INT (7), operands[3], operands[4]));
5943 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5944 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5945 GEN_INT (11), operands[3], operands[4]));
5948 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5949 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5950 GEN_INT (15), operands[3], operands[4]));
5958 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5959 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5960 (vec_merge:<ssequartermode>
5961 (vec_select:<ssequartermode>
5962 (match_operand:V16FI 1 "register_operand" "v")
5963 (parallel [(match_operand 2 "const_0_to_15_operand")
5964 (match_operand 3 "const_0_to_15_operand")
5965 (match_operand 4 "const_0_to_15_operand")
5966 (match_operand 5 "const_0_to_15_operand")]))
5967 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5968 (match_operand:QI 7 "register_operand" "Yk")))]
5970 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
5971 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
5972 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
5974 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5975 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5977 [(set_attr "type" "sselog")
5978 (set_attr "prefix_extra" "1")
5979 (set_attr "length_immediate" "1")
5980 (set_attr "memory" "store")
5981 (set_attr "prefix" "evex")
5982 (set_attr "mode" "<sseinsnmode>")])
5984 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5985 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5986 (vec_select:<ssequartermode>
5987 (match_operand:V16FI 1 "register_operand" "v")
5988 (parallel [(match_operand 2 "const_0_to_15_operand")
5989 (match_operand 3 "const_0_to_15_operand")
5990 (match_operand 4 "const_0_to_15_operand")
5991 (match_operand 5 "const_0_to_15_operand")])))]
5993 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
5994 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
5995 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
5997 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5998 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6000 [(set_attr "type" "sselog")
6001 (set_attr "prefix_extra" "1")
6002 (set_attr "length_immediate" "1")
6003 (set (attr "memory")
6004 (if_then_else (match_test "MEM_P (operands[0])")
6005 (const_string "store")
6006 (const_string "none")))
6007 (set_attr "prefix" "evex")
6008 (set_attr "mode" "<sseinsnmode>")])
6010 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
6011 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6012 (match_operand:V8FI 1 "register_operand")
6013 (match_operand:SI 2 "const_0_to_1_operand")
6014 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6015 (match_operand:QI 4 "register_operand")]
6018 rtx (*insn)(rtx, rtx, rtx, rtx);
6020 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6021 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6023 switch (INTVAL (operands[2]))
6026 insn = gen_vec_extract_lo_<mode>_mask;
6029 insn = gen_vec_extract_hi_<mode>_mask;
6035 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6040 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6041 (vec_select:<ssehalfvecmode>
6042 (match_operand:V8FI 1 "nonimmediate_operand")
6043 (parallel [(const_int 0) (const_int 1)
6044 (const_int 2) (const_int 3)])))]
6045 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6046 && reload_completed"
6049 rtx op1 = operands[1];
6051 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6053 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6054 emit_move_insn (operands[0], op1);
6058 (define_insn "vec_extract_lo_<mode>_maskm"
6059 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6060 (vec_merge:<ssehalfvecmode>
6061 (vec_select:<ssehalfvecmode>
6062 (match_operand:V8FI 1 "register_operand" "v")
6063 (parallel [(const_int 0) (const_int 1)
6064 (const_int 2) (const_int 3)]))
6065 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6066 (match_operand:QI 3 "register_operand" "Yk")))]
6068 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6069 [(set_attr "type" "sselog")
6070 (set_attr "prefix_extra" "1")
6071 (set_attr "length_immediate" "1")
6072 (set_attr "prefix" "evex")
6073 (set_attr "mode" "<sseinsnmode>")])
6075 (define_insn "vec_extract_lo_<mode><mask_name>"
6076 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6077 (vec_select:<ssehalfvecmode>
6078 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6079 (parallel [(const_int 0) (const_int 1)
6080 (const_int 2) (const_int 3)])))]
6081 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6084 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6088 [(set_attr "type" "sselog")
6089 (set_attr "prefix_extra" "1")
6090 (set_attr "length_immediate" "1")
6091 (set (attr "memory")
6092 (if_then_else (match_test "MEM_P (operands[0])")
6093 (const_string "store")
6094 (const_string "none")))
6095 (set_attr "prefix" "evex")
6096 (set_attr "mode" "<sseinsnmode>")])
6098 (define_insn "vec_extract_hi_<mode>_maskm"
6099 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6100 (vec_merge:<ssehalfvecmode>
6101 (vec_select:<ssehalfvecmode>
6102 (match_operand:V8FI 1 "register_operand" "v")
6103 (parallel [(const_int 4) (const_int 5)
6104 (const_int 6) (const_int 7)]))
6105 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6106 (match_operand:QI 3 "register_operand" "Yk")))]
6108 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6109 [(set_attr "type" "sselog")
6110 (set_attr "prefix_extra" "1")
6111 (set_attr "length_immediate" "1")
6112 (set_attr "memory" "store")
6113 (set_attr "prefix" "evex")
6114 (set_attr "mode" "<sseinsnmode>")])
6116 (define_insn "vec_extract_hi_<mode><mask_name>"
6117 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6118 (vec_select:<ssehalfvecmode>
6119 (match_operand:V8FI 1 "register_operand" "v")
6120 (parallel [(const_int 4) (const_int 5)
6121 (const_int 6) (const_int 7)])))]
6123 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6124 [(set_attr "type" "sselog")
6125 (set_attr "prefix_extra" "1")
6126 (set_attr "length_immediate" "1")
6127 (set (attr "memory")
6128 (if_then_else (match_test "MEM_P (operands[0])")
6129 (const_string "store")
6130 (const_string "none")))
6131 (set_attr "prefix" "evex")
6132 (set_attr "mode" "<sseinsnmode>")])
6134 (define_expand "avx_vextractf128<mode>"
6135 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6136 (match_operand:V_256 1 "register_operand")
6137 (match_operand:SI 2 "const_0_to_1_operand")]
6140 rtx (*insn)(rtx, rtx);
6142 switch (INTVAL (operands[2]))
6145 insn = gen_vec_extract_lo_<mode>;
6148 insn = gen_vec_extract_hi_<mode>;
6154 emit_insn (insn (operands[0], operands[1]));
6158 (define_insn_and_split "vec_extract_lo_<mode>"
6159 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6160 (vec_select:<ssehalfvecmode>
6161 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6162 (parallel [(const_int 0) (const_int 1)
6163 (const_int 2) (const_int 3)
6164 (const_int 4) (const_int 5)
6165 (const_int 6) (const_int 7)])))]
6166 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6168 "&& reload_completed"
6171 rtx op1 = operands[1];
6173 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6175 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6176 emit_move_insn (operands[0], op1);
6180 (define_insn "vec_extract_hi_<mode>"
6181 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6182 (vec_select:<ssehalfvecmode>
6183 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
6184 (parallel [(const_int 8) (const_int 9)
6185 (const_int 10) (const_int 11)
6186 (const_int 12) (const_int 13)
6187 (const_int 14) (const_int 15)])))]
6189 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6190 [(set_attr "type" "sselog")
6191 (set_attr "prefix_extra" "1")
6192 (set_attr "length_immediate" "1")
6193 (set_attr "memory" "none,store")
6194 (set_attr "prefix" "evex")
6195 (set_attr "mode" "XI")])
6197 (define_insn_and_split "vec_extract_lo_<mode>"
6198 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6199 (vec_select:<ssehalfvecmode>
6200 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
6201 (parallel [(const_int 0) (const_int 1)])))]
6202 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6204 "&& reload_completed"
6205 [(set (match_dup 0) (match_dup 1))]
6207 if (REG_P (operands[1]))
6208 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6210 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6213 (define_insn "vec_extract_hi_<mode>"
6214 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6215 (vec_select:<ssehalfvecmode>
6216 (match_operand:VI8F_256 1 "register_operand" "x,x")
6217 (parallel [(const_int 2) (const_int 3)])))]
6219 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6220 [(set_attr "type" "sselog")
6221 (set_attr "prefix_extra" "1")
6222 (set_attr "length_immediate" "1")
6223 (set_attr "memory" "none,store")
6224 (set_attr "prefix" "vex")
6225 (set_attr "mode" "<sseinsnmode>")])
6227 (define_insn_and_split "vec_extract_lo_<mode>"
6228 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6229 (vec_select:<ssehalfvecmode>
6230 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
6231 (parallel [(const_int 0) (const_int 1)
6232 (const_int 2) (const_int 3)])))]
6233 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6235 "&& reload_completed"
6236 [(set (match_dup 0) (match_dup 1))]
6238 if (REG_P (operands[1]))
6239 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6241 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6244 (define_insn "vec_extract_hi_<mode>"
6245 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6246 (vec_select:<ssehalfvecmode>
6247 (match_operand:VI4F_256 1 "register_operand" "x,x")
6248 (parallel [(const_int 4) (const_int 5)
6249 (const_int 6) (const_int 7)])))]
6251 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6252 [(set_attr "type" "sselog")
6253 (set_attr "prefix_extra" "1")
6254 (set_attr "length_immediate" "1")
6255 (set_attr "memory" "none,store")
6256 (set_attr "prefix" "vex")
6257 (set_attr "mode" "<sseinsnmode>")])
6259 (define_insn_and_split "vec_extract_lo_v32hi"
6260 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6262 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6263 (parallel [(const_int 0) (const_int 1)
6264 (const_int 2) (const_int 3)
6265 (const_int 4) (const_int 5)
6266 (const_int 6) (const_int 7)
6267 (const_int 8) (const_int 9)
6268 (const_int 10) (const_int 11)
6269 (const_int 12) (const_int 13)
6270 (const_int 14) (const_int 15)])))]
6271 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6273 "&& reload_completed"
6274 [(set (match_dup 0) (match_dup 1))]
6276 if (REG_P (operands[1]))
6277 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6279 operands[1] = adjust_address (operands[1], V16HImode, 0);
6282 (define_insn "vec_extract_hi_v32hi"
6283 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6285 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6286 (parallel [(const_int 16) (const_int 17)
6287 (const_int 18) (const_int 19)
6288 (const_int 20) (const_int 21)
6289 (const_int 22) (const_int 23)
6290 (const_int 24) (const_int 25)
6291 (const_int 26) (const_int 27)
6292 (const_int 28) (const_int 29)
6293 (const_int 30) (const_int 31)])))]
6295 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6296 [(set_attr "type" "sselog")
6297 (set_attr "prefix_extra" "1")
6298 (set_attr "length_immediate" "1")
6299 (set_attr "memory" "none,store")
6300 (set_attr "prefix" "evex")
6301 (set_attr "mode" "XI")])
6303 (define_insn_and_split "vec_extract_lo_v16hi"
6304 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6306 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6307 (parallel [(const_int 0) (const_int 1)
6308 (const_int 2) (const_int 3)
6309 (const_int 4) (const_int 5)
6310 (const_int 6) (const_int 7)])))]
6311 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6313 "&& reload_completed"
6314 [(set (match_dup 0) (match_dup 1))]
6316 if (REG_P (operands[1]))
6317 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
6319 operands[1] = adjust_address (operands[1], V8HImode, 0);
6322 (define_insn "vec_extract_hi_v16hi"
6323 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6325 (match_operand:V16HI 1 "register_operand" "x,x")
6326 (parallel [(const_int 8) (const_int 9)
6327 (const_int 10) (const_int 11)
6328 (const_int 12) (const_int 13)
6329 (const_int 14) (const_int 15)])))]
6331 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6332 [(set_attr "type" "sselog")
6333 (set_attr "prefix_extra" "1")
6334 (set_attr "length_immediate" "1")
6335 (set_attr "memory" "none,store")
6336 (set_attr "prefix" "vex")
6337 (set_attr "mode" "OI")])
6339 (define_insn_and_split "vec_extract_lo_v64qi"
6340 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6342 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6343 (parallel [(const_int 0) (const_int 1)
6344 (const_int 2) (const_int 3)
6345 (const_int 4) (const_int 5)
6346 (const_int 6) (const_int 7)
6347 (const_int 8) (const_int 9)
6348 (const_int 10) (const_int 11)
6349 (const_int 12) (const_int 13)
6350 (const_int 14) (const_int 15)
6351 (const_int 16) (const_int 17)
6352 (const_int 18) (const_int 19)
6353 (const_int 20) (const_int 21)
6354 (const_int 22) (const_int 23)
6355 (const_int 24) (const_int 25)
6356 (const_int 26) (const_int 27)
6357 (const_int 28) (const_int 29)
6358 (const_int 30) (const_int 31)])))]
6359 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6361 "&& reload_completed"
6362 [(set (match_dup 0) (match_dup 1))]
6364 if (REG_P (operands[1]))
6365 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6367 operands[1] = adjust_address (operands[1], V32QImode, 0);
6370 (define_insn "vec_extract_hi_v64qi"
6371 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6373 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6374 (parallel [(const_int 32) (const_int 33)
6375 (const_int 34) (const_int 35)
6376 (const_int 36) (const_int 37)
6377 (const_int 38) (const_int 39)
6378 (const_int 40) (const_int 41)
6379 (const_int 42) (const_int 43)
6380 (const_int 44) (const_int 45)
6381 (const_int 46) (const_int 47)
6382 (const_int 48) (const_int 49)
6383 (const_int 50) (const_int 51)
6384 (const_int 52) (const_int 53)
6385 (const_int 54) (const_int 55)
6386 (const_int 56) (const_int 57)
6387 (const_int 58) (const_int 59)
6388 (const_int 60) (const_int 61)
6389 (const_int 62) (const_int 63)])))]
6391 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6392 [(set_attr "type" "sselog")
6393 (set_attr "prefix_extra" "1")
6394 (set_attr "length_immediate" "1")
6395 (set_attr "memory" "none,store")
6396 (set_attr "prefix" "evex")
6397 (set_attr "mode" "XI")])
6399 (define_insn_and_split "vec_extract_lo_v32qi"
6400 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6402 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6403 (parallel [(const_int 0) (const_int 1)
6404 (const_int 2) (const_int 3)
6405 (const_int 4) (const_int 5)
6406 (const_int 6) (const_int 7)
6407 (const_int 8) (const_int 9)
6408 (const_int 10) (const_int 11)
6409 (const_int 12) (const_int 13)
6410 (const_int 14) (const_int 15)])))]
6411 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6413 "&& reload_completed"
6414 [(set (match_dup 0) (match_dup 1))]
6416 if (REG_P (operands[1]))
6417 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6419 operands[1] = adjust_address (operands[1], V16QImode, 0);
6422 (define_insn "vec_extract_hi_v32qi"
6423 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6425 (match_operand:V32QI 1 "register_operand" "x,x")
6426 (parallel [(const_int 16) (const_int 17)
6427 (const_int 18) (const_int 19)
6428 (const_int 20) (const_int 21)
6429 (const_int 22) (const_int 23)
6430 (const_int 24) (const_int 25)
6431 (const_int 26) (const_int 27)
6432 (const_int 28) (const_int 29)
6433 (const_int 30) (const_int 31)])))]
6435 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6436 [(set_attr "type" "sselog")
6437 (set_attr "prefix_extra" "1")
6438 (set_attr "length_immediate" "1")
6439 (set_attr "memory" "none,store")
6440 (set_attr "prefix" "vex")
6441 (set_attr "mode" "OI")])
6443 ;; Modes handled by vec_extract patterns.
6444 (define_mode_iterator VEC_EXTRACT_MODE
6445 [(V32QI "TARGET_AVX") V16QI
6446 (V16HI "TARGET_AVX") V8HI
6447 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6448 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6449 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6450 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6452 (define_expand "vec_extract<mode>"
6453 [(match_operand:<ssescalarmode> 0 "register_operand")
6454 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6455 (match_operand 2 "const_int_operand")]
6458 ix86_expand_vector_extract (false, operands[0], operands[1],
6459 INTVAL (operands[2]));
6463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6465 ;; Parallel double-precision floating point element swizzling
6467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6469 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6470 [(set (match_operand:V8DF 0 "register_operand" "=v")
6473 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6474 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6475 (parallel [(const_int 1) (const_int 9)
6476 (const_int 3) (const_int 11)
6477 (const_int 5) (const_int 13)
6478 (const_int 7) (const_int 15)])))]
6480 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6481 [(set_attr "type" "sselog")
6482 (set_attr "prefix" "evex")
6483 (set_attr "mode" "V8DF")])
6485 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6486 (define_insn "avx_unpckhpd256"
6487 [(set (match_operand:V4DF 0 "register_operand" "=x")
6490 (match_operand:V4DF 1 "register_operand" "x")
6491 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6492 (parallel [(const_int 1) (const_int 5)
6493 (const_int 3) (const_int 7)])))]
6495 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6496 [(set_attr "type" "sselog")
6497 (set_attr "prefix" "vex")
6498 (set_attr "mode" "V4DF")])
6500 (define_expand "vec_interleave_highv4df"
6504 (match_operand:V4DF 1 "register_operand" "x")
6505 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6506 (parallel [(const_int 0) (const_int 4)
6507 (const_int 2) (const_int 6)])))
6513 (parallel [(const_int 1) (const_int 5)
6514 (const_int 3) (const_int 7)])))
6515 (set (match_operand:V4DF 0 "register_operand")
6520 (parallel [(const_int 2) (const_int 3)
6521 (const_int 6) (const_int 7)])))]
6524 operands[3] = gen_reg_rtx (V4DFmode);
6525 operands[4] = gen_reg_rtx (V4DFmode);
6529 (define_expand "vec_interleave_highv2df"
6530 [(set (match_operand:V2DF 0 "register_operand")
6533 (match_operand:V2DF 1 "nonimmediate_operand")
6534 (match_operand:V2DF 2 "nonimmediate_operand"))
6535 (parallel [(const_int 1)
6539 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6540 operands[2] = force_reg (V2DFmode, operands[2]);
6543 (define_insn "*vec_interleave_highv2df"
6544 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6547 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6548 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6549 (parallel [(const_int 1)
6551 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6553 unpckhpd\t{%2, %0|%0, %2}
6554 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6555 %vmovddup\t{%H1, %0|%0, %H1}
6556 movlpd\t{%H1, %0|%0, %H1}
6557 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6558 %vmovhpd\t{%1, %0|%q0, %1}"
6559 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6560 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6561 (set_attr "ssememalign" "64")
6562 (set_attr "prefix_data16" "*,*,*,1,*,1")
6563 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6564 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6566 (define_expand "avx512f_movddup512<mask_name>"
6567 [(set (match_operand:V8DF 0 "register_operand")
6570 (match_operand:V8DF 1 "nonimmediate_operand")
6572 (parallel [(const_int 0) (const_int 8)
6573 (const_int 2) (const_int 10)
6574 (const_int 4) (const_int 12)
6575 (const_int 6) (const_int 14)])))]
6578 (define_expand "avx512f_unpcklpd512<mask_name>"
6579 [(set (match_operand:V8DF 0 "register_operand")
6582 (match_operand:V8DF 1 "register_operand")
6583 (match_operand:V8DF 2 "nonimmediate_operand"))
6584 (parallel [(const_int 0) (const_int 8)
6585 (const_int 2) (const_int 10)
6586 (const_int 4) (const_int 12)
6587 (const_int 6) (const_int 14)])))]
6590 (define_insn "*avx512f_unpcklpd512<mask_name>"
6591 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6594 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6595 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6596 (parallel [(const_int 0) (const_int 8)
6597 (const_int 2) (const_int 10)
6598 (const_int 4) (const_int 12)
6599 (const_int 6) (const_int 14)])))]
6602 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6603 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6604 [(set_attr "type" "sselog")
6605 (set_attr "prefix" "evex")
6606 (set_attr "mode" "V8DF")])
6608 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6609 (define_expand "avx_movddup256"
6610 [(set (match_operand:V4DF 0 "register_operand")
6613 (match_operand:V4DF 1 "nonimmediate_operand")
6615 (parallel [(const_int 0) (const_int 4)
6616 (const_int 2) (const_int 6)])))]
6619 (define_expand "avx_unpcklpd256"
6620 [(set (match_operand:V4DF 0 "register_operand")
6623 (match_operand:V4DF 1 "register_operand")
6624 (match_operand:V4DF 2 "nonimmediate_operand"))
6625 (parallel [(const_int 0) (const_int 4)
6626 (const_int 2) (const_int 6)])))]
6629 (define_insn "*avx_unpcklpd256"
6630 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6633 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6634 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6635 (parallel [(const_int 0) (const_int 4)
6636 (const_int 2) (const_int 6)])))]
6639 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6640 vmovddup\t{%1, %0|%0, %1}"
6641 [(set_attr "type" "sselog")
6642 (set_attr "prefix" "vex")
6643 (set_attr "mode" "V4DF")])
6645 (define_expand "vec_interleave_lowv4df"
6649 (match_operand:V4DF 1 "register_operand" "x")
6650 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6651 (parallel [(const_int 0) (const_int 4)
6652 (const_int 2) (const_int 6)])))
6658 (parallel [(const_int 1) (const_int 5)
6659 (const_int 3) (const_int 7)])))
6660 (set (match_operand:V4DF 0 "register_operand")
6665 (parallel [(const_int 0) (const_int 1)
6666 (const_int 4) (const_int 5)])))]
6669 operands[3] = gen_reg_rtx (V4DFmode);
6670 operands[4] = gen_reg_rtx (V4DFmode);
6673 (define_expand "vec_interleave_lowv2df"
6674 [(set (match_operand:V2DF 0 "register_operand")
6677 (match_operand:V2DF 1 "nonimmediate_operand")
6678 (match_operand:V2DF 2 "nonimmediate_operand"))
6679 (parallel [(const_int 0)
6683 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6684 operands[1] = force_reg (V2DFmode, operands[1]);
6687 (define_insn "*vec_interleave_lowv2df"
6688 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6691 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6692 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6693 (parallel [(const_int 0)
6695 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6697 unpcklpd\t{%2, %0|%0, %2}
6698 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6699 %vmovddup\t{%1, %0|%0, %q1}
6700 movhpd\t{%2, %0|%0, %q2}
6701 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6702 %vmovlpd\t{%2, %H0|%H0, %2}"
6703 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6704 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6705 (set_attr "ssememalign" "64")
6706 (set_attr "prefix_data16" "*,*,*,1,*,1")
6707 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6708 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6711 [(set (match_operand:V2DF 0 "memory_operand")
6714 (match_operand:V2DF 1 "register_operand")
6716 (parallel [(const_int 0)
6718 "TARGET_SSE3 && reload_completed"
6721 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6722 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6723 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6728 [(set (match_operand:V2DF 0 "register_operand")
6731 (match_operand:V2DF 1 "memory_operand")
6733 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6734 (match_operand:SI 3 "const_int_operand")])))]
6735 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6736 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6738 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6741 (define_insn "avx512f_vmscalef<mode><round_name>"
6742 [(set (match_operand:VF_128 0 "register_operand" "=v")
6745 [(match_operand:VF_128 1 "register_operand" "v")
6746 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
6751 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
6752 [(set_attr "prefix" "evex")
6753 (set_attr "mode" "<ssescalarmode>")])
6755 (define_insn "avx512f_scalef<mode><mask_name><round_name>"
6756 [(set (match_operand:VF_512 0 "register_operand" "=v")
6758 [(match_operand:VF_512 1 "register_operand" "v")
6759 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
6762 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
6763 [(set_attr "prefix" "evex")
6764 (set_attr "mode" "<MODE>")])
6766 (define_expand "avx512f_vternlog<mode>_maskz"
6767 [(match_operand:VI48_512 0 "register_operand")
6768 (match_operand:VI48_512 1 "register_operand")
6769 (match_operand:VI48_512 2 "register_operand")
6770 (match_operand:VI48_512 3 "nonimmediate_operand")
6771 (match_operand:SI 4 "const_0_to_255_operand")
6772 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6775 emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
6776 operands[0], operands[1], operands[2], operands[3],
6777 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
6781 (define_insn "avx512f_vternlog<mode><sd_maskz_name>"
6782 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6784 [(match_operand:VI48_512 1 "register_operand" "0")
6785 (match_operand:VI48_512 2 "register_operand" "v")
6786 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6787 (match_operand:SI 4 "const_0_to_255_operand")]
6790 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
6791 [(set_attr "type" "sselog")
6792 (set_attr "prefix" "evex")
6793 (set_attr "mode" "<sseinsnmode>")])
6795 (define_insn "avx512f_vternlog<mode>_mask"
6796 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6799 [(match_operand:VI48_512 1 "register_operand" "0")
6800 (match_operand:VI48_512 2 "register_operand" "v")
6801 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6802 (match_operand:SI 4 "const_0_to_255_operand")]
6805 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6807 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6808 [(set_attr "type" "sselog")
6809 (set_attr "prefix" "evex")
6810 (set_attr "mode" "<sseinsnmode>")])
6812 (define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
6813 [(set (match_operand:VF_512 0 "register_operand" "=v")
6814 (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6817 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
6818 [(set_attr "prefix" "evex")
6819 (set_attr "mode" "<MODE>")])
6821 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
6822 [(set (match_operand:VF_128 0 "register_operand" "=v")
6825 [(match_operand:VF_128 1 "register_operand" "v")
6826 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6831 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
6832 [(set_attr "prefix" "evex")
6833 (set_attr "mode" "<ssescalarmode>")])
6835 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6836 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6837 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6838 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6839 (match_operand:SI 3 "const_0_to_255_operand")]
6842 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6843 [(set_attr "prefix" "evex")
6844 (set_attr "mode" "<sseinsnmode>")])
6846 (define_expand "avx512f_shufps512_mask"
6847 [(match_operand:V16SF 0 "register_operand")
6848 (match_operand:V16SF 1 "register_operand")
6849 (match_operand:V16SF 2 "nonimmediate_operand")
6850 (match_operand:SI 3 "const_0_to_255_operand")
6851 (match_operand:V16SF 4 "register_operand")
6852 (match_operand:HI 5 "register_operand")]
6855 int mask = INTVAL (operands[3]);
6856 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6857 GEN_INT ((mask >> 0) & 3),
6858 GEN_INT ((mask >> 2) & 3),
6859 GEN_INT (((mask >> 4) & 3) + 16),
6860 GEN_INT (((mask >> 6) & 3) + 16),
6861 GEN_INT (((mask >> 0) & 3) + 4),
6862 GEN_INT (((mask >> 2) & 3) + 4),
6863 GEN_INT (((mask >> 4) & 3) + 20),
6864 GEN_INT (((mask >> 6) & 3) + 20),
6865 GEN_INT (((mask >> 0) & 3) + 8),
6866 GEN_INT (((mask >> 2) & 3) + 8),
6867 GEN_INT (((mask >> 4) & 3) + 24),
6868 GEN_INT (((mask >> 6) & 3) + 24),
6869 GEN_INT (((mask >> 0) & 3) + 12),
6870 GEN_INT (((mask >> 2) & 3) + 12),
6871 GEN_INT (((mask >> 4) & 3) + 28),
6872 GEN_INT (((mask >> 6) & 3) + 28),
6873 operands[4], operands[5]));
6878 (define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
6879 [(match_operand:VF_512 0 "register_operand")
6880 (match_operand:VF_512 1 "register_operand")
6881 (match_operand:VF_512 2 "register_operand")
6882 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6883 (match_operand:SI 4 "const_0_to_255_operand")
6884 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6887 emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6888 operands[0], operands[1], operands[2], operands[3],
6889 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6890 <round_saeonly_expand_operand6>));
6894 (define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
6895 [(set (match_operand:VF_512 0 "register_operand" "=v")
6897 [(match_operand:VF_512 1 "register_operand" "0")
6898 (match_operand:VF_512 2 "register_operand" "v")
6899 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6900 (match_operand:SI 4 "const_0_to_255_operand")]
6903 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6904 [(set_attr "prefix" "evex")
6905 (set_attr "mode" "<MODE>")])
6907 (define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
6908 [(set (match_operand:VF_512 0 "register_operand" "=v")
6911 [(match_operand:VF_512 1 "register_operand" "0")
6912 (match_operand:VF_512 2 "register_operand" "v")
6913 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6914 (match_operand:SI 4 "const_0_to_255_operand")]
6917 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6919 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6920 [(set_attr "prefix" "evex")
6921 (set_attr "mode" "<MODE>")])
6923 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
6924 [(match_operand:VF_128 0 "register_operand")
6925 (match_operand:VF_128 1 "register_operand")
6926 (match_operand:VF_128 2 "register_operand")
6927 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6928 (match_operand:SI 4 "const_0_to_255_operand")
6929 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6932 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6933 operands[0], operands[1], operands[2], operands[3],
6934 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6935 <round_saeonly_expand_operand6>));
6939 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
6940 [(set (match_operand:VF_128 0 "register_operand" "=v")
6943 [(match_operand:VF_128 1 "register_operand" "0")
6944 (match_operand:VF_128 2 "register_operand" "v")
6945 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6946 (match_operand:SI 4 "const_0_to_255_operand")]
6951 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6952 [(set_attr "prefix" "evex")
6953 (set_attr "mode" "<ssescalarmode>")])
6955 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
6956 [(set (match_operand:VF_128 0 "register_operand" "=v")
6960 [(match_operand:VF_128 1 "register_operand" "0")
6961 (match_operand:VF_128 2 "register_operand" "v")
6962 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6963 (match_operand:SI 4 "const_0_to_255_operand")]
6968 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6970 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6971 [(set_attr "prefix" "evex")
6972 (set_attr "mode" "<ssescalarmode>")])
6974 (define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
6975 [(set (match_operand:VF_512 0 "register_operand" "=v")
6977 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6978 (match_operand:SI 2 "const_0_to_255_operand")]
6981 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
6982 [(set_attr "length_immediate" "1")
6983 (set_attr "prefix" "evex")
6984 (set_attr "mode" "<MODE>")])
6986 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
6987 [(set (match_operand:VF_128 0 "register_operand" "=v")
6990 [(match_operand:VF_128 1 "register_operand" "v")
6991 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6992 (match_operand:SI 3 "const_0_to_255_operand")]
6997 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
6998 [(set_attr "length_immediate" "1")
6999 (set_attr "prefix" "evex")
7000 (set_attr "mode" "<MODE>")])
7002 ;; One bit in mask selects 2 elements.
7003 (define_insn "avx512f_shufps512_1<mask_name>"
7004 [(set (match_operand:V16SF 0 "register_operand" "=v")
7007 (match_operand:V16SF 1 "register_operand" "v")
7008 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7009 (parallel [(match_operand 3 "const_0_to_3_operand")
7010 (match_operand 4 "const_0_to_3_operand")
7011 (match_operand 5 "const_16_to_19_operand")
7012 (match_operand 6 "const_16_to_19_operand")
7013 (match_operand 7 "const_4_to_7_operand")
7014 (match_operand 8 "const_4_to_7_operand")
7015 (match_operand 9 "const_20_to_23_operand")
7016 (match_operand 10 "const_20_to_23_operand")
7017 (match_operand 11 "const_8_to_11_operand")
7018 (match_operand 12 "const_8_to_11_operand")
7019 (match_operand 13 "const_24_to_27_operand")
7020 (match_operand 14 "const_24_to_27_operand")
7021 (match_operand 15 "const_12_to_15_operand")
7022 (match_operand 16 "const_12_to_15_operand")
7023 (match_operand 17 "const_28_to_31_operand")
7024 (match_operand 18 "const_28_to_31_operand")])))]
7026 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7027 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7028 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7029 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7030 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7031 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7032 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7033 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7034 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7035 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7036 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7037 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7040 mask = INTVAL (operands[3]);
7041 mask |= INTVAL (operands[4]) << 2;
7042 mask |= (INTVAL (operands[5]) - 16) << 4;
7043 mask |= (INTVAL (operands[6]) - 16) << 6;
7044 operands[3] = GEN_INT (mask);
7046 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7048 [(set_attr "type" "sselog")
7049 (set_attr "length_immediate" "1")
7050 (set_attr "prefix" "evex")
7051 (set_attr "mode" "V16SF")])
7053 (define_expand "avx512f_shufpd512_mask"
7054 [(match_operand:V8DF 0 "register_operand")
7055 (match_operand:V8DF 1 "register_operand")
7056 (match_operand:V8DF 2 "nonimmediate_operand")
7057 (match_operand:SI 3 "const_0_to_255_operand")
7058 (match_operand:V8DF 4 "register_operand")
7059 (match_operand:QI 5 "register_operand")]
7062 int mask = INTVAL (operands[3]);
7063 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7065 GEN_INT (mask & 2 ? 9 : 8),
7066 GEN_INT (mask & 4 ? 3 : 2),
7067 GEN_INT (mask & 8 ? 11 : 10),
7068 GEN_INT (mask & 16 ? 5 : 4),
7069 GEN_INT (mask & 32 ? 13 : 12),
7070 GEN_INT (mask & 64 ? 7 : 6),
7071 GEN_INT (mask & 128 ? 15 : 14),
7072 operands[4], operands[5]));
7076 (define_insn "avx512f_shufpd512_1<mask_name>"
7077 [(set (match_operand:V8DF 0 "register_operand" "=v")
7080 (match_operand:V8DF 1 "register_operand" "v")
7081 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7082 (parallel [(match_operand 3 "const_0_to_1_operand")
7083 (match_operand 4 "const_8_to_9_operand")
7084 (match_operand 5 "const_2_to_3_operand")
7085 (match_operand 6 "const_10_to_11_operand")
7086 (match_operand 7 "const_4_to_5_operand")
7087 (match_operand 8 "const_12_to_13_operand")
7088 (match_operand 9 "const_6_to_7_operand")
7089 (match_operand 10 "const_14_to_15_operand")])))]
7093 mask = INTVAL (operands[3]);
7094 mask |= (INTVAL (operands[4]) - 8) << 1;
7095 mask |= (INTVAL (operands[5]) - 2) << 2;
7096 mask |= (INTVAL (operands[6]) - 10) << 3;
7097 mask |= (INTVAL (operands[7]) - 4) << 4;
7098 mask |= (INTVAL (operands[8]) - 12) << 5;
7099 mask |= (INTVAL (operands[9]) - 6) << 6;
7100 mask |= (INTVAL (operands[10]) - 14) << 7;
7101 operands[3] = GEN_INT (mask);
7103 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7105 [(set_attr "type" "sselog")
7106 (set_attr "length_immediate" "1")
7107 (set_attr "prefix" "evex")
7108 (set_attr "mode" "V8DF")])
7110 (define_expand "avx_shufpd256"
7111 [(match_operand:V4DF 0 "register_operand")
7112 (match_operand:V4DF 1 "register_operand")
7113 (match_operand:V4DF 2 "nonimmediate_operand")
7114 (match_operand:SI 3 "const_int_operand")]
7117 int mask = INTVAL (operands[3]);
7118 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
7120 GEN_INT (mask & 2 ? 5 : 4),
7121 GEN_INT (mask & 4 ? 3 : 2),
7122 GEN_INT (mask & 8 ? 7 : 6)));
7126 (define_insn "avx_shufpd256_1"
7127 [(set (match_operand:V4DF 0 "register_operand" "=x")
7130 (match_operand:V4DF 1 "register_operand" "x")
7131 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7132 (parallel [(match_operand 3 "const_0_to_1_operand")
7133 (match_operand 4 "const_4_to_5_operand")
7134 (match_operand 5 "const_2_to_3_operand")
7135 (match_operand 6 "const_6_to_7_operand")])))]
7139 mask = INTVAL (operands[3]);
7140 mask |= (INTVAL (operands[4]) - 4) << 1;
7141 mask |= (INTVAL (operands[5]) - 2) << 2;
7142 mask |= (INTVAL (operands[6]) - 6) << 3;
7143 operands[3] = GEN_INT (mask);
7145 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7147 [(set_attr "type" "sseshuf")
7148 (set_attr "length_immediate" "1")
7149 (set_attr "prefix" "vex")
7150 (set_attr "mode" "V4DF")])
7152 (define_expand "sse2_shufpd"
7153 [(match_operand:V2DF 0 "register_operand")
7154 (match_operand:V2DF 1 "register_operand")
7155 (match_operand:V2DF 2 "nonimmediate_operand")
7156 (match_operand:SI 3 "const_int_operand")]
7159 int mask = INTVAL (operands[3]);
7160 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
7162 GEN_INT (mask & 2 ? 3 : 2)));
7166 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
7167 (define_insn "avx2_interleave_highv4di"
7168 [(set (match_operand:V4DI 0 "register_operand" "=x")
7171 (match_operand:V4DI 1 "register_operand" "x")
7172 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7173 (parallel [(const_int 1)
7178 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7179 [(set_attr "type" "sselog")
7180 (set_attr "prefix" "vex")
7181 (set_attr "mode" "OI")])
7183 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7184 [(set (match_operand:V8DI 0 "register_operand" "=v")
7187 (match_operand:V8DI 1 "register_operand" "v")
7188 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7189 (parallel [(const_int 1) (const_int 9)
7190 (const_int 3) (const_int 11)
7191 (const_int 5) (const_int 13)
7192 (const_int 7) (const_int 15)])))]
7194 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7195 [(set_attr "type" "sselog")
7196 (set_attr "prefix" "evex")
7197 (set_attr "mode" "XI")])
7199 (define_insn "vec_interleave_highv2di"
7200 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7203 (match_operand:V2DI 1 "register_operand" "0,x")
7204 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7205 (parallel [(const_int 1)
7209 punpckhqdq\t{%2, %0|%0, %2}
7210 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7211 [(set_attr "isa" "noavx,avx")
7212 (set_attr "type" "sselog")
7213 (set_attr "prefix_data16" "1,*")
7214 (set_attr "prefix" "orig,vex")
7215 (set_attr "mode" "TI")])
7217 (define_insn "avx2_interleave_lowv4di"
7218 [(set (match_operand:V4DI 0 "register_operand" "=x")
7221 (match_operand:V4DI 1 "register_operand" "x")
7222 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7223 (parallel [(const_int 0)
7228 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7229 [(set_attr "type" "sselog")
7230 (set_attr "prefix" "vex")
7231 (set_attr "mode" "OI")])
7233 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7234 [(set (match_operand:V8DI 0 "register_operand" "=v")
7237 (match_operand:V8DI 1 "register_operand" "v")
7238 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7239 (parallel [(const_int 0) (const_int 8)
7240 (const_int 2) (const_int 10)
7241 (const_int 4) (const_int 12)
7242 (const_int 6) (const_int 14)])))]
7244 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7245 [(set_attr "type" "sselog")
7246 (set_attr "prefix" "evex")
7247 (set_attr "mode" "XI")])
7249 (define_insn "vec_interleave_lowv2di"
7250 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7253 (match_operand:V2DI 1 "register_operand" "0,x")
7254 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7255 (parallel [(const_int 0)
7259 punpcklqdq\t{%2, %0|%0, %2}
7260 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7261 [(set_attr "isa" "noavx,avx")
7262 (set_attr "type" "sselog")
7263 (set_attr "prefix_data16" "1,*")
7264 (set_attr "prefix" "orig,vex")
7265 (set_attr "mode" "TI")])
7267 (define_insn "sse2_shufpd_<mode>"
7268 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
7269 (vec_select:VI8F_128
7270 (vec_concat:<ssedoublevecmode>
7271 (match_operand:VI8F_128 1 "register_operand" "0,x")
7272 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
7273 (parallel [(match_operand 3 "const_0_to_1_operand")
7274 (match_operand 4 "const_2_to_3_operand")])))]
7278 mask = INTVAL (operands[3]);
7279 mask |= (INTVAL (operands[4]) - 2) << 1;
7280 operands[3] = GEN_INT (mask);
7282 switch (which_alternative)
7285 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
7287 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7292 [(set_attr "isa" "noavx,avx")
7293 (set_attr "type" "sseshuf")
7294 (set_attr "length_immediate" "1")
7295 (set_attr "prefix" "orig,vex")
7296 (set_attr "mode" "V2DF")])
7298 ;; Avoid combining registers from different units in a single alternative,
7299 ;; see comment above inline_secondary_memory_needed function in i386.c
7300 (define_insn "sse2_storehpd"
7301 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
7303 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
7304 (parallel [(const_int 1)])))]
7305 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7307 %vmovhpd\t{%1, %0|%0, %1}
7309 vunpckhpd\t{%d1, %0|%0, %d1}
7313 [(set_attr "isa" "*,noavx,avx,*,*,*")
7314 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
7315 (set (attr "prefix_data16")
7317 (and (eq_attr "alternative" "0")
7318 (not (match_test "TARGET_AVX")))
7320 (const_string "*")))
7321 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
7322 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
7325 [(set (match_operand:DF 0 "register_operand")
7327 (match_operand:V2DF 1 "memory_operand")
7328 (parallel [(const_int 1)])))]
7329 "TARGET_SSE2 && reload_completed"
7330 [(set (match_dup 0) (match_dup 1))]
7331 "operands[1] = adjust_address (operands[1], DFmode, 8);")
7333 (define_insn "*vec_extractv2df_1_sse"
7334 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7336 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
7337 (parallel [(const_int 1)])))]
7338 "!TARGET_SSE2 && TARGET_SSE
7339 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7341 movhps\t{%1, %0|%q0, %1}
7342 movhlps\t{%1, %0|%0, %1}
7343 movlps\t{%H1, %0|%0, %H1}"
7344 [(set_attr "type" "ssemov")
7345 (set_attr "ssememalign" "64")
7346 (set_attr "mode" "V2SF,V4SF,V2SF")])
7348 ;; Avoid combining registers from different units in a single alternative,
7349 ;; see comment above inline_secondary_memory_needed function in i386.c
7350 (define_insn "sse2_storelpd"
7351 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
7353 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
7354 (parallel [(const_int 0)])))]
7355 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7357 %vmovlpd\t{%1, %0|%0, %1}
7362 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
7363 (set_attr "prefix_data16" "1,*,*,*,*")
7364 (set_attr "prefix" "maybe_vex")
7365 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
7368 [(set (match_operand:DF 0 "register_operand")
7370 (match_operand:V2DF 1 "nonimmediate_operand")
7371 (parallel [(const_int 0)])))]
7372 "TARGET_SSE2 && reload_completed"
7373 [(set (match_dup 0) (match_dup 1))]
7375 if (REG_P (operands[1]))
7376 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7378 operands[1] = adjust_address (operands[1], DFmode, 0);
7381 (define_insn "*vec_extractv2df_0_sse"
7382 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7384 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7385 (parallel [(const_int 0)])))]
7386 "!TARGET_SSE2 && TARGET_SSE
7387 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7389 movlps\t{%1, %0|%0, %1}
7390 movaps\t{%1, %0|%0, %1}
7391 movlps\t{%1, %0|%0, %q1}"
7392 [(set_attr "type" "ssemov")
7393 (set_attr "mode" "V2SF,V4SF,V2SF")])
7395 (define_expand "sse2_loadhpd_exp"
7396 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7399 (match_operand:V2DF 1 "nonimmediate_operand")
7400 (parallel [(const_int 0)]))
7401 (match_operand:DF 2 "nonimmediate_operand")))]
7404 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7406 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7408 /* Fix up the destination if needed. */
7409 if (dst != operands[0])
7410 emit_move_insn (operands[0], dst);
7415 ;; Avoid combining registers from different units in a single alternative,
7416 ;; see comment above inline_secondary_memory_needed function in i386.c
7417 (define_insn "sse2_loadhpd"
7418 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7422 (match_operand:V2DF 1 "nonimmediate_operand"
7424 (parallel [(const_int 0)]))
7425 (match_operand:DF 2 "nonimmediate_operand"
7426 " m,m,x,x,x,*f,r")))]
7427 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7429 movhpd\t{%2, %0|%0, %2}
7430 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7431 unpcklpd\t{%2, %0|%0, %2}
7432 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7436 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7437 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7438 (set_attr "ssememalign" "64")
7439 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7440 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7441 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7444 [(set (match_operand:V2DF 0 "memory_operand")
7446 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7447 (match_operand:DF 1 "register_operand")))]
7448 "TARGET_SSE2 && reload_completed"
7449 [(set (match_dup 0) (match_dup 1))]
7450 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7452 (define_expand "sse2_loadlpd_exp"
7453 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7455 (match_operand:DF 2 "nonimmediate_operand")
7457 (match_operand:V2DF 1 "nonimmediate_operand")
7458 (parallel [(const_int 1)]))))]
7461 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7463 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7465 /* Fix up the destination if needed. */
7466 if (dst != operands[0])
7467 emit_move_insn (operands[0], dst);
7472 ;; Avoid combining registers from different units in a single alternative,
7473 ;; see comment above inline_secondary_memory_needed function in i386.c
7474 (define_insn "sse2_loadlpd"
7475 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7476 "=x,x,x,x,x,x,x,x,m,m ,m")
7478 (match_operand:DF 2 "nonimmediate_operand"
7479 " m,m,m,x,x,0,0,x,x,*f,r")
7481 (match_operand:V2DF 1 "vector_move_operand"
7482 " C,0,x,0,x,x,o,o,0,0 ,0")
7483 (parallel [(const_int 1)]))))]
7484 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7486 %vmovsd\t{%2, %0|%0, %2}
7487 movlpd\t{%2, %0|%0, %2}
7488 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7489 movsd\t{%2, %0|%0, %2}
7490 vmovsd\t{%2, %1, %0|%0, %1, %2}
7491 shufpd\t{$2, %1, %0|%0, %1, 2}
7492 movhpd\t{%H1, %0|%0, %H1}
7493 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7497 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7499 (cond [(eq_attr "alternative" "5")
7500 (const_string "sselog")
7501 (eq_attr "alternative" "9")
7502 (const_string "fmov")
7503 (eq_attr "alternative" "10")
7504 (const_string "imov")
7506 (const_string "ssemov")))
7507 (set_attr "ssememalign" "64")
7508 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7509 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7510 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7511 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7514 [(set (match_operand:V2DF 0 "memory_operand")
7516 (match_operand:DF 1 "register_operand")
7517 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7518 "TARGET_SSE2 && reload_completed"
7519 [(set (match_dup 0) (match_dup 1))]
7520 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7522 (define_insn "sse2_movsd"
7523 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7525 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7526 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7530 movsd\t{%2, %0|%0, %2}
7531 vmovsd\t{%2, %1, %0|%0, %1, %2}
7532 movlpd\t{%2, %0|%0, %q2}
7533 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7534 %vmovlpd\t{%2, %0|%q0, %2}
7535 shufpd\t{$2, %1, %0|%0, %1, 2}
7536 movhps\t{%H1, %0|%0, %H1}
7537 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7538 %vmovhps\t{%1, %H0|%H0, %1}"
7539 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7542 (eq_attr "alternative" "5")
7543 (const_string "sselog")
7544 (const_string "ssemov")))
7545 (set (attr "prefix_data16")
7547 (and (eq_attr "alternative" "2,4")
7548 (not (match_test "TARGET_AVX")))
7550 (const_string "*")))
7551 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7552 (set_attr "ssememalign" "64")
7553 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7554 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7556 (define_insn "vec_dupv2df"
7557 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7559 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7563 %vmovddup\t{%1, %0|%0, %1}"
7564 [(set_attr "isa" "noavx,sse3")
7565 (set_attr "type" "sselog1")
7566 (set_attr "prefix" "orig,maybe_vex")
7567 (set_attr "mode" "V2DF,DF")])
7569 (define_insn "*vec_concatv2df"
7570 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7572 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7573 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7576 unpcklpd\t{%2, %0|%0, %2}
7577 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7578 %vmovddup\t{%1, %0|%0, %1}
7579 movhpd\t{%2, %0|%0, %2}
7580 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7581 %vmovsd\t{%1, %0|%0, %1}
7582 movlhps\t{%2, %0|%0, %2}
7583 movhps\t{%2, %0|%0, %2}"
7584 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7587 (eq_attr "alternative" "0,1,2")
7588 (const_string "sselog")
7589 (const_string "ssemov")))
7590 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7591 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7592 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7594 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7596 ;; Parallel integer down-conversion operations
7598 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7600 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7601 (define_mode_attr pmov_src_mode
7602 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7603 (define_mode_attr pmov_src_lower
7604 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7605 (define_mode_attr pmov_suff
7606 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7608 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7609 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7610 (any_truncate:PMOV_DST_MODE
7611 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7613 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7614 [(set_attr "type" "ssemov")
7615 (set_attr "memory" "none,store")
7616 (set_attr "prefix" "evex")
7617 (set_attr "mode" "<sseinsnmode>")])
7619 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7620 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7621 (vec_merge:PMOV_DST_MODE
7622 (any_truncate:PMOV_DST_MODE
7623 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7624 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7625 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
7627 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7628 [(set_attr "type" "ssemov")
7629 (set_attr "memory" "none,store")
7630 (set_attr "prefix" "evex")
7631 (set_attr "mode" "<sseinsnmode>")])
7633 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
7634 [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
7635 (vec_merge:PMOV_DST_MODE
7636 (any_truncate:PMOV_DST_MODE
7637 (match_operand:<pmov_src_mode> 1 "register_operand"))
7639 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
7642 (define_insn "*avx512f_<code>v8div16qi2"
7643 [(set (match_operand:V16QI 0 "register_operand" "=v")
7646 (match_operand:V8DI 1 "register_operand" "v"))
7647 (const_vector:V8QI [(const_int 0) (const_int 0)
7648 (const_int 0) (const_int 0)
7649 (const_int 0) (const_int 0)
7650 (const_int 0) (const_int 0)])))]
7652 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7653 [(set_attr "type" "ssemov")
7654 (set_attr "prefix" "evex")
7655 (set_attr "mode" "TI")])
7657 (define_insn "*avx512f_<code>v8div16qi2_store"
7658 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7661 (match_operand:V8DI 1 "register_operand" "v"))
7664 (parallel [(const_int 8) (const_int 9)
7665 (const_int 10) (const_int 11)
7666 (const_int 12) (const_int 13)
7667 (const_int 14) (const_int 15)]))))]
7669 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7670 [(set_attr "type" "ssemov")
7671 (set_attr "memory" "store")
7672 (set_attr "prefix" "evex")
7673 (set_attr "mode" "TI")])
7675 (define_insn "avx512f_<code>v8div16qi2_mask"
7676 [(set (match_operand:V16QI 0 "register_operand" "=v")
7680 (match_operand:V8DI 1 "register_operand" "v"))
7682 (match_operand:V16QI 2 "vector_move_operand" "0C")
7683 (parallel [(const_int 0) (const_int 1)
7684 (const_int 2) (const_int 3)
7685 (const_int 4) (const_int 5)
7686 (const_int 6) (const_int 7)]))
7687 (match_operand:QI 3 "register_operand" "Yk"))
7688 (const_vector:V8QI [(const_int 0) (const_int 0)
7689 (const_int 0) (const_int 0)
7690 (const_int 0) (const_int 0)
7691 (const_int 0) (const_int 0)])))]
7693 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7694 [(set_attr "type" "ssemov")
7695 (set_attr "prefix" "evex")
7696 (set_attr "mode" "TI")])
7698 (define_insn "avx512f_<code>v8div16qi2_mask_store"
7699 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7703 (match_operand:V8DI 1 "register_operand" "v"))
7706 (parallel [(const_int 0) (const_int 1)
7707 (const_int 2) (const_int 3)
7708 (const_int 4) (const_int 5)
7709 (const_int 6) (const_int 7)]))
7710 (match_operand:QI 2 "register_operand" "Yk"))
7713 (parallel [(const_int 8) (const_int 9)
7714 (const_int 10) (const_int 11)
7715 (const_int 12) (const_int 13)
7716 (const_int 14) (const_int 15)]))))]
7718 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7719 [(set_attr "type" "ssemov")
7720 (set_attr "memory" "store")
7721 (set_attr "prefix" "evex")
7722 (set_attr "mode" "TI")])
7724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7726 ;; Parallel integral arithmetic
7728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7730 (define_expand "neg<mode>2"
7731 [(set (match_operand:VI_AVX2 0 "register_operand")
7734 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7736 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7738 (define_expand "<plusminus_insn><mode>3<mask_name>"
7739 [(set (match_operand:VI_AVX2 0 "register_operand")
7741 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7742 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7743 "TARGET_SSE2 && <mask_mode512bit_condition>"
7744 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7746 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7747 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7749 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7750 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7751 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7753 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7754 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7755 [(set_attr "isa" "noavx,avx")
7756 (set_attr "type" "sseiadd")
7757 (set_attr "prefix_data16" "1,*")
7758 (set_attr "prefix" "<mask_prefix3>")
7759 (set_attr "mode" "<sseinsnmode>")])
7761 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7762 [(set (match_operand:VI12_AVX2 0 "register_operand")
7763 (sat_plusminus:VI12_AVX2
7764 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7765 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7767 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7769 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7770 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7771 (sat_plusminus:VI12_AVX2
7772 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7773 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7774 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7776 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7777 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7778 [(set_attr "isa" "noavx,avx")
7779 (set_attr "type" "sseiadd")
7780 (set_attr "prefix_data16" "1,*")
7781 (set_attr "prefix" "orig,vex")
7782 (set_attr "mode" "TI")])
7784 (define_expand "mul<mode>3"
7785 [(set (match_operand:VI1_AVX2 0 "register_operand")
7786 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7787 (match_operand:VI1_AVX2 2 "register_operand")))]
7790 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7794 (define_expand "mul<mode>3"
7795 [(set (match_operand:VI2_AVX2 0 "register_operand")
7796 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7797 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7799 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7801 (define_insn "*mul<mode>3"
7802 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7803 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7804 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7805 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7807 pmullw\t{%2, %0|%0, %2}
7808 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7809 [(set_attr "isa" "noavx,avx")
7810 (set_attr "type" "sseimul")
7811 (set_attr "prefix_data16" "1,*")
7812 (set_attr "prefix" "orig,vex")
7813 (set_attr "mode" "<sseinsnmode>")])
7815 (define_expand "<s>mul<mode>3_highpart"
7816 [(set (match_operand:VI2_AVX2 0 "register_operand")
7818 (lshiftrt:<ssedoublemode>
7819 (mult:<ssedoublemode>
7820 (any_extend:<ssedoublemode>
7821 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7822 (any_extend:<ssedoublemode>
7823 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7826 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7828 (define_insn "*<s>mul<mode>3_highpart"
7829 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7831 (lshiftrt:<ssedoublemode>
7832 (mult:<ssedoublemode>
7833 (any_extend:<ssedoublemode>
7834 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7835 (any_extend:<ssedoublemode>
7836 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7838 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7840 pmulh<u>w\t{%2, %0|%0, %2}
7841 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7842 [(set_attr "isa" "noavx,avx")
7843 (set_attr "type" "sseimul")
7844 (set_attr "prefix_data16" "1,*")
7845 (set_attr "prefix" "orig,vex")
7846 (set_attr "mode" "<sseinsnmode>")])
7848 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7849 [(set (match_operand:V8DI 0 "register_operand")
7853 (match_operand:V16SI 1 "nonimmediate_operand")
7854 (parallel [(const_int 0) (const_int 2)
7855 (const_int 4) (const_int 6)
7856 (const_int 8) (const_int 10)
7857 (const_int 12) (const_int 14)])))
7860 (match_operand:V16SI 2 "nonimmediate_operand")
7861 (parallel [(const_int 0) (const_int 2)
7862 (const_int 4) (const_int 6)
7863 (const_int 8) (const_int 10)
7864 (const_int 12) (const_int 14)])))))]
7866 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7868 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7869 [(set (match_operand:V8DI 0 "register_operand" "=v")
7873 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7874 (parallel [(const_int 0) (const_int 2)
7875 (const_int 4) (const_int 6)
7876 (const_int 8) (const_int 10)
7877 (const_int 12) (const_int 14)])))
7880 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7881 (parallel [(const_int 0) (const_int 2)
7882 (const_int 4) (const_int 6)
7883 (const_int 8) (const_int 10)
7884 (const_int 12) (const_int 14)])))))]
7885 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7886 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7887 [(set_attr "isa" "avx512f")
7888 (set_attr "type" "sseimul")
7889 (set_attr "prefix_extra" "1")
7890 (set_attr "prefix" "evex")
7891 (set_attr "mode" "XI")])
7893 (define_expand "vec_widen_umult_even_v8si"
7894 [(set (match_operand:V4DI 0 "register_operand")
7898 (match_operand:V8SI 1 "nonimmediate_operand")
7899 (parallel [(const_int 0) (const_int 2)
7900 (const_int 4) (const_int 6)])))
7903 (match_operand:V8SI 2 "nonimmediate_operand")
7904 (parallel [(const_int 0) (const_int 2)
7905 (const_int 4) (const_int 6)])))))]
7907 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7909 (define_insn "*vec_widen_umult_even_v8si"
7910 [(set (match_operand:V4DI 0 "register_operand" "=x")
7914 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7915 (parallel [(const_int 0) (const_int 2)
7916 (const_int 4) (const_int 6)])))
7919 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7920 (parallel [(const_int 0) (const_int 2)
7921 (const_int 4) (const_int 6)])))))]
7922 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7923 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7924 [(set_attr "type" "sseimul")
7925 (set_attr "prefix" "vex")
7926 (set_attr "mode" "OI")])
7928 (define_expand "vec_widen_umult_even_v4si"
7929 [(set (match_operand:V2DI 0 "register_operand")
7933 (match_operand:V4SI 1 "nonimmediate_operand")
7934 (parallel [(const_int 0) (const_int 2)])))
7937 (match_operand:V4SI 2 "nonimmediate_operand")
7938 (parallel [(const_int 0) (const_int 2)])))))]
7940 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7942 (define_insn "*vec_widen_umult_even_v4si"
7943 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7947 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7948 (parallel [(const_int 0) (const_int 2)])))
7951 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7952 (parallel [(const_int 0) (const_int 2)])))))]
7953 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7955 pmuludq\t{%2, %0|%0, %2}
7956 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7957 [(set_attr "isa" "noavx,avx")
7958 (set_attr "type" "sseimul")
7959 (set_attr "prefix_data16" "1,*")
7960 (set_attr "prefix" "orig,vex")
7961 (set_attr "mode" "TI")])
7963 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7964 [(set (match_operand:V8DI 0 "register_operand")
7968 (match_operand:V16SI 1 "nonimmediate_operand")
7969 (parallel [(const_int 0) (const_int 2)
7970 (const_int 4) (const_int 6)
7971 (const_int 8) (const_int 10)
7972 (const_int 12) (const_int 14)])))
7975 (match_operand:V16SI 2 "nonimmediate_operand")
7976 (parallel [(const_int 0) (const_int 2)
7977 (const_int 4) (const_int 6)
7978 (const_int 8) (const_int 10)
7979 (const_int 12) (const_int 14)])))))]
7981 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7983 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7984 [(set (match_operand:V8DI 0 "register_operand" "=v")
7988 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7989 (parallel [(const_int 0) (const_int 2)
7990 (const_int 4) (const_int 6)
7991 (const_int 8) (const_int 10)
7992 (const_int 12) (const_int 14)])))
7995 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7996 (parallel [(const_int 0) (const_int 2)
7997 (const_int 4) (const_int 6)
7998 (const_int 8) (const_int 10)
7999 (const_int 12) (const_int 14)])))))]
8000 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
8001 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8002 [(set_attr "isa" "avx512f")
8003 (set_attr "type" "sseimul")
8004 (set_attr "prefix_extra" "1")
8005 (set_attr "prefix" "evex")
8006 (set_attr "mode" "XI")])
8008 (define_expand "vec_widen_smult_even_v8si"
8009 [(set (match_operand:V4DI 0 "register_operand")
8013 (match_operand:V8SI 1 "nonimmediate_operand")
8014 (parallel [(const_int 0) (const_int 2)
8015 (const_int 4) (const_int 6)])))
8018 (match_operand:V8SI 2 "nonimmediate_operand")
8019 (parallel [(const_int 0) (const_int 2)
8020 (const_int 4) (const_int 6)])))))]
8022 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
8024 (define_insn "*vec_widen_smult_even_v8si"
8025 [(set (match_operand:V4DI 0 "register_operand" "=x")
8029 (match_operand:V8SI 1 "nonimmediate_operand" "x")
8030 (parallel [(const_int 0) (const_int 2)
8031 (const_int 4) (const_int 6)])))
8034 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8035 (parallel [(const_int 0) (const_int 2)
8036 (const_int 4) (const_int 6)])))))]
8037 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
8038 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
8039 [(set_attr "type" "sseimul")
8040 (set_attr "prefix_extra" "1")
8041 (set_attr "prefix" "vex")
8042 (set_attr "mode" "OI")])
8044 (define_expand "sse4_1_mulv2siv2di3"
8045 [(set (match_operand:V2DI 0 "register_operand")
8049 (match_operand:V4SI 1 "nonimmediate_operand")
8050 (parallel [(const_int 0) (const_int 2)])))
8053 (match_operand:V4SI 2 "nonimmediate_operand")
8054 (parallel [(const_int 0) (const_int 2)])))))]
8056 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
8058 (define_insn "*sse4_1_mulv2siv2di3"
8059 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8063 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
8064 (parallel [(const_int 0) (const_int 2)])))
8067 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8068 (parallel [(const_int 0) (const_int 2)])))))]
8069 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
8071 pmuldq\t{%2, %0|%0, %2}
8072 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
8073 [(set_attr "isa" "noavx,avx")
8074 (set_attr "type" "sseimul")
8075 (set_attr "prefix_data16" "1,*")
8076 (set_attr "prefix_extra" "1")
8077 (set_attr "prefix" "orig,vex")
8078 (set_attr "mode" "TI")])
8080 (define_expand "avx2_pmaddwd"
8081 [(set (match_operand:V8SI 0 "register_operand")
8086 (match_operand:V16HI 1 "nonimmediate_operand")
8087 (parallel [(const_int 0) (const_int 2)
8088 (const_int 4) (const_int 6)
8089 (const_int 8) (const_int 10)
8090 (const_int 12) (const_int 14)])))
8093 (match_operand:V16HI 2 "nonimmediate_operand")
8094 (parallel [(const_int 0) (const_int 2)
8095 (const_int 4) (const_int 6)
8096 (const_int 8) (const_int 10)
8097 (const_int 12) (const_int 14)]))))
8100 (vec_select:V8HI (match_dup 1)
8101 (parallel [(const_int 1) (const_int 3)
8102 (const_int 5) (const_int 7)
8103 (const_int 9) (const_int 11)
8104 (const_int 13) (const_int 15)])))
8106 (vec_select:V8HI (match_dup 2)
8107 (parallel [(const_int 1) (const_int 3)
8108 (const_int 5) (const_int 7)
8109 (const_int 9) (const_int 11)
8110 (const_int 13) (const_int 15)]))))))]
8112 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8114 (define_insn "*avx2_pmaddwd"
8115 [(set (match_operand:V8SI 0 "register_operand" "=x")
8120 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
8121 (parallel [(const_int 0) (const_int 2)
8122 (const_int 4) (const_int 6)
8123 (const_int 8) (const_int 10)
8124 (const_int 12) (const_int 14)])))
8127 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8128 (parallel [(const_int 0) (const_int 2)
8129 (const_int 4) (const_int 6)
8130 (const_int 8) (const_int 10)
8131 (const_int 12) (const_int 14)]))))
8134 (vec_select:V8HI (match_dup 1)
8135 (parallel [(const_int 1) (const_int 3)
8136 (const_int 5) (const_int 7)
8137 (const_int 9) (const_int 11)
8138 (const_int 13) (const_int 15)])))
8140 (vec_select:V8HI (match_dup 2)
8141 (parallel [(const_int 1) (const_int 3)
8142 (const_int 5) (const_int 7)
8143 (const_int 9) (const_int 11)
8144 (const_int 13) (const_int 15)]))))))]
8145 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
8146 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8147 [(set_attr "type" "sseiadd")
8148 (set_attr "prefix" "vex")
8149 (set_attr "mode" "OI")])
8151 (define_expand "sse2_pmaddwd"
8152 [(set (match_operand:V4SI 0 "register_operand")
8157 (match_operand:V8HI 1 "nonimmediate_operand")
8158 (parallel [(const_int 0) (const_int 2)
8159 (const_int 4) (const_int 6)])))
8162 (match_operand:V8HI 2 "nonimmediate_operand")
8163 (parallel [(const_int 0) (const_int 2)
8164 (const_int 4) (const_int 6)]))))
8167 (vec_select:V4HI (match_dup 1)
8168 (parallel [(const_int 1) (const_int 3)
8169 (const_int 5) (const_int 7)])))
8171 (vec_select:V4HI (match_dup 2)
8172 (parallel [(const_int 1) (const_int 3)
8173 (const_int 5) (const_int 7)]))))))]
8175 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8177 (define_insn "*sse2_pmaddwd"
8178 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8183 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8184 (parallel [(const_int 0) (const_int 2)
8185 (const_int 4) (const_int 6)])))
8188 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8189 (parallel [(const_int 0) (const_int 2)
8190 (const_int 4) (const_int 6)]))))
8193 (vec_select:V4HI (match_dup 1)
8194 (parallel [(const_int 1) (const_int 3)
8195 (const_int 5) (const_int 7)])))
8197 (vec_select:V4HI (match_dup 2)
8198 (parallel [(const_int 1) (const_int 3)
8199 (const_int 5) (const_int 7)]))))))]
8200 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8202 pmaddwd\t{%2, %0|%0, %2}
8203 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8204 [(set_attr "isa" "noavx,avx")
8205 (set_attr "type" "sseiadd")
8206 (set_attr "atom_unit" "simul")
8207 (set_attr "prefix_data16" "1,*")
8208 (set_attr "prefix" "orig,vex")
8209 (set_attr "mode" "TI")])
8211 (define_expand "mul<mode>3<mask_name>"
8212 [(set (match_operand:VI4_AVX512F 0 "register_operand")
8214 (match_operand:VI4_AVX512F 1 "general_vector_operand")
8215 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
8216 "TARGET_SSE2 && <mask_mode512bit_condition>"
8220 if (!nonimmediate_operand (operands[1], <MODE>mode))
8221 operands[1] = force_reg (<MODE>mode, operands[1]);
8222 if (!nonimmediate_operand (operands[2], <MODE>mode))
8223 operands[2] = force_reg (<MODE>mode, operands[2]);
8224 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8228 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
8233 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
8234 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
8236 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
8237 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
8238 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
8240 pmulld\t{%2, %0|%0, %2}
8241 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8242 [(set_attr "isa" "noavx,avx")
8243 (set_attr "type" "sseimul")
8244 (set_attr "prefix_extra" "1")
8245 (set_attr "prefix" "<mask_prefix3>")
8246 (set_attr "btver2_decode" "vector,vector")
8247 (set_attr "mode" "<sseinsnmode>")])
8249 (define_expand "mul<mode>3"
8250 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
8251 (mult:VI8_AVX2_AVX512F
8252 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
8253 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
8256 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
8260 (define_expand "vec_widen_<s>mult_hi_<mode>"
8261 [(match_operand:<sseunpackmode> 0 "register_operand")
8262 (any_extend:<sseunpackmode>
8263 (match_operand:VI124_AVX2 1 "register_operand"))
8264 (match_operand:VI124_AVX2 2 "register_operand")]
8267 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8272 (define_expand "vec_widen_<s>mult_lo_<mode>"
8273 [(match_operand:<sseunpackmode> 0 "register_operand")
8274 (any_extend:<sseunpackmode>
8275 (match_operand:VI124_AVX2 1 "register_operand"))
8276 (match_operand:VI124_AVX2 2 "register_operand")]
8279 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8284 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
8285 ;; named patterns, but signed V4SI needs special help for plain SSE2.
8286 (define_expand "vec_widen_smult_even_v4si"
8287 [(match_operand:V2DI 0 "register_operand")
8288 (match_operand:V4SI 1 "nonimmediate_operand")
8289 (match_operand:V4SI 2 "nonimmediate_operand")]
8292 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8297 (define_expand "vec_widen_<s>mult_odd_<mode>"
8298 [(match_operand:<sseunpackmode> 0 "register_operand")
8299 (any_extend:<sseunpackmode>
8300 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
8301 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
8304 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8309 (define_expand "sdot_prod<mode>"
8310 [(match_operand:<sseunpackmode> 0 "register_operand")
8311 (match_operand:VI2_AVX2 1 "register_operand")
8312 (match_operand:VI2_AVX2 2 "register_operand")
8313 (match_operand:<sseunpackmode> 3 "register_operand")]
8316 rtx t = gen_reg_rtx (<sseunpackmode>mode);
8317 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
8318 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8319 gen_rtx_PLUS (<sseunpackmode>mode,
8324 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
8325 ;; back together when madd is available.
8326 (define_expand "sdot_prodv4si"
8327 [(match_operand:V2DI 0 "register_operand")
8328 (match_operand:V4SI 1 "register_operand")
8329 (match_operand:V4SI 2 "register_operand")
8330 (match_operand:V2DI 3 "register_operand")]
8333 rtx t = gen_reg_rtx (V2DImode);
8334 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
8335 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
8339 (define_expand "usadv16qi"
8340 [(match_operand:V4SI 0 "register_operand")
8341 (match_operand:V16QI 1 "register_operand")
8342 (match_operand:V16QI 2 "nonimmediate_operand")
8343 (match_operand:V4SI 3 "nonimmediate_operand")]
8346 rtx t1 = gen_reg_rtx (V2DImode);
8347 rtx t2 = gen_reg_rtx (V4SImode);
8348 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
8349 convert_move (t2, t1, 0);
8350 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
8354 (define_expand "usadv32qi"
8355 [(match_operand:V8SI 0 "register_operand")
8356 (match_operand:V32QI 1 "register_operand")
8357 (match_operand:V32QI 2 "nonimmediate_operand")
8358 (match_operand:V8SI 3 "nonimmediate_operand")]
8361 rtx t1 = gen_reg_rtx (V4DImode);
8362 rtx t2 = gen_reg_rtx (V8SImode);
8363 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
8364 convert_move (t2, t1, 0);
8365 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
8369 (define_insn "ashr<mode>3"
8370 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
8372 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
8373 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8376 psra<ssemodesuffix>\t{%2, %0|%0, %2}
8377 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8378 [(set_attr "isa" "noavx,avx")
8379 (set_attr "type" "sseishft")
8380 (set (attr "length_immediate")
8381 (if_then_else (match_operand 2 "const_int_operand")
8383 (const_string "0")))
8384 (set_attr "prefix_data16" "1,*")
8385 (set_attr "prefix" "orig,vex")
8386 (set_attr "mode" "<sseinsnmode>")])
8388 (define_insn "ashr<mode>3<mask_name>"
8389 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8391 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
8392 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
8393 "TARGET_AVX512F && <mask_mode512bit_condition>"
8394 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8395 [(set_attr "type" "sseishft")
8396 (set (attr "length_immediate")
8397 (if_then_else (match_operand 2 "const_int_operand")
8399 (const_string "0")))
8400 (set_attr "mode" "<sseinsnmode>")])
8402 (define_insn "<shift_insn><mode>3"
8403 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
8404 (any_lshift:VI248_AVX2
8405 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
8406 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8409 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8410 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8411 [(set_attr "isa" "noavx,avx")
8412 (set_attr "type" "sseishft")
8413 (set (attr "length_immediate")
8414 (if_then_else (match_operand 2 "const_int_operand")
8416 (const_string "0")))
8417 (set_attr "prefix_data16" "1,*")
8418 (set_attr "prefix" "orig,vex")
8419 (set_attr "mode" "<sseinsnmode>")])
8421 (define_insn "<shift_insn><mode>3<mask_name>"
8422 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8423 (any_lshift:VI48_512
8424 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
8425 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8426 "TARGET_AVX512F && <mask_mode512bit_condition>"
8427 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8428 [(set_attr "isa" "avx512f")
8429 (set_attr "type" "sseishft")
8430 (set (attr "length_immediate")
8431 (if_then_else (match_operand 2 "const_int_operand")
8433 (const_string "0")))
8434 (set_attr "prefix" "evex")
8435 (set_attr "mode" "<sseinsnmode>")])
8438 (define_expand "vec_shl_<mode>"
8441 (match_operand:VI_128 1 "register_operand")
8442 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8443 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8446 operands[1] = gen_lowpart (V1TImode, operands[1]);
8447 operands[3] = gen_reg_rtx (V1TImode);
8448 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8451 (define_insn "<sse2_avx2>_ashl<mode>3"
8452 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8454 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8455 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8458 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8460 switch (which_alternative)
8463 return "pslldq\t{%2, %0|%0, %2}";
8465 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8470 [(set_attr "isa" "noavx,avx")
8471 (set_attr "type" "sseishft")
8472 (set_attr "length_immediate" "1")
8473 (set_attr "prefix_data16" "1,*")
8474 (set_attr "prefix" "orig,vex")
8475 (set_attr "mode" "<sseinsnmode>")])
8477 (define_expand "vec_shr_<mode>"
8480 (match_operand:VI_128 1 "register_operand")
8481 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8482 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8485 operands[1] = gen_lowpart (V1TImode, operands[1]);
8486 operands[3] = gen_reg_rtx (V1TImode);
8487 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8490 (define_insn "<sse2_avx2>_lshr<mode>3"
8491 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8492 (lshiftrt:VIMAX_AVX2
8493 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8494 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8497 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8499 switch (which_alternative)
8502 return "psrldq\t{%2, %0|%0, %2}";
8504 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8509 [(set_attr "isa" "noavx,avx")
8510 (set_attr "type" "sseishft")
8511 (set_attr "length_immediate" "1")
8512 (set_attr "atom_unit" "sishuf")
8513 (set_attr "prefix_data16" "1,*")
8514 (set_attr "prefix" "orig,vex")
8515 (set_attr "mode" "<sseinsnmode>")])
8517 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8518 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8519 (any_rotate:VI48_512
8520 (match_operand:VI48_512 1 "register_operand" "v")
8521 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8523 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8524 [(set_attr "prefix" "evex")
8525 (set_attr "mode" "<sseinsnmode>")])
8527 (define_insn "avx512f_<rotate><mode><mask_name>"
8528 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8529 (any_rotate:VI48_512
8530 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8531 (match_operand:SI 2 "const_0_to_255_operand")))]
8533 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8534 [(set_attr "prefix" "evex")
8535 (set_attr "mode" "<sseinsnmode>")])
8537 (define_expand "<code><mode>3<mask_name><round_name>"
8538 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8539 (maxmin:VI124_256_48_512
8540 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
8541 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
8542 "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8543 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8545 (define_insn "*avx2_<code><mode>3<mask_name><round_name>"
8546 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8547 (maxmin:VI124_256_48_512
8548 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
8549 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
8550 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8551 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8552 "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8553 [(set_attr "type" "sseiadd")
8554 (set_attr "prefix_extra" "1")
8555 (set_attr "prefix" "maybe_evex")
8556 (set_attr "mode" "OI")])
8558 (define_expand "<code><mode>3"
8559 [(set (match_operand:VI8_AVX2 0 "register_operand")
8561 (match_operand:VI8_AVX2 1 "register_operand")
8562 (match_operand:VI8_AVX2 2 "register_operand")))]
8569 xops[0] = operands[0];
8571 if (<CODE> == SMAX || <CODE> == UMAX)
8573 xops[1] = operands[1];
8574 xops[2] = operands[2];
8578 xops[1] = operands[2];
8579 xops[2] = operands[1];
8582 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8584 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8585 xops[4] = operands[1];
8586 xops[5] = operands[2];
8588 ok = ix86_expand_int_vcond (xops);
8593 (define_expand "<code><mode>3"
8594 [(set (match_operand:VI124_128 0 "register_operand")
8596 (match_operand:VI124_128 1 "nonimmediate_operand")
8597 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8600 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8601 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8607 xops[0] = operands[0];
8608 operands[1] = force_reg (<MODE>mode, operands[1]);
8609 operands[2] = force_reg (<MODE>mode, operands[2]);
8613 xops[1] = operands[1];
8614 xops[2] = operands[2];
8618 xops[1] = operands[2];
8619 xops[2] = operands[1];
8622 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8623 xops[4] = operands[1];
8624 xops[5] = operands[2];
8626 ok = ix86_expand_int_vcond (xops);
8632 (define_insn "*sse4_1_<code><mode>3"
8633 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8635 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8636 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8637 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8639 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8640 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8641 [(set_attr "isa" "noavx,avx")
8642 (set_attr "type" "sseiadd")
8643 (set_attr "prefix_extra" "1,*")
8644 (set_attr "prefix" "orig,vex")
8645 (set_attr "mode" "TI")])
8647 (define_insn "*<code>v8hi3"
8648 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8650 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8651 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8652 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8654 p<maxmin_int>w\t{%2, %0|%0, %2}
8655 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8656 [(set_attr "isa" "noavx,avx")
8657 (set_attr "type" "sseiadd")
8658 (set_attr "prefix_data16" "1,*")
8659 (set_attr "prefix_extra" "*,1")
8660 (set_attr "prefix" "orig,vex")
8661 (set_attr "mode" "TI")])
8663 (define_expand "<code><mode>3"
8664 [(set (match_operand:VI124_128 0 "register_operand")
8666 (match_operand:VI124_128 1 "nonimmediate_operand")
8667 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8670 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8671 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8672 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8674 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8675 operands[1] = force_reg (<MODE>mode, operands[1]);
8676 if (rtx_equal_p (op3, op2))
8677 op3 = gen_reg_rtx (V8HImode);
8678 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8679 emit_insn (gen_addv8hi3 (op0, op3, op2));
8687 operands[1] = force_reg (<MODE>mode, operands[1]);
8688 operands[2] = force_reg (<MODE>mode, operands[2]);
8690 xops[0] = operands[0];
8694 xops[1] = operands[1];
8695 xops[2] = operands[2];
8699 xops[1] = operands[2];
8700 xops[2] = operands[1];
8703 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8704 xops[4] = operands[1];
8705 xops[5] = operands[2];
8707 ok = ix86_expand_int_vcond (xops);
8713 (define_insn "*sse4_1_<code><mode>3"
8714 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8716 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8717 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8718 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8720 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8721 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8722 [(set_attr "isa" "noavx,avx")
8723 (set_attr "type" "sseiadd")
8724 (set_attr "prefix_extra" "1,*")
8725 (set_attr "prefix" "orig,vex")
8726 (set_attr "mode" "TI")])
8728 (define_insn "*<code>v16qi3"
8729 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8731 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8732 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8733 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8735 p<maxmin_int>b\t{%2, %0|%0, %2}
8736 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8737 [(set_attr "isa" "noavx,avx")
8738 (set_attr "type" "sseiadd")
8739 (set_attr "prefix_data16" "1,*")
8740 (set_attr "prefix_extra" "*,1")
8741 (set_attr "prefix" "orig,vex")
8742 (set_attr "mode" "TI")])
8744 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8746 ;; Parallel integral comparisons
8748 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8750 (define_expand "avx2_eq<mode>3"
8751 [(set (match_operand:VI_256 0 "register_operand")
8753 (match_operand:VI_256 1 "nonimmediate_operand")
8754 (match_operand:VI_256 2 "nonimmediate_operand")))]
8756 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8758 (define_insn "*avx2_eq<mode>3"
8759 [(set (match_operand:VI_256 0 "register_operand" "=x")
8761 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8762 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8763 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8764 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8765 [(set_attr "type" "ssecmp")
8766 (set_attr "prefix_extra" "1")
8767 (set_attr "prefix" "vex")
8768 (set_attr "mode" "OI")])
8770 (define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
8771 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8772 (unspec:<avx512fmaskmode>
8773 [(match_operand:VI48_512 1 "register_operand")
8774 (match_operand:VI48_512 2 "nonimmediate_operand")]
8777 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8779 (define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
8780 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8781 (unspec:<avx512fmaskmode>
8782 [(match_operand:VI48_512 1 "register_operand" "%v")
8783 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8785 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8786 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8787 [(set_attr "type" "ssecmp")
8788 (set_attr "prefix_extra" "1")
8789 (set_attr "prefix" "evex")
8790 (set_attr "mode" "<sseinsnmode>")])
8792 (define_insn "*sse4_1_eqv2di3"
8793 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8795 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8796 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8797 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8799 pcmpeqq\t{%2, %0|%0, %2}
8800 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8801 [(set_attr "isa" "noavx,avx")
8802 (set_attr "type" "ssecmp")
8803 (set_attr "prefix_extra" "1")
8804 (set_attr "prefix" "orig,vex")
8805 (set_attr "mode" "TI")])
8807 (define_insn "*sse2_eq<mode>3"
8808 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8810 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8811 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8812 "TARGET_SSE2 && !TARGET_XOP
8813 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8815 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8816 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8817 [(set_attr "isa" "noavx,avx")
8818 (set_attr "type" "ssecmp")
8819 (set_attr "prefix_data16" "1,*")
8820 (set_attr "prefix" "orig,vex")
8821 (set_attr "mode" "TI")])
8823 (define_expand "sse2_eq<mode>3"
8824 [(set (match_operand:VI124_128 0 "register_operand")
8826 (match_operand:VI124_128 1 "nonimmediate_operand")
8827 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8828 "TARGET_SSE2 && !TARGET_XOP "
8829 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8831 (define_expand "sse4_1_eqv2di3"
8832 [(set (match_operand:V2DI 0 "register_operand")
8834 (match_operand:V2DI 1 "nonimmediate_operand")
8835 (match_operand:V2DI 2 "nonimmediate_operand")))]
8837 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8839 (define_insn "sse4_2_gtv2di3"
8840 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8842 (match_operand:V2DI 1 "register_operand" "0,x")
8843 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8846 pcmpgtq\t{%2, %0|%0, %2}
8847 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8848 [(set_attr "isa" "noavx,avx")
8849 (set_attr "type" "ssecmp")
8850 (set_attr "prefix_extra" "1")
8851 (set_attr "prefix" "orig,vex")
8852 (set_attr "mode" "TI")])
8854 (define_insn "avx2_gt<mode>3"
8855 [(set (match_operand:VI_256 0 "register_operand" "=x")
8857 (match_operand:VI_256 1 "register_operand" "x")
8858 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8860 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8861 [(set_attr "type" "ssecmp")
8862 (set_attr "prefix_extra" "1")
8863 (set_attr "prefix" "vex")
8864 (set_attr "mode" "OI")])
8866 (define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
8867 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8868 (unspec:<avx512fmaskmode>
8869 [(match_operand:VI48_512 1 "register_operand" "v")
8870 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8872 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8873 [(set_attr "type" "ssecmp")
8874 (set_attr "prefix_extra" "1")
8875 (set_attr "prefix" "evex")
8876 (set_attr "mode" "<sseinsnmode>")])
8878 (define_insn "sse2_gt<mode>3"
8879 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8881 (match_operand:VI124_128 1 "register_operand" "0,x")
8882 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8883 "TARGET_SSE2 && !TARGET_XOP"
8885 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8886 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8887 [(set_attr "isa" "noavx,avx")
8888 (set_attr "type" "ssecmp")
8889 (set_attr "prefix_data16" "1,*")
8890 (set_attr "prefix" "orig,vex")
8891 (set_attr "mode" "TI")])
8893 (define_expand "vcond<V_512:mode><VI_512:mode>"
8894 [(set (match_operand:V_512 0 "register_operand")
8896 (match_operator 3 ""
8897 [(match_operand:VI_512 4 "nonimmediate_operand")
8898 (match_operand:VI_512 5 "general_operand")])
8899 (match_operand:V_512 1)
8900 (match_operand:V_512 2)))]
8902 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8903 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8905 bool ok = ix86_expand_int_vcond (operands);
8910 (define_expand "vcond<V_256:mode><VI_256:mode>"
8911 [(set (match_operand:V_256 0 "register_operand")
8913 (match_operator 3 ""
8914 [(match_operand:VI_256 4 "nonimmediate_operand")
8915 (match_operand:VI_256 5 "general_operand")])
8916 (match_operand:V_256 1)
8917 (match_operand:V_256 2)))]
8919 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8920 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8922 bool ok = ix86_expand_int_vcond (operands);
8927 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8928 [(set (match_operand:V_128 0 "register_operand")
8930 (match_operator 3 ""
8931 [(match_operand:VI124_128 4 "nonimmediate_operand")
8932 (match_operand:VI124_128 5 "general_operand")])
8933 (match_operand:V_128 1)
8934 (match_operand:V_128 2)))]
8936 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8937 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8939 bool ok = ix86_expand_int_vcond (operands);
8944 (define_expand "vcond<VI8F_128:mode>v2di"
8945 [(set (match_operand:VI8F_128 0 "register_operand")
8946 (if_then_else:VI8F_128
8947 (match_operator 3 ""
8948 [(match_operand:V2DI 4 "nonimmediate_operand")
8949 (match_operand:V2DI 5 "general_operand")])
8950 (match_operand:VI8F_128 1)
8951 (match_operand:VI8F_128 2)))]
8954 bool ok = ix86_expand_int_vcond (operands);
8959 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8960 [(set (match_operand:V_512 0 "register_operand")
8962 (match_operator 3 ""
8963 [(match_operand:VI_512 4 "nonimmediate_operand")
8964 (match_operand:VI_512 5 "nonimmediate_operand")])
8965 (match_operand:V_512 1 "general_operand")
8966 (match_operand:V_512 2 "general_operand")))]
8968 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8969 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8971 bool ok = ix86_expand_int_vcond (operands);
8976 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8977 [(set (match_operand:V_256 0 "register_operand")
8979 (match_operator 3 ""
8980 [(match_operand:VI_256 4 "nonimmediate_operand")
8981 (match_operand:VI_256 5 "nonimmediate_operand")])
8982 (match_operand:V_256 1 "general_operand")
8983 (match_operand:V_256 2 "general_operand")))]
8985 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8986 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8988 bool ok = ix86_expand_int_vcond (operands);
8993 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8994 [(set (match_operand:V_128 0 "register_operand")
8996 (match_operator 3 ""
8997 [(match_operand:VI124_128 4 "nonimmediate_operand")
8998 (match_operand:VI124_128 5 "nonimmediate_operand")])
8999 (match_operand:V_128 1 "general_operand")
9000 (match_operand:V_128 2 "general_operand")))]
9002 && (GET_MODE_NUNITS (<V_128:MODE>mode)
9003 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
9005 bool ok = ix86_expand_int_vcond (operands);
9010 (define_expand "vcondu<VI8F_128:mode>v2di"
9011 [(set (match_operand:VI8F_128 0 "register_operand")
9012 (if_then_else:VI8F_128
9013 (match_operator 3 ""
9014 [(match_operand:V2DI 4 "nonimmediate_operand")
9015 (match_operand:V2DI 5 "nonimmediate_operand")])
9016 (match_operand:VI8F_128 1 "general_operand")
9017 (match_operand:VI8F_128 2 "general_operand")))]
9020 bool ok = ix86_expand_int_vcond (operands);
9025 (define_mode_iterator VEC_PERM_AVX2
9026 [V16QI V8HI V4SI V2DI V4SF V2DF
9027 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
9028 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
9029 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
9030 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
9031 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
9033 (define_expand "vec_perm<mode>"
9034 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
9035 (match_operand:VEC_PERM_AVX2 1 "register_operand")
9036 (match_operand:VEC_PERM_AVX2 2 "register_operand")
9037 (match_operand:<sseintvecmode> 3 "register_operand")]
9038 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
9040 ix86_expand_vec_perm (operands);
9044 (define_mode_iterator VEC_PERM_CONST
9045 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
9046 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
9047 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
9048 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
9049 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
9050 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
9051 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
9052 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
9054 (define_expand "vec_perm_const<mode>"
9055 [(match_operand:VEC_PERM_CONST 0 "register_operand")
9056 (match_operand:VEC_PERM_CONST 1 "register_operand")
9057 (match_operand:VEC_PERM_CONST 2 "register_operand")
9058 (match_operand:<sseintvecmode> 3)]
9061 if (ix86_expand_vec_perm_const (operands))
9067 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9069 ;; Parallel bitwise logical operations
9071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9073 (define_expand "one_cmpl<mode>2"
9074 [(set (match_operand:VI 0 "register_operand")
9075 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
9079 int i, n = GET_MODE_NUNITS (<MODE>mode);
9080 rtvec v = rtvec_alloc (n);
9082 for (i = 0; i < n; ++i)
9083 RTVEC_ELT (v, i) = constm1_rtx;
9085 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
9088 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
9089 [(set (match_operand:VI_AVX2 0 "register_operand")
9091 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
9092 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9093 "TARGET_SSE2 && <mask_mode512bit_condition>")
9095 (define_insn "*andnot<mode>3<mask_name>"
9096 [(set (match_operand:VI 0 "register_operand" "=x,v")
9098 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
9099 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
9100 "TARGET_SSE && <mask_mode512bit_condition>"
9102 static char buf[64];
9106 switch (get_attr_mode (insn))
9109 gcc_assert (TARGET_AVX512F);
9111 tmp = "pandn<ssemodesuffix>";
9115 gcc_assert (TARGET_AVX2);
9117 gcc_assert (TARGET_SSE2);
9123 gcc_assert (TARGET_AVX512F);
9125 gcc_assert (TARGET_AVX);
9127 gcc_assert (TARGET_SSE);
9136 switch (which_alternative)
9139 ops = "%s\t{%%2, %%0|%%0, %%2}";
9142 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9148 snprintf (buf, sizeof (buf), ops, tmp);
9151 [(set_attr "isa" "noavx,avx")
9152 (set_attr "type" "sselog")
9153 (set (attr "prefix_data16")
9155 (and (eq_attr "alternative" "0")
9156 (eq_attr "mode" "TI"))
9158 (const_string "*")))
9159 (set_attr "prefix" "<mask_prefix3>")
9161 (cond [(and (match_test "<MODE_SIZE> == 16")
9162 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
9163 (const_string "<ssePSmode>")
9164 (match_test "TARGET_AVX2")
9165 (const_string "<sseinsnmode>")
9166 (match_test "TARGET_AVX")
9168 (match_test "<MODE_SIZE> > 16")
9169 (const_string "V8SF")
9170 (const_string "<sseinsnmode>"))
9171 (ior (not (match_test "TARGET_SSE2"))
9172 (match_test "optimize_function_for_size_p (cfun)"))
9173 (const_string "V4SF")
9175 (const_string "<sseinsnmode>")))])
9177 (define_expand "<code><mode>3"
9178 [(set (match_operand:VI 0 "register_operand")
9180 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
9181 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
9184 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
9188 (define_insn "<mask_codefor><code><mode>3<mask_name>"
9189 [(set (match_operand:VI 0 "register_operand" "=x,v")
9191 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
9192 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
9193 "TARGET_SSE && <mask_mode512bit_condition>
9194 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9196 static char buf[64];
9200 switch (get_attr_mode (insn))
9203 gcc_assert (TARGET_AVX512F);
9205 tmp = "p<logic><ssemodesuffix>";
9209 gcc_assert (TARGET_AVX2);
9211 gcc_assert (TARGET_SSE2);
9217 gcc_assert (TARGET_AVX512F);
9219 gcc_assert (TARGET_AVX);
9221 gcc_assert (TARGET_SSE);
9230 switch (which_alternative)
9233 ops = "%s\t{%%2, %%0|%%0, %%2}";
9236 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9242 snprintf (buf, sizeof (buf), ops, tmp);
9245 [(set_attr "isa" "noavx,avx")
9246 (set_attr "type" "sselog")
9247 (set (attr "prefix_data16")
9249 (and (eq_attr "alternative" "0")
9250 (eq_attr "mode" "TI"))
9252 (const_string "*")))
9253 (set_attr "prefix" "<mask_prefix3>")
9255 (cond [(and (match_test "<MODE_SIZE> == 16")
9256 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
9257 (const_string "<ssePSmode>")
9258 (match_test "TARGET_AVX2")
9259 (const_string "<sseinsnmode>")
9260 (match_test "TARGET_AVX")
9262 (match_test "<MODE_SIZE> > 16")
9263 (const_string "V8SF")
9264 (const_string "<sseinsnmode>"))
9265 (ior (not (match_test "TARGET_SSE2"))
9266 (match_test "optimize_function_for_size_p (cfun)"))
9267 (const_string "V4SF")
9269 (const_string "<sseinsnmode>")))])
9271 (define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
9272 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9273 (unspec:<avx512fmaskmode>
9274 [(match_operand:VI48_512 1 "register_operand" "v")
9275 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9278 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9279 [(set_attr "prefix" "evex")
9280 (set_attr "mode" "<sseinsnmode>")])
9282 (define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
9283 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9284 (unspec:<avx512fmaskmode>
9285 [(match_operand:VI48_512 1 "register_operand" "v")
9286 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9289 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9290 [(set_attr "prefix" "evex")
9291 (set_attr "mode" "<sseinsnmode>")])
9293 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9295 ;; Parallel integral element swizzling
9297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9299 (define_expand "vec_pack_trunc_<mode>"
9300 [(match_operand:<ssepackmode> 0 "register_operand")
9301 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
9302 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
9305 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
9306 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
9307 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
9311 (define_insn "<sse2_avx2>_packsswb"
9312 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9313 (vec_concat:VI1_AVX2
9314 (ss_truncate:<ssehalfvecmode>
9315 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9316 (ss_truncate:<ssehalfvecmode>
9317 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9320 packsswb\t{%2, %0|%0, %2}
9321 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
9322 [(set_attr "isa" "noavx,avx")
9323 (set_attr "type" "sselog")
9324 (set_attr "prefix_data16" "1,*")
9325 (set_attr "prefix" "orig,vex")
9326 (set_attr "mode" "<sseinsnmode>")])
9328 (define_insn "<sse2_avx2>_packssdw"
9329 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9330 (vec_concat:VI2_AVX2
9331 (ss_truncate:<ssehalfvecmode>
9332 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9333 (ss_truncate:<ssehalfvecmode>
9334 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9337 packssdw\t{%2, %0|%0, %2}
9338 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
9339 [(set_attr "isa" "noavx,avx")
9340 (set_attr "type" "sselog")
9341 (set_attr "prefix_data16" "1,*")
9342 (set_attr "prefix" "orig,vex")
9343 (set_attr "mode" "<sseinsnmode>")])
9345 (define_insn "<sse2_avx2>_packuswb"
9346 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9347 (vec_concat:VI1_AVX2
9348 (us_truncate:<ssehalfvecmode>
9349 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9350 (us_truncate:<ssehalfvecmode>
9351 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9354 packuswb\t{%2, %0|%0, %2}
9355 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
9356 [(set_attr "isa" "noavx,avx")
9357 (set_attr "type" "sselog")
9358 (set_attr "prefix_data16" "1,*")
9359 (set_attr "prefix" "orig,vex")
9360 (set_attr "mode" "<sseinsnmode>")])
9362 (define_insn "avx2_interleave_highv32qi"
9363 [(set (match_operand:V32QI 0 "register_operand" "=x")
9366 (match_operand:V32QI 1 "register_operand" "x")
9367 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9368 (parallel [(const_int 8) (const_int 40)
9369 (const_int 9) (const_int 41)
9370 (const_int 10) (const_int 42)
9371 (const_int 11) (const_int 43)
9372 (const_int 12) (const_int 44)
9373 (const_int 13) (const_int 45)
9374 (const_int 14) (const_int 46)
9375 (const_int 15) (const_int 47)
9376 (const_int 24) (const_int 56)
9377 (const_int 25) (const_int 57)
9378 (const_int 26) (const_int 58)
9379 (const_int 27) (const_int 59)
9380 (const_int 28) (const_int 60)
9381 (const_int 29) (const_int 61)
9382 (const_int 30) (const_int 62)
9383 (const_int 31) (const_int 63)])))]
9385 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9386 [(set_attr "type" "sselog")
9387 (set_attr "prefix" "vex")
9388 (set_attr "mode" "OI")])
9390 (define_insn "vec_interleave_highv16qi"
9391 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9394 (match_operand:V16QI 1 "register_operand" "0,x")
9395 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9396 (parallel [(const_int 8) (const_int 24)
9397 (const_int 9) (const_int 25)
9398 (const_int 10) (const_int 26)
9399 (const_int 11) (const_int 27)
9400 (const_int 12) (const_int 28)
9401 (const_int 13) (const_int 29)
9402 (const_int 14) (const_int 30)
9403 (const_int 15) (const_int 31)])))]
9406 punpckhbw\t{%2, %0|%0, %2}
9407 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9408 [(set_attr "isa" "noavx,avx")
9409 (set_attr "type" "sselog")
9410 (set_attr "prefix_data16" "1,*")
9411 (set_attr "prefix" "orig,vex")
9412 (set_attr "mode" "TI")])
9414 (define_insn "avx2_interleave_lowv32qi"
9415 [(set (match_operand:V32QI 0 "register_operand" "=x")
9418 (match_operand:V32QI 1 "register_operand" "x")
9419 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9420 (parallel [(const_int 0) (const_int 32)
9421 (const_int 1) (const_int 33)
9422 (const_int 2) (const_int 34)
9423 (const_int 3) (const_int 35)
9424 (const_int 4) (const_int 36)
9425 (const_int 5) (const_int 37)
9426 (const_int 6) (const_int 38)
9427 (const_int 7) (const_int 39)
9428 (const_int 16) (const_int 48)
9429 (const_int 17) (const_int 49)
9430 (const_int 18) (const_int 50)
9431 (const_int 19) (const_int 51)
9432 (const_int 20) (const_int 52)
9433 (const_int 21) (const_int 53)
9434 (const_int 22) (const_int 54)
9435 (const_int 23) (const_int 55)])))]
9437 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9438 [(set_attr "type" "sselog")
9439 (set_attr "prefix" "vex")
9440 (set_attr "mode" "OI")])
9442 (define_insn "vec_interleave_lowv16qi"
9443 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9446 (match_operand:V16QI 1 "register_operand" "0,x")
9447 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9448 (parallel [(const_int 0) (const_int 16)
9449 (const_int 1) (const_int 17)
9450 (const_int 2) (const_int 18)
9451 (const_int 3) (const_int 19)
9452 (const_int 4) (const_int 20)
9453 (const_int 5) (const_int 21)
9454 (const_int 6) (const_int 22)
9455 (const_int 7) (const_int 23)])))]
9458 punpcklbw\t{%2, %0|%0, %2}
9459 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9460 [(set_attr "isa" "noavx,avx")
9461 (set_attr "type" "sselog")
9462 (set_attr "prefix_data16" "1,*")
9463 (set_attr "prefix" "orig,vex")
9464 (set_attr "mode" "TI")])
9466 (define_insn "avx2_interleave_highv16hi"
9467 [(set (match_operand:V16HI 0 "register_operand" "=x")
9470 (match_operand:V16HI 1 "register_operand" "x")
9471 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9472 (parallel [(const_int 4) (const_int 20)
9473 (const_int 5) (const_int 21)
9474 (const_int 6) (const_int 22)
9475 (const_int 7) (const_int 23)
9476 (const_int 12) (const_int 28)
9477 (const_int 13) (const_int 29)
9478 (const_int 14) (const_int 30)
9479 (const_int 15) (const_int 31)])))]
9481 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9482 [(set_attr "type" "sselog")
9483 (set_attr "prefix" "vex")
9484 (set_attr "mode" "OI")])
9486 (define_insn "vec_interleave_highv8hi"
9487 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9490 (match_operand:V8HI 1 "register_operand" "0,x")
9491 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9492 (parallel [(const_int 4) (const_int 12)
9493 (const_int 5) (const_int 13)
9494 (const_int 6) (const_int 14)
9495 (const_int 7) (const_int 15)])))]
9498 punpckhwd\t{%2, %0|%0, %2}
9499 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9500 [(set_attr "isa" "noavx,avx")
9501 (set_attr "type" "sselog")
9502 (set_attr "prefix_data16" "1,*")
9503 (set_attr "prefix" "orig,vex")
9504 (set_attr "mode" "TI")])
9506 (define_insn "avx2_interleave_lowv16hi"
9507 [(set (match_operand:V16HI 0 "register_operand" "=x")
9510 (match_operand:V16HI 1 "register_operand" "x")
9511 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9512 (parallel [(const_int 0) (const_int 16)
9513 (const_int 1) (const_int 17)
9514 (const_int 2) (const_int 18)
9515 (const_int 3) (const_int 19)
9516 (const_int 8) (const_int 24)
9517 (const_int 9) (const_int 25)
9518 (const_int 10) (const_int 26)
9519 (const_int 11) (const_int 27)])))]
9521 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9522 [(set_attr "type" "sselog")
9523 (set_attr "prefix" "vex")
9524 (set_attr "mode" "OI")])
9526 (define_insn "vec_interleave_lowv8hi"
9527 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9530 (match_operand:V8HI 1 "register_operand" "0,x")
9531 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9532 (parallel [(const_int 0) (const_int 8)
9533 (const_int 1) (const_int 9)
9534 (const_int 2) (const_int 10)
9535 (const_int 3) (const_int 11)])))]
9538 punpcklwd\t{%2, %0|%0, %2}
9539 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9540 [(set_attr "isa" "noavx,avx")
9541 (set_attr "type" "sselog")
9542 (set_attr "prefix_data16" "1,*")
9543 (set_attr "prefix" "orig,vex")
9544 (set_attr "mode" "TI")])
9546 (define_insn "avx2_interleave_highv8si"
9547 [(set (match_operand:V8SI 0 "register_operand" "=x")
9550 (match_operand:V8SI 1 "register_operand" "x")
9551 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9552 (parallel [(const_int 2) (const_int 10)
9553 (const_int 3) (const_int 11)
9554 (const_int 6) (const_int 14)
9555 (const_int 7) (const_int 15)])))]
9557 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9558 [(set_attr "type" "sselog")
9559 (set_attr "prefix" "vex")
9560 (set_attr "mode" "OI")])
9562 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9563 [(set (match_operand:V16SI 0 "register_operand" "=v")
9566 (match_operand:V16SI 1 "register_operand" "v")
9567 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9568 (parallel [(const_int 2) (const_int 18)
9569 (const_int 3) (const_int 19)
9570 (const_int 6) (const_int 22)
9571 (const_int 7) (const_int 23)
9572 (const_int 10) (const_int 26)
9573 (const_int 11) (const_int 27)
9574 (const_int 14) (const_int 30)
9575 (const_int 15) (const_int 31)])))]
9577 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9578 [(set_attr "type" "sselog")
9579 (set_attr "prefix" "evex")
9580 (set_attr "mode" "XI")])
9583 (define_insn "vec_interleave_highv4si"
9584 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9587 (match_operand:V4SI 1 "register_operand" "0,x")
9588 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9589 (parallel [(const_int 2) (const_int 6)
9590 (const_int 3) (const_int 7)])))]
9593 punpckhdq\t{%2, %0|%0, %2}
9594 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9595 [(set_attr "isa" "noavx,avx")
9596 (set_attr "type" "sselog")
9597 (set_attr "prefix_data16" "1,*")
9598 (set_attr "prefix" "orig,vex")
9599 (set_attr "mode" "TI")])
9601 (define_insn "avx2_interleave_lowv8si"
9602 [(set (match_operand:V8SI 0 "register_operand" "=x")
9605 (match_operand:V8SI 1 "register_operand" "x")
9606 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9607 (parallel [(const_int 0) (const_int 8)
9608 (const_int 1) (const_int 9)
9609 (const_int 4) (const_int 12)
9610 (const_int 5) (const_int 13)])))]
9612 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9613 [(set_attr "type" "sselog")
9614 (set_attr "prefix" "vex")
9615 (set_attr "mode" "OI")])
9617 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9618 [(set (match_operand:V16SI 0 "register_operand" "=v")
9621 (match_operand:V16SI 1 "register_operand" "v")
9622 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9623 (parallel [(const_int 0) (const_int 16)
9624 (const_int 1) (const_int 17)
9625 (const_int 4) (const_int 20)
9626 (const_int 5) (const_int 21)
9627 (const_int 8) (const_int 24)
9628 (const_int 9) (const_int 25)
9629 (const_int 12) (const_int 28)
9630 (const_int 13) (const_int 29)])))]
9632 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9633 [(set_attr "type" "sselog")
9634 (set_attr "prefix" "evex")
9635 (set_attr "mode" "XI")])
9637 (define_insn "vec_interleave_lowv4si"
9638 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9641 (match_operand:V4SI 1 "register_operand" "0,x")
9642 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9643 (parallel [(const_int 0) (const_int 4)
9644 (const_int 1) (const_int 5)])))]
9647 punpckldq\t{%2, %0|%0, %2}
9648 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9649 [(set_attr "isa" "noavx,avx")
9650 (set_attr "type" "sselog")
9651 (set_attr "prefix_data16" "1,*")
9652 (set_attr "prefix" "orig,vex")
9653 (set_attr "mode" "TI")])
9655 (define_expand "vec_interleave_high<mode>"
9656 [(match_operand:VI_256 0 "register_operand" "=x")
9657 (match_operand:VI_256 1 "register_operand" "x")
9658 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9661 rtx t1 = gen_reg_rtx (<MODE>mode);
9662 rtx t2 = gen_reg_rtx (<MODE>mode);
9663 rtx t3 = gen_reg_rtx (V4DImode);
9664 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9665 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9666 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9667 gen_lowpart (V4DImode, t2),
9668 GEN_INT (1 + (3 << 4))));
9669 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9673 (define_expand "vec_interleave_low<mode>"
9674 [(match_operand:VI_256 0 "register_operand" "=x")
9675 (match_operand:VI_256 1 "register_operand" "x")
9676 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9679 rtx t1 = gen_reg_rtx (<MODE>mode);
9680 rtx t2 = gen_reg_rtx (<MODE>mode);
9681 rtx t3 = gen_reg_rtx (V4DImode);
9682 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9683 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9684 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9685 gen_lowpart (V4DImode, t2),
9686 GEN_INT (0 + (2 << 4))));
9687 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9691 ;; Modes handled by pinsr patterns.
9692 (define_mode_iterator PINSR_MODE
9693 [(V16QI "TARGET_SSE4_1") V8HI
9694 (V4SI "TARGET_SSE4_1")
9695 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9697 (define_mode_attr sse2p4_1
9698 [(V16QI "sse4_1") (V8HI "sse2")
9699 (V4SI "sse4_1") (V2DI "sse4_1")])
9701 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9702 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9703 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9704 (vec_merge:PINSR_MODE
9705 (vec_duplicate:PINSR_MODE
9706 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9707 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9708 (match_operand:SI 3 "const_int_operand")))]
9710 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9711 < GET_MODE_NUNITS (<MODE>mode))"
9713 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9715 switch (which_alternative)
9718 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9719 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9722 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9724 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9725 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9728 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9733 [(set_attr "isa" "noavx,noavx,avx,avx")
9734 (set_attr "type" "sselog")
9735 (set (attr "prefix_rex")
9737 (and (not (match_test "TARGET_AVX"))
9738 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9740 (const_string "*")))
9741 (set (attr "prefix_data16")
9743 (and (not (match_test "TARGET_AVX"))
9744 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9746 (const_string "*")))
9747 (set (attr "prefix_extra")
9749 (and (not (match_test "TARGET_AVX"))
9750 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9752 (const_string "1")))
9753 (set_attr "length_immediate" "1")
9754 (set_attr "prefix" "orig,orig,vex,vex")
9755 (set_attr "mode" "TI")])
9757 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9758 [(match_operand:V16FI 0 "register_operand")
9759 (match_operand:V16FI 1 "register_operand")
9760 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9761 (match_operand:SI 3 "const_0_to_3_operand")
9762 (match_operand:V16FI 4 "register_operand")
9763 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9766 switch (INTVAL (operands[3]))
9769 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9770 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9774 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9775 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9779 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9780 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9784 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9785 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9795 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9796 [(set (match_operand:V16FI 0 "register_operand" "=v")
9798 (match_operand:V16FI 1 "register_operand" "v")
9799 (vec_duplicate:V16FI
9800 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9801 (match_operand:SI 3 "const_int_operand" "n")))]
9805 if (INTVAL (operands[3]) == 0xFFF)
9807 else if ( INTVAL (operands[3]) == 0xF0FF)
9809 else if ( INTVAL (operands[3]) == 0xFF0F)
9811 else if ( INTVAL (operands[3]) == 0xFFF0)
9816 operands[3] = GEN_INT (mask);
9818 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9820 [(set_attr "type" "sselog")
9821 (set_attr "length_immediate" "1")
9822 (set_attr "prefix" "evex")
9823 (set_attr "mode" "<sseinsnmode>")])
9825 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9826 [(match_operand:V8FI 0 "register_operand")
9827 (match_operand:V8FI 1 "register_operand")
9828 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9829 (match_operand:SI 3 "const_0_to_1_operand")
9830 (match_operand:V8FI 4 "register_operand")
9831 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9834 int mask = INTVAL (operands[3]);
9836 emit_insn (gen_vec_set_lo_<mode>_mask
9837 (operands[0], operands[1], operands[2],
9838 operands[4], operands[5]));
9840 emit_insn (gen_vec_set_hi_<mode>_mask
9841 (operands[0], operands[1], operands[2],
9842 operands[4], operands[5]));
9846 (define_insn "vec_set_lo_<mode><mask_name>"
9847 [(set (match_operand:V8FI 0 "register_operand" "=v")
9849 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9850 (vec_select:<ssehalfvecmode>
9851 (match_operand:V8FI 1 "register_operand" "v")
9852 (parallel [(const_int 4) (const_int 5)
9853 (const_int 6) (const_int 7)]))))]
9855 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9856 [(set_attr "type" "sselog")
9857 (set_attr "length_immediate" "1")
9858 (set_attr "prefix" "evex")
9859 (set_attr "mode" "XI")])
9861 (define_insn "vec_set_hi_<mode><mask_name>"
9862 [(set (match_operand:V8FI 0 "register_operand" "=v")
9864 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9865 (vec_select:<ssehalfvecmode>
9866 (match_operand:V8FI 1 "register_operand" "v")
9867 (parallel [(const_int 0) (const_int 1)
9868 (const_int 2) (const_int 3)]))))]
9870 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9871 [(set_attr "type" "sselog")
9872 (set_attr "length_immediate" "1")
9873 (set_attr "prefix" "evex")
9874 (set_attr "mode" "XI")])
9876 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9877 [(match_operand:V8FI 0 "register_operand")
9878 (match_operand:V8FI 1 "register_operand")
9879 (match_operand:V8FI 2 "nonimmediate_operand")
9880 (match_operand:SI 3 "const_0_to_255_operand")
9881 (match_operand:V8FI 4 "register_operand")
9882 (match_operand:QI 5 "register_operand")]
9885 int mask = INTVAL (operands[3]);
9886 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9887 (operands[0], operands[1], operands[2],
9888 GEN_INT (((mask >> 0) & 3) * 2),
9889 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9890 GEN_INT (((mask >> 2) & 3) * 2),
9891 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9892 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9893 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9894 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9895 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9896 operands[4], operands[5]));
9900 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9901 [(set (match_operand:V8FI 0 "register_operand" "=v")
9903 (vec_concat:<ssedoublemode>
9904 (match_operand:V8FI 1 "register_operand" "v")
9905 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9906 (parallel [(match_operand 3 "const_0_to_7_operand")
9907 (match_operand 4 "const_0_to_7_operand")
9908 (match_operand 5 "const_0_to_7_operand")
9909 (match_operand 6 "const_0_to_7_operand")
9910 (match_operand 7 "const_8_to_15_operand")
9911 (match_operand 8 "const_8_to_15_operand")
9912 (match_operand 9 "const_8_to_15_operand")
9913 (match_operand 10 "const_8_to_15_operand")])))]
9915 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9916 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9917 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9918 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9921 mask = INTVAL (operands[3]) / 2;
9922 mask |= INTVAL (operands[5]) / 2 << 2;
9923 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9924 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9925 operands[3] = GEN_INT (mask);
9927 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9929 [(set_attr "type" "sselog")
9930 (set_attr "length_immediate" "1")
9931 (set_attr "prefix" "evex")
9932 (set_attr "mode" "<sseinsnmode>")])
9934 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9935 [(match_operand:V16FI 0 "register_operand")
9936 (match_operand:V16FI 1 "register_operand")
9937 (match_operand:V16FI 2 "nonimmediate_operand")
9938 (match_operand:SI 3 "const_0_to_255_operand")
9939 (match_operand:V16FI 4 "register_operand")
9940 (match_operand:HI 5 "register_operand")]
9943 int mask = INTVAL (operands[3]);
9944 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9945 (operands[0], operands[1], operands[2],
9946 GEN_INT (((mask >> 0) & 3) * 4),
9947 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9948 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9949 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9950 GEN_INT (((mask >> 2) & 3) * 4),
9951 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9952 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9953 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9954 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9955 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9956 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9957 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9958 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9959 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9960 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9961 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9962 operands[4], operands[5]));
9966 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9967 [(set (match_operand:V16FI 0 "register_operand" "=v")
9969 (vec_concat:<ssedoublemode>
9970 (match_operand:V16FI 1 "register_operand" "v")
9971 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9972 (parallel [(match_operand 3 "const_0_to_15_operand")
9973 (match_operand 4 "const_0_to_15_operand")
9974 (match_operand 5 "const_0_to_15_operand")
9975 (match_operand 6 "const_0_to_15_operand")
9976 (match_operand 7 "const_0_to_15_operand")
9977 (match_operand 8 "const_0_to_15_operand")
9978 (match_operand 9 "const_0_to_15_operand")
9979 (match_operand 10 "const_0_to_15_operand")
9980 (match_operand 11 "const_16_to_31_operand")
9981 (match_operand 12 "const_16_to_31_operand")
9982 (match_operand 13 "const_16_to_31_operand")
9983 (match_operand 14 "const_16_to_31_operand")
9984 (match_operand 15 "const_16_to_31_operand")
9985 (match_operand 16 "const_16_to_31_operand")
9986 (match_operand 17 "const_16_to_31_operand")
9987 (match_operand 18 "const_16_to_31_operand")])))]
9989 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9990 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9991 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9992 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9993 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9994 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9995 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9996 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9997 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9998 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9999 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
10000 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
10003 mask = INTVAL (operands[3]) / 4;
10004 mask |= INTVAL (operands[7]) / 4 << 2;
10005 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
10006 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
10007 operands[3] = GEN_INT (mask);
10009 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
10011 [(set_attr "type" "sselog")
10012 (set_attr "length_immediate" "1")
10013 (set_attr "prefix" "evex")
10014 (set_attr "mode" "<sseinsnmode>")])
10016 (define_expand "avx512f_pshufdv3_mask"
10017 [(match_operand:V16SI 0 "register_operand")
10018 (match_operand:V16SI 1 "nonimmediate_operand")
10019 (match_operand:SI 2 "const_0_to_255_operand")
10020 (match_operand:V16SI 3 "register_operand")
10021 (match_operand:HI 4 "register_operand")]
10024 int mask = INTVAL (operands[2]);
10025 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
10026 GEN_INT ((mask >> 0) & 3),
10027 GEN_INT ((mask >> 2) & 3),
10028 GEN_INT ((mask >> 4) & 3),
10029 GEN_INT ((mask >> 6) & 3),
10030 GEN_INT (((mask >> 0) & 3) + 4),
10031 GEN_INT (((mask >> 2) & 3) + 4),
10032 GEN_INT (((mask >> 4) & 3) + 4),
10033 GEN_INT (((mask >> 6) & 3) + 4),
10034 GEN_INT (((mask >> 0) & 3) + 8),
10035 GEN_INT (((mask >> 2) & 3) + 8),
10036 GEN_INT (((mask >> 4) & 3) + 8),
10037 GEN_INT (((mask >> 6) & 3) + 8),
10038 GEN_INT (((mask >> 0) & 3) + 12),
10039 GEN_INT (((mask >> 2) & 3) + 12),
10040 GEN_INT (((mask >> 4) & 3) + 12),
10041 GEN_INT (((mask >> 6) & 3) + 12),
10042 operands[3], operands[4]));
10046 (define_insn "avx512f_pshufd_1<mask_name>"
10047 [(set (match_operand:V16SI 0 "register_operand" "=v")
10049 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
10050 (parallel [(match_operand 2 "const_0_to_3_operand")
10051 (match_operand 3 "const_0_to_3_operand")
10052 (match_operand 4 "const_0_to_3_operand")
10053 (match_operand 5 "const_0_to_3_operand")
10054 (match_operand 6 "const_4_to_7_operand")
10055 (match_operand 7 "const_4_to_7_operand")
10056 (match_operand 8 "const_4_to_7_operand")
10057 (match_operand 9 "const_4_to_7_operand")
10058 (match_operand 10 "const_8_to_11_operand")
10059 (match_operand 11 "const_8_to_11_operand")
10060 (match_operand 12 "const_8_to_11_operand")
10061 (match_operand 13 "const_8_to_11_operand")
10062 (match_operand 14 "const_12_to_15_operand")
10063 (match_operand 15 "const_12_to_15_operand")
10064 (match_operand 16 "const_12_to_15_operand")
10065 (match_operand 17 "const_12_to_15_operand")])))]
10067 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
10068 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
10069 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
10070 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
10071 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
10072 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
10073 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
10074 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
10075 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
10076 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
10077 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
10078 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
10081 mask |= INTVAL (operands[2]) << 0;
10082 mask |= INTVAL (operands[3]) << 2;
10083 mask |= INTVAL (operands[4]) << 4;
10084 mask |= INTVAL (operands[5]) << 6;
10085 operands[2] = GEN_INT (mask);
10087 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
10089 [(set_attr "type" "sselog1")
10090 (set_attr "prefix" "evex")
10091 (set_attr "length_immediate" "1")
10092 (set_attr "mode" "XI")])
10094 (define_expand "avx2_pshufdv3"
10095 [(match_operand:V8SI 0 "register_operand")
10096 (match_operand:V8SI 1 "nonimmediate_operand")
10097 (match_operand:SI 2 "const_0_to_255_operand")]
10100 int mask = INTVAL (operands[2]);
10101 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
10102 GEN_INT ((mask >> 0) & 3),
10103 GEN_INT ((mask >> 2) & 3),
10104 GEN_INT ((mask >> 4) & 3),
10105 GEN_INT ((mask >> 6) & 3),
10106 GEN_INT (((mask >> 0) & 3) + 4),
10107 GEN_INT (((mask >> 2) & 3) + 4),
10108 GEN_INT (((mask >> 4) & 3) + 4),
10109 GEN_INT (((mask >> 6) & 3) + 4)));
10113 (define_insn "avx2_pshufd_1"
10114 [(set (match_operand:V8SI 0 "register_operand" "=x")
10116 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
10117 (parallel [(match_operand 2 "const_0_to_3_operand")
10118 (match_operand 3 "const_0_to_3_operand")
10119 (match_operand 4 "const_0_to_3_operand")
10120 (match_operand 5 "const_0_to_3_operand")
10121 (match_operand 6 "const_4_to_7_operand")
10122 (match_operand 7 "const_4_to_7_operand")
10123 (match_operand 8 "const_4_to_7_operand")
10124 (match_operand 9 "const_4_to_7_operand")])))]
10126 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
10127 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
10128 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
10129 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
10132 mask |= INTVAL (operands[2]) << 0;
10133 mask |= INTVAL (operands[3]) << 2;
10134 mask |= INTVAL (operands[4]) << 4;
10135 mask |= INTVAL (operands[5]) << 6;
10136 operands[2] = GEN_INT (mask);
10138 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
10140 [(set_attr "type" "sselog1")
10141 (set_attr "prefix" "vex")
10142 (set_attr "length_immediate" "1")
10143 (set_attr "mode" "OI")])
10145 (define_expand "sse2_pshufd"
10146 [(match_operand:V4SI 0 "register_operand")
10147 (match_operand:V4SI 1 "nonimmediate_operand")
10148 (match_operand:SI 2 "const_int_operand")]
10151 int mask = INTVAL (operands[2]);
10152 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
10153 GEN_INT ((mask >> 0) & 3),
10154 GEN_INT ((mask >> 2) & 3),
10155 GEN_INT ((mask >> 4) & 3),
10156 GEN_INT ((mask >> 6) & 3)));
10160 (define_insn "sse2_pshufd_1"
10161 [(set (match_operand:V4SI 0 "register_operand" "=x")
10163 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10164 (parallel [(match_operand 2 "const_0_to_3_operand")
10165 (match_operand 3 "const_0_to_3_operand")
10166 (match_operand 4 "const_0_to_3_operand")
10167 (match_operand 5 "const_0_to_3_operand")])))]
10171 mask |= INTVAL (operands[2]) << 0;
10172 mask |= INTVAL (operands[3]) << 2;
10173 mask |= INTVAL (operands[4]) << 4;
10174 mask |= INTVAL (operands[5]) << 6;
10175 operands[2] = GEN_INT (mask);
10177 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
10179 [(set_attr "type" "sselog1")
10180 (set_attr "prefix_data16" "1")
10181 (set_attr "prefix" "maybe_vex")
10182 (set_attr "length_immediate" "1")
10183 (set_attr "mode" "TI")])
10185 (define_expand "avx2_pshuflwv3"
10186 [(match_operand:V16HI 0 "register_operand")
10187 (match_operand:V16HI 1 "nonimmediate_operand")
10188 (match_operand:SI 2 "const_0_to_255_operand")]
10191 int mask = INTVAL (operands[2]);
10192 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
10193 GEN_INT ((mask >> 0) & 3),
10194 GEN_INT ((mask >> 2) & 3),
10195 GEN_INT ((mask >> 4) & 3),
10196 GEN_INT ((mask >> 6) & 3),
10197 GEN_INT (((mask >> 0) & 3) + 8),
10198 GEN_INT (((mask >> 2) & 3) + 8),
10199 GEN_INT (((mask >> 4) & 3) + 8),
10200 GEN_INT (((mask >> 6) & 3) + 8)));
10204 (define_insn "avx2_pshuflw_1"
10205 [(set (match_operand:V16HI 0 "register_operand" "=x")
10207 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10208 (parallel [(match_operand 2 "const_0_to_3_operand")
10209 (match_operand 3 "const_0_to_3_operand")
10210 (match_operand 4 "const_0_to_3_operand")
10211 (match_operand 5 "const_0_to_3_operand")
10216 (match_operand 6 "const_8_to_11_operand")
10217 (match_operand 7 "const_8_to_11_operand")
10218 (match_operand 8 "const_8_to_11_operand")
10219 (match_operand 9 "const_8_to_11_operand")
10223 (const_int 15)])))]
10225 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10226 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10227 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10228 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10231 mask |= INTVAL (operands[2]) << 0;
10232 mask |= INTVAL (operands[3]) << 2;
10233 mask |= INTVAL (operands[4]) << 4;
10234 mask |= INTVAL (operands[5]) << 6;
10235 operands[2] = GEN_INT (mask);
10237 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10239 [(set_attr "type" "sselog")
10240 (set_attr "prefix" "vex")
10241 (set_attr "length_immediate" "1")
10242 (set_attr "mode" "OI")])
10244 (define_expand "sse2_pshuflw"
10245 [(match_operand:V8HI 0 "register_operand")
10246 (match_operand:V8HI 1 "nonimmediate_operand")
10247 (match_operand:SI 2 "const_int_operand")]
10250 int mask = INTVAL (operands[2]);
10251 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
10252 GEN_INT ((mask >> 0) & 3),
10253 GEN_INT ((mask >> 2) & 3),
10254 GEN_INT ((mask >> 4) & 3),
10255 GEN_INT ((mask >> 6) & 3)));
10259 (define_insn "sse2_pshuflw_1"
10260 [(set (match_operand:V8HI 0 "register_operand" "=x")
10262 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10263 (parallel [(match_operand 2 "const_0_to_3_operand")
10264 (match_operand 3 "const_0_to_3_operand")
10265 (match_operand 4 "const_0_to_3_operand")
10266 (match_operand 5 "const_0_to_3_operand")
10274 mask |= INTVAL (operands[2]) << 0;
10275 mask |= INTVAL (operands[3]) << 2;
10276 mask |= INTVAL (operands[4]) << 4;
10277 mask |= INTVAL (operands[5]) << 6;
10278 operands[2] = GEN_INT (mask);
10280 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10282 [(set_attr "type" "sselog")
10283 (set_attr "prefix_data16" "0")
10284 (set_attr "prefix_rep" "1")
10285 (set_attr "prefix" "maybe_vex")
10286 (set_attr "length_immediate" "1")
10287 (set_attr "mode" "TI")])
10289 (define_expand "avx2_pshufhwv3"
10290 [(match_operand:V16HI 0 "register_operand")
10291 (match_operand:V16HI 1 "nonimmediate_operand")
10292 (match_operand:SI 2 "const_0_to_255_operand")]
10295 int mask = INTVAL (operands[2]);
10296 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
10297 GEN_INT (((mask >> 0) & 3) + 4),
10298 GEN_INT (((mask >> 2) & 3) + 4),
10299 GEN_INT (((mask >> 4) & 3) + 4),
10300 GEN_INT (((mask >> 6) & 3) + 4),
10301 GEN_INT (((mask >> 0) & 3) + 12),
10302 GEN_INT (((mask >> 2) & 3) + 12),
10303 GEN_INT (((mask >> 4) & 3) + 12),
10304 GEN_INT (((mask >> 6) & 3) + 12)));
10308 (define_insn "avx2_pshufhw_1"
10309 [(set (match_operand:V16HI 0 "register_operand" "=x")
10311 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10312 (parallel [(const_int 0)
10316 (match_operand 2 "const_4_to_7_operand")
10317 (match_operand 3 "const_4_to_7_operand")
10318 (match_operand 4 "const_4_to_7_operand")
10319 (match_operand 5 "const_4_to_7_operand")
10324 (match_operand 6 "const_12_to_15_operand")
10325 (match_operand 7 "const_12_to_15_operand")
10326 (match_operand 8 "const_12_to_15_operand")
10327 (match_operand 9 "const_12_to_15_operand")])))]
10329 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10330 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10331 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10332 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10335 mask |= (INTVAL (operands[2]) - 4) << 0;
10336 mask |= (INTVAL (operands[3]) - 4) << 2;
10337 mask |= (INTVAL (operands[4]) - 4) << 4;
10338 mask |= (INTVAL (operands[5]) - 4) << 6;
10339 operands[2] = GEN_INT (mask);
10341 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10343 [(set_attr "type" "sselog")
10344 (set_attr "prefix" "vex")
10345 (set_attr "length_immediate" "1")
10346 (set_attr "mode" "OI")])
10348 (define_expand "sse2_pshufhw"
10349 [(match_operand:V8HI 0 "register_operand")
10350 (match_operand:V8HI 1 "nonimmediate_operand")
10351 (match_operand:SI 2 "const_int_operand")]
10354 int mask = INTVAL (operands[2]);
10355 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
10356 GEN_INT (((mask >> 0) & 3) + 4),
10357 GEN_INT (((mask >> 2) & 3) + 4),
10358 GEN_INT (((mask >> 4) & 3) + 4),
10359 GEN_INT (((mask >> 6) & 3) + 4)));
10363 (define_insn "sse2_pshufhw_1"
10364 [(set (match_operand:V8HI 0 "register_operand" "=x")
10366 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10367 (parallel [(const_int 0)
10371 (match_operand 2 "const_4_to_7_operand")
10372 (match_operand 3 "const_4_to_7_operand")
10373 (match_operand 4 "const_4_to_7_operand")
10374 (match_operand 5 "const_4_to_7_operand")])))]
10378 mask |= (INTVAL (operands[2]) - 4) << 0;
10379 mask |= (INTVAL (operands[3]) - 4) << 2;
10380 mask |= (INTVAL (operands[4]) - 4) << 4;
10381 mask |= (INTVAL (operands[5]) - 4) << 6;
10382 operands[2] = GEN_INT (mask);
10384 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10386 [(set_attr "type" "sselog")
10387 (set_attr "prefix_rep" "1")
10388 (set_attr "prefix_data16" "0")
10389 (set_attr "prefix" "maybe_vex")
10390 (set_attr "length_immediate" "1")
10391 (set_attr "mode" "TI")])
10393 (define_expand "sse2_loadd"
10394 [(set (match_operand:V4SI 0 "register_operand")
10396 (vec_duplicate:V4SI
10397 (match_operand:SI 1 "nonimmediate_operand"))
10401 "operands[2] = CONST0_RTX (V4SImode);")
10403 (define_insn "sse2_loadld"
10404 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
10406 (vec_duplicate:V4SI
10407 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
10408 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
10412 %vmovd\t{%2, %0|%0, %2}
10413 %vmovd\t{%2, %0|%0, %2}
10414 movss\t{%2, %0|%0, %2}
10415 movss\t{%2, %0|%0, %2}
10416 vmovss\t{%2, %1, %0|%0, %1, %2}"
10417 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
10418 (set_attr "type" "ssemov")
10419 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10420 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10422 (define_insn "*vec_extract<mode>"
10423 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10424 (vec_select:<ssescalarmode>
10425 (match_operand:VI12_128 1 "register_operand" "x,x")
10427 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10430 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10431 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10432 [(set_attr "type" "sselog1")
10433 (set (attr "prefix_data16")
10435 (and (eq_attr "alternative" "0")
10436 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10438 (const_string "*")))
10439 (set (attr "prefix_extra")
10441 (and (eq_attr "alternative" "0")
10442 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10444 (const_string "1")))
10445 (set_attr "length_immediate" "1")
10446 (set_attr "prefix" "maybe_vex")
10447 (set_attr "mode" "TI")])
10449 (define_insn "*vec_extractv8hi_sse2"
10450 [(set (match_operand:HI 0 "register_operand" "=r")
10452 (match_operand:V8HI 1 "register_operand" "x")
10454 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10455 "TARGET_SSE2 && !TARGET_SSE4_1"
10456 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10457 [(set_attr "type" "sselog1")
10458 (set_attr "prefix_data16" "1")
10459 (set_attr "length_immediate" "1")
10460 (set_attr "mode" "TI")])
10462 (define_insn "*vec_extractv16qi_zext"
10463 [(set (match_operand:SWI48 0 "register_operand" "=r")
10466 (match_operand:V16QI 1 "register_operand" "x")
10468 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10470 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10471 [(set_attr "type" "sselog1")
10472 (set_attr "prefix_extra" "1")
10473 (set_attr "length_immediate" "1")
10474 (set_attr "prefix" "maybe_vex")
10475 (set_attr "mode" "TI")])
10477 (define_insn "*vec_extractv8hi_zext"
10478 [(set (match_operand:SWI48 0 "register_operand" "=r")
10481 (match_operand:V8HI 1 "register_operand" "x")
10483 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10485 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10486 [(set_attr "type" "sselog1")
10487 (set_attr "prefix_data16" "1")
10488 (set_attr "length_immediate" "1")
10489 (set_attr "prefix" "maybe_vex")
10490 (set_attr "mode" "TI")])
10492 (define_insn "*vec_extract<mode>_mem"
10493 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10494 (vec_select:<ssescalarmode>
10495 (match_operand:VI12_128 1 "memory_operand" "o")
10497 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10501 (define_insn "*vec_extract<ssevecmodelower>_0"
10502 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10504 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10505 (parallel [(const_int 0)])))]
10506 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10508 [(set_attr "isa" "*,sse4,*,*")])
10510 (define_insn_and_split "*vec_extractv4si_0_zext"
10511 [(set (match_operand:DI 0 "register_operand" "=r")
10514 (match_operand:V4SI 1 "register_operand" "x")
10515 (parallel [(const_int 0)]))))]
10516 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10518 "&& reload_completed"
10519 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10520 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10522 (define_insn "*vec_extractv2di_0_sse"
10523 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10525 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10526 (parallel [(const_int 0)])))]
10527 "TARGET_SSE && !TARGET_64BIT
10528 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10532 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10534 (match_operand:<ssevecmode> 1 "register_operand")
10535 (parallel [(const_int 0)])))]
10536 "TARGET_SSE && reload_completed"
10537 [(set (match_dup 0) (match_dup 1))]
10538 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10540 (define_insn "*vec_extractv4si"
10541 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10543 (match_operand:V4SI 1 "register_operand" "x,0,x")
10544 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10547 switch (which_alternative)
10550 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10553 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10554 return "psrldq\t{%2, %0|%0, %2}";
10557 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10558 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10561 gcc_unreachable ();
10564 [(set_attr "isa" "*,noavx,avx")
10565 (set_attr "type" "sselog1,sseishft1,sseishft1")
10566 (set_attr "prefix_extra" "1,*,*")
10567 (set_attr "length_immediate" "1")
10568 (set_attr "prefix" "maybe_vex,orig,vex")
10569 (set_attr "mode" "TI")])
10571 (define_insn "*vec_extractv4si_zext"
10572 [(set (match_operand:DI 0 "register_operand" "=r")
10575 (match_operand:V4SI 1 "register_operand" "x")
10576 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10577 "TARGET_64BIT && TARGET_SSE4_1"
10578 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10579 [(set_attr "type" "sselog1")
10580 (set_attr "prefix_extra" "1")
10581 (set_attr "length_immediate" "1")
10582 (set_attr "prefix" "maybe_vex")
10583 (set_attr "mode" "TI")])
10585 (define_insn "*vec_extractv4si_mem"
10586 [(set (match_operand:SI 0 "register_operand" "=x,r")
10588 (match_operand:V4SI 1 "memory_operand" "o,o")
10589 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10593 (define_insn_and_split "*vec_extractv4si_zext_mem"
10594 [(set (match_operand:DI 0 "register_operand" "=x,r")
10597 (match_operand:V4SI 1 "memory_operand" "o,o")
10598 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10599 "TARGET_64BIT && TARGET_SSE"
10601 "&& reload_completed"
10602 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10604 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10607 (define_insn "*vec_extractv2di_1"
10608 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10610 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10611 (parallel [(const_int 1)])))]
10612 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10614 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10615 %vmovhps\t{%1, %0|%0, %1}
10616 psrldq\t{$8, %0|%0, 8}
10617 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10618 movhlps\t{%1, %0|%0, %1}
10621 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10622 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10623 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10624 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10625 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10626 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10627 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10630 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10631 (vec_select:<ssescalarmode>
10632 (match_operand:VI_128 1 "memory_operand")
10634 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10635 "TARGET_SSE && reload_completed"
10636 [(set (match_dup 0) (match_dup 1))]
10638 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10640 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10643 (define_insn "*vec_dupv4si"
10644 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10645 (vec_duplicate:V4SI
10646 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10649 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10650 vbroadcastss\t{%1, %0|%0, %1}
10651 shufps\t{$0, %0, %0|%0, %0, 0}"
10652 [(set_attr "isa" "sse2,avx,noavx")
10653 (set_attr "type" "sselog1,ssemov,sselog1")
10654 (set_attr "length_immediate" "1,0,1")
10655 (set_attr "prefix_extra" "0,1,*")
10656 (set_attr "prefix" "maybe_vex,vex,orig")
10657 (set_attr "mode" "TI,V4SF,V4SF")])
10659 (define_insn "*vec_dupv2di"
10660 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10661 (vec_duplicate:V2DI
10662 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10666 vpunpcklqdq\t{%d1, %0|%0, %d1}
10667 %vmovddup\t{%1, %0|%0, %1}
10669 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10670 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10671 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10672 (set_attr "mode" "TI,TI,DF,V4SF")])
10674 (define_insn "*vec_concatv2si_sse4_1"
10675 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10677 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10678 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10681 pinsrd\t{$1, %2, %0|%0, %2, 1}
10682 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10683 punpckldq\t{%2, %0|%0, %2}
10684 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10685 %vmovd\t{%1, %0|%0, %1}
10686 punpckldq\t{%2, %0|%0, %2}
10687 movd\t{%1, %0|%0, %1}"
10688 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10689 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10690 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10691 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10692 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10693 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10695 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10696 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10697 ;; alternatives pretty much forces the MMX alternative to be chosen.
10698 (define_insn "*vec_concatv2si"
10699 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10701 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10702 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10703 "TARGET_SSE && !TARGET_SSE4_1"
10705 punpckldq\t{%2, %0|%0, %2}
10706 movd\t{%1, %0|%0, %1}
10707 movd\t{%1, %0|%0, %1}
10708 unpcklps\t{%2, %0|%0, %2}
10709 movss\t{%1, %0|%0, %1}
10710 punpckldq\t{%2, %0|%0, %2}
10711 movd\t{%1, %0|%0, %1}"
10712 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10713 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10714 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10716 (define_insn "*vec_concatv4si"
10717 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10719 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10720 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10723 punpcklqdq\t{%2, %0|%0, %2}
10724 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10725 movlhps\t{%2, %0|%0, %2}
10726 movhps\t{%2, %0|%0, %q2}
10727 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10728 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10729 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10730 (set_attr "prefix" "orig,vex,orig,orig,vex")
10731 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10733 ;; movd instead of movq is required to handle broken assemblers.
10734 (define_insn "vec_concatv2di"
10735 [(set (match_operand:V2DI 0 "register_operand"
10736 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10738 (match_operand:DI 1 "nonimmediate_operand"
10739 " 0,x ,r ,xm,*y,0,x,0,0,x")
10740 (match_operand:DI 2 "vector_move_operand"
10741 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10744 pinsrq\t{$1, %2, %0|%0, %2, 1}
10745 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10746 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10747 %vmovq\t{%1, %0|%0, %1}
10748 movq2dq\t{%1, %0|%0, %1}
10749 punpcklqdq\t{%2, %0|%0, %2}
10750 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10751 movlhps\t{%2, %0|%0, %2}
10752 movhps\t{%2, %0|%0, %2}
10753 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10754 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10757 (eq_attr "alternative" "0,1,5,6")
10758 (const_string "sselog")
10759 (const_string "ssemov")))
10760 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10761 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10762 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10763 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10764 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10766 (define_expand "vec_unpacks_lo_<mode>"
10767 [(match_operand:<sseunpackmode> 0 "register_operand")
10768 (match_operand:VI124_AVX512F 1 "register_operand")]
10770 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10772 (define_expand "vec_unpacks_hi_<mode>"
10773 [(match_operand:<sseunpackmode> 0 "register_operand")
10774 (match_operand:VI124_AVX512F 1 "register_operand")]
10776 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10778 (define_expand "vec_unpacku_lo_<mode>"
10779 [(match_operand:<sseunpackmode> 0 "register_operand")
10780 (match_operand:VI124_AVX512F 1 "register_operand")]
10782 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10784 (define_expand "vec_unpacku_hi_<mode>"
10785 [(match_operand:<sseunpackmode> 0 "register_operand")
10786 (match_operand:VI124_AVX512F 1 "register_operand")]
10788 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10796 (define_expand "<sse2_avx2>_uavg<mode>3"
10797 [(set (match_operand:VI12_AVX2 0 "register_operand")
10798 (truncate:VI12_AVX2
10799 (lshiftrt:<ssedoublemode>
10800 (plus:<ssedoublemode>
10801 (plus:<ssedoublemode>
10802 (zero_extend:<ssedoublemode>
10803 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10804 (zero_extend:<ssedoublemode>
10805 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10810 operands[3] = CONST1_RTX(<MODE>mode);
10811 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10814 (define_insn "*<sse2_avx2>_uavg<mode>3"
10815 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10816 (truncate:VI12_AVX2
10817 (lshiftrt:<ssedoublemode>
10818 (plus:<ssedoublemode>
10819 (plus:<ssedoublemode>
10820 (zero_extend:<ssedoublemode>
10821 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10822 (zero_extend:<ssedoublemode>
10823 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10824 (match_operand:VI12_AVX2 3 "const1_operand"))
10826 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10828 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10829 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10830 [(set_attr "isa" "noavx,avx")
10831 (set_attr "type" "sseiadd")
10832 (set_attr "prefix_data16" "1,*")
10833 (set_attr "prefix" "orig,vex")
10834 (set_attr "mode" "<sseinsnmode>")])
10836 ;; The correct representation for this is absolutely enormous, and
10837 ;; surely not generally useful.
10838 (define_insn "<sse2_avx2>_psadbw"
10839 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10841 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10842 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10846 psadbw\t{%2, %0|%0, %2}
10847 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10848 [(set_attr "isa" "noavx,avx")
10849 (set_attr "type" "sseiadd")
10850 (set_attr "atom_unit" "simul")
10851 (set_attr "prefix_data16" "1,*")
10852 (set_attr "prefix" "orig,vex")
10853 (set_attr "mode" "<sseinsnmode>")])
10855 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10856 [(set (match_operand:SI 0 "register_operand" "=r")
10858 [(match_operand:VF_128_256 1 "register_operand" "x")]
10861 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10862 [(set_attr "type" "ssemov")
10863 (set_attr "prefix" "maybe_vex")
10864 (set_attr "mode" "<MODE>")])
10866 (define_insn "avx2_pmovmskb"
10867 [(set (match_operand:SI 0 "register_operand" "=r")
10868 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10871 "vpmovmskb\t{%1, %0|%0, %1}"
10872 [(set_attr "type" "ssemov")
10873 (set_attr "prefix" "vex")
10874 (set_attr "mode" "DI")])
10876 (define_insn "sse2_pmovmskb"
10877 [(set (match_operand:SI 0 "register_operand" "=r")
10878 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10881 "%vpmovmskb\t{%1, %0|%0, %1}"
10882 [(set_attr "type" "ssemov")
10883 (set_attr "prefix_data16" "1")
10884 (set_attr "prefix" "maybe_vex")
10885 (set_attr "mode" "SI")])
10887 (define_expand "sse2_maskmovdqu"
10888 [(set (match_operand:V16QI 0 "memory_operand")
10889 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10890 (match_operand:V16QI 2 "register_operand")
10895 (define_insn "*sse2_maskmovdqu"
10896 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10897 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10898 (match_operand:V16QI 2 "register_operand" "x")
10899 (mem:V16QI (match_dup 0))]
10903 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10904 that requires %v to be at the beginning of the opcode name. */
10905 if (Pmode != word_mode)
10906 fputs ("\taddr32", asm_out_file);
10907 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10909 [(set_attr "type" "ssemov")
10910 (set_attr "prefix_data16" "1")
10911 (set (attr "length_address")
10912 (symbol_ref ("Pmode != word_mode")))
10913 ;; The implicit %rdi operand confuses default length_vex computation.
10914 (set (attr "length_vex")
10915 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10916 (set_attr "prefix" "maybe_vex")
10917 (set_attr "mode" "TI")])
10919 (define_insn "sse_ldmxcsr"
10920 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10924 [(set_attr "type" "sse")
10925 (set_attr "atom_sse_attr" "mxcsr")
10926 (set_attr "prefix" "maybe_vex")
10927 (set_attr "memory" "load")])
10929 (define_insn "sse_stmxcsr"
10930 [(set (match_operand:SI 0 "memory_operand" "=m")
10931 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10934 [(set_attr "type" "sse")
10935 (set_attr "atom_sse_attr" "mxcsr")
10936 (set_attr "prefix" "maybe_vex")
10937 (set_attr "memory" "store")])
10939 (define_insn "sse2_clflush"
10940 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10944 [(set_attr "type" "sse")
10945 (set_attr "atom_sse_attr" "fence")
10946 (set_attr "memory" "unknown")])
10949 (define_insn "sse3_mwait"
10950 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
10951 (match_operand:SI 1 "register_operand" "c")]
10954 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10955 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10956 ;; we only need to set up 32bit registers.
10958 [(set_attr "length" "3")])
10960 (define_insn "sse3_monitor_<mode>"
10961 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10962 (match_operand:SI 1 "register_operand" "c")
10963 (match_operand:SI 2 "register_operand" "d")]
10966 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10967 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10968 ;; zero extended to 64bit, we only need to set up 32bit registers.
10970 [(set (attr "length")
10971 (symbol_ref ("(Pmode != word_mode) + 3")))])
10973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10975 ;; SSSE3 instructions
10977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10979 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10981 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10982 [(set (match_operand:V16HI 0 "register_operand" "=x")
10987 (ssse3_plusminus:HI
10989 (match_operand:V16HI 1 "register_operand" "x")
10990 (parallel [(const_int 0)]))
10991 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10992 (ssse3_plusminus:HI
10993 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10994 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10996 (ssse3_plusminus:HI
10997 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10998 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10999 (ssse3_plusminus:HI
11000 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
11001 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
11004 (ssse3_plusminus:HI
11005 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
11006 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
11007 (ssse3_plusminus:HI
11008 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
11009 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
11011 (ssse3_plusminus:HI
11012 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
11013 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
11014 (ssse3_plusminus:HI
11015 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
11016 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
11020 (ssse3_plusminus:HI
11022 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11023 (parallel [(const_int 0)]))
11024 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11025 (ssse3_plusminus:HI
11026 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11027 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
11029 (ssse3_plusminus:HI
11030 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
11031 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
11032 (ssse3_plusminus:HI
11033 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
11034 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
11037 (ssse3_plusminus:HI
11038 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
11039 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
11040 (ssse3_plusminus:HI
11041 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
11042 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
11044 (ssse3_plusminus:HI
11045 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
11046 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
11047 (ssse3_plusminus:HI
11048 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
11049 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
11051 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
11052 [(set_attr "type" "sseiadd")
11053 (set_attr "prefix_extra" "1")
11054 (set_attr "prefix" "vex")
11055 (set_attr "mode" "OI")])
11057 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
11058 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11062 (ssse3_plusminus:HI
11064 (match_operand:V8HI 1 "register_operand" "0,x")
11065 (parallel [(const_int 0)]))
11066 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11067 (ssse3_plusminus:HI
11068 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11069 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11071 (ssse3_plusminus:HI
11072 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
11073 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
11074 (ssse3_plusminus:HI
11075 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
11076 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
11079 (ssse3_plusminus:HI
11081 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11082 (parallel [(const_int 0)]))
11083 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11084 (ssse3_plusminus:HI
11085 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11086 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
11088 (ssse3_plusminus:HI
11089 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
11090 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
11091 (ssse3_plusminus:HI
11092 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
11093 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
11096 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
11097 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
11098 [(set_attr "isa" "noavx,avx")
11099 (set_attr "type" "sseiadd")
11100 (set_attr "atom_unit" "complex")
11101 (set_attr "prefix_data16" "1,*")
11102 (set_attr "prefix_extra" "1")
11103 (set_attr "prefix" "orig,vex")
11104 (set_attr "mode" "TI")])
11106 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
11107 [(set (match_operand:V4HI 0 "register_operand" "=y")
11110 (ssse3_plusminus:HI
11112 (match_operand:V4HI 1 "register_operand" "0")
11113 (parallel [(const_int 0)]))
11114 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11115 (ssse3_plusminus:HI
11116 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11117 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11119 (ssse3_plusminus:HI
11121 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
11122 (parallel [(const_int 0)]))
11123 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11124 (ssse3_plusminus:HI
11125 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11126 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
11128 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
11129 [(set_attr "type" "sseiadd")
11130 (set_attr "atom_unit" "complex")
11131 (set_attr "prefix_extra" "1")
11132 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11133 (set_attr "mode" "DI")])
11135 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
11136 [(set (match_operand:V8SI 0 "register_operand" "=x")
11142 (match_operand:V8SI 1 "register_operand" "x")
11143 (parallel [(const_int 0)]))
11144 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11146 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11147 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11150 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
11151 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
11153 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
11154 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
11159 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
11160 (parallel [(const_int 0)]))
11161 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11163 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11164 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
11167 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
11168 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
11170 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
11171 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
11173 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11174 [(set_attr "type" "sseiadd")
11175 (set_attr "prefix_extra" "1")
11176 (set_attr "prefix" "vex")
11177 (set_attr "mode" "OI")])
11179 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
11180 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11185 (match_operand:V4SI 1 "register_operand" "0,x")
11186 (parallel [(const_int 0)]))
11187 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11189 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11190 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11194 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
11195 (parallel [(const_int 0)]))
11196 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11198 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11199 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
11202 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
11203 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11204 [(set_attr "isa" "noavx,avx")
11205 (set_attr "type" "sseiadd")
11206 (set_attr "atom_unit" "complex")
11207 (set_attr "prefix_data16" "1,*")
11208 (set_attr "prefix_extra" "1")
11209 (set_attr "prefix" "orig,vex")
11210 (set_attr "mode" "TI")])
11212 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
11213 [(set (match_operand:V2SI 0 "register_operand" "=y")
11217 (match_operand:V2SI 1 "register_operand" "0")
11218 (parallel [(const_int 0)]))
11219 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11222 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
11223 (parallel [(const_int 0)]))
11224 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
11226 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
11227 [(set_attr "type" "sseiadd")
11228 (set_attr "atom_unit" "complex")
11229 (set_attr "prefix_extra" "1")
11230 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11231 (set_attr "mode" "DI")])
11233 (define_insn "avx2_pmaddubsw256"
11234 [(set (match_operand:V16HI 0 "register_operand" "=x")
11239 (match_operand:V32QI 1 "register_operand" "x")
11240 (parallel [(const_int 0) (const_int 2)
11241 (const_int 4) (const_int 6)
11242 (const_int 8) (const_int 10)
11243 (const_int 12) (const_int 14)
11244 (const_int 16) (const_int 18)
11245 (const_int 20) (const_int 22)
11246 (const_int 24) (const_int 26)
11247 (const_int 28) (const_int 30)])))
11250 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
11251 (parallel [(const_int 0) (const_int 2)
11252 (const_int 4) (const_int 6)
11253 (const_int 8) (const_int 10)
11254 (const_int 12) (const_int 14)
11255 (const_int 16) (const_int 18)
11256 (const_int 20) (const_int 22)
11257 (const_int 24) (const_int 26)
11258 (const_int 28) (const_int 30)]))))
11261 (vec_select:V16QI (match_dup 1)
11262 (parallel [(const_int 1) (const_int 3)
11263 (const_int 5) (const_int 7)
11264 (const_int 9) (const_int 11)
11265 (const_int 13) (const_int 15)
11266 (const_int 17) (const_int 19)
11267 (const_int 21) (const_int 23)
11268 (const_int 25) (const_int 27)
11269 (const_int 29) (const_int 31)])))
11271 (vec_select:V16QI (match_dup 2)
11272 (parallel [(const_int 1) (const_int 3)
11273 (const_int 5) (const_int 7)
11274 (const_int 9) (const_int 11)
11275 (const_int 13) (const_int 15)
11276 (const_int 17) (const_int 19)
11277 (const_int 21) (const_int 23)
11278 (const_int 25) (const_int 27)
11279 (const_int 29) (const_int 31)]))))))]
11281 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11282 [(set_attr "type" "sseiadd")
11283 (set_attr "prefix_extra" "1")
11284 (set_attr "prefix" "vex")
11285 (set_attr "mode" "OI")])
11287 (define_insn "ssse3_pmaddubsw128"
11288 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11293 (match_operand:V16QI 1 "register_operand" "0,x")
11294 (parallel [(const_int 0) (const_int 2)
11295 (const_int 4) (const_int 6)
11296 (const_int 8) (const_int 10)
11297 (const_int 12) (const_int 14)])))
11300 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
11301 (parallel [(const_int 0) (const_int 2)
11302 (const_int 4) (const_int 6)
11303 (const_int 8) (const_int 10)
11304 (const_int 12) (const_int 14)]))))
11307 (vec_select:V8QI (match_dup 1)
11308 (parallel [(const_int 1) (const_int 3)
11309 (const_int 5) (const_int 7)
11310 (const_int 9) (const_int 11)
11311 (const_int 13) (const_int 15)])))
11313 (vec_select:V8QI (match_dup 2)
11314 (parallel [(const_int 1) (const_int 3)
11315 (const_int 5) (const_int 7)
11316 (const_int 9) (const_int 11)
11317 (const_int 13) (const_int 15)]))))))]
11320 pmaddubsw\t{%2, %0|%0, %2}
11321 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11322 [(set_attr "isa" "noavx,avx")
11323 (set_attr "type" "sseiadd")
11324 (set_attr "atom_unit" "simul")
11325 (set_attr "prefix_data16" "1,*")
11326 (set_attr "prefix_extra" "1")
11327 (set_attr "prefix" "orig,vex")
11328 (set_attr "mode" "TI")])
11330 (define_insn "ssse3_pmaddubsw"
11331 [(set (match_operand:V4HI 0 "register_operand" "=y")
11336 (match_operand:V8QI 1 "register_operand" "0")
11337 (parallel [(const_int 0) (const_int 2)
11338 (const_int 4) (const_int 6)])))
11341 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
11342 (parallel [(const_int 0) (const_int 2)
11343 (const_int 4) (const_int 6)]))))
11346 (vec_select:V4QI (match_dup 1)
11347 (parallel [(const_int 1) (const_int 3)
11348 (const_int 5) (const_int 7)])))
11350 (vec_select:V4QI (match_dup 2)
11351 (parallel [(const_int 1) (const_int 3)
11352 (const_int 5) (const_int 7)]))))))]
11354 "pmaddubsw\t{%2, %0|%0, %2}"
11355 [(set_attr "type" "sseiadd")
11356 (set_attr "atom_unit" "simul")
11357 (set_attr "prefix_extra" "1")
11358 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11359 (set_attr "mode" "DI")])
11361 (define_mode_iterator PMULHRSW
11362 [V4HI V8HI (V16HI "TARGET_AVX2")])
11364 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
11365 [(set (match_operand:PMULHRSW 0 "register_operand")
11367 (lshiftrt:<ssedoublemode>
11368 (plus:<ssedoublemode>
11369 (lshiftrt:<ssedoublemode>
11370 (mult:<ssedoublemode>
11371 (sign_extend:<ssedoublemode>
11372 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
11373 (sign_extend:<ssedoublemode>
11374 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
11380 operands[3] = CONST1_RTX(<MODE>mode);
11381 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11384 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
11385 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
11387 (lshiftrt:<ssedoublemode>
11388 (plus:<ssedoublemode>
11389 (lshiftrt:<ssedoublemode>
11390 (mult:<ssedoublemode>
11391 (sign_extend:<ssedoublemode>
11392 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
11393 (sign_extend:<ssedoublemode>
11394 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
11396 (match_operand:VI2_AVX2 3 "const1_operand"))
11398 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
11400 pmulhrsw\t{%2, %0|%0, %2}
11401 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
11402 [(set_attr "isa" "noavx,avx")
11403 (set_attr "type" "sseimul")
11404 (set_attr "prefix_data16" "1,*")
11405 (set_attr "prefix_extra" "1")
11406 (set_attr "prefix" "orig,vex")
11407 (set_attr "mode" "<sseinsnmode>")])
11409 (define_insn "*ssse3_pmulhrswv4hi3"
11410 [(set (match_operand:V4HI 0 "register_operand" "=y")
11417 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11419 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11421 (match_operand:V4HI 3 "const1_operand"))
11423 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11424 "pmulhrsw\t{%2, %0|%0, %2}"
11425 [(set_attr "type" "sseimul")
11426 (set_attr "prefix_extra" "1")
11427 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11428 (set_attr "mode" "DI")])
11430 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11431 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11433 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11434 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11438 pshufb\t{%2, %0|%0, %2}
11439 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11440 [(set_attr "isa" "noavx,avx")
11441 (set_attr "type" "sselog1")
11442 (set_attr "prefix_data16" "1,*")
11443 (set_attr "prefix_extra" "1")
11444 (set_attr "prefix" "orig,vex")
11445 (set_attr "btver2_decode" "vector,vector")
11446 (set_attr "mode" "<sseinsnmode>")])
11448 (define_insn "ssse3_pshufbv8qi3"
11449 [(set (match_operand:V8QI 0 "register_operand" "=y")
11450 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11451 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11454 "pshufb\t{%2, %0|%0, %2}";
11455 [(set_attr "type" "sselog1")
11456 (set_attr "prefix_extra" "1")
11457 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11458 (set_attr "mode" "DI")])
11460 (define_insn "<ssse3_avx2>_psign<mode>3"
11461 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11463 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11464 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11468 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11469 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11470 [(set_attr "isa" "noavx,avx")
11471 (set_attr "type" "sselog1")
11472 (set_attr "prefix_data16" "1,*")
11473 (set_attr "prefix_extra" "1")
11474 (set_attr "prefix" "orig,vex")
11475 (set_attr "mode" "<sseinsnmode>")])
11477 (define_insn "ssse3_psign<mode>3"
11478 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11480 [(match_operand:MMXMODEI 1 "register_operand" "0")
11481 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11484 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11485 [(set_attr "type" "sselog1")
11486 (set_attr "prefix_extra" "1")
11487 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11488 (set_attr "mode" "DI")])
11490 (define_insn "<ssse3_avx2>_palignr<mode>"
11491 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11492 (unspec:SSESCALARMODE
11493 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11494 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11495 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11499 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11501 switch (which_alternative)
11504 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11506 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11508 gcc_unreachable ();
11511 [(set_attr "isa" "noavx,avx")
11512 (set_attr "type" "sseishft")
11513 (set_attr "atom_unit" "sishuf")
11514 (set_attr "prefix_data16" "1,*")
11515 (set_attr "prefix_extra" "1")
11516 (set_attr "length_immediate" "1")
11517 (set_attr "prefix" "orig,vex")
11518 (set_attr "mode" "<sseinsnmode>")])
11520 (define_insn "ssse3_palignrdi"
11521 [(set (match_operand:DI 0 "register_operand" "=y")
11522 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11523 (match_operand:DI 2 "nonimmediate_operand" "ym")
11524 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11528 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11529 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11531 [(set_attr "type" "sseishft")
11532 (set_attr "atom_unit" "sishuf")
11533 (set_attr "prefix_extra" "1")
11534 (set_attr "length_immediate" "1")
11535 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11536 (set_attr "mode" "DI")])
11538 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11539 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11540 (abs:VI124_AVX2_48_AVX512F
11541 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11542 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11543 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11544 [(set_attr "type" "sselog1")
11545 (set_attr "prefix_data16" "1")
11546 (set_attr "prefix_extra" "1")
11547 (set_attr "prefix" "maybe_vex")
11548 (set_attr "mode" "<sseinsnmode>")])
11550 (define_expand "abs<mode>2"
11551 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11552 (abs:VI124_AVX2_48_AVX512F
11553 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11558 ix86_expand_sse2_abs (operands[0], operands[1]);
11563 (define_insn "abs<mode>2"
11564 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11566 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11568 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11569 [(set_attr "type" "sselog1")
11570 (set_attr "prefix_rep" "0")
11571 (set_attr "prefix_extra" "1")
11572 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11573 (set_attr "mode" "DI")])
11575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11577 ;; AMD SSE4A instructions
11579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11581 (define_insn "sse4a_movnt<mode>"
11582 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11584 [(match_operand:MODEF 1 "register_operand" "x")]
11587 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11588 [(set_attr "type" "ssemov")
11589 (set_attr "mode" "<MODE>")])
11591 (define_insn "sse4a_vmmovnt<mode>"
11592 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11593 (unspec:<ssescalarmode>
11594 [(vec_select:<ssescalarmode>
11595 (match_operand:VF_128 1 "register_operand" "x")
11596 (parallel [(const_int 0)]))]
11599 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11600 [(set_attr "type" "ssemov")
11601 (set_attr "mode" "<ssescalarmode>")])
11603 (define_insn "sse4a_extrqi"
11604 [(set (match_operand:V2DI 0 "register_operand" "=x")
11605 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11606 (match_operand 2 "const_0_to_255_operand")
11607 (match_operand 3 "const_0_to_255_operand")]
11610 "extrq\t{%3, %2, %0|%0, %2, %3}"
11611 [(set_attr "type" "sse")
11612 (set_attr "prefix_data16" "1")
11613 (set_attr "length_immediate" "2")
11614 (set_attr "mode" "TI")])
11616 (define_insn "sse4a_extrq"
11617 [(set (match_operand:V2DI 0 "register_operand" "=x")
11618 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11619 (match_operand:V16QI 2 "register_operand" "x")]
11622 "extrq\t{%2, %0|%0, %2}"
11623 [(set_attr "type" "sse")
11624 (set_attr "prefix_data16" "1")
11625 (set_attr "mode" "TI")])
11627 (define_insn "sse4a_insertqi"
11628 [(set (match_operand:V2DI 0 "register_operand" "=x")
11629 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11630 (match_operand:V2DI 2 "register_operand" "x")
11631 (match_operand 3 "const_0_to_255_operand")
11632 (match_operand 4 "const_0_to_255_operand")]
11635 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11636 [(set_attr "type" "sseins")
11637 (set_attr "prefix_data16" "0")
11638 (set_attr "prefix_rep" "1")
11639 (set_attr "length_immediate" "2")
11640 (set_attr "mode" "TI")])
11642 (define_insn "sse4a_insertq"
11643 [(set (match_operand:V2DI 0 "register_operand" "=x")
11644 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11645 (match_operand:V2DI 2 "register_operand" "x")]
11648 "insertq\t{%2, %0|%0, %2}"
11649 [(set_attr "type" "sseins")
11650 (set_attr "prefix_data16" "0")
11651 (set_attr "prefix_rep" "1")
11652 (set_attr "mode" "TI")])
11654 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11656 ;; Intel SSE4.1 instructions
11658 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11660 ;; Mapping of immediate bits for blend instructions
11661 (define_mode_attr blendbits
11662 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
11664 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11665 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11666 (vec_merge:VF_128_256
11667 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11668 (match_operand:VF_128_256 1 "register_operand" "0,x")
11669 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11672 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11673 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11674 [(set_attr "isa" "noavx,avx")
11675 (set_attr "type" "ssemov")
11676 (set_attr "length_immediate" "1")
11677 (set_attr "prefix_data16" "1,*")
11678 (set_attr "prefix_extra" "1")
11679 (set_attr "prefix" "orig,vex")
11680 (set_attr "mode" "<MODE>")])
11682 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11683 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11685 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11686 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11687 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11691 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11692 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11693 [(set_attr "isa" "noavx,avx")
11694 (set_attr "type" "ssemov")
11695 (set_attr "length_immediate" "1")
11696 (set_attr "prefix_data16" "1,*")
11697 (set_attr "prefix_extra" "1")
11698 (set_attr "prefix" "orig,vex")
11699 (set_attr "btver2_decode" "vector,vector")
11700 (set_attr "mode" "<MODE>")])
11702 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11703 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11705 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11706 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11707 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11711 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11712 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11713 [(set_attr "isa" "noavx,avx")
11714 (set_attr "type" "ssemul")
11715 (set_attr "length_immediate" "1")
11716 (set_attr "prefix_data16" "1,*")
11717 (set_attr "prefix_extra" "1")
11718 (set_attr "prefix" "orig,vex")
11719 (set_attr "btver2_decode" "vector,vector")
11720 (set_attr "mode" "<MODE>")])
11722 ;; Mode attribute used by `vmovntdqa' pattern
11723 (define_mode_attr vi8_sse4_1_avx2_avx512
11724 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
11726 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
11727 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
11728 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
11731 "%vmovntdqa\t{%1, %0|%0, %1}"
11732 [(set_attr "type" "ssemov")
11733 (set_attr "prefix_extra" "1, *")
11734 (set_attr "prefix" "maybe_vex, evex")
11735 (set_attr "mode" "<sseinsnmode>")])
11737 (define_insn "<sse4_1_avx2>_mpsadbw"
11738 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11740 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11741 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11742 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11746 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11747 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11748 [(set_attr "isa" "noavx,avx")
11749 (set_attr "type" "sselog1")
11750 (set_attr "length_immediate" "1")
11751 (set_attr "prefix_extra" "1")
11752 (set_attr "prefix" "orig,vex")
11753 (set_attr "btver2_decode" "vector,vector")
11754 (set_attr "mode" "<sseinsnmode>")])
11756 (define_insn "avx2_packusdw"
11757 [(set (match_operand:V16HI 0 "register_operand" "=x")
11760 (match_operand:V8SI 1 "register_operand" "x"))
11762 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11764 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "prefix_extra" "1")
11767 (set_attr "prefix" "vex")
11768 (set_attr "mode" "OI")])
11770 (define_insn "sse4_1_packusdw"
11771 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11774 (match_operand:V4SI 1 "register_operand" "0,x"))
11776 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11779 packusdw\t{%2, %0|%0, %2}
11780 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11781 [(set_attr "isa" "noavx,avx")
11782 (set_attr "type" "sselog")
11783 (set_attr "prefix_extra" "1")
11784 (set_attr "prefix" "orig,vex")
11785 (set_attr "mode" "TI")])
11787 (define_insn "<sse4_1_avx2>_pblendvb"
11788 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11790 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11791 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11792 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11796 pblendvb\t{%3, %2, %0|%0, %2, %3}
11797 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11798 [(set_attr "isa" "noavx,avx")
11799 (set_attr "type" "ssemov")
11800 (set_attr "prefix_extra" "1")
11801 (set_attr "length_immediate" "*,1")
11802 (set_attr "prefix" "orig,vex")
11803 (set_attr "btver2_decode" "vector,vector")
11804 (set_attr "mode" "<sseinsnmode>")])
11806 (define_insn "sse4_1_pblendw"
11807 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11809 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11810 (match_operand:V8HI 1 "register_operand" "0,x")
11811 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11814 pblendw\t{%3, %2, %0|%0, %2, %3}
11815 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11816 [(set_attr "isa" "noavx,avx")
11817 (set_attr "type" "ssemov")
11818 (set_attr "prefix_extra" "1")
11819 (set_attr "length_immediate" "1")
11820 (set_attr "prefix" "orig,vex")
11821 (set_attr "mode" "TI")])
11823 ;; The builtin uses an 8-bit immediate. Expand that.
11824 (define_expand "avx2_pblendw"
11825 [(set (match_operand:V16HI 0 "register_operand")
11827 (match_operand:V16HI 2 "nonimmediate_operand")
11828 (match_operand:V16HI 1 "register_operand")
11829 (match_operand:SI 3 "const_0_to_255_operand")))]
11832 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11833 operands[3] = GEN_INT (val << 8 | val);
11836 (define_insn "*avx2_pblendw"
11837 [(set (match_operand:V16HI 0 "register_operand" "=x")
11839 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11840 (match_operand:V16HI 1 "register_operand" "x")
11841 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11844 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11845 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11847 [(set_attr "type" "ssemov")
11848 (set_attr "prefix_extra" "1")
11849 (set_attr "length_immediate" "1")
11850 (set_attr "prefix" "vex")
11851 (set_attr "mode" "OI")])
11853 (define_insn "avx2_pblendd<mode>"
11854 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11855 (vec_merge:VI4_AVX2
11856 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11857 (match_operand:VI4_AVX2 1 "register_operand" "x")
11858 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11860 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11861 [(set_attr "type" "ssemov")
11862 (set_attr "prefix_extra" "1")
11863 (set_attr "length_immediate" "1")
11864 (set_attr "prefix" "vex")
11865 (set_attr "mode" "<sseinsnmode>")])
11867 (define_insn "sse4_1_phminposuw"
11868 [(set (match_operand:V8HI 0 "register_operand" "=x")
11869 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11870 UNSPEC_PHMINPOSUW))]
11872 "%vphminposuw\t{%1, %0|%0, %1}"
11873 [(set_attr "type" "sselog1")
11874 (set_attr "prefix_extra" "1")
11875 (set_attr "prefix" "maybe_vex")
11876 (set_attr "mode" "TI")])
11878 (define_insn "avx2_<code>v16qiv16hi2"
11879 [(set (match_operand:V16HI 0 "register_operand" "=x")
11881 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11883 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11884 [(set_attr "type" "ssemov")
11885 (set_attr "prefix_extra" "1")
11886 (set_attr "prefix" "vex")
11887 (set_attr "mode" "OI")])
11889 (define_insn "sse4_1_<code>v8qiv8hi2"
11890 [(set (match_operand:V8HI 0 "register_operand" "=x")
11893 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11894 (parallel [(const_int 0) (const_int 1)
11895 (const_int 2) (const_int 3)
11896 (const_int 4) (const_int 5)
11897 (const_int 6) (const_int 7)]))))]
11899 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11900 [(set_attr "type" "ssemov")
11901 (set_attr "ssememalign" "64")
11902 (set_attr "prefix_extra" "1")
11903 (set_attr "prefix" "maybe_vex")
11904 (set_attr "mode" "TI")])
11906 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11907 [(set (match_operand:V16SI 0 "register_operand" "=v")
11909 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11911 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11912 [(set_attr "type" "ssemov")
11913 (set_attr "prefix" "evex")
11914 (set_attr "mode" "XI")])
11916 (define_insn "avx2_<code>v8qiv8si2"
11917 [(set (match_operand:V8SI 0 "register_operand" "=x")
11920 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11921 (parallel [(const_int 0) (const_int 1)
11922 (const_int 2) (const_int 3)
11923 (const_int 4) (const_int 5)
11924 (const_int 6) (const_int 7)]))))]
11926 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11927 [(set_attr "type" "ssemov")
11928 (set_attr "prefix_extra" "1")
11929 (set_attr "prefix" "vex")
11930 (set_attr "mode" "OI")])
11932 (define_insn "sse4_1_<code>v4qiv4si2"
11933 [(set (match_operand:V4SI 0 "register_operand" "=x")
11936 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11937 (parallel [(const_int 0) (const_int 1)
11938 (const_int 2) (const_int 3)]))))]
11940 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11941 [(set_attr "type" "ssemov")
11942 (set_attr "ssememalign" "32")
11943 (set_attr "prefix_extra" "1")
11944 (set_attr "prefix" "maybe_vex")
11945 (set_attr "mode" "TI")])
11947 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11948 [(set (match_operand:V16SI 0 "register_operand" "=v")
11950 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11952 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11953 [(set_attr "type" "ssemov")
11954 (set_attr "prefix" "evex")
11955 (set_attr "mode" "XI")])
11957 (define_insn "avx2_<code>v8hiv8si2"
11958 [(set (match_operand:V8SI 0 "register_operand" "=x")
11960 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11962 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11963 [(set_attr "type" "ssemov")
11964 (set_attr "prefix_extra" "1")
11965 (set_attr "prefix" "vex")
11966 (set_attr "mode" "OI")])
11968 (define_insn "sse4_1_<code>v4hiv4si2"
11969 [(set (match_operand:V4SI 0 "register_operand" "=x")
11972 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11973 (parallel [(const_int 0) (const_int 1)
11974 (const_int 2) (const_int 3)]))))]
11976 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11977 [(set_attr "type" "ssemov")
11978 (set_attr "ssememalign" "64")
11979 (set_attr "prefix_extra" "1")
11980 (set_attr "prefix" "maybe_vex")
11981 (set_attr "mode" "TI")])
11983 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11984 [(set (match_operand:V8DI 0 "register_operand" "=v")
11987 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11988 (parallel [(const_int 0) (const_int 1)
11989 (const_int 2) (const_int 3)
11990 (const_int 4) (const_int 5)
11991 (const_int 6) (const_int 7)]))))]
11993 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11994 [(set_attr "type" "ssemov")
11995 (set_attr "prefix" "evex")
11996 (set_attr "mode" "XI")])
11998 (define_insn "avx2_<code>v4qiv4di2"
11999 [(set (match_operand:V4DI 0 "register_operand" "=x")
12002 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12003 (parallel [(const_int 0) (const_int 1)
12004 (const_int 2) (const_int 3)]))))]
12006 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
12007 [(set_attr "type" "ssemov")
12008 (set_attr "prefix_extra" "1")
12009 (set_attr "prefix" "vex")
12010 (set_attr "mode" "OI")])
12012 (define_insn "sse4_1_<code>v2qiv2di2"
12013 [(set (match_operand:V2DI 0 "register_operand" "=x")
12016 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12017 (parallel [(const_int 0) (const_int 1)]))))]
12019 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
12020 [(set_attr "type" "ssemov")
12021 (set_attr "ssememalign" "16")
12022 (set_attr "prefix_extra" "1")
12023 (set_attr "prefix" "maybe_vex")
12024 (set_attr "mode" "TI")])
12026 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
12027 [(set (match_operand:V8DI 0 "register_operand" "=v")
12029 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
12031 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
12032 [(set_attr "type" "ssemov")
12033 (set_attr "prefix" "evex")
12034 (set_attr "mode" "XI")])
12036 (define_insn "avx2_<code>v4hiv4di2"
12037 [(set (match_operand:V4DI 0 "register_operand" "=x")
12040 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12041 (parallel [(const_int 0) (const_int 1)
12042 (const_int 2) (const_int 3)]))))]
12044 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
12045 [(set_attr "type" "ssemov")
12046 (set_attr "prefix_extra" "1")
12047 (set_attr "prefix" "vex")
12048 (set_attr "mode" "OI")])
12050 (define_insn "sse4_1_<code>v2hiv2di2"
12051 [(set (match_operand:V2DI 0 "register_operand" "=x")
12054 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12055 (parallel [(const_int 0) (const_int 1)]))))]
12057 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
12058 [(set_attr "type" "ssemov")
12059 (set_attr "ssememalign" "32")
12060 (set_attr "prefix_extra" "1")
12061 (set_attr "prefix" "maybe_vex")
12062 (set_attr "mode" "TI")])
12064 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
12065 [(set (match_operand:V8DI 0 "register_operand" "=v")
12067 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
12069 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12070 [(set_attr "type" "ssemov")
12071 (set_attr "prefix" "evex")
12072 (set_attr "mode" "XI")])
12074 (define_insn "avx2_<code>v4siv4di2"
12075 [(set (match_operand:V4DI 0 "register_operand" "=x")
12077 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
12079 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
12080 [(set_attr "type" "ssemov")
12081 (set_attr "prefix_extra" "1")
12082 (set_attr "mode" "OI")])
12084 (define_insn "sse4_1_<code>v2siv2di2"
12085 [(set (match_operand:V2DI 0 "register_operand" "=x")
12088 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12089 (parallel [(const_int 0) (const_int 1)]))))]
12091 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
12092 [(set_attr "type" "ssemov")
12093 (set_attr "ssememalign" "64")
12094 (set_attr "prefix_extra" "1")
12095 (set_attr "prefix" "maybe_vex")
12096 (set_attr "mode" "TI")])
12098 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
12099 ;; setting FLAGS_REG. But it is not a really compare instruction.
12100 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
12101 [(set (reg:CC FLAGS_REG)
12102 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
12103 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
12106 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
12107 [(set_attr "type" "ssecomi")
12108 (set_attr "prefix_extra" "1")
12109 (set_attr "prefix" "vex")
12110 (set_attr "mode" "<MODE>")])
12112 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
12113 ;; But it is not a really compare instruction.
12114 (define_insn "avx_ptest256"
12115 [(set (reg:CC FLAGS_REG)
12116 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
12117 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
12120 "vptest\t{%1, %0|%0, %1}"
12121 [(set_attr "type" "ssecomi")
12122 (set_attr "prefix_extra" "1")
12123 (set_attr "prefix" "vex")
12124 (set_attr "btver2_decode" "vector")
12125 (set_attr "mode" "OI")])
12127 (define_insn "sse4_1_ptest"
12128 [(set (reg:CC FLAGS_REG)
12129 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
12130 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
12133 "%vptest\t{%1, %0|%0, %1}"
12134 [(set_attr "type" "ssecomi")
12135 (set_attr "prefix_extra" "1")
12136 (set_attr "prefix" "maybe_vex")
12137 (set_attr "mode" "TI")])
12139 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
12140 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
12142 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
12143 (match_operand:SI 2 "const_0_to_15_operand" "n")]
12146 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12147 [(set_attr "type" "ssecvt")
12148 (set (attr "prefix_data16")
12150 (match_test "TARGET_AVX")
12152 (const_string "1")))
12153 (set_attr "prefix_extra" "1")
12154 (set_attr "length_immediate" "1")
12155 (set_attr "prefix" "maybe_vex")
12156 (set_attr "mode" "<MODE>")])
12158 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
12159 [(match_operand:<sseintvecmode> 0 "register_operand")
12160 (match_operand:VF1_128_256 1 "nonimmediate_operand")
12161 (match_operand:SI 2 "const_0_to_15_operand")]
12164 rtx tmp = gen_reg_rtx (<MODE>mode);
12167 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
12170 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12174 (define_expand "avx512f_roundpd512"
12175 [(match_operand:V8DF 0 "register_operand")
12176 (match_operand:V8DF 1 "nonimmediate_operand")
12177 (match_operand:SI 2 "const_0_to_15_operand")]
12180 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
12184 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
12185 [(match_operand:<ssepackfltmode> 0 "register_operand")
12186 (match_operand:VF2 1 "nonimmediate_operand")
12187 (match_operand:VF2 2 "nonimmediate_operand")
12188 (match_operand:SI 3 "const_0_to_15_operand")]
12193 if (<MODE>mode == V2DFmode
12194 && TARGET_AVX && !TARGET_PREFER_AVX128)
12196 rtx tmp2 = gen_reg_rtx (V4DFmode);
12198 tmp0 = gen_reg_rtx (V4DFmode);
12199 tmp1 = force_reg (V2DFmode, operands[1]);
12201 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12202 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
12203 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12207 tmp0 = gen_reg_rtx (<MODE>mode);
12208 tmp1 = gen_reg_rtx (<MODE>mode);
12211 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
12214 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
12217 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12222 (define_insn "sse4_1_round<ssescalarmodesuffix>"
12223 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
12226 [(match_operand:VF_128 2 "register_operand" "x,x")
12227 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
12229 (match_operand:VF_128 1 "register_operand" "0,x")
12233 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
12234 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12235 [(set_attr "isa" "noavx,avx")
12236 (set_attr "type" "ssecvt")
12237 (set_attr "length_immediate" "1")
12238 (set_attr "prefix_data16" "1,*")
12239 (set_attr "prefix_extra" "1")
12240 (set_attr "prefix" "orig,vex")
12241 (set_attr "mode" "<MODE>")])
12243 (define_expand "round<mode>2"
12244 [(set (match_dup 4)
12246 (match_operand:VF 1 "register_operand")
12248 (set (match_operand:VF 0 "register_operand")
12250 [(match_dup 4) (match_dup 5)]
12252 "TARGET_ROUND && !flag_trapping_math"
12254 enum machine_mode scalar_mode;
12255 const struct real_format *fmt;
12256 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
12257 rtx half, vec_half;
12259 scalar_mode = GET_MODE_INNER (<MODE>mode);
12261 /* load nextafter (0.5, 0.0) */
12262 fmt = REAL_MODE_FORMAT (scalar_mode);
12263 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
12264 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
12265 half = const_double_from_real_value (pred_half, scalar_mode);
12267 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
12268 vec_half = force_reg (<MODE>mode, vec_half);
12270 operands[3] = gen_reg_rtx (<MODE>mode);
12271 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
12273 operands[4] = gen_reg_rtx (<MODE>mode);
12274 operands[5] = GEN_INT (ROUND_TRUNC);
12277 (define_expand "round<mode>2_sfix"
12278 [(match_operand:<sseintvecmode> 0 "register_operand")
12279 (match_operand:VF1_128_256 1 "register_operand")]
12280 "TARGET_ROUND && !flag_trapping_math"
12282 rtx tmp = gen_reg_rtx (<MODE>mode);
12284 emit_insn (gen_round<mode>2 (tmp, operands[1]));
12287 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12291 (define_expand "round<mode>2_vec_pack_sfix"
12292 [(match_operand:<ssepackfltmode> 0 "register_operand")
12293 (match_operand:VF2 1 "register_operand")
12294 (match_operand:VF2 2 "register_operand")]
12295 "TARGET_ROUND && !flag_trapping_math"
12299 if (<MODE>mode == V2DFmode
12300 && TARGET_AVX && !TARGET_PREFER_AVX128)
12302 rtx tmp2 = gen_reg_rtx (V4DFmode);
12304 tmp0 = gen_reg_rtx (V4DFmode);
12305 tmp1 = force_reg (V2DFmode, operands[1]);
12307 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12308 emit_insn (gen_roundv4df2 (tmp2, tmp0));
12309 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12313 tmp0 = gen_reg_rtx (<MODE>mode);
12314 tmp1 = gen_reg_rtx (<MODE>mode);
12316 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
12317 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
12320 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12327 ;; Intel SSE4.2 string/text processing instructions
12329 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12331 (define_insn_and_split "sse4_2_pcmpestr"
12332 [(set (match_operand:SI 0 "register_operand" "=c,c")
12334 [(match_operand:V16QI 2 "register_operand" "x,x")
12335 (match_operand:SI 3 "register_operand" "a,a")
12336 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
12337 (match_operand:SI 5 "register_operand" "d,d")
12338 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
12340 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12348 (set (reg:CC FLAGS_REG)
12357 && can_create_pseudo_p ()"
12362 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12363 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12364 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12367 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12368 operands[3], operands[4],
12369 operands[5], operands[6]));
12371 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12372 operands[3], operands[4],
12373 operands[5], operands[6]));
12374 if (flags && !(ecx || xmm0))
12375 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12376 operands[2], operands[3],
12377 operands[4], operands[5],
12379 if (!(flags || ecx || xmm0))
12380 emit_note (NOTE_INSN_DELETED);
12384 [(set_attr "type" "sselog")
12385 (set_attr "prefix_data16" "1")
12386 (set_attr "prefix_extra" "1")
12387 (set_attr "ssememalign" "8")
12388 (set_attr "length_immediate" "1")
12389 (set_attr "memory" "none,load")
12390 (set_attr "mode" "TI")])
12392 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
12393 [(set (match_operand:SI 0 "register_operand" "=c")
12395 [(match_operand:V16QI 2 "register_operand" "x")
12396 (match_operand:SI 3 "register_operand" "a")
12398 [(match_operand:V16QI 4 "memory_operand" "m")]
12400 (match_operand:SI 5 "register_operand" "d")
12401 (match_operand:SI 6 "const_0_to_255_operand" "n")]
12403 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12407 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12411 (set (reg:CC FLAGS_REG)
12415 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12420 && can_create_pseudo_p ()"
12425 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12426 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12427 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12430 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12431 operands[3], operands[4],
12432 operands[5], operands[6]));
12434 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12435 operands[3], operands[4],
12436 operands[5], operands[6]));
12437 if (flags && !(ecx || xmm0))
12438 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12439 operands[2], operands[3],
12440 operands[4], operands[5],
12442 if (!(flags || ecx || xmm0))
12443 emit_note (NOTE_INSN_DELETED);
12447 [(set_attr "type" "sselog")
12448 (set_attr "prefix_data16" "1")
12449 (set_attr "prefix_extra" "1")
12450 (set_attr "ssememalign" "8")
12451 (set_attr "length_immediate" "1")
12452 (set_attr "memory" "load")
12453 (set_attr "mode" "TI")])
12455 (define_insn "sse4_2_pcmpestri"
12456 [(set (match_operand:SI 0 "register_operand" "=c,c")
12458 [(match_operand:V16QI 1 "register_operand" "x,x")
12459 (match_operand:SI 2 "register_operand" "a,a")
12460 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12461 (match_operand:SI 4 "register_operand" "d,d")
12462 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12464 (set (reg:CC FLAGS_REG)
12473 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12474 [(set_attr "type" "sselog")
12475 (set_attr "prefix_data16" "1")
12476 (set_attr "prefix_extra" "1")
12477 (set_attr "prefix" "maybe_vex")
12478 (set_attr "ssememalign" "8")
12479 (set_attr "length_immediate" "1")
12480 (set_attr "btver2_decode" "vector")
12481 (set_attr "memory" "none,load")
12482 (set_attr "mode" "TI")])
12484 (define_insn "sse4_2_pcmpestrm"
12485 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12487 [(match_operand:V16QI 1 "register_operand" "x,x")
12488 (match_operand:SI 2 "register_operand" "a,a")
12489 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12490 (match_operand:SI 4 "register_operand" "d,d")
12491 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12493 (set (reg:CC FLAGS_REG)
12502 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12503 [(set_attr "type" "sselog")
12504 (set_attr "prefix_data16" "1")
12505 (set_attr "prefix_extra" "1")
12506 (set_attr "ssememalign" "8")
12507 (set_attr "length_immediate" "1")
12508 (set_attr "prefix" "maybe_vex")
12509 (set_attr "btver2_decode" "vector")
12510 (set_attr "memory" "none,load")
12511 (set_attr "mode" "TI")])
12513 (define_insn "sse4_2_pcmpestr_cconly"
12514 [(set (reg:CC FLAGS_REG)
12516 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12517 (match_operand:SI 3 "register_operand" "a,a,a,a")
12518 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12519 (match_operand:SI 5 "register_operand" "d,d,d,d")
12520 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12522 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12523 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12526 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12527 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12528 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12529 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12530 [(set_attr "type" "sselog")
12531 (set_attr "prefix_data16" "1")
12532 (set_attr "prefix_extra" "1")
12533 (set_attr "ssememalign" "8")
12534 (set_attr "length_immediate" "1")
12535 (set_attr "memory" "none,load,none,load")
12536 (set_attr "btver2_decode" "vector,vector,vector,vector")
12537 (set_attr "prefix" "maybe_vex")
12538 (set_attr "mode" "TI")])
12540 (define_insn_and_split "sse4_2_pcmpistr"
12541 [(set (match_operand:SI 0 "register_operand" "=c,c")
12543 [(match_operand:V16QI 2 "register_operand" "x,x")
12544 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12545 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12547 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12553 (set (reg:CC FLAGS_REG)
12560 && can_create_pseudo_p ()"
12565 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12566 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12567 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12570 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12571 operands[3], operands[4]));
12573 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12574 operands[3], operands[4]));
12575 if (flags && !(ecx || xmm0))
12576 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12577 operands[2], operands[3],
12579 if (!(flags || ecx || xmm0))
12580 emit_note (NOTE_INSN_DELETED);
12584 [(set_attr "type" "sselog")
12585 (set_attr "prefix_data16" "1")
12586 (set_attr "prefix_extra" "1")
12587 (set_attr "ssememalign" "8")
12588 (set_attr "length_immediate" "1")
12589 (set_attr "memory" "none,load")
12590 (set_attr "mode" "TI")])
12592 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12593 [(set (match_operand:SI 0 "register_operand" "=c")
12595 [(match_operand:V16QI 2 "register_operand" "x")
12597 [(match_operand:V16QI 3 "memory_operand" "m")]
12599 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12601 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12604 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12607 (set (reg:CC FLAGS_REG)
12610 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12614 && can_create_pseudo_p ()"
12619 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12620 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12621 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12624 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12625 operands[3], operands[4]));
12627 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12628 operands[3], operands[4]));
12629 if (flags && !(ecx || xmm0))
12630 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12631 operands[2], operands[3],
12633 if (!(flags || ecx || xmm0))
12634 emit_note (NOTE_INSN_DELETED);
12638 [(set_attr "type" "sselog")
12639 (set_attr "prefix_data16" "1")
12640 (set_attr "prefix_extra" "1")
12641 (set_attr "ssememalign" "8")
12642 (set_attr "length_immediate" "1")
12643 (set_attr "memory" "load")
12644 (set_attr "mode" "TI")])
12646 (define_insn "sse4_2_pcmpistri"
12647 [(set (match_operand:SI 0 "register_operand" "=c,c")
12649 [(match_operand:V16QI 1 "register_operand" "x,x")
12650 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12651 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12653 (set (reg:CC FLAGS_REG)
12660 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12661 [(set_attr "type" "sselog")
12662 (set_attr "prefix_data16" "1")
12663 (set_attr "prefix_extra" "1")
12664 (set_attr "ssememalign" "8")
12665 (set_attr "length_immediate" "1")
12666 (set_attr "prefix" "maybe_vex")
12667 (set_attr "memory" "none,load")
12668 (set_attr "btver2_decode" "vector")
12669 (set_attr "mode" "TI")])
12671 (define_insn "sse4_2_pcmpistrm"
12672 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12674 [(match_operand:V16QI 1 "register_operand" "x,x")
12675 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12676 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12678 (set (reg:CC FLAGS_REG)
12685 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12686 [(set_attr "type" "sselog")
12687 (set_attr "prefix_data16" "1")
12688 (set_attr "prefix_extra" "1")
12689 (set_attr "ssememalign" "8")
12690 (set_attr "length_immediate" "1")
12691 (set_attr "prefix" "maybe_vex")
12692 (set_attr "memory" "none,load")
12693 (set_attr "btver2_decode" "vector")
12694 (set_attr "mode" "TI")])
12696 (define_insn "sse4_2_pcmpistr_cconly"
12697 [(set (reg:CC FLAGS_REG)
12699 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12700 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12701 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12703 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12704 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12707 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12708 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12709 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12710 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12711 [(set_attr "type" "sselog")
12712 (set_attr "prefix_data16" "1")
12713 (set_attr "prefix_extra" "1")
12714 (set_attr "ssememalign" "8")
12715 (set_attr "length_immediate" "1")
12716 (set_attr "memory" "none,load,none,load")
12717 (set_attr "prefix" "maybe_vex")
12718 (set_attr "btver2_decode" "vector,vector,vector,vector")
12719 (set_attr "mode" "TI")])
12721 ;; Packed float variants
12722 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
12723 [(V8DI "V8SF") (V16SI "V16SF")])
12725 (define_expand "avx512pf_gatherpf<mode>sf"
12727 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12728 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12730 [(match_operand 2 "vsib_address_operand")
12731 (match_operand:VI48_512 1 "register_operand")
12732 (match_operand:SI 3 "const1248_operand")]))
12733 (match_operand:SI 4 "const_2_to_3_operand")]
12734 UNSPEC_GATHER_PREFETCH)]
12738 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12739 operands[3]), UNSPEC_VSIBADDR);
12742 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
12744 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12745 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12747 [(match_operand:P 2 "vsib_address_operand" "Tv")
12748 (match_operand:VI48_512 1 "register_operand" "v")
12749 (match_operand:SI 3 "const1248_operand" "n")]
12751 (match_operand:SI 4 "const_2_to_3_operand" "n")]
12752 UNSPEC_GATHER_PREFETCH)]
12755 switch (INTVAL (operands[4]))
12758 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12760 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12762 gcc_unreachable ();
12765 [(set_attr "type" "sse")
12766 (set_attr "prefix" "evex")
12767 (set_attr "mode" "XI")])
12769 (define_insn "*avx512pf_gatherpf<mode>sf"
12772 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12774 [(match_operand:P 1 "vsib_address_operand" "Tv")
12775 (match_operand:VI48_512 0 "register_operand" "v")
12776 (match_operand:SI 2 "const1248_operand" "n")]
12778 (match_operand:SI 3 "const_2_to_3_operand" "n")]
12779 UNSPEC_GATHER_PREFETCH)]
12782 switch (INTVAL (operands[3]))
12785 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12787 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12789 gcc_unreachable ();
12792 [(set_attr "type" "sse")
12793 (set_attr "prefix" "evex")
12794 (set_attr "mode" "XI")])
12796 ;; Packed double variants
12797 (define_expand "avx512pf_gatherpf<mode>df"
12799 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12802 [(match_operand 2 "vsib_address_operand")
12803 (match_operand:VI4_256_8_512 1 "register_operand")
12804 (match_operand:SI 3 "const1248_operand")]))
12805 (match_operand:SI 4 "const_2_to_3_operand")]
12806 UNSPEC_GATHER_PREFETCH)]
12810 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12811 operands[3]), UNSPEC_VSIBADDR);
12814 (define_insn "*avx512pf_gatherpf<mode>df_mask"
12816 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12817 (match_operator:V8DF 5 "vsib_mem_operator"
12819 [(match_operand:P 2 "vsib_address_operand" "Tv")
12820 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12821 (match_operand:SI 3 "const1248_operand" "n")]
12823 (match_operand:SI 4 "const_2_to_3_operand" "n")]
12824 UNSPEC_GATHER_PREFETCH)]
12827 switch (INTVAL (operands[4]))
12830 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12832 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12834 gcc_unreachable ();
12837 [(set_attr "type" "sse")
12838 (set_attr "prefix" "evex")
12839 (set_attr "mode" "XI")])
12841 (define_insn "*avx512pf_gatherpf<mode>df"
12844 (match_operator:V8DF 4 "vsib_mem_operator"
12846 [(match_operand:P 1 "vsib_address_operand" "Tv")
12847 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12848 (match_operand:SI 2 "const1248_operand" "n")]
12850 (match_operand:SI 3 "const_2_to_3_operand" "n")]
12851 UNSPEC_GATHER_PREFETCH)]
12854 switch (INTVAL (operands[3]))
12857 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
12859 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
12861 gcc_unreachable ();
12864 [(set_attr "type" "sse")
12865 (set_attr "prefix" "evex")
12866 (set_attr "mode" "XI")])
12868 ;; Packed float variants
12869 (define_expand "avx512pf_scatterpf<mode>sf"
12871 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12872 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12874 [(match_operand 2 "vsib_address_operand")
12875 (match_operand:VI48_512 1 "register_operand")
12876 (match_operand:SI 3 "const1248_operand")]))
12877 (match_operand:SI 4 "const2367_operand")]
12878 UNSPEC_SCATTER_PREFETCH)]
12882 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12883 operands[3]), UNSPEC_VSIBADDR);
12886 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
12888 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12889 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12891 [(match_operand:P 2 "vsib_address_operand" "Tv")
12892 (match_operand:VI48_512 1 "register_operand" "v")
12893 (match_operand:SI 3 "const1248_operand" "n")]
12895 (match_operand:SI 4 "const2367_operand" "n")]
12896 UNSPEC_SCATTER_PREFETCH)]
12899 switch (INTVAL (operands[4]))
12903 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12906 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12908 gcc_unreachable ();
12911 [(set_attr "type" "sse")
12912 (set_attr "prefix" "evex")
12913 (set_attr "mode" "XI")])
12915 (define_insn "*avx512pf_scatterpf<mode>sf"
12918 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12920 [(match_operand:P 1 "vsib_address_operand" "Tv")
12921 (match_operand:VI48_512 0 "register_operand" "v")
12922 (match_operand:SI 2 "const1248_operand" "n")]
12924 (match_operand:SI 3 "const2367_operand" "n")]
12925 UNSPEC_SCATTER_PREFETCH)]
12928 switch (INTVAL (operands[3]))
12932 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12935 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12937 gcc_unreachable ();
12940 [(set_attr "type" "sse")
12941 (set_attr "prefix" "evex")
12942 (set_attr "mode" "XI")])
12944 ;; Packed double variants
12945 (define_expand "avx512pf_scatterpf<mode>df"
12947 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12950 [(match_operand 2 "vsib_address_operand")
12951 (match_operand:VI4_256_8_512 1 "register_operand")
12952 (match_operand:SI 3 "const1248_operand")]))
12953 (match_operand:SI 4 "const2367_operand")]
12954 UNSPEC_SCATTER_PREFETCH)]
12958 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12959 operands[3]), UNSPEC_VSIBADDR);
12962 (define_insn "*avx512pf_scatterpf<mode>df_mask"
12964 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12965 (match_operator:V8DF 5 "vsib_mem_operator"
12967 [(match_operand:P 2 "vsib_address_operand" "Tv")
12968 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12969 (match_operand:SI 3 "const1248_operand" "n")]
12971 (match_operand:SI 4 "const2367_operand" "n")]
12972 UNSPEC_SCATTER_PREFETCH)]
12975 switch (INTVAL (operands[4]))
12979 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12982 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12984 gcc_unreachable ();
12987 [(set_attr "type" "sse")
12988 (set_attr "prefix" "evex")
12989 (set_attr "mode" "XI")])
12991 (define_insn "*avx512pf_scatterpf<mode>df"
12994 (match_operator:V8DF 4 "vsib_mem_operator"
12996 [(match_operand:P 1 "vsib_address_operand" "Tv")
12997 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12998 (match_operand:SI 2 "const1248_operand" "n")]
13000 (match_operand:SI 3 "const2367_operand" "n")]
13001 UNSPEC_SCATTER_PREFETCH)]
13004 switch (INTVAL (operands[3]))
13008 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
13011 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
13013 gcc_unreachable ();
13016 [(set_attr "type" "sse")
13017 (set_attr "prefix" "evex")
13018 (set_attr "mode" "XI")])
13020 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
13021 [(set (match_operand:VF_512 0 "register_operand" "=v")
13023 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13026 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
13027 [(set_attr "prefix" "evex")
13028 (set_attr "type" "sse")
13029 (set_attr "mode" "<MODE>")])
13031 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
13032 [(set (match_operand:VF_512 0 "register_operand" "=v")
13034 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13037 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
13038 [(set_attr "prefix" "evex")
13039 (set_attr "type" "sse")
13040 (set_attr "mode" "<MODE>")])
13042 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
13043 [(set (match_operand:VF_128 0 "register_operand" "=v")
13046 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13048 (match_operand:VF_128 2 "register_operand" "v")
13051 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
13052 [(set_attr "length_immediate" "1")
13053 (set_attr "prefix" "evex")
13054 (set_attr "type" "sse")
13055 (set_attr "mode" "<MODE>")])
13057 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
13058 [(set (match_operand:VF_512 0 "register_operand" "=v")
13060 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13063 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
13064 [(set_attr "prefix" "evex")
13065 (set_attr "type" "sse")
13066 (set_attr "mode" "<MODE>")])
13068 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
13069 [(set (match_operand:VF_128 0 "register_operand" "=v")
13072 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13074 (match_operand:VF_128 2 "register_operand" "v")
13077 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
13078 [(set_attr "length_immediate" "1")
13079 (set_attr "type" "sse")
13080 (set_attr "prefix" "evex")
13081 (set_attr "mode" "<MODE>")])
13083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13085 ;; XOP instructions
13087 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13089 (define_code_iterator xop_plus [plus ss_plus])
13091 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
13092 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
13094 ;; XOP parallel integer multiply/add instructions.
13096 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
13097 [(set (match_operand:VI24_128 0 "register_operand" "=x")
13100 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
13101 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
13102 (match_operand:VI24_128 3 "register_operand" "x")))]
13104 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13105 [(set_attr "type" "ssemuladd")
13106 (set_attr "mode" "TI")])
13108 (define_insn "xop_p<macs>dql"
13109 [(set (match_operand:V2DI 0 "register_operand" "=x")
13114 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13115 (parallel [(const_int 0) (const_int 2)])))
13118 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13119 (parallel [(const_int 0) (const_int 2)]))))
13120 (match_operand:V2DI 3 "register_operand" "x")))]
13122 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13123 [(set_attr "type" "ssemuladd")
13124 (set_attr "mode" "TI")])
13126 (define_insn "xop_p<macs>dqh"
13127 [(set (match_operand:V2DI 0 "register_operand" "=x")
13132 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13133 (parallel [(const_int 1) (const_int 3)])))
13136 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13137 (parallel [(const_int 1) (const_int 3)]))))
13138 (match_operand:V2DI 3 "register_operand" "x")))]
13140 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13141 [(set_attr "type" "ssemuladd")
13142 (set_attr "mode" "TI")])
13144 ;; XOP parallel integer multiply/add instructions for the intrinisics
13145 (define_insn "xop_p<macs>wd"
13146 [(set (match_operand:V4SI 0 "register_operand" "=x")
13151 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13152 (parallel [(const_int 1) (const_int 3)
13153 (const_int 5) (const_int 7)])))
13156 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13157 (parallel [(const_int 1) (const_int 3)
13158 (const_int 5) (const_int 7)]))))
13159 (match_operand:V4SI 3 "register_operand" "x")))]
13161 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13162 [(set_attr "type" "ssemuladd")
13163 (set_attr "mode" "TI")])
13165 (define_insn "xop_p<madcs>wd"
13166 [(set (match_operand:V4SI 0 "register_operand" "=x")
13172 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13173 (parallel [(const_int 0) (const_int 2)
13174 (const_int 4) (const_int 6)])))
13177 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13178 (parallel [(const_int 0) (const_int 2)
13179 (const_int 4) (const_int 6)]))))
13184 (parallel [(const_int 1) (const_int 3)
13185 (const_int 5) (const_int 7)])))
13189 (parallel [(const_int 1) (const_int 3)
13190 (const_int 5) (const_int 7)])))))
13191 (match_operand:V4SI 3 "register_operand" "x")))]
13193 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13194 [(set_attr "type" "ssemuladd")
13195 (set_attr "mode" "TI")])
13197 ;; XOP parallel XMM conditional moves
13198 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
13199 [(set (match_operand:V 0 "register_operand" "=x,x")
13201 (match_operand:V 3 "nonimmediate_operand" "x,m")
13202 (match_operand:V 1 "register_operand" "x,x")
13203 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
13205 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13206 [(set_attr "type" "sse4arg")])
13208 ;; XOP horizontal add/subtract instructions
13209 (define_insn "xop_phadd<u>bw"
13210 [(set (match_operand:V8HI 0 "register_operand" "=x")
13214 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13215 (parallel [(const_int 0) (const_int 2)
13216 (const_int 4) (const_int 6)
13217 (const_int 8) (const_int 10)
13218 (const_int 12) (const_int 14)])))
13222 (parallel [(const_int 1) (const_int 3)
13223 (const_int 5) (const_int 7)
13224 (const_int 9) (const_int 11)
13225 (const_int 13) (const_int 15)])))))]
13227 "vphadd<u>bw\t{%1, %0|%0, %1}"
13228 [(set_attr "type" "sseiadd1")])
13230 (define_insn "xop_phadd<u>bd"
13231 [(set (match_operand:V4SI 0 "register_operand" "=x")
13236 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13237 (parallel [(const_int 0) (const_int 4)
13238 (const_int 8) (const_int 12)])))
13242 (parallel [(const_int 1) (const_int 5)
13243 (const_int 9) (const_int 13)]))))
13248 (parallel [(const_int 2) (const_int 6)
13249 (const_int 10) (const_int 14)])))
13253 (parallel [(const_int 3) (const_int 7)
13254 (const_int 11) (const_int 15)]))))))]
13256 "vphadd<u>bd\t{%1, %0|%0, %1}"
13257 [(set_attr "type" "sseiadd1")])
13259 (define_insn "xop_phadd<u>bq"
13260 [(set (match_operand:V2DI 0 "register_operand" "=x")
13266 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13267 (parallel [(const_int 0) (const_int 8)])))
13271 (parallel [(const_int 1) (const_int 9)]))))
13276 (parallel [(const_int 2) (const_int 10)])))
13280 (parallel [(const_int 3) (const_int 11)])))))
13286 (parallel [(const_int 4) (const_int 12)])))
13290 (parallel [(const_int 5) (const_int 13)]))))
13295 (parallel [(const_int 6) (const_int 14)])))
13299 (parallel [(const_int 7) (const_int 15)])))))))]
13301 "vphadd<u>bq\t{%1, %0|%0, %1}"
13302 [(set_attr "type" "sseiadd1")])
13304 (define_insn "xop_phadd<u>wd"
13305 [(set (match_operand:V4SI 0 "register_operand" "=x")
13309 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13310 (parallel [(const_int 0) (const_int 2)
13311 (const_int 4) (const_int 6)])))
13315 (parallel [(const_int 1) (const_int 3)
13316 (const_int 5) (const_int 7)])))))]
13318 "vphadd<u>wd\t{%1, %0|%0, %1}"
13319 [(set_attr "type" "sseiadd1")])
13321 (define_insn "xop_phadd<u>wq"
13322 [(set (match_operand:V2DI 0 "register_operand" "=x")
13327 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13328 (parallel [(const_int 0) (const_int 4)])))
13332 (parallel [(const_int 1) (const_int 5)]))))
13337 (parallel [(const_int 2) (const_int 6)])))
13341 (parallel [(const_int 3) (const_int 7)]))))))]
13343 "vphadd<u>wq\t{%1, %0|%0, %1}"
13344 [(set_attr "type" "sseiadd1")])
13346 (define_insn "xop_phadd<u>dq"
13347 [(set (match_operand:V2DI 0 "register_operand" "=x")
13351 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13352 (parallel [(const_int 0) (const_int 2)])))
13356 (parallel [(const_int 1) (const_int 3)])))))]
13358 "vphadd<u>dq\t{%1, %0|%0, %1}"
13359 [(set_attr "type" "sseiadd1")])
13361 (define_insn "xop_phsubbw"
13362 [(set (match_operand:V8HI 0 "register_operand" "=x")
13366 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13367 (parallel [(const_int 0) (const_int 2)
13368 (const_int 4) (const_int 6)
13369 (const_int 8) (const_int 10)
13370 (const_int 12) (const_int 14)])))
13374 (parallel [(const_int 1) (const_int 3)
13375 (const_int 5) (const_int 7)
13376 (const_int 9) (const_int 11)
13377 (const_int 13) (const_int 15)])))))]
13379 "vphsubbw\t{%1, %0|%0, %1}"
13380 [(set_attr "type" "sseiadd1")])
13382 (define_insn "xop_phsubwd"
13383 [(set (match_operand:V4SI 0 "register_operand" "=x")
13387 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13388 (parallel [(const_int 0) (const_int 2)
13389 (const_int 4) (const_int 6)])))
13393 (parallel [(const_int 1) (const_int 3)
13394 (const_int 5) (const_int 7)])))))]
13396 "vphsubwd\t{%1, %0|%0, %1}"
13397 [(set_attr "type" "sseiadd1")])
13399 (define_insn "xop_phsubdq"
13400 [(set (match_operand:V2DI 0 "register_operand" "=x")
13404 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13405 (parallel [(const_int 0) (const_int 2)])))
13409 (parallel [(const_int 1) (const_int 3)])))))]
13411 "vphsubdq\t{%1, %0|%0, %1}"
13412 [(set_attr "type" "sseiadd1")])
13414 ;; XOP permute instructions
13415 (define_insn "xop_pperm"
13416 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13418 [(match_operand:V16QI 1 "register_operand" "x,x")
13419 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13420 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
13421 UNSPEC_XOP_PERMUTE))]
13422 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13423 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13424 [(set_attr "type" "sse4arg")
13425 (set_attr "mode" "TI")])
13427 ;; XOP pack instructions that combine two vectors into a smaller vector
13428 (define_insn "xop_pperm_pack_v2di_v4si"
13429 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13432 (match_operand:V2DI 1 "register_operand" "x,x"))
13434 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
13435 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13436 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13437 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13438 [(set_attr "type" "sse4arg")
13439 (set_attr "mode" "TI")])
13441 (define_insn "xop_pperm_pack_v4si_v8hi"
13442 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13445 (match_operand:V4SI 1 "register_operand" "x,x"))
13447 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
13448 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13449 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13450 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13451 [(set_attr "type" "sse4arg")
13452 (set_attr "mode" "TI")])
13454 (define_insn "xop_pperm_pack_v8hi_v16qi"
13455 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13458 (match_operand:V8HI 1 "register_operand" "x,x"))
13460 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
13461 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13462 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13463 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13464 [(set_attr "type" "sse4arg")
13465 (set_attr "mode" "TI")])
13467 ;; XOP packed rotate instructions
13468 (define_expand "rotl<mode>3"
13469 [(set (match_operand:VI_128 0 "register_operand")
13471 (match_operand:VI_128 1 "nonimmediate_operand")
13472 (match_operand:SI 2 "general_operand")))]
13475 /* If we were given a scalar, convert it to parallel */
13476 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13478 rtvec vs = rtvec_alloc (<ssescalarnum>);
13479 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13480 rtx reg = gen_reg_rtx (<MODE>mode);
13481 rtx op2 = operands[2];
13484 if (GET_MODE (op2) != <ssescalarmode>mode)
13486 op2 = gen_reg_rtx (<ssescalarmode>mode);
13487 convert_move (op2, operands[2], false);
13490 for (i = 0; i < <ssescalarnum>; i++)
13491 RTVEC_ELT (vs, i) = op2;
13493 emit_insn (gen_vec_init<mode> (reg, par));
13494 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13499 (define_expand "rotr<mode>3"
13500 [(set (match_operand:VI_128 0 "register_operand")
13502 (match_operand:VI_128 1 "nonimmediate_operand")
13503 (match_operand:SI 2 "general_operand")))]
13506 /* If we were given a scalar, convert it to parallel */
13507 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13509 rtvec vs = rtvec_alloc (<ssescalarnum>);
13510 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13511 rtx neg = gen_reg_rtx (<MODE>mode);
13512 rtx reg = gen_reg_rtx (<MODE>mode);
13513 rtx op2 = operands[2];
13516 if (GET_MODE (op2) != <ssescalarmode>mode)
13518 op2 = gen_reg_rtx (<ssescalarmode>mode);
13519 convert_move (op2, operands[2], false);
13522 for (i = 0; i < <ssescalarnum>; i++)
13523 RTVEC_ELT (vs, i) = op2;
13525 emit_insn (gen_vec_init<mode> (reg, par));
13526 emit_insn (gen_neg<mode>2 (neg, reg));
13527 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
13532 (define_insn "xop_rotl<mode>3"
13533 [(set (match_operand:VI_128 0 "register_operand" "=x")
13535 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13536 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13538 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13539 [(set_attr "type" "sseishft")
13540 (set_attr "length_immediate" "1")
13541 (set_attr "mode" "TI")])
13543 (define_insn "xop_rotr<mode>3"
13544 [(set (match_operand:VI_128 0 "register_operand" "=x")
13546 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13547 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13551 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
13552 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
13554 [(set_attr "type" "sseishft")
13555 (set_attr "length_immediate" "1")
13556 (set_attr "mode" "TI")])
13558 (define_expand "vrotr<mode>3"
13559 [(match_operand:VI_128 0 "register_operand")
13560 (match_operand:VI_128 1 "register_operand")
13561 (match_operand:VI_128 2 "register_operand")]
13564 rtx reg = gen_reg_rtx (<MODE>mode);
13565 emit_insn (gen_neg<mode>2 (reg, operands[2]));
13566 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13570 (define_expand "vrotl<mode>3"
13571 [(match_operand:VI_128 0 "register_operand")
13572 (match_operand:VI_128 1 "register_operand")
13573 (match_operand:VI_128 2 "register_operand")]
13576 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
13580 (define_insn "xop_vrotl<mode>3"
13581 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13582 (if_then_else:VI_128
13584 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13587 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13591 (neg:VI_128 (match_dup 2)))))]
13592 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13593 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13594 [(set_attr "type" "sseishft")
13595 (set_attr "prefix_data16" "0")
13596 (set_attr "prefix_extra" "2")
13597 (set_attr "mode" "TI")])
13599 ;; XOP packed shift instructions.
13600 (define_expand "vlshr<mode>3"
13601 [(set (match_operand:VI12_128 0 "register_operand")
13603 (match_operand:VI12_128 1 "register_operand")
13604 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13607 rtx neg = gen_reg_rtx (<MODE>mode);
13608 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13609 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13613 (define_expand "vlshr<mode>3"
13614 [(set (match_operand:VI48_128 0 "register_operand")
13616 (match_operand:VI48_128 1 "register_operand")
13617 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13618 "TARGET_AVX2 || TARGET_XOP"
13622 rtx neg = gen_reg_rtx (<MODE>mode);
13623 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13624 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13629 (define_expand "vlshr<mode>3"
13630 [(set (match_operand:VI48_512 0 "register_operand")
13632 (match_operand:VI48_512 1 "register_operand")
13633 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13636 (define_expand "vlshr<mode>3"
13637 [(set (match_operand:VI48_256 0 "register_operand")
13639 (match_operand:VI48_256 1 "register_operand")
13640 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13643 (define_expand "vashr<mode>3"
13644 [(set (match_operand:VI128_128 0 "register_operand")
13645 (ashiftrt:VI128_128
13646 (match_operand:VI128_128 1 "register_operand")
13647 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13650 rtx neg = gen_reg_rtx (<MODE>mode);
13651 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13652 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13656 (define_expand "vashrv4si3"
13657 [(set (match_operand:V4SI 0 "register_operand")
13658 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13659 (match_operand:V4SI 2 "nonimmediate_operand")))]
13660 "TARGET_AVX2 || TARGET_XOP"
13664 rtx neg = gen_reg_rtx (V4SImode);
13665 emit_insn (gen_negv4si2 (neg, operands[2]));
13666 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13671 (define_expand "vashrv16si3"
13672 [(set (match_operand:V16SI 0 "register_operand")
13673 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13674 (match_operand:V16SI 2 "nonimmediate_operand")))]
13677 (define_expand "vashrv8si3"
13678 [(set (match_operand:V8SI 0 "register_operand")
13679 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13680 (match_operand:V8SI 2 "nonimmediate_operand")))]
13683 (define_expand "vashl<mode>3"
13684 [(set (match_operand:VI12_128 0 "register_operand")
13686 (match_operand:VI12_128 1 "register_operand")
13687 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13690 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13694 (define_expand "vashl<mode>3"
13695 [(set (match_operand:VI48_128 0 "register_operand")
13697 (match_operand:VI48_128 1 "register_operand")
13698 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13699 "TARGET_AVX2 || TARGET_XOP"
13703 operands[2] = force_reg (<MODE>mode, operands[2]);
13704 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13709 (define_expand "vashl<mode>3"
13710 [(set (match_operand:VI48_512 0 "register_operand")
13712 (match_operand:VI48_512 1 "register_operand")
13713 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13716 (define_expand "vashl<mode>3"
13717 [(set (match_operand:VI48_256 0 "register_operand")
13719 (match_operand:VI48_256 1 "register_operand")
13720 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13723 (define_insn "xop_sha<mode>3"
13724 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13725 (if_then_else:VI_128
13727 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13730 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13734 (neg:VI_128 (match_dup 2)))))]
13735 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13736 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13737 [(set_attr "type" "sseishft")
13738 (set_attr "prefix_data16" "0")
13739 (set_attr "prefix_extra" "2")
13740 (set_attr "mode" "TI")])
13742 (define_insn "xop_shl<mode>3"
13743 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13744 (if_then_else:VI_128
13746 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13749 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13753 (neg:VI_128 (match_dup 2)))))]
13754 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13755 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13756 [(set_attr "type" "sseishft")
13757 (set_attr "prefix_data16" "0")
13758 (set_attr "prefix_extra" "2")
13759 (set_attr "mode" "TI")])
13761 (define_expand "<shift_insn><mode>3"
13762 [(set (match_operand:VI1_AVX2 0 "register_operand")
13763 (any_shift:VI1_AVX2
13764 (match_operand:VI1_AVX2 1 "register_operand")
13765 (match_operand:SI 2 "nonmemory_operand")))]
13768 if (TARGET_XOP && <MODE>mode == V16QImode)
13770 bool negate = false;
13771 rtx (*gen) (rtx, rtx, rtx);
13775 if (<CODE> != ASHIFT)
13777 if (CONST_INT_P (operands[2]))
13778 operands[2] = GEN_INT (-INTVAL (operands[2]));
13782 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13783 for (i = 0; i < 16; i++)
13784 XVECEXP (par, 0, i) = operands[2];
13786 tmp = gen_reg_rtx (V16QImode);
13787 emit_insn (gen_vec_initv16qi (tmp, par));
13790 emit_insn (gen_negv16qi2 (tmp, tmp));
13792 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13793 emit_insn (gen (operands[0], operands[1], tmp));
13796 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13800 (define_expand "ashrv2di3"
13801 [(set (match_operand:V2DI 0 "register_operand")
13803 (match_operand:V2DI 1 "register_operand")
13804 (match_operand:DI 2 "nonmemory_operand")))]
13807 rtx reg = gen_reg_rtx (V2DImode);
13809 bool negate = false;
13812 if (CONST_INT_P (operands[2]))
13813 operands[2] = GEN_INT (-INTVAL (operands[2]));
13817 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13818 for (i = 0; i < 2; i++)
13819 XVECEXP (par, 0, i) = operands[2];
13821 emit_insn (gen_vec_initv2di (reg, par));
13824 emit_insn (gen_negv2di2 (reg, reg));
13826 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13830 ;; XOP FRCZ support
13831 (define_insn "xop_frcz<mode>2"
13832 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13834 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13837 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13838 [(set_attr "type" "ssecvt1")
13839 (set_attr "mode" "<MODE>")])
13841 (define_expand "xop_vmfrcz<mode>2"
13842 [(set (match_operand:VF_128 0 "register_operand")
13845 [(match_operand:VF_128 1 "nonimmediate_operand")]
13850 "operands[2] = CONST0_RTX (<MODE>mode);")
13852 (define_insn "*xop_vmfrcz<mode>2"
13853 [(set (match_operand:VF_128 0 "register_operand" "=x")
13856 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13858 (match_operand:VF_128 2 "const0_operand")
13861 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13862 [(set_attr "type" "ssecvt1")
13863 (set_attr "mode" "<MODE>")])
13865 (define_insn "xop_maskcmp<mode>3"
13866 [(set (match_operand:VI_128 0 "register_operand" "=x")
13867 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13868 [(match_operand:VI_128 2 "register_operand" "x")
13869 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13871 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13872 [(set_attr "type" "sse4arg")
13873 (set_attr "prefix_data16" "0")
13874 (set_attr "prefix_rep" "0")
13875 (set_attr "prefix_extra" "2")
13876 (set_attr "length_immediate" "1")
13877 (set_attr "mode" "TI")])
13879 (define_insn "xop_maskcmp_uns<mode>3"
13880 [(set (match_operand:VI_128 0 "register_operand" "=x")
13881 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13882 [(match_operand:VI_128 2 "register_operand" "x")
13883 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13885 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13886 [(set_attr "type" "ssecmp")
13887 (set_attr "prefix_data16" "0")
13888 (set_attr "prefix_rep" "0")
13889 (set_attr "prefix_extra" "2")
13890 (set_attr "length_immediate" "1")
13891 (set_attr "mode" "TI")])
13893 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13894 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13895 ;; the exact instruction generated for the intrinsic.
13896 (define_insn "xop_maskcmp_uns2<mode>3"
13897 [(set (match_operand:VI_128 0 "register_operand" "=x")
13899 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13900 [(match_operand:VI_128 2 "register_operand" "x")
13901 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13902 UNSPEC_XOP_UNSIGNED_CMP))]
13904 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13905 [(set_attr "type" "ssecmp")
13906 (set_attr "prefix_data16" "0")
13907 (set_attr "prefix_extra" "2")
13908 (set_attr "length_immediate" "1")
13909 (set_attr "mode" "TI")])
13911 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13912 ;; being added here to be complete.
13913 (define_insn "xop_pcom_tf<mode>3"
13914 [(set (match_operand:VI_128 0 "register_operand" "=x")
13916 [(match_operand:VI_128 1 "register_operand" "x")
13917 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13918 (match_operand:SI 3 "const_int_operand" "n")]
13919 UNSPEC_XOP_TRUEFALSE))]
13922 return ((INTVAL (operands[3]) != 0)
13923 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13924 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13926 [(set_attr "type" "ssecmp")
13927 (set_attr "prefix_data16" "0")
13928 (set_attr "prefix_extra" "2")
13929 (set_attr "length_immediate" "1")
13930 (set_attr "mode" "TI")])
13932 (define_insn "xop_vpermil2<mode>3"
13933 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13935 [(match_operand:VF_128_256 1 "register_operand" "x")
13936 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13937 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13938 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13941 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13942 [(set_attr "type" "sse4arg")
13943 (set_attr "length_immediate" "1")
13944 (set_attr "mode" "<MODE>")])
13946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13948 (define_insn "aesenc"
13949 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13950 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13951 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13955 aesenc\t{%2, %0|%0, %2}
13956 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13957 [(set_attr "isa" "noavx,avx")
13958 (set_attr "type" "sselog1")
13959 (set_attr "prefix_extra" "1")
13960 (set_attr "prefix" "orig,vex")
13961 (set_attr "btver2_decode" "double,double")
13962 (set_attr "mode" "TI")])
13964 (define_insn "aesenclast"
13965 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13966 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13967 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13968 UNSPEC_AESENCLAST))]
13971 aesenclast\t{%2, %0|%0, %2}
13972 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13973 [(set_attr "isa" "noavx,avx")
13974 (set_attr "type" "sselog1")
13975 (set_attr "prefix_extra" "1")
13976 (set_attr "prefix" "orig,vex")
13977 (set_attr "btver2_decode" "double,double")
13978 (set_attr "mode" "TI")])
13980 (define_insn "aesdec"
13981 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13982 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13983 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13987 aesdec\t{%2, %0|%0, %2}
13988 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13989 [(set_attr "isa" "noavx,avx")
13990 (set_attr "type" "sselog1")
13991 (set_attr "prefix_extra" "1")
13992 (set_attr "prefix" "orig,vex")
13993 (set_attr "btver2_decode" "double,double")
13994 (set_attr "mode" "TI")])
13996 (define_insn "aesdeclast"
13997 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13998 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13999 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
14000 UNSPEC_AESDECLAST))]
14003 aesdeclast\t{%2, %0|%0, %2}
14004 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
14005 [(set_attr "isa" "noavx,avx")
14006 (set_attr "type" "sselog1")
14007 (set_attr "prefix_extra" "1")
14008 (set_attr "prefix" "orig,vex")
14009 (set_attr "btver2_decode" "double,double")
14010 (set_attr "mode" "TI")])
14012 (define_insn "aesimc"
14013 [(set (match_operand:V2DI 0 "register_operand" "=x")
14014 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
14017 "%vaesimc\t{%1, %0|%0, %1}"
14018 [(set_attr "type" "sselog1")
14019 (set_attr "prefix_extra" "1")
14020 (set_attr "prefix" "maybe_vex")
14021 (set_attr "mode" "TI")])
14023 (define_insn "aeskeygenassist"
14024 [(set (match_operand:V2DI 0 "register_operand" "=x")
14025 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
14026 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14027 UNSPEC_AESKEYGENASSIST))]
14029 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
14030 [(set_attr "type" "sselog1")
14031 (set_attr "prefix_extra" "1")
14032 (set_attr "length_immediate" "1")
14033 (set_attr "prefix" "maybe_vex")
14034 (set_attr "mode" "TI")])
14036 (define_insn "pclmulqdq"
14037 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
14038 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
14039 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
14040 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14044 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
14045 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14046 [(set_attr "isa" "noavx,avx")
14047 (set_attr "type" "sselog1")
14048 (set_attr "prefix_extra" "1")
14049 (set_attr "length_immediate" "1")
14050 (set_attr "prefix" "orig,vex")
14051 (set_attr "mode" "TI")])
14053 (define_expand "avx_vzeroall"
14054 [(match_par_dup 0 [(const_int 0)])]
14057 int nregs = TARGET_64BIT ? 16 : 8;
14060 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
14062 XVECEXP (operands[0], 0, 0)
14063 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
14066 for (regno = 0; regno < nregs; regno++)
14067 XVECEXP (operands[0], 0, regno + 1)
14068 = gen_rtx_SET (VOIDmode,
14069 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
14070 CONST0_RTX (V8SImode));
14073 (define_insn "*avx_vzeroall"
14074 [(match_parallel 0 "vzeroall_operation"
14075 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
14078 [(set_attr "type" "sse")
14079 (set_attr "modrm" "0")
14080 (set_attr "memory" "none")
14081 (set_attr "prefix" "vex")
14082 (set_attr "btver2_decode" "vector")
14083 (set_attr "mode" "OI")])
14085 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
14086 ;; if the upper 128bits are unused.
14087 (define_insn "avx_vzeroupper"
14088 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
14091 [(set_attr "type" "sse")
14092 (set_attr "modrm" "0")
14093 (set_attr "memory" "none")
14094 (set_attr "prefix" "vex")
14095 (set_attr "btver2_decode" "vector")
14096 (set_attr "mode" "OI")])
14098 (define_insn "avx2_pbroadcast<mode>"
14099 [(set (match_operand:VI 0 "register_operand" "=x")
14101 (vec_select:<ssescalarmode>
14102 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
14103 (parallel [(const_int 0)]))))]
14105 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
14106 [(set_attr "type" "ssemov")
14107 (set_attr "prefix_extra" "1")
14108 (set_attr "prefix" "vex")
14109 (set_attr "mode" "<sseinsnmode>")])
14111 (define_insn "avx2_pbroadcast<mode>_1"
14112 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
14113 (vec_duplicate:VI_256
14114 (vec_select:<ssescalarmode>
14115 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
14116 (parallel [(const_int 0)]))))]
14119 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
14120 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
14121 [(set_attr "type" "ssemov")
14122 (set_attr "prefix_extra" "1")
14123 (set_attr "prefix" "vex")
14124 (set_attr "mode" "<sseinsnmode>")])
14126 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
14127 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
14128 (unspec:VI48F_256_512
14129 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
14130 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
14132 "TARGET_AVX2 && <mask_mode512bit_condition>"
14133 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
14134 [(set_attr "type" "sselog")
14135 (set_attr "prefix" "<mask_prefix2>")
14136 (set_attr "mode" "<sseinsnmode>")])
14138 (define_expand "<avx2_avx512f>_perm<mode>"
14139 [(match_operand:VI8F_256_512 0 "register_operand")
14140 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
14141 (match_operand:SI 2 "const_0_to_255_operand")]
14144 int mask = INTVAL (operands[2]);
14145 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
14146 GEN_INT ((mask >> 0) & 3),
14147 GEN_INT ((mask >> 2) & 3),
14148 GEN_INT ((mask >> 4) & 3),
14149 GEN_INT ((mask >> 6) & 3)));
14153 (define_expand "avx512f_perm<mode>_mask"
14154 [(match_operand:V8FI 0 "register_operand")
14155 (match_operand:V8FI 1 "nonimmediate_operand")
14156 (match_operand:SI 2 "const_0_to_255_operand")
14157 (match_operand:V8FI 3 "vector_move_operand")
14158 (match_operand:<avx512fmaskmode> 4 "register_operand")]
14161 int mask = INTVAL (operands[2]);
14162 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
14163 GEN_INT ((mask >> 0) & 3),
14164 GEN_INT ((mask >> 2) & 3),
14165 GEN_INT ((mask >> 4) & 3),
14166 GEN_INT ((mask >> 6) & 3),
14167 operands[3], operands[4]));
14171 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
14172 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
14173 (vec_select:VI8F_256_512
14174 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
14175 (parallel [(match_operand 2 "const_0_to_3_operand")
14176 (match_operand 3 "const_0_to_3_operand")
14177 (match_operand 4 "const_0_to_3_operand")
14178 (match_operand 5 "const_0_to_3_operand")])))]
14179 "TARGET_AVX2 && <mask_mode512bit_condition>"
14182 mask |= INTVAL (operands[2]) << 0;
14183 mask |= INTVAL (operands[3]) << 2;
14184 mask |= INTVAL (operands[4]) << 4;
14185 mask |= INTVAL (operands[5]) << 6;
14186 operands[2] = GEN_INT (mask);
14187 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14189 [(set_attr "type" "sselog")
14190 (set_attr "prefix" "<mask_prefix2>")
14191 (set_attr "mode" "<sseinsnmode>")])
14193 (define_insn "avx2_permv2ti"
14194 [(set (match_operand:V4DI 0 "register_operand" "=x")
14196 [(match_operand:V4DI 1 "register_operand" "x")
14197 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
14198 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14201 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14202 [(set_attr "type" "sselog")
14203 (set_attr "prefix" "vex")
14204 (set_attr "mode" "OI")])
14206 (define_insn "avx2_vec_dupv4df"
14207 [(set (match_operand:V4DF 0 "register_operand" "=x")
14208 (vec_duplicate:V4DF
14210 (match_operand:V2DF 1 "register_operand" "x")
14211 (parallel [(const_int 0)]))))]
14213 "vbroadcastsd\t{%1, %0|%0, %1}"
14214 [(set_attr "type" "sselog1")
14215 (set_attr "prefix" "vex")
14216 (set_attr "mode" "V4DF")])
14218 ;; Modes handled by AVX vec_dup patterns.
14219 (define_mode_iterator AVX_VEC_DUP_MODE
14220 [V8SI V8SF V4DI V4DF])
14222 (define_insn "vec_dup<mode>"
14223 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
14224 (vec_duplicate:AVX_VEC_DUP_MODE
14225 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
14228 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
14229 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
14231 [(set_attr "type" "ssemov")
14232 (set_attr "prefix_extra" "1")
14233 (set_attr "prefix" "vex")
14234 (set_attr "isa" "*,avx2,noavx2")
14235 (set_attr "mode" "V8SF")])
14237 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
14238 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14239 (vec_duplicate:VI48F_512
14240 (vec_select:<ssescalarmode>
14241 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
14242 (parallel [(const_int 0)]))))]
14244 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14245 [(set_attr "type" "ssemov")
14246 (set_attr "prefix" "evex")
14247 (set_attr "mode" "<sseinsnmode>")])
14249 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14250 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
14251 (vec_duplicate:V16FI
14252 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
14255 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
14256 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14257 [(set_attr "type" "ssemov")
14258 (set_attr "prefix" "evex")
14259 (set_attr "mode" "<sseinsnmode>")])
14261 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14262 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
14263 (vec_duplicate:V8FI
14264 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
14267 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
14268 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14269 [(set_attr "type" "ssemov")
14270 (set_attr "prefix" "evex")
14271 (set_attr "mode" "<sseinsnmode>")])
14273 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
14274 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14275 (vec_duplicate:VI48_512
14276 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
14277 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
14278 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14279 [(set_attr "type" "ssemov")
14280 (set_attr "prefix" "evex")
14281 (set_attr "mode" "<sseinsnmode>")])
14283 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
14284 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14285 (vec_duplicate:VI48F_512
14286 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
14288 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14289 [(set_attr "type" "ssemov")
14290 (set_attr "prefix" "evex")
14291 (set_attr "mode" "<sseinsnmode>")])
14293 (define_insn "avx2_vbroadcasti128_<mode>"
14294 [(set (match_operand:VI_256 0 "register_operand" "=x")
14296 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
14299 "vbroadcasti128\t{%1, %0|%0, %1}"
14300 [(set_attr "type" "ssemov")
14301 (set_attr "prefix_extra" "1")
14302 (set_attr "prefix" "vex")
14303 (set_attr "mode" "OI")])
14306 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
14307 (vec_duplicate:AVX_VEC_DUP_MODE
14308 (match_operand:<ssescalarmode> 1 "register_operand")))]
14309 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
14310 [(set (match_dup 2)
14311 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
14313 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
14314 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
14316 (define_insn "avx_vbroadcastf128_<mode>"
14317 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
14319 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
14323 vbroadcast<i128>\t{%1, %0|%0, %1}
14324 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
14325 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
14326 [(set_attr "type" "ssemov,sselog1,sselog1")
14327 (set_attr "prefix_extra" "1")
14328 (set_attr "length_immediate" "0,1,1")
14329 (set_attr "prefix" "vex")
14330 (set_attr "mode" "<sseinsnmode>")])
14332 (define_insn "avx512cd_maskb_vec_dupv8di"
14333 [(set (match_operand:V8DI 0 "register_operand" "=v")
14334 (vec_duplicate:V8DI
14336 (match_operand:QI 1 "register_operand" "Yk"))))]
14338 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
14339 [(set_attr "type" "mskmov")
14340 (set_attr "prefix" "evex")
14341 (set_attr "mode" "XI")])
14343 (define_insn "avx512cd_maskw_vec_dupv16si"
14344 [(set (match_operand:V16SI 0 "register_operand" "=v")
14345 (vec_duplicate:V16SI
14347 (match_operand:HI 1 "register_operand" "Yk"))))]
14349 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
14350 [(set_attr "type" "mskmov")
14351 (set_attr "prefix" "evex")
14352 (set_attr "mode" "XI")])
14354 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
14355 ;; If it so happens that the input is in memory, use vbroadcast.
14356 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
14357 (define_insn "*avx_vperm_broadcast_v4sf"
14358 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
14360 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
14361 (match_parallel 2 "avx_vbroadcast_operand"
14362 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14365 int elt = INTVAL (operands[3]);
14366 switch (which_alternative)
14370 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
14371 return "vbroadcastss\t{%1, %0|%0, %k1}";
14373 operands[2] = GEN_INT (elt * 0x55);
14374 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
14376 gcc_unreachable ();
14379 [(set_attr "type" "ssemov,ssemov,sselog1")
14380 (set_attr "prefix_extra" "1")
14381 (set_attr "length_immediate" "0,0,1")
14382 (set_attr "prefix" "vex")
14383 (set_attr "mode" "SF,SF,V4SF")])
14385 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
14386 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
14388 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
14389 (match_parallel 2 "avx_vbroadcast_operand"
14390 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14393 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
14394 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
14396 rtx op0 = operands[0], op1 = operands[1];
14397 int elt = INTVAL (operands[3]);
14403 if (TARGET_AVX2 && elt == 0)
14405 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
14410 /* Shuffle element we care about into all elements of the 128-bit lane.
14411 The other lane gets shuffled too, but we don't care. */
14412 if (<MODE>mode == V4DFmode)
14413 mask = (elt & 1 ? 15 : 0);
14415 mask = (elt & 3) * 0x55;
14416 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
14418 /* Shuffle the lane we care about into both lanes of the dest. */
14419 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
14420 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
14424 operands[1] = adjust_address (op1, <ssescalarmode>mode,
14425 elt * GET_MODE_SIZE (<ssescalarmode>mode));
14428 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14429 [(set (match_operand:VF2 0 "register_operand")
14431 (match_operand:VF2 1 "nonimmediate_operand")
14432 (match_operand:SI 2 "const_0_to_255_operand")))]
14433 "TARGET_AVX && <mask_mode512bit_condition>"
14435 int mask = INTVAL (operands[2]);
14436 rtx perm[<ssescalarnum>];
14439 for (i = 0; i < <ssescalarnum>; i = i + 2)
14441 perm[i] = GEN_INT (((mask >> i) & 1) + i);
14442 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
14446 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14449 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14450 [(set (match_operand:VF1 0 "register_operand")
14452 (match_operand:VF1 1 "nonimmediate_operand")
14453 (match_operand:SI 2 "const_0_to_255_operand")))]
14454 "TARGET_AVX && <mask_mode512bit_condition>"
14456 int mask = INTVAL (operands[2]);
14457 rtx perm[<ssescalarnum>];
14460 for (i = 0; i < <ssescalarnum>; i = i + 4)
14462 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
14463 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
14464 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
14465 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
14469 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14472 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
14473 [(set (match_operand:VF 0 "register_operand" "=v")
14475 (match_operand:VF 1 "nonimmediate_operand" "vm")
14476 (match_parallel 2 ""
14477 [(match_operand 3 "const_int_operand")])))]
14478 "TARGET_AVX && <mask_mode512bit_condition>
14479 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
14481 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
14482 operands[2] = GEN_INT (mask);
14483 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
14485 [(set_attr "type" "sselog")
14486 (set_attr "prefix_extra" "1")
14487 (set_attr "length_immediate" "1")
14488 (set_attr "prefix" "<mask_prefix>")
14489 (set_attr "mode" "<sseinsnmode>")])
14491 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
14492 [(set (match_operand:VF 0 "register_operand" "=v")
14494 [(match_operand:VF 1 "register_operand" "v")
14495 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
14497 "TARGET_AVX && <mask_mode512bit_condition>"
14498 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14499 [(set_attr "type" "sselog")
14500 (set_attr "prefix_extra" "1")
14501 (set_attr "btver2_decode" "vector")
14502 (set_attr "prefix" "<mask_prefix>")
14503 (set_attr "mode" "<sseinsnmode>")])
14505 (define_expand "avx512f_vpermi2var<mode>3_maskz"
14506 [(match_operand:VI48F_512 0 "register_operand" "=v")
14507 (match_operand:VI48F_512 1 "register_operand" "v")
14508 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14509 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14510 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14513 emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
14514 operands[0], operands[1], operands[2], operands[3],
14515 CONST0_RTX (<MODE>mode), operands[4]));
14519 (define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
14520 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14522 [(match_operand:VI48F_512 1 "register_operand" "v")
14523 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14524 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14527 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14528 [(set_attr "type" "sselog")
14529 (set_attr "prefix" "evex")
14530 (set_attr "mode" "<sseinsnmode>")])
14532 (define_insn "avx512f_vpermi2var<mode>3_mask"
14533 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14534 (vec_merge:VI48F_512
14536 [(match_operand:VI48F_512 1 "register_operand" "v")
14537 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14538 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14539 UNSPEC_VPERMI2_MASK)
14541 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14543 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14544 [(set_attr "type" "sselog")
14545 (set_attr "prefix" "evex")
14546 (set_attr "mode" "<sseinsnmode>")])
14548 (define_expand "avx512f_vpermt2var<mode>3_maskz"
14549 [(match_operand:VI48F_512 0 "register_operand" "=v")
14550 (match_operand:<sseintvecmode> 1 "register_operand" "v")
14551 (match_operand:VI48F_512 2 "register_operand" "0")
14552 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14553 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14556 emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
14557 operands[0], operands[1], operands[2], operands[3],
14558 CONST0_RTX (<MODE>mode), operands[4]));
14562 (define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
14563 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14565 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14566 (match_operand:VI48F_512 2 "register_operand" "0")
14567 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14570 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14571 [(set_attr "type" "sselog")
14572 (set_attr "prefix" "evex")
14573 (set_attr "mode" "<sseinsnmode>")])
14575 (define_insn "avx512f_vpermt2var<mode>3_mask"
14576 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14577 (vec_merge:VI48F_512
14579 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14580 (match_operand:VI48F_512 2 "register_operand" "0")
14581 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14584 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14586 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14587 [(set_attr "type" "sselog")
14588 (set_attr "prefix" "evex")
14589 (set_attr "mode" "<sseinsnmode>")])
14591 (define_expand "avx_vperm2f128<mode>3"
14592 [(set (match_operand:AVX256MODE2P 0 "register_operand")
14593 (unspec:AVX256MODE2P
14594 [(match_operand:AVX256MODE2P 1 "register_operand")
14595 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
14596 (match_operand:SI 3 "const_0_to_255_operand")]
14597 UNSPEC_VPERMIL2F128))]
14600 int mask = INTVAL (operands[3]);
14601 if ((mask & 0x88) == 0)
14603 rtx perm[<ssescalarnum>], t1, t2;
14604 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
14606 base = (mask & 3) * nelt2;
14607 for (i = 0; i < nelt2; ++i)
14608 perm[i] = GEN_INT (base + i);
14610 base = ((mask >> 4) & 3) * nelt2;
14611 for (i = 0; i < nelt2; ++i)
14612 perm[i + nelt2] = GEN_INT (base + i);
14614 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
14615 operands[1], operands[2]);
14616 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
14617 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
14618 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
14624 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
14625 ;; means that in order to represent this properly in rtl we'd have to
14626 ;; nest *another* vec_concat with a zero operand and do the select from
14627 ;; a 4x wide vector. That doesn't seem very nice.
14628 (define_insn "*avx_vperm2f128<mode>_full"
14629 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14630 (unspec:AVX256MODE2P
14631 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
14632 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
14633 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14634 UNSPEC_VPERMIL2F128))]
14636 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14637 [(set_attr "type" "sselog")
14638 (set_attr "prefix_extra" "1")
14639 (set_attr "length_immediate" "1")
14640 (set_attr "prefix" "vex")
14641 (set_attr "mode" "<sseinsnmode>")])
14643 (define_insn "*avx_vperm2f128<mode>_nozero"
14644 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14645 (vec_select:AVX256MODE2P
14646 (vec_concat:<ssedoublevecmode>
14647 (match_operand:AVX256MODE2P 1 "register_operand" "x")
14648 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
14649 (match_parallel 3 ""
14650 [(match_operand 4 "const_int_operand")])))]
14652 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
14654 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
14656 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
14658 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
14659 operands[3] = GEN_INT (mask);
14660 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14662 [(set_attr "type" "sselog")
14663 (set_attr "prefix_extra" "1")
14664 (set_attr "length_immediate" "1")
14665 (set_attr "prefix" "vex")
14666 (set_attr "mode" "<sseinsnmode>")])
14668 (define_insn "*ssse3_palignr<mode>_perm"
14669 [(set (match_operand:V_128 0 "register_operand" "=x,x")
14671 (match_operand:V_128 1 "register_operand" "0,x")
14672 (match_parallel 2 "palignr_operand"
14673 [(match_operand 3 "const_int_operand" "n, n")])))]
14676 enum machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
14677 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
14679 switch (which_alternative)
14682 return "palignr\t{%2, %1, %0|%0, %1, %2}";
14684 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
14686 gcc_unreachable ();
14689 [(set_attr "isa" "noavx,avx")
14690 (set_attr "type" "sseishft")
14691 (set_attr "atom_unit" "sishuf")
14692 (set_attr "prefix_data16" "1,*")
14693 (set_attr "prefix_extra" "1")
14694 (set_attr "length_immediate" "1")
14695 (set_attr "prefix" "orig,vex")])
14697 (define_expand "avx_vinsertf128<mode>"
14698 [(match_operand:V_256 0 "register_operand")
14699 (match_operand:V_256 1 "register_operand")
14700 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14701 (match_operand:SI 3 "const_0_to_1_operand")]
14704 rtx (*insn)(rtx, rtx, rtx);
14706 switch (INTVAL (operands[3]))
14709 insn = gen_vec_set_lo_<mode>;
14712 insn = gen_vec_set_hi_<mode>;
14715 gcc_unreachable ();
14718 emit_insn (insn (operands[0], operands[1], operands[2]));
14722 (define_insn "avx2_vec_set_lo_v4di"
14723 [(set (match_operand:V4DI 0 "register_operand" "=x")
14725 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14727 (match_operand:V4DI 1 "register_operand" "x")
14728 (parallel [(const_int 2) (const_int 3)]))))]
14730 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14731 [(set_attr "type" "sselog")
14732 (set_attr "prefix_extra" "1")
14733 (set_attr "length_immediate" "1")
14734 (set_attr "prefix" "vex")
14735 (set_attr "mode" "OI")])
14737 (define_insn "avx2_vec_set_hi_v4di"
14738 [(set (match_operand:V4DI 0 "register_operand" "=x")
14741 (match_operand:V4DI 1 "register_operand" "x")
14742 (parallel [(const_int 0) (const_int 1)]))
14743 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14745 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14746 [(set_attr "type" "sselog")
14747 (set_attr "prefix_extra" "1")
14748 (set_attr "length_immediate" "1")
14749 (set_attr "prefix" "vex")
14750 (set_attr "mode" "OI")])
14752 (define_insn "vec_set_lo_<mode>"
14753 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14754 (vec_concat:VI8F_256
14755 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14756 (vec_select:<ssehalfvecmode>
14757 (match_operand:VI8F_256 1 "register_operand" "x")
14758 (parallel [(const_int 2) (const_int 3)]))))]
14760 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14761 [(set_attr "type" "sselog")
14762 (set_attr "prefix_extra" "1")
14763 (set_attr "length_immediate" "1")
14764 (set_attr "prefix" "vex")
14765 (set_attr "mode" "<sseinsnmode>")])
14767 (define_insn "vec_set_hi_<mode>"
14768 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14769 (vec_concat:VI8F_256
14770 (vec_select:<ssehalfvecmode>
14771 (match_operand:VI8F_256 1 "register_operand" "x")
14772 (parallel [(const_int 0) (const_int 1)]))
14773 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14775 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14776 [(set_attr "type" "sselog")
14777 (set_attr "prefix_extra" "1")
14778 (set_attr "length_immediate" "1")
14779 (set_attr "prefix" "vex")
14780 (set_attr "mode" "<sseinsnmode>")])
14782 (define_insn "vec_set_lo_<mode>"
14783 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14784 (vec_concat:VI4F_256
14785 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14786 (vec_select:<ssehalfvecmode>
14787 (match_operand:VI4F_256 1 "register_operand" "x")
14788 (parallel [(const_int 4) (const_int 5)
14789 (const_int 6) (const_int 7)]))))]
14791 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14792 [(set_attr "type" "sselog")
14793 (set_attr "prefix_extra" "1")
14794 (set_attr "length_immediate" "1")
14795 (set_attr "prefix" "vex")
14796 (set_attr "mode" "<sseinsnmode>")])
14798 (define_insn "vec_set_hi_<mode>"
14799 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14800 (vec_concat:VI4F_256
14801 (vec_select:<ssehalfvecmode>
14802 (match_operand:VI4F_256 1 "register_operand" "x")
14803 (parallel [(const_int 0) (const_int 1)
14804 (const_int 2) (const_int 3)]))
14805 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14807 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14808 [(set_attr "type" "sselog")
14809 (set_attr "prefix_extra" "1")
14810 (set_attr "length_immediate" "1")
14811 (set_attr "prefix" "vex")
14812 (set_attr "mode" "<sseinsnmode>")])
14814 (define_insn "vec_set_lo_v16hi"
14815 [(set (match_operand:V16HI 0 "register_operand" "=x")
14817 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14819 (match_operand:V16HI 1 "register_operand" "x")
14820 (parallel [(const_int 8) (const_int 9)
14821 (const_int 10) (const_int 11)
14822 (const_int 12) (const_int 13)
14823 (const_int 14) (const_int 15)]))))]
14825 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14826 [(set_attr "type" "sselog")
14827 (set_attr "prefix_extra" "1")
14828 (set_attr "length_immediate" "1")
14829 (set_attr "prefix" "vex")
14830 (set_attr "mode" "OI")])
14832 (define_insn "vec_set_hi_v16hi"
14833 [(set (match_operand:V16HI 0 "register_operand" "=x")
14836 (match_operand:V16HI 1 "register_operand" "x")
14837 (parallel [(const_int 0) (const_int 1)
14838 (const_int 2) (const_int 3)
14839 (const_int 4) (const_int 5)
14840 (const_int 6) (const_int 7)]))
14841 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14843 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14844 [(set_attr "type" "sselog")
14845 (set_attr "prefix_extra" "1")
14846 (set_attr "length_immediate" "1")
14847 (set_attr "prefix" "vex")
14848 (set_attr "mode" "OI")])
14850 (define_insn "vec_set_lo_v32qi"
14851 [(set (match_operand:V32QI 0 "register_operand" "=x")
14853 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14855 (match_operand:V32QI 1 "register_operand" "x")
14856 (parallel [(const_int 16) (const_int 17)
14857 (const_int 18) (const_int 19)
14858 (const_int 20) (const_int 21)
14859 (const_int 22) (const_int 23)
14860 (const_int 24) (const_int 25)
14861 (const_int 26) (const_int 27)
14862 (const_int 28) (const_int 29)
14863 (const_int 30) (const_int 31)]))))]
14865 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14866 [(set_attr "type" "sselog")
14867 (set_attr "prefix_extra" "1")
14868 (set_attr "length_immediate" "1")
14869 (set_attr "prefix" "vex")
14870 (set_attr "mode" "OI")])
14872 (define_insn "vec_set_hi_v32qi"
14873 [(set (match_operand:V32QI 0 "register_operand" "=x")
14876 (match_operand:V32QI 1 "register_operand" "x")
14877 (parallel [(const_int 0) (const_int 1)
14878 (const_int 2) (const_int 3)
14879 (const_int 4) (const_int 5)
14880 (const_int 6) (const_int 7)
14881 (const_int 8) (const_int 9)
14882 (const_int 10) (const_int 11)
14883 (const_int 12) (const_int 13)
14884 (const_int 14) (const_int 15)]))
14885 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14887 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14888 [(set_attr "type" "sselog")
14889 (set_attr "prefix_extra" "1")
14890 (set_attr "length_immediate" "1")
14891 (set_attr "prefix" "vex")
14892 (set_attr "mode" "OI")])
14894 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14895 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14897 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14898 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14901 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14902 [(set_attr "type" "sselog1")
14903 (set_attr "prefix_extra" "1")
14904 (set_attr "prefix" "vex")
14905 (set_attr "btver2_decode" "vector")
14906 (set_attr "mode" "<sseinsnmode>")])
14908 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14909 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14911 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14912 (match_operand:V48_AVX2 2 "register_operand" "x")
14916 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14917 [(set_attr "type" "sselog1")
14918 (set_attr "prefix_extra" "1")
14919 (set_attr "prefix" "vex")
14920 (set_attr "btver2_decode" "vector")
14921 (set_attr "mode" "<sseinsnmode>")])
14923 (define_expand "maskload<mode>"
14924 [(set (match_operand:V48_AVX2 0 "register_operand")
14926 [(match_operand:<sseintvecmode> 2 "register_operand")
14927 (match_operand:V48_AVX2 1 "memory_operand")]
14931 (define_expand "maskstore<mode>"
14932 [(set (match_operand:V48_AVX2 0 "memory_operand")
14934 [(match_operand:<sseintvecmode> 2 "register_operand")
14935 (match_operand:V48_AVX2 1 "register_operand")
14940 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14941 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14942 (unspec:AVX256MODE2P
14943 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14947 "&& reload_completed"
14950 rtx op0 = operands[0];
14951 rtx op1 = operands[1];
14953 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14955 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14956 emit_move_insn (op0, op1);
14960 (define_expand "vec_init<mode>"
14961 [(match_operand:V_256 0 "register_operand")
14965 ix86_expand_vector_init (false, operands[0], operands[1]);
14969 (define_expand "vec_init<mode>"
14970 [(match_operand:VI48F_512 0 "register_operand")
14974 ix86_expand_vector_init (false, operands[0], operands[1]);
14978 (define_expand "avx2_extracti128"
14979 [(match_operand:V2DI 0 "nonimmediate_operand")
14980 (match_operand:V4DI 1 "register_operand")
14981 (match_operand:SI 2 "const_0_to_1_operand")]
14984 rtx (*insn)(rtx, rtx);
14986 switch (INTVAL (operands[2]))
14989 insn = gen_vec_extract_lo_v4di;
14992 insn = gen_vec_extract_hi_v4di;
14995 gcc_unreachable ();
14998 emit_insn (insn (operands[0], operands[1]));
15002 (define_expand "avx2_inserti128"
15003 [(match_operand:V4DI 0 "register_operand")
15004 (match_operand:V4DI 1 "register_operand")
15005 (match_operand:V2DI 2 "nonimmediate_operand")
15006 (match_operand:SI 3 "const_0_to_1_operand")]
15009 rtx (*insn)(rtx, rtx, rtx);
15011 switch (INTVAL (operands[3]))
15014 insn = gen_avx2_vec_set_lo_v4di;
15017 insn = gen_avx2_vec_set_hi_v4di;
15020 gcc_unreachable ();
15023 emit_insn (insn (operands[0], operands[1], operands[2]));
15027 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
15028 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
15029 (ashiftrt:VI48_AVX512F
15030 (match_operand:VI48_AVX512F 1 "register_operand" "v")
15031 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
15032 "TARGET_AVX2 && <mask_mode512bit_condition>"
15033 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15034 [(set_attr "type" "sseishft")
15035 (set_attr "prefix" "maybe_evex")
15036 (set_attr "mode" "<sseinsnmode>")])
15038 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
15039 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
15040 (any_lshift:VI48_AVX2_48_AVX512F
15041 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
15042 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
15043 "TARGET_AVX2 && <mask_mode512bit_condition>"
15044 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15045 [(set_attr "type" "sseishft")
15046 (set_attr "prefix" "maybe_evex")
15047 (set_attr "mode" "<sseinsnmode>")])
15049 ;; For avx_vec_concat<mode> insn pattern
15050 (define_mode_attr concat_tg_mode
15051 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
15052 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
15054 (define_insn "avx_vec_concat<mode>"
15055 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
15056 (vec_concat:V_256_512
15057 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
15058 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
15061 switch (which_alternative)
15064 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
15066 switch (get_attr_mode (insn))
15069 return "vmovaps\t{%1, %t0|%t0, %1}";
15071 return "vmovapd\t{%1, %t0|%t0, %1}";
15073 return "vmovaps\t{%1, %x0|%x0, %1}";
15075 return "vmovapd\t{%1, %x0|%x0, %1}";
15077 return "vmovdqa\t{%1, %t0|%t0, %1}";
15079 return "vmovdqa\t{%1, %x0|%x0, %1}";
15081 gcc_unreachable ();
15084 gcc_unreachable ();
15087 [(set_attr "type" "sselog,ssemov")
15088 (set_attr "prefix_extra" "1,*")
15089 (set_attr "length_immediate" "1,*")
15090 (set_attr "prefix" "maybe_evex")
15091 (set_attr "mode" "<sseinsnmode>")])
15093 (define_insn "vcvtph2ps"
15094 [(set (match_operand:V4SF 0 "register_operand" "=x")
15096 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
15098 (parallel [(const_int 0) (const_int 1)
15099 (const_int 2) (const_int 3)])))]
15101 "vcvtph2ps\t{%1, %0|%0, %1}"
15102 [(set_attr "type" "ssecvt")
15103 (set_attr "prefix" "vex")
15104 (set_attr "mode" "V4SF")])
15106 (define_insn "*vcvtph2ps_load"
15107 [(set (match_operand:V4SF 0 "register_operand" "=x")
15108 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
15109 UNSPEC_VCVTPH2PS))]
15111 "vcvtph2ps\t{%1, %0|%0, %1}"
15112 [(set_attr "type" "ssecvt")
15113 (set_attr "prefix" "vex")
15114 (set_attr "mode" "V8SF")])
15116 (define_insn "vcvtph2ps256"
15117 [(set (match_operand:V8SF 0 "register_operand" "=x")
15118 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
15119 UNSPEC_VCVTPH2PS))]
15121 "vcvtph2ps\t{%1, %0|%0, %1}"
15122 [(set_attr "type" "ssecvt")
15123 (set_attr "prefix" "vex")
15124 (set_attr "btver2_decode" "double")
15125 (set_attr "mode" "V8SF")])
15127 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
15128 [(set (match_operand:V16SF 0 "register_operand" "=v")
15130 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15131 UNSPEC_VCVTPH2PS))]
15133 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15134 [(set_attr "type" "ssecvt")
15135 (set_attr "prefix" "evex")
15136 (set_attr "mode" "V16SF")])
15138 (define_expand "vcvtps2ph"
15139 [(set (match_operand:V8HI 0 "register_operand")
15141 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
15142 (match_operand:SI 2 "const_0_to_255_operand")]
15146 "operands[3] = CONST0_RTX (V4HImode);")
15148 (define_insn "*vcvtps2ph"
15149 [(set (match_operand:V8HI 0 "register_operand" "=x")
15151 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15152 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15154 (match_operand:V4HI 3 "const0_operand")))]
15156 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15157 [(set_attr "type" "ssecvt")
15158 (set_attr "prefix" "vex")
15159 (set_attr "mode" "V4SF")])
15161 (define_insn "*vcvtps2ph_store"
15162 [(set (match_operand:V4HI 0 "memory_operand" "=m")
15163 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15164 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15165 UNSPEC_VCVTPS2PH))]
15167 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15168 [(set_attr "type" "ssecvt")
15169 (set_attr "prefix" "vex")
15170 (set_attr "mode" "V4SF")])
15172 (define_insn "vcvtps2ph256"
15173 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
15174 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
15175 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15176 UNSPEC_VCVTPS2PH))]
15178 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15179 [(set_attr "type" "ssecvt")
15180 (set_attr "prefix" "vex")
15181 (set_attr "btver2_decode" "vector")
15182 (set_attr "mode" "V8SF")])
15184 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
15185 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
15187 [(match_operand:V16SF 1 "register_operand" "v")
15188 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15189 UNSPEC_VCVTPS2PH))]
15191 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15192 [(set_attr "type" "ssecvt")
15193 (set_attr "prefix" "evex")
15194 (set_attr "mode" "V16SF")])
15196 ;; For gather* insn patterns
15197 (define_mode_iterator VEC_GATHER_MODE
15198 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
15199 (define_mode_attr VEC_GATHER_IDXSI
15200 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
15201 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
15202 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
15203 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
15205 (define_mode_attr VEC_GATHER_IDXDI
15206 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15207 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
15208 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
15209 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
15211 (define_mode_attr VEC_GATHER_SRCDI
15212 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15213 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
15214 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
15215 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
15217 (define_expand "avx2_gathersi<mode>"
15218 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15219 (unspec:VEC_GATHER_MODE
15220 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
15221 (mem:<ssescalarmode>
15223 [(match_operand 2 "vsib_address_operand")
15224 (match_operand:<VEC_GATHER_IDXSI>
15225 3 "register_operand")
15226 (match_operand:SI 5 "const1248_operand ")]))
15227 (mem:BLK (scratch))
15228 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
15230 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15234 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15235 operands[5]), UNSPEC_VSIBADDR);
15238 (define_insn "*avx2_gathersi<mode>"
15239 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15240 (unspec:VEC_GATHER_MODE
15241 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
15242 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15244 [(match_operand:P 3 "vsib_address_operand" "Tv")
15245 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
15246 (match_operand:SI 6 "const1248_operand" "n")]
15248 (mem:BLK (scratch))
15249 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
15251 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15253 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
15254 [(set_attr "type" "ssemov")
15255 (set_attr "prefix" "vex")
15256 (set_attr "mode" "<sseinsnmode>")])
15258 (define_insn "*avx2_gathersi<mode>_2"
15259 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15260 (unspec:VEC_GATHER_MODE
15262 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15264 [(match_operand:P 2 "vsib_address_operand" "Tv")
15265 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
15266 (match_operand:SI 5 "const1248_operand" "n")]
15268 (mem:BLK (scratch))
15269 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
15271 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15273 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
15274 [(set_attr "type" "ssemov")
15275 (set_attr "prefix" "vex")
15276 (set_attr "mode" "<sseinsnmode>")])
15278 (define_expand "avx2_gatherdi<mode>"
15279 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15280 (unspec:VEC_GATHER_MODE
15281 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15282 (mem:<ssescalarmode>
15284 [(match_operand 2 "vsib_address_operand")
15285 (match_operand:<VEC_GATHER_IDXDI>
15286 3 "register_operand")
15287 (match_operand:SI 5 "const1248_operand ")]))
15288 (mem:BLK (scratch))
15289 (match_operand:<VEC_GATHER_SRCDI>
15290 4 "register_operand")]
15292 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15296 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15297 operands[5]), UNSPEC_VSIBADDR);
15300 (define_insn "*avx2_gatherdi<mode>"
15301 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15302 (unspec:VEC_GATHER_MODE
15303 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15304 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15306 [(match_operand:P 3 "vsib_address_operand" "Tv")
15307 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15308 (match_operand:SI 6 "const1248_operand" "n")]
15310 (mem:BLK (scratch))
15311 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15313 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15315 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
15316 [(set_attr "type" "ssemov")
15317 (set_attr "prefix" "vex")
15318 (set_attr "mode" "<sseinsnmode>")])
15320 (define_insn "*avx2_gatherdi<mode>_2"
15321 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15322 (unspec:VEC_GATHER_MODE
15324 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15326 [(match_operand:P 2 "vsib_address_operand" "Tv")
15327 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15328 (match_operand:SI 5 "const1248_operand" "n")]
15330 (mem:BLK (scratch))
15331 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15333 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15336 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15337 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
15338 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
15340 [(set_attr "type" "ssemov")
15341 (set_attr "prefix" "vex")
15342 (set_attr "mode" "<sseinsnmode>")])
15344 (define_insn "*avx2_gatherdi<mode>_3"
15345 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15346 (vec_select:<VEC_GATHER_SRCDI>
15348 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15349 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15351 [(match_operand:P 3 "vsib_address_operand" "Tv")
15352 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15353 (match_operand:SI 6 "const1248_operand" "n")]
15355 (mem:BLK (scratch))
15356 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15358 (parallel [(const_int 0) (const_int 1)
15359 (const_int 2) (const_int 3)])))
15360 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15362 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
15363 [(set_attr "type" "ssemov")
15364 (set_attr "prefix" "vex")
15365 (set_attr "mode" "<sseinsnmode>")])
15367 (define_insn "*avx2_gatherdi<mode>_4"
15368 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15369 (vec_select:<VEC_GATHER_SRCDI>
15372 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15374 [(match_operand:P 2 "vsib_address_operand" "Tv")
15375 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15376 (match_operand:SI 5 "const1248_operand" "n")]
15378 (mem:BLK (scratch))
15379 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15381 (parallel [(const_int 0) (const_int 1)
15382 (const_int 2) (const_int 3)])))
15383 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15385 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
15386 [(set_attr "type" "ssemov")
15387 (set_attr "prefix" "vex")
15388 (set_attr "mode" "<sseinsnmode>")])
15390 (define_expand "avx512f_gathersi<mode>"
15391 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15393 [(match_operand:VI48F_512 1 "register_operand")
15394 (match_operand:<avx512fmaskmode> 4 "register_operand")
15395 (mem:<ssescalarmode>
15397 [(match_operand 2 "vsib_address_operand")
15398 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
15399 (match_operand:SI 5 "const1248_operand")]))]
15401 (clobber (match_scratch:<avx512fmaskmode> 7))])]
15405 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15406 operands[5]), UNSPEC_VSIBADDR);
15409 (define_insn "*avx512f_gathersi<mode>"
15410 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15412 [(match_operand:VI48F_512 1 "register_operand" "0")
15413 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
15414 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15416 [(match_operand:P 4 "vsib_address_operand" "Tv")
15417 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
15418 (match_operand:SI 5 "const1248_operand" "n")]
15419 UNSPEC_VSIBADDR)])]
15421 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
15423 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
15424 [(set_attr "type" "ssemov")
15425 (set_attr "prefix" "evex")
15426 (set_attr "mode" "<sseinsnmode>")])
15428 (define_insn "*avx512f_gathersi<mode>_2"
15429 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15432 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15433 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15435 [(match_operand:P 3 "vsib_address_operand" "Tv")
15436 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15437 (match_operand:SI 4 "const1248_operand" "n")]
15438 UNSPEC_VSIBADDR)])]
15440 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15442 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
15443 [(set_attr "type" "ssemov")
15444 (set_attr "prefix" "evex")
15445 (set_attr "mode" "<sseinsnmode>")])
15448 (define_expand "avx512f_gatherdi<mode>"
15449 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15451 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15452 (match_operand:QI 4 "register_operand")
15453 (mem:<ssescalarmode>
15455 [(match_operand 2 "vsib_address_operand")
15456 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
15457 (match_operand:SI 5 "const1248_operand")]))]
15459 (clobber (match_scratch:QI 7))])]
15463 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15464 operands[5]), UNSPEC_VSIBADDR);
15467 (define_insn "*avx512f_gatherdi<mode>"
15468 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15470 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
15471 (match_operand:QI 7 "register_operand" "2")
15472 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15474 [(match_operand:P 4 "vsib_address_operand" "Tv")
15475 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
15476 (match_operand:SI 5 "const1248_operand" "n")]
15477 UNSPEC_VSIBADDR)])]
15479 (clobber (match_scratch:QI 2 "=&Yk"))]
15481 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
15482 [(set_attr "type" "ssemov")
15483 (set_attr "prefix" "evex")
15484 (set_attr "mode" "<sseinsnmode>")])
15486 (define_insn "*avx512f_gatherdi<mode>_2"
15487 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15490 (match_operand:QI 6 "register_operand" "1")
15491 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15493 [(match_operand:P 3 "vsib_address_operand" "Tv")
15494 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
15495 (match_operand:SI 4 "const1248_operand" "n")]
15496 UNSPEC_VSIBADDR)])]
15498 (clobber (match_scratch:QI 1 "=&Yk"))]
15501 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15502 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
15503 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
15505 [(set_attr "type" "ssemov")
15506 (set_attr "prefix" "evex")
15507 (set_attr "mode" "<sseinsnmode>")])
15509 (define_expand "avx512f_scattersi<mode>"
15510 [(parallel [(set (mem:VI48F_512
15512 [(match_operand 0 "vsib_address_operand")
15513 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
15514 (match_operand:SI 4 "const1248_operand")]))
15516 [(match_operand:<avx512fmaskmode> 1 "register_operand")
15517 (match_operand:VI48F_512 3 "register_operand")]
15519 (clobber (match_scratch:<avx512fmaskmode> 6))])]
15523 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15524 operands[4]), UNSPEC_VSIBADDR);
15527 (define_insn "*avx512f_scattersi<mode>"
15528 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15530 [(match_operand:P 0 "vsib_address_operand" "Tv")
15531 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15532 (match_operand:SI 4 "const1248_operand" "n")]
15535 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15536 (match_operand:VI48F_512 3 "register_operand" "v")]
15538 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15540 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15541 [(set_attr "type" "ssemov")
15542 (set_attr "prefix" "evex")
15543 (set_attr "mode" "<sseinsnmode>")])
15545 (define_expand "avx512f_scatterdi<mode>"
15546 [(parallel [(set (mem:VI48F_512
15548 [(match_operand 0 "vsib_address_operand")
15549 (match_operand:V8DI 2 "register_operand")
15550 (match_operand:SI 4 "const1248_operand")]))
15552 [(match_operand:QI 1 "register_operand")
15553 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
15555 (clobber (match_scratch:QI 6))])]
15559 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15560 operands[4]), UNSPEC_VSIBADDR);
15563 (define_insn "*avx512f_scatterdi<mode>"
15564 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15566 [(match_operand:P 0 "vsib_address_operand" "Tv")
15567 (match_operand:V8DI 2 "register_operand" "v")
15568 (match_operand:SI 4 "const1248_operand" "n")]
15571 [(match_operand:QI 6 "register_operand" "1")
15572 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
15574 (clobber (match_scratch:QI 1 "=&Yk"))]
15576 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15577 [(set_attr "type" "ssemov")
15578 (set_attr "prefix" "evex")
15579 (set_attr "mode" "<sseinsnmode>")])
15581 (define_insn "avx512f_compress<mode>_mask"
15582 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
15584 [(match_operand:VI48F_512 1 "register_operand" "v")
15585 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
15586 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
15589 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15590 [(set_attr "type" "ssemov")
15591 (set_attr "prefix" "evex")
15592 (set_attr "mode" "<sseinsnmode>")])
15594 (define_insn "avx512f_compressstore<mode>_mask"
15595 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
15597 [(match_operand:VI48F_512 1 "register_operand" "x")
15599 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
15600 UNSPEC_COMPRESS_STORE))]
15602 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
15603 [(set_attr "type" "ssemov")
15604 (set_attr "prefix" "evex")
15605 (set_attr "memory" "store")
15606 (set_attr "mode" "<sseinsnmode>")])
15608 (define_expand "avx512f_expand<mode>_maskz"
15609 [(set (match_operand:VI48F_512 0 "register_operand")
15611 [(match_operand:VI48F_512 1 "nonimmediate_operand")
15612 (match_operand:VI48F_512 2 "vector_move_operand")
15613 (match_operand:<avx512fmaskmode> 3 "register_operand")]
15616 "operands[2] = CONST0_RTX (<MODE>mode);")
15618 (define_insn "avx512f_expand<mode>_mask"
15619 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
15621 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
15622 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
15623 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
15626 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15627 [(set_attr "type" "ssemov")
15628 (set_attr "prefix" "evex")
15629 (set_attr "memory" "none,load")
15630 (set_attr "mode" "<sseinsnmode>")])
15632 (define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
15633 [(set (match_operand:VF_512 0 "register_operand" "=v")
15635 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15636 (match_operand:SI 2 "const_0_to_15_operand")]
15639 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
15640 [(set_attr "prefix" "evex")
15641 (set_attr "mode" "<MODE>")])
15643 (define_insn "avx512f_getmant<mode><round_saeonly_name>"
15644 [(set (match_operand:VF_128 0 "register_operand" "=v")
15647 [(match_operand:VF_128 1 "register_operand" "v")
15648 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15649 (match_operand:SI 3 "const_0_to_15_operand")]
15654 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
15655 [(set_attr "prefix" "evex")
15656 (set_attr "mode" "<ssescalarmode>")])
15658 (define_insn "clz<mode>2<mask_name>"
15659 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15661 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
15663 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15664 [(set_attr "type" "sse")
15665 (set_attr "prefix" "evex")
15666 (set_attr "mode" "<sseinsnmode>")])
15668 (define_insn "<mask_codefor>conflict<mode><mask_name>"
15669 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15671 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
15674 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15675 [(set_attr "type" "sse")
15676 (set_attr "prefix" "evex")
15677 (set_attr "mode" "<sseinsnmode>")])
15679 (define_insn "sha1msg1"
15680 [(set (match_operand:V4SI 0 "register_operand" "=x")
15682 [(match_operand:V4SI 1 "register_operand" "0")
15683 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15686 "sha1msg1\t{%2, %0|%0, %2}"
15687 [(set_attr "type" "sselog1")
15688 (set_attr "mode" "TI")])
15690 (define_insn "sha1msg2"
15691 [(set (match_operand:V4SI 0 "register_operand" "=x")
15693 [(match_operand:V4SI 1 "register_operand" "0")
15694 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15697 "sha1msg2\t{%2, %0|%0, %2}"
15698 [(set_attr "type" "sselog1")
15699 (set_attr "mode" "TI")])
15701 (define_insn "sha1nexte"
15702 [(set (match_operand:V4SI 0 "register_operand" "=x")
15704 [(match_operand:V4SI 1 "register_operand" "0")
15705 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15706 UNSPEC_SHA1NEXTE))]
15708 "sha1nexte\t{%2, %0|%0, %2}"
15709 [(set_attr "type" "sselog1")
15710 (set_attr "mode" "TI")])
15712 (define_insn "sha1rnds4"
15713 [(set (match_operand:V4SI 0 "register_operand" "=x")
15715 [(match_operand:V4SI 1 "register_operand" "0")
15716 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15717 (match_operand:SI 3 "const_0_to_3_operand" "n")]
15718 UNSPEC_SHA1RNDS4))]
15720 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
15721 [(set_attr "type" "sselog1")
15722 (set_attr "length_immediate" "1")
15723 (set_attr "mode" "TI")])
15725 (define_insn "sha256msg1"
15726 [(set (match_operand:V4SI 0 "register_operand" "=x")
15728 [(match_operand:V4SI 1 "register_operand" "0")
15729 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15730 UNSPEC_SHA256MSG1))]
15732 "sha256msg1\t{%2, %0|%0, %2}"
15733 [(set_attr "type" "sselog1")
15734 (set_attr "mode" "TI")])
15736 (define_insn "sha256msg2"
15737 [(set (match_operand:V4SI 0 "register_operand" "=x")
15739 [(match_operand:V4SI 1 "register_operand" "0")
15740 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15741 UNSPEC_SHA256MSG2))]
15743 "sha256msg2\t{%2, %0|%0, %2}"
15744 [(set_attr "type" "sselog1")
15745 (set_attr "mode" "TI")])
15747 (define_insn "sha256rnds2"
15748 [(set (match_operand:V4SI 0 "register_operand" "=x")
15750 [(match_operand:V4SI 1 "register_operand" "0")
15751 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15752 (match_operand:V4SI 3 "register_operand" "Yz")]
15753 UNSPEC_SHA256RNDS2))]
15755 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
15756 [(set_attr "type" "sselog1")
15757 (set_attr "length_immediate" "1")
15758 (set_attr "mode" "TI")])
15760 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
15761 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
15762 (unspec:AVX512MODE2P
15763 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
15767 "&& reload_completed"
15770 rtx op0 = operands[0];
15771 rtx op1 = operands[1];
15773 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
15775 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
15776 emit_move_insn (op0, op1);
15780 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
15781 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
15782 (unspec:AVX512MODE2P
15783 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
15787 "&& reload_completed"
15790 rtx op0 = operands[0];
15791 rtx op1 = operands[1];
15793 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
15795 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
15796 emit_move_insn (op0, op1);