1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
124 (define_c_enum "unspecv" [
134 ;; All vector modes including V?TImode, used in move patterns.
135 (define_mode_iterator VMOVE
136 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
137 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
138 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
139 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
140 (V2TI "TARGET_AVX") V1TI
141 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
142 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
145 (define_mode_iterator V
146 [(V32QI "TARGET_AVX") V16QI
147 (V16HI "TARGET_AVX") V8HI
148 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
149 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
153 ;; All 128bit vector modes
154 (define_mode_iterator V_128
155 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
157 ;; All 256bit vector modes
158 (define_mode_iterator V_256
159 [V32QI V16HI V8SI V4DI V8SF V4DF])
161 ;; All 512bit vector modes
162 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
164 ;; All 256bit and 512bit vector modes
165 (define_mode_iterator V_256_512
166 [V32QI V16HI V8SI V4DI V8SF V4DF
167 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
168 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
170 ;; All vector float modes
171 (define_mode_iterator VF
172 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
173 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
175 ;; 128- and 256-bit float vector modes
176 (define_mode_iterator VF_128_256
177 [(V8SF "TARGET_AVX") V4SF
178 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
180 ;; All SFmode vector float modes
181 (define_mode_iterator VF1
182 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
184 ;; 128- and 256-bit SF vector modes
185 (define_mode_iterator VF1_128_256
186 [(V8SF "TARGET_AVX") V4SF])
188 ;; All DFmode vector float modes
189 (define_mode_iterator VF2
190 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
192 ;; 128- and 256-bit DF vector modes
193 (define_mode_iterator VF2_128_256
194 [(V4DF "TARGET_AVX") V2DF])
196 (define_mode_iterator VF2_512_256
197 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
199 ;; All 128bit vector float modes
200 (define_mode_iterator VF_128
201 [V4SF (V2DF "TARGET_SSE2")])
203 ;; All 256bit vector float modes
204 (define_mode_iterator VF_256
207 ;; All 512bit vector float modes
208 (define_mode_iterator VF_512
211 ;; All vector integer modes
212 (define_mode_iterator VI
213 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
214 (V32QI "TARGET_AVX") V16QI
215 (V16HI "TARGET_AVX") V8HI
216 (V8SI "TARGET_AVX") V4SI
217 (V4DI "TARGET_AVX") V2DI])
219 (define_mode_iterator VI_AVX2
220 [(V32QI "TARGET_AVX2") V16QI
221 (V16HI "TARGET_AVX2") V8HI
222 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
223 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
225 ;; All QImode vector integer modes
226 (define_mode_iterator VI1
227 [(V32QI "TARGET_AVX") V16QI])
229 (define_mode_iterator VI_UNALIGNED_LOADSTORE
230 [(V32QI "TARGET_AVX") V16QI
231 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
233 ;; All DImode vector integer modes
234 (define_mode_iterator VI8
235 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
237 (define_mode_iterator VI1_AVX2
238 [(V32QI "TARGET_AVX2") V16QI])
240 (define_mode_iterator VI2_AVX2
241 [(V16HI "TARGET_AVX2") V8HI])
243 (define_mode_iterator VI2_AVX512F
244 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
246 (define_mode_iterator VI4_AVX
247 [(V8SI "TARGET_AVX") V4SI])
249 (define_mode_iterator VI4_AVX2
250 [(V8SI "TARGET_AVX2") V4SI])
252 (define_mode_iterator VI4_AVX512F
253 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
255 (define_mode_iterator VI48_AVX512F
256 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
257 (V8DI "TARGET_AVX512F")])
259 (define_mode_iterator VI8_AVX2
260 [(V4DI "TARGET_AVX2") V2DI])
262 (define_mode_iterator VI8_AVX2_AVX512F
263 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
266 (define_mode_iterator V8FI
270 (define_mode_iterator V16FI
273 ;; ??? We should probably use TImode instead.
274 (define_mode_iterator VIMAX_AVX2
275 [(V2TI "TARGET_AVX2") V1TI])
277 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
278 (define_mode_iterator SSESCALARMODE
279 [(V2TI "TARGET_AVX2") TI])
281 (define_mode_iterator VI12_AVX2
282 [(V32QI "TARGET_AVX2") V16QI
283 (V16HI "TARGET_AVX2") V8HI])
285 (define_mode_iterator VI24_AVX2
286 [(V16HI "TARGET_AVX2") V8HI
287 (V8SI "TARGET_AVX2") V4SI])
289 (define_mode_iterator VI124_AVX2_48_AVX512F
290 [(V32QI "TARGET_AVX2") V16QI
291 (V16HI "TARGET_AVX2") V8HI
292 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
293 (V8DI "TARGET_AVX512F")])
295 (define_mode_iterator VI124_AVX512F
296 [(V32QI "TARGET_AVX2") V16QI
297 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
298 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
300 (define_mode_iterator VI124_AVX2
301 [(V32QI "TARGET_AVX2") V16QI
302 (V16HI "TARGET_AVX2") V8HI
303 (V8SI "TARGET_AVX2") V4SI])
305 (define_mode_iterator VI248_AVX2
306 [(V16HI "TARGET_AVX2") V8HI
307 (V8SI "TARGET_AVX2") V4SI
308 (V4DI "TARGET_AVX2") V2DI])
310 (define_mode_iterator VI248_AVX2_8_AVX512F
311 [(V16HI "TARGET_AVX2") V8HI
312 (V8SI "TARGET_AVX2") V4SI
313 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
315 (define_mode_iterator VI48_AVX2_48_AVX512F
316 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
317 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
319 (define_mode_iterator V48_AVX2
322 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
323 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
325 (define_mode_attr sse2_avx_avx512f
326 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
327 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
329 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
330 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
332 (define_mode_attr sse2_avx2
333 [(V16QI "sse2") (V32QI "avx2")
334 (V8HI "sse2") (V16HI "avx2")
335 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
336 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
337 (V1TI "sse2") (V2TI "avx2")])
339 (define_mode_attr ssse3_avx2
340 [(V16QI "ssse3") (V32QI "avx2")
341 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
342 (V4SI "ssse3") (V8SI "avx2")
343 (V2DI "ssse3") (V4DI "avx2")
344 (TI "ssse3") (V2TI "avx2")])
346 (define_mode_attr sse4_1_avx2
347 [(V16QI "sse4_1") (V32QI "avx2")
348 (V8HI "sse4_1") (V16HI "avx2")
349 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
350 (V2DI "sse4_1") (V4DI "avx2")])
352 (define_mode_attr avx_avx2
353 [(V4SF "avx") (V2DF "avx")
354 (V8SF "avx") (V4DF "avx")
355 (V4SI "avx2") (V2DI "avx2")
356 (V8SI "avx2") (V4DI "avx2")])
358 (define_mode_attr vec_avx2
359 [(V16QI "vec") (V32QI "avx2")
360 (V8HI "vec") (V16HI "avx2")
361 (V4SI "vec") (V8SI "avx2")
362 (V2DI "vec") (V4DI "avx2")])
364 (define_mode_attr avx2_avx512f
365 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
366 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
367 (V8SF "avx2") (V16SF "avx512f")
368 (V4DF "avx2") (V8DF "avx512f")])
370 (define_mode_attr shuffletype
371 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
372 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
373 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
374 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
375 (V64QI "i") (V1TI "i") (V2TI "i")])
377 (define_mode_attr ssequartermode
378 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
380 (define_mode_attr ssedoublemode
381 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
382 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
383 (V32QI "V32HI") (V16QI "V16HI")])
385 (define_mode_attr ssebytemode
386 [(V4DI "V32QI") (V2DI "V16QI")])
388 ;; All 128bit vector integer modes
389 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
391 ;; All 256bit vector integer modes
392 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
394 ;; All 512bit vector integer modes
395 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
397 ;; Various 128bit vector integer mode combinations
398 (define_mode_iterator VI12_128 [V16QI V8HI])
399 (define_mode_iterator VI14_128 [V16QI V4SI])
400 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
401 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
402 (define_mode_iterator VI24_128 [V8HI V4SI])
403 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
404 (define_mode_iterator VI48_128 [V4SI V2DI])
406 ;; Various 256bit and 512 vector integer mode combinations
407 (define_mode_iterator VI124_256_48_512
408 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
409 (define_mode_iterator VI48_256 [V8SI V4DI])
410 (define_mode_iterator VI48_512 [V16SI V8DI])
412 ;; Int-float size matches
413 (define_mode_iterator VI4F_128 [V4SI V4SF])
414 (define_mode_iterator VI8F_128 [V2DI V2DF])
415 (define_mode_iterator VI4F_256 [V8SI V8SF])
416 (define_mode_iterator VI8F_256 [V4DI V4DF])
417 (define_mode_iterator VI8F_256_512
418 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
419 (define_mode_iterator VI48F_256_512
421 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
422 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
423 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
425 ;; Mapping from float mode to required SSE level
426 (define_mode_attr sse
427 [(SF "sse") (DF "sse2")
428 (V4SF "sse") (V2DF "sse2")
429 (V16SF "avx512f") (V8SF "avx")
430 (V8DF "avx512f") (V4DF "avx")])
432 (define_mode_attr sse2
433 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
434 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
436 (define_mode_attr sse3
437 [(V16QI "sse3") (V32QI "avx")])
439 (define_mode_attr sse4_1
440 [(V4SF "sse4_1") (V2DF "sse4_1")
441 (V8SF "avx") (V4DF "avx")
444 (define_mode_attr avxsizesuffix
445 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
446 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
447 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
448 (V16SF "512") (V8DF "512")
449 (V8SF "256") (V4DF "256")
450 (V4SF "") (V2DF "")])
452 ;; SSE instruction mode
453 (define_mode_attr sseinsnmode
454 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
455 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
456 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
457 (V16SF "V16SF") (V8DF "V8DF")
458 (V8SF "V8SF") (V4DF "V4DF")
459 (V4SF "V4SF") (V2DF "V2DF")
462 ;; Mapping of vector modes to corresponding mask size
463 (define_mode_attr avx512fmaskmode
465 (V16HI "HI") (V8HI "QI")
466 (V16SI "HI") (V8SI "QI") (V4SI "QI")
467 (V8DI "QI") (V4DI "QI") (V2DI "QI")
468 (V16SF "HI") (V8SF "QI") (V4SF "QI")
469 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
471 ;; Mapping of vector float modes to an integer mode of the same size
472 (define_mode_attr sseintvecmode
473 [(V16SF "V16SI") (V8DF "V8DI")
474 (V8SF "V8SI") (V4DF "V4DI")
475 (V4SF "V4SI") (V2DF "V2DI")
476 (V16SI "V16SI") (V8DI "V8DI")
477 (V8SI "V8SI") (V4DI "V4DI")
478 (V4SI "V4SI") (V2DI "V2DI")
479 (V16HI "V16HI") (V8HI "V8HI")
480 (V32QI "V32QI") (V16QI "V16QI")])
482 (define_mode_attr sseintvecmodelower
484 (V8SF "v8si") (V4DF "v4di")
485 (V4SF "v4si") (V2DF "v2di")
486 (V8SI "v8si") (V4DI "v4di")
487 (V4SI "v4si") (V2DI "v2di")
488 (V16HI "v16hi") (V8HI "v8hi")
489 (V32QI "v32qi") (V16QI "v16qi")])
491 ;; Mapping of vector modes to a vector mode of double size
492 (define_mode_attr ssedoublevecmode
493 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
494 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
495 (V8SF "V16SF") (V4DF "V8DF")
496 (V4SF "V8SF") (V2DF "V4DF")])
498 ;; Mapping of vector modes to a vector mode of half size
499 (define_mode_attr ssehalfvecmode
500 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
501 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
502 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
503 (V16SF "V8SF") (V8DF "V4DF")
504 (V8SF "V4SF") (V4DF "V2DF")
507 ;; Mapping of vector modes ti packed single mode of the same size
508 (define_mode_attr ssePSmode
509 [(V16SI "V16SF") (V8DF "V16SF")
510 (V16SF "V16SF") (V8DI "V16SF")
511 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
512 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
513 (V8SI "V8SF") (V4SI "V4SF")
514 (V4DI "V8SF") (V2DI "V4SF")
515 (V2TI "V8SF") (V1TI "V4SF")
516 (V8SF "V8SF") (V4SF "V4SF")
517 (V4DF "V8SF") (V2DF "V4SF")])
519 ;; Mapping of vector modes back to the scalar modes
520 (define_mode_attr ssescalarmode
521 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
522 (V32HI "HI") (V16HI "HI") (V8HI "HI")
523 (V16SI "SI") (V8SI "SI") (V4SI "SI")
524 (V8DI "DI") (V4DI "DI") (V2DI "DI")
525 (V16SF "SF") (V8SF "SF") (V4SF "SF")
526 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
528 ;; Mapping of vector modes to the 128bit modes
529 (define_mode_attr ssexmmmode
530 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
531 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
532 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
533 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
534 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
535 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
537 ;; Pointer size override for scalar modes (Intel asm dialect)
538 (define_mode_attr iptr
539 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
540 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
541 (V8SF "k") (V4DF "q")
542 (V4SF "k") (V2DF "q")
545 ;; Number of scalar elements in each vector type
546 (define_mode_attr ssescalarnum
547 [(V64QI "64") (V16SI "16") (V8DI "8")
548 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
549 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
550 (V16SF "16") (V8DF "8")
551 (V8SF "8") (V4DF "4")
552 (V4SF "4") (V2DF "2")])
554 ;; Mask of scalar elements in each vector type
555 (define_mode_attr ssescalarnummask
556 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
557 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
558 (V8SF "7") (V4DF "3")
559 (V4SF "3") (V2DF "1")])
561 (define_mode_attr ssescalarsize
562 [(V8DI "64") (V4DI "64") (V2DI "64")
563 (V32HI "16") (V16HI "16") (V8HI "16")
564 (V16SI "32") (V8SI "32") (V4SI "32")
565 (V16SF "32") (V8DF "64")])
567 ;; SSE prefix for integer vector modes
568 (define_mode_attr sseintprefix
569 [(V2DI "p") (V2DF "")
574 (V16SI "p") (V16SF "")])
576 ;; SSE scalar suffix for vector modes
577 (define_mode_attr ssescalarmodesuffix
579 (V8SF "ss") (V4DF "sd")
580 (V4SF "ss") (V2DF "sd")
581 (V8SI "ss") (V4DI "sd")
584 ;; Pack/unpack vector modes
585 (define_mode_attr sseunpackmode
586 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
587 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
588 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
590 (define_mode_attr ssepackmode
591 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
592 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
593 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
595 ;; Mapping of the max integer size for xop rotate immediate constraint
596 (define_mode_attr sserotatemax
597 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
599 ;; Mapping of mode to cast intrinsic name
600 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
602 ;; Instruction suffix for sign and zero extensions.
603 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
605 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
606 ;; i64x4 or f64x4 for 512bit modes.
607 (define_mode_attr i128
608 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
609 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
610 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
613 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
615 ;; Mapping of immediate bits for blend instructions
616 (define_mode_attr blendbits
617 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
619 ;; Mapping suffixes for broadcast
620 (define_mode_attr bcstscalarsuff
621 [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
623 ;; Include define_subst patterns for instructions with mask
626 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
632 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
634 ;; All of these patterns are enabled for SSE1 as well as SSE2.
635 ;; This is essential for maintaining stable calling conventions.
637 (define_expand "mov<mode>"
638 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
639 (match_operand:VMOVE 1 "nonimmediate_operand"))]
642 ix86_expand_vector_move (<MODE>mode, operands);
646 (define_insn "*mov<mode>_internal"
647 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
648 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
650 && (register_operand (operands[0], <MODE>mode)
651 || register_operand (operands[1], <MODE>mode))"
653 int mode = get_attr_mode (insn);
654 switch (which_alternative)
657 return standard_sse_constant_opcode (insn, operands[1]);
660 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
661 in avx512f, so we need to use workarounds, to access sse registers
662 16-31, which are evex-only. */
663 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
664 && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
665 || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
667 if (memory_operand (operands[0], <MODE>mode))
669 if (GET_MODE_SIZE (<MODE>mode) == 32)
670 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
671 else if (GET_MODE_SIZE (<MODE>mode) == 16)
672 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
676 else if (memory_operand (operands[1], <MODE>mode))
678 if (GET_MODE_SIZE (<MODE>mode) == 32)
679 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
680 else if (GET_MODE_SIZE (<MODE>mode) == 16)
681 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
686 /* Reg -> reg move is always aligned. Just use wider move. */
691 return "vmovaps\t{%g1, %g0|%g0, %g1}";
694 return "vmovapd\t{%g1, %g0|%g0, %g1}";
697 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
708 && (misaligned_operand (operands[0], <MODE>mode)
709 || misaligned_operand (operands[1], <MODE>mode)))
710 return "vmovups\t{%1, %0|%0, %1}";
712 return "%vmovaps\t{%1, %0|%0, %1}";
718 && (misaligned_operand (operands[0], <MODE>mode)
719 || misaligned_operand (operands[1], <MODE>mode)))
720 return "vmovupd\t{%1, %0|%0, %1}";
722 return "%vmovapd\t{%1, %0|%0, %1}";
727 && (misaligned_operand (operands[0], <MODE>mode)
728 || misaligned_operand (operands[1], <MODE>mode)))
729 return "vmovdqu\t{%1, %0|%0, %1}";
731 return "%vmovdqa\t{%1, %0|%0, %1}";
733 if (misaligned_operand (operands[0], <MODE>mode)
734 || misaligned_operand (operands[1], <MODE>mode))
735 return "vmovdqu64\t{%1, %0|%0, %1}";
737 return "vmovdqa64\t{%1, %0|%0, %1}";
746 [(set_attr "type" "sselog1,ssemov,ssemov")
747 (set_attr "prefix" "maybe_vex")
749 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
750 (const_string "<ssePSmode>")
751 (and (eq_attr "alternative" "2")
752 (match_test "TARGET_SSE_TYPELESS_STORES"))
753 (const_string "<ssePSmode>")
754 (match_test "TARGET_AVX")
755 (const_string "<sseinsnmode>")
756 (ior (not (match_test "TARGET_SSE2"))
757 (match_test "optimize_function_for_size_p (cfun)"))
758 (const_string "V4SF")
759 (and (eq_attr "alternative" "0")
760 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
763 (const_string "<sseinsnmode>")))])
765 (define_insn "avx512f_load<mode>_mask"
766 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
768 (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
769 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
770 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
773 switch (MODE_<sseinsnmode>)
777 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
779 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
782 [(set_attr "type" "ssemov")
783 (set_attr "prefix" "evex")
784 (set_attr "memory" "none,load")
785 (set_attr "mode" "<sseinsnmode>")])
787 (define_insn "avx512f_blendm<mode>"
788 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
790 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
791 (match_operand:VI48F_512 1 "register_operand" "v")
792 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))]
794 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
795 [(set_attr "type" "ssemov")
796 (set_attr "prefix" "evex")
797 (set_attr "mode" "<sseinsnmode>")])
799 (define_insn "avx512f_store<mode>_mask"
800 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
802 (match_operand:VI48F_512 1 "register_operand" "v")
804 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
807 switch (MODE_<sseinsnmode>)
811 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
813 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
816 [(set_attr "type" "ssemov")
817 (set_attr "prefix" "evex")
818 (set_attr "memory" "store")
819 (set_attr "mode" "<sseinsnmode>")])
821 (define_insn "sse2_movq128"
822 [(set (match_operand:V2DI 0 "register_operand" "=x")
825 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
826 (parallel [(const_int 0)]))
829 "%vmovq\t{%1, %0|%0, %q1}"
830 [(set_attr "type" "ssemov")
831 (set_attr "prefix" "maybe_vex")
832 (set_attr "mode" "TI")])
834 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
835 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
836 ;; from memory, we'd prefer to load the memory directly into the %xmm
837 ;; register. To facilitate this happy circumstance, this pattern won't
838 ;; split until after register allocation. If the 64-bit value didn't
839 ;; come from memory, this is the best we can do. This is much better
840 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
843 (define_insn_and_split "movdi_to_sse"
845 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
846 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
847 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
848 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
850 "&& reload_completed"
853 if (register_operand (operands[1], DImode))
855 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
856 Assemble the 64-bit DImode value in an xmm register. */
857 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
858 gen_rtx_SUBREG (SImode, operands[1], 0)));
859 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
860 gen_rtx_SUBREG (SImode, operands[1], 4)));
861 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
864 else if (memory_operand (operands[1], DImode))
866 rtx tmp = gen_reg_rtx (V2DImode);
867 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
868 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
875 [(set (match_operand:V4SF 0 "register_operand")
876 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
877 "TARGET_SSE && reload_completed"
880 (vec_duplicate:V4SF (match_dup 1))
884 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
885 operands[2] = CONST0_RTX (V4SFmode);
889 [(set (match_operand:V2DF 0 "register_operand")
890 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
891 "TARGET_SSE2 && reload_completed"
892 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
894 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
895 operands[2] = CONST0_RTX (DFmode);
898 (define_expand "push<mode>1"
899 [(match_operand:VMOVE 0 "register_operand")]
902 ix86_expand_push (<MODE>mode, operands[0]);
906 (define_expand "movmisalign<mode>"
907 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
908 (match_operand:VMOVE 1 "nonimmediate_operand"))]
911 ix86_expand_vector_move_misalign (<MODE>mode, operands);
915 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
916 [(set (match_operand:VF 0 "register_operand" "=v")
918 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
920 "TARGET_SSE && <mask_mode512bit_condition>"
922 switch (get_attr_mode (insn))
927 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
929 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
932 [(set_attr "type" "ssemov")
933 (set_attr "movu" "1")
934 (set_attr "prefix" "maybe_vex")
936 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
937 (const_string "<ssePSmode>")
938 (match_test "TARGET_AVX")
939 (const_string "<MODE>")
940 (match_test "optimize_function_for_size_p (cfun)")
941 (const_string "V4SF")
943 (const_string "<MODE>")))])
945 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
946 [(set (match_operand:VF 0 "memory_operand" "=m")
948 [(match_operand:VF 1 "register_operand" "v")]
952 switch (get_attr_mode (insn))
957 return "%vmovups\t{%1, %0|%0, %1}";
959 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
962 [(set_attr "type" "ssemov")
963 (set_attr "movu" "1")
964 (set_attr "prefix" "maybe_vex")
966 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
967 (match_test "TARGET_SSE_TYPELESS_STORES"))
968 (const_string "<ssePSmode>")
969 (match_test "TARGET_AVX")
970 (const_string "<MODE>")
971 (match_test "optimize_function_for_size_p (cfun)")
972 (const_string "V4SF")
974 (const_string "<MODE>")))])
976 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
977 [(set (match_operand:VF_512 0 "memory_operand" "=m")
980 [(match_operand:VF_512 1 "register_operand" "v")]
983 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
986 switch (get_attr_mode (insn))
989 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
991 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
994 [(set_attr "type" "ssemov")
995 (set_attr "movu" "1")
996 (set_attr "memory" "store")
997 (set_attr "prefix" "evex")
998 (set_attr "mode" "<sseinsnmode>")])
1000 (define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1001 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1002 (unspec:VI_UNALIGNED_LOADSTORE
1003 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1005 "TARGET_SSE2 && <mask_mode512bit_condition>"
1007 switch (get_attr_mode (insn))
1011 return "%vmovups\t{%1, %0|%0, %1}";
1013 if (<MODE>mode == V8DImode)
1014 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1016 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1018 return "%vmovdqu\t{%1, %0|%0, %1}";
1021 [(set_attr "type" "ssemov")
1022 (set_attr "movu" "1")
1023 (set (attr "prefix_data16")
1025 (match_test "TARGET_AVX")
1027 (const_string "1")))
1028 (set_attr "prefix" "maybe_vex")
1030 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1031 (const_string "<ssePSmode>")
1032 (match_test "TARGET_AVX")
1033 (const_string "<sseinsnmode>")
1034 (match_test "optimize_function_for_size_p (cfun)")
1035 (const_string "V4SF")
1037 (const_string "<sseinsnmode>")))])
1039 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1040 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1041 (unspec:VI_UNALIGNED_LOADSTORE
1042 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1046 switch (get_attr_mode (insn))
1050 return "%vmovups\t{%1, %0|%0, %1}";
1052 if (<MODE>mode == V8DImode)
1053 return "vmovdqu64\t{%1, %0|%0, %1}";
1055 return "vmovdqu32\t{%1, %0|%0, %1}";
1057 return "%vmovdqu\t{%1, %0|%0, %1}";
1060 [(set_attr "type" "ssemov")
1061 (set_attr "movu" "1")
1062 (set (attr "prefix_data16")
1064 (match_test "TARGET_AVX")
1066 (const_string "1")))
1067 (set_attr "prefix" "maybe_vex")
1069 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1070 (match_test "TARGET_SSE_TYPELESS_STORES"))
1071 (const_string "<ssePSmode>")
1072 (match_test "TARGET_AVX")
1073 (const_string "<sseinsnmode>")
1074 (match_test "optimize_function_for_size_p (cfun)")
1075 (const_string "V4SF")
1077 (const_string "<sseinsnmode>")))])
1079 (define_insn "avx512f_storedqu<mode>_mask"
1080 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1083 [(match_operand:VI48_512 1 "register_operand" "v")]
1086 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
1089 if (<MODE>mode == V8DImode)
1090 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1092 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1094 [(set_attr "type" "ssemov")
1095 (set_attr "movu" "1")
1096 (set_attr "memory" "store")
1097 (set_attr "prefix" "evex")
1098 (set_attr "mode" "<sseinsnmode>")])
1100 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1101 [(set (match_operand:VI1 0 "register_operand" "=x")
1102 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1105 "%vlddqu\t{%1, %0|%0, %1}"
1106 [(set_attr "type" "ssemov")
1107 (set_attr "movu" "1")
1108 (set (attr "prefix_data16")
1110 (match_test "TARGET_AVX")
1112 (const_string "0")))
1113 (set (attr "prefix_rep")
1115 (match_test "TARGET_AVX")
1117 (const_string "1")))
1118 (set_attr "prefix" "maybe_vex")
1119 (set_attr "mode" "<sseinsnmode>")])
1121 (define_insn "sse2_movnti<mode>"
1122 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1123 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1126 "movnti\t{%1, %0|%0, %1}"
1127 [(set_attr "type" "ssemov")
1128 (set_attr "prefix_data16" "0")
1129 (set_attr "mode" "<MODE>")])
1131 (define_insn "<sse>_movnt<mode>"
1132 [(set (match_operand:VF 0 "memory_operand" "=m")
1134 [(match_operand:VF 1 "register_operand" "v")]
1137 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1138 [(set_attr "type" "ssemov")
1139 (set_attr "prefix" "maybe_vex")
1140 (set_attr "mode" "<MODE>")])
1142 (define_insn "<sse2>_movnt<mode>"
1143 [(set (match_operand:VI8 0 "memory_operand" "=m")
1144 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1147 "%vmovntdq\t{%1, %0|%0, %1}"
1148 [(set_attr "type" "ssecvt")
1149 (set (attr "prefix_data16")
1151 (match_test "TARGET_AVX")
1153 (const_string "1")))
1154 (set_attr "prefix" "maybe_vex")
1155 (set_attr "mode" "<sseinsnmode>")])
1157 ; Expand patterns for non-temporal stores. At the moment, only those
1158 ; that directly map to insns are defined; it would be possible to
1159 ; define patterns for other modes that would expand to several insns.
1161 ;; Modes handled by storent patterns.
1162 (define_mode_iterator STORENT_MODE
1163 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1164 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1165 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1166 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1167 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1169 (define_expand "storent<mode>"
1170 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1171 (unspec:STORENT_MODE
1172 [(match_operand:STORENT_MODE 1 "register_operand")]
1176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1178 ;; Parallel floating point arithmetic
1180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1182 (define_expand "<code><mode>2"
1183 [(set (match_operand:VF 0 "register_operand")
1185 (match_operand:VF 1 "register_operand")))]
1187 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1189 (define_insn_and_split "*absneg<mode>2"
1190 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1191 (match_operator:VF 3 "absneg_operator"
1192 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1193 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1196 "&& reload_completed"
1199 enum rtx_code absneg_op;
1205 if (MEM_P (operands[1]))
1206 op1 = operands[2], op2 = operands[1];
1208 op1 = operands[1], op2 = operands[2];
1213 if (rtx_equal_p (operands[0], operands[1]))
1219 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1220 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1221 t = gen_rtx_SET (VOIDmode, operands[0], t);
1225 [(set_attr "isa" "noavx,noavx,avx,avx")])
1227 (define_expand "<plusminus_insn><mode>3<mask_name>"
1228 [(set (match_operand:VF 0 "register_operand")
1230 (match_operand:VF 1 "nonimmediate_operand")
1231 (match_operand:VF 2 "nonimmediate_operand")))]
1232 "TARGET_SSE && <mask_mode512bit_condition>"
1233 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1235 (define_insn "*<plusminus_insn><mode>3<mask_name>"
1236 [(set (match_operand:VF 0 "register_operand" "=x,v")
1238 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
1239 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1240 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
1242 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1243 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1244 [(set_attr "isa" "noavx,avx")
1245 (set_attr "type" "sseadd")
1246 (set_attr "prefix" "<mask_prefix3>")
1247 (set_attr "mode" "<MODE>")])
1249 (define_insn "<sse>_vm<plusminus_insn><mode>3"
1250 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1253 (match_operand:VF_128 1 "register_operand" "0,v")
1254 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1259 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1260 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1261 [(set_attr "isa" "noavx,avx")
1262 (set_attr "type" "sseadd")
1263 (set_attr "prefix" "orig,vex")
1264 (set_attr "mode" "<ssescalarmode>")])
1266 (define_expand "mul<mode>3<mask_name>"
1267 [(set (match_operand:VF 0 "register_operand")
1269 (match_operand:VF 1 "nonimmediate_operand")
1270 (match_operand:VF 2 "nonimmediate_operand")))]
1271 "TARGET_SSE && <mask_mode512bit_condition>"
1272 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1274 (define_insn "*mul<mode>3<mask_name>"
1275 [(set (match_operand:VF 0 "register_operand" "=x,v")
1277 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1278 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1279 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
1281 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1282 vmul<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1283 [(set_attr "isa" "noavx,avx")
1284 (set_attr "type" "ssemul")
1285 (set_attr "prefix" "<mask_prefix3>")
1286 (set_attr "btver2_decode" "direct,double")
1287 (set_attr "mode" "<MODE>")])
1289 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
1290 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1293 (match_operand:VF_128 1 "register_operand" "0,v")
1294 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1299 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1300 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1301 [(set_attr "isa" "noavx,avx")
1302 (set_attr "type" "sse<multdiv_mnemonic>")
1303 (set_attr "prefix" "orig,vex")
1304 (set_attr "btver2_decode" "direct,double")
1305 (set_attr "mode" "<ssescalarmode>")])
1307 (define_expand "div<mode>3"
1308 [(set (match_operand:VF2 0 "register_operand")
1309 (div:VF2 (match_operand:VF2 1 "register_operand")
1310 (match_operand:VF2 2 "nonimmediate_operand")))]
1312 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1314 (define_expand "div<mode>3"
1315 [(set (match_operand:VF1 0 "register_operand")
1316 (div:VF1 (match_operand:VF1 1 "register_operand")
1317 (match_operand:VF1 2 "nonimmediate_operand")))]
1320 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1323 && TARGET_RECIP_VEC_DIV
1324 && !optimize_insn_for_size_p ()
1325 && flag_finite_math_only && !flag_trapping_math
1326 && flag_unsafe_math_optimizations)
1328 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1333 (define_insn "<sse>_div<mode>3<mask_name>"
1334 [(set (match_operand:VF 0 "register_operand" "=x,v")
1336 (match_operand:VF 1 "register_operand" "0,v")
1337 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1338 "TARGET_SSE && <mask_mode512bit_condition>"
1340 div<ssemodesuffix>\t{%2, %0|%0, %2}
1341 vdiv<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1342 [(set_attr "isa" "noavx,avx")
1343 (set_attr "type" "ssediv")
1344 (set_attr "prefix" "<mask_prefix3>")
1345 (set_attr "mode" "<MODE>")])
1347 (define_insn "<sse>_rcp<mode>2"
1348 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1350 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1352 "%vrcpps\t{%1, %0|%0, %1}"
1353 [(set_attr "type" "sse")
1354 (set_attr "atom_sse_attr" "rcp")
1355 (set_attr "btver2_sse_attr" "rcp")
1356 (set_attr "prefix" "maybe_vex")
1357 (set_attr "mode" "<MODE>")])
1359 (define_insn "sse_vmrcpv4sf2"
1360 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1362 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1364 (match_operand:V4SF 2 "register_operand" "0,x")
1368 rcpss\t{%1, %0|%0, %k1}
1369 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1370 [(set_attr "isa" "noavx,avx")
1371 (set_attr "type" "sse")
1372 (set_attr "atom_sse_attr" "rcp")
1373 (set_attr "btver2_sse_attr" "rcp")
1374 (set_attr "prefix" "orig,vex")
1375 (set_attr "mode" "SF")])
1377 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1378 [(set (match_operand:VF_512 0 "register_operand" "=v")
1380 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1383 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1384 [(set_attr "type" "sse")
1385 (set_attr "prefix" "evex")
1386 (set_attr "mode" "<MODE>")])
1388 (define_insn "*srcp14<mode>"
1389 [(set (match_operand:VF_128 0 "register_operand" "=v")
1392 [(match_operand:VF_128 1 "register_operand" "v")
1393 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1398 "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1399 [(set_attr "type" "sse")
1400 (set_attr "prefix" "evex")
1401 (set_attr "mode" "<MODE>")])
1403 (define_expand "sqrt<mode>2"
1404 [(set (match_operand:VF2 0 "register_operand")
1405 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1408 (define_expand "sqrt<mode>2"
1409 [(set (match_operand:VF1 0 "register_operand")
1410 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1414 && TARGET_RECIP_VEC_SQRT
1415 && !optimize_insn_for_size_p ()
1416 && flag_finite_math_only && !flag_trapping_math
1417 && flag_unsafe_math_optimizations)
1419 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1424 (define_insn "<sse>_sqrt<mode>2<mask_name>"
1425 [(set (match_operand:VF 0 "register_operand" "=v")
1426 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
1427 "TARGET_SSE && <mask_mode512bit_condition>"
1428 "%vsqrt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1429 [(set_attr "type" "sse")
1430 (set_attr "atom_sse_attr" "sqrt")
1431 (set_attr "btver2_sse_attr" "sqrt")
1432 (set_attr "prefix" "maybe_vex")
1433 (set_attr "mode" "<MODE>")])
1435 (define_insn "<sse>_vmsqrt<mode>2"
1436 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1439 (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
1440 (match_operand:VF_128 2 "register_operand" "0,v")
1444 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1445 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1446 [(set_attr "isa" "noavx,avx")
1447 (set_attr "type" "sse")
1448 (set_attr "atom_sse_attr" "sqrt")
1449 (set_attr "prefix" "orig,vex")
1450 (set_attr "btver2_sse_attr" "sqrt")
1451 (set_attr "mode" "<ssescalarmode>")])
1453 (define_expand "rsqrt<mode>2"
1454 [(set (match_operand:VF1_128_256 0 "register_operand")
1456 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1459 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1463 (define_insn "<sse>_rsqrt<mode>2"
1464 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1466 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1468 "%vrsqrtps\t{%1, %0|%0, %1}"
1469 [(set_attr "type" "sse")
1470 (set_attr "prefix" "maybe_vex")
1471 (set_attr "mode" "<MODE>")])
1473 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1474 [(set (match_operand:VF_512 0 "register_operand" "=v")
1476 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1479 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1480 [(set_attr "type" "sse")
1481 (set_attr "prefix" "evex")
1482 (set_attr "mode" "<MODE>")])
1484 (define_insn "*rsqrt14<mode>"
1485 [(set (match_operand:VF_128 0 "register_operand" "=v")
1488 [(match_operand:VF_128 1 "register_operand" "v")
1489 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
1494 "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1495 [(set_attr "type" "sse")
1496 (set_attr "prefix" "evex")
1497 (set_attr "mode" "<MODE>")])
1499 (define_insn "sse_vmrsqrtv4sf2"
1500 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1502 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1504 (match_operand:V4SF 2 "register_operand" "0,x")
1508 rsqrtss\t{%1, %0|%0, %k1}
1509 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1510 [(set_attr "isa" "noavx,avx")
1511 (set_attr "type" "sse")
1512 (set_attr "prefix" "orig,vex")
1513 (set_attr "mode" "SF")])
1515 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1516 ;; isn't really correct, as those rtl operators aren't defined when
1517 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1519 (define_expand "<code><mode>3<mask_name>"
1520 [(set (match_operand:VF 0 "register_operand")
1522 (match_operand:VF 1 "nonimmediate_operand")
1523 (match_operand:VF 2 "nonimmediate_operand")))]
1524 "TARGET_SSE && <mask_mode512bit_condition>"
1526 if (!flag_finite_math_only)
1527 operands[1] = force_reg (<MODE>mode, operands[1]);
1528 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1531 (define_insn "*<code><mode>3_finite<mask_name>"
1532 [(set (match_operand:VF 0 "register_operand" "=x,v")
1534 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1535 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1536 "TARGET_SSE && flag_finite_math_only
1537 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1538 && <mask_mode512bit_condition>"
1540 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1541 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1542 [(set_attr "isa" "noavx,avx")
1543 (set_attr "type" "sseadd")
1544 (set_attr "btver2_sse_attr" "maxmin")
1545 (set_attr "prefix" "<mask_prefix3>")
1546 (set_attr "mode" "<MODE>")])
1548 (define_insn "*<code><mode>3<mask_name>"
1549 [(set (match_operand:VF 0 "register_operand" "=x,v")
1551 (match_operand:VF 1 "register_operand" "0,v")
1552 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1553 "TARGET_SSE && !flag_finite_math_only
1554 && <mask_mode512bit_condition>"
1556 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1557 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1558 [(set_attr "isa" "noavx,avx")
1559 (set_attr "type" "sseadd")
1560 (set_attr "btver2_sse_attr" "maxmin")
1561 (set_attr "prefix" "<mask_prefix3>")
1562 (set_attr "mode" "<MODE>")])
1564 (define_insn "<sse>_vm<code><mode>3"
1565 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1568 (match_operand:VF_128 1 "register_operand" "0,v")
1569 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1574 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1575 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1576 [(set_attr "isa" "noavx,avx")
1577 (set_attr "type" "sse")
1578 (set_attr "btver2_sse_attr" "maxmin")
1579 (set_attr "prefix" "orig,vex")
1580 (set_attr "mode" "<ssescalarmode>")])
1582 ;; These versions of the min/max patterns implement exactly the operations
1583 ;; min = (op1 < op2 ? op1 : op2)
1584 ;; max = (!(op1 < op2) ? op1 : op2)
1585 ;; Their operands are not commutative, and thus they may be used in the
1586 ;; presence of -0.0 and NaN.
1588 (define_insn "*ieee_smin<mode>3"
1589 [(set (match_operand:VF 0 "register_operand" "=v,v")
1591 [(match_operand:VF 1 "register_operand" "0,v")
1592 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1596 min<ssemodesuffix>\t{%2, %0|%0, %2}
1597 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1598 [(set_attr "isa" "noavx,avx")
1599 (set_attr "type" "sseadd")
1600 (set_attr "prefix" "orig,vex")
1601 (set_attr "mode" "<MODE>")])
1603 (define_insn "*ieee_smax<mode>3"
1604 [(set (match_operand:VF 0 "register_operand" "=v,v")
1606 [(match_operand:VF 1 "register_operand" "0,v")
1607 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1611 max<ssemodesuffix>\t{%2, %0|%0, %2}
1612 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1613 [(set_attr "isa" "noavx,avx")
1614 (set_attr "type" "sseadd")
1615 (set_attr "prefix" "orig,vex")
1616 (set_attr "mode" "<MODE>")])
1618 (define_insn "avx_addsubv4df3"
1619 [(set (match_operand:V4DF 0 "register_operand" "=x")
1622 (match_operand:V4DF 1 "register_operand" "x")
1623 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1624 (minus:V4DF (match_dup 1) (match_dup 2))
1627 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1628 [(set_attr "type" "sseadd")
1629 (set_attr "prefix" "vex")
1630 (set_attr "mode" "V4DF")])
1632 (define_insn "sse3_addsubv2df3"
1633 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1636 (match_operand:V2DF 1 "register_operand" "0,x")
1637 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1638 (minus:V2DF (match_dup 1) (match_dup 2))
1642 addsubpd\t{%2, %0|%0, %2}
1643 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1644 [(set_attr "isa" "noavx,avx")
1645 (set_attr "type" "sseadd")
1646 (set_attr "atom_unit" "complex")
1647 (set_attr "prefix" "orig,vex")
1648 (set_attr "mode" "V2DF")])
1650 (define_insn "avx_addsubv8sf3"
1651 [(set (match_operand:V8SF 0 "register_operand" "=x")
1654 (match_operand:V8SF 1 "register_operand" "x")
1655 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1656 (minus:V8SF (match_dup 1) (match_dup 2))
1659 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1660 [(set_attr "type" "sseadd")
1661 (set_attr "prefix" "vex")
1662 (set_attr "mode" "V8SF")])
1664 (define_insn "sse3_addsubv4sf3"
1665 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1668 (match_operand:V4SF 1 "register_operand" "0,x")
1669 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1670 (minus:V4SF (match_dup 1) (match_dup 2))
1674 addsubps\t{%2, %0|%0, %2}
1675 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1676 [(set_attr "isa" "noavx,avx")
1677 (set_attr "type" "sseadd")
1678 (set_attr "prefix" "orig,vex")
1679 (set_attr "prefix_rep" "1,*")
1680 (set_attr "mode" "V4SF")])
1682 (define_insn "avx_h<plusminus_insn>v4df3"
1683 [(set (match_operand:V4DF 0 "register_operand" "=x")
1688 (match_operand:V4DF 1 "register_operand" "x")
1689 (parallel [(const_int 0)]))
1690 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1693 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1694 (parallel [(const_int 0)]))
1695 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1698 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1699 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1701 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1702 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1704 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1705 [(set_attr "type" "sseadd")
1706 (set_attr "prefix" "vex")
1707 (set_attr "mode" "V4DF")])
1709 (define_expand "sse3_haddv2df3"
1710 [(set (match_operand:V2DF 0 "register_operand")
1714 (match_operand:V2DF 1 "register_operand")
1715 (parallel [(const_int 0)]))
1716 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1719 (match_operand:V2DF 2 "nonimmediate_operand")
1720 (parallel [(const_int 0)]))
1721 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1724 (define_insn "*sse3_haddv2df3"
1725 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1729 (match_operand:V2DF 1 "register_operand" "0,x")
1730 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1733 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1736 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1737 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1740 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1742 && INTVAL (operands[3]) != INTVAL (operands[4])
1743 && INTVAL (operands[5]) != INTVAL (operands[6])"
1745 haddpd\t{%2, %0|%0, %2}
1746 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1747 [(set_attr "isa" "noavx,avx")
1748 (set_attr "type" "sseadd")
1749 (set_attr "prefix" "orig,vex")
1750 (set_attr "mode" "V2DF")])
1752 (define_insn "sse3_hsubv2df3"
1753 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1757 (match_operand:V2DF 1 "register_operand" "0,x")
1758 (parallel [(const_int 0)]))
1759 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1762 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1763 (parallel [(const_int 0)]))
1764 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1767 hsubpd\t{%2, %0|%0, %2}
1768 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1769 [(set_attr "isa" "noavx,avx")
1770 (set_attr "type" "sseadd")
1771 (set_attr "prefix" "orig,vex")
1772 (set_attr "mode" "V2DF")])
1774 (define_insn "*sse3_haddv2df3_low"
1775 [(set (match_operand:DF 0 "register_operand" "=x,x")
1778 (match_operand:V2DF 1 "register_operand" "0,x")
1779 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1782 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1784 && INTVAL (operands[2]) != INTVAL (operands[3])"
1786 haddpd\t{%0, %0|%0, %0}
1787 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1788 [(set_attr "isa" "noavx,avx")
1789 (set_attr "type" "sseadd1")
1790 (set_attr "prefix" "orig,vex")
1791 (set_attr "mode" "V2DF")])
1793 (define_insn "*sse3_hsubv2df3_low"
1794 [(set (match_operand:DF 0 "register_operand" "=x,x")
1797 (match_operand:V2DF 1 "register_operand" "0,x")
1798 (parallel [(const_int 0)]))
1801 (parallel [(const_int 1)]))))]
1804 hsubpd\t{%0, %0|%0, %0}
1805 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1806 [(set_attr "isa" "noavx,avx")
1807 (set_attr "type" "sseadd1")
1808 (set_attr "prefix" "orig,vex")
1809 (set_attr "mode" "V2DF")])
1811 (define_insn "avx_h<plusminus_insn>v8sf3"
1812 [(set (match_operand:V8SF 0 "register_operand" "=x")
1818 (match_operand:V8SF 1 "register_operand" "x")
1819 (parallel [(const_int 0)]))
1820 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1822 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1823 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1827 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1828 (parallel [(const_int 0)]))
1829 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1831 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1832 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1836 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1837 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1839 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1840 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1843 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1844 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1846 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1847 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1849 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1850 [(set_attr "type" "sseadd")
1851 (set_attr "prefix" "vex")
1852 (set_attr "mode" "V8SF")])
1854 (define_insn "sse3_h<plusminus_insn>v4sf3"
1855 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1860 (match_operand:V4SF 1 "register_operand" "0,x")
1861 (parallel [(const_int 0)]))
1862 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1864 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1865 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1869 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1870 (parallel [(const_int 0)]))
1871 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1873 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1874 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1877 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1878 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1879 [(set_attr "isa" "noavx,avx")
1880 (set_attr "type" "sseadd")
1881 (set_attr "atom_unit" "complex")
1882 (set_attr "prefix" "orig,vex")
1883 (set_attr "prefix_rep" "1,*")
1884 (set_attr "mode" "V4SF")])
1886 (define_expand "reduc_splus_v8df"
1887 [(match_operand:V8DF 0 "register_operand")
1888 (match_operand:V8DF 1 "register_operand")]
1891 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
1895 (define_expand "reduc_splus_v4df"
1896 [(match_operand:V4DF 0 "register_operand")
1897 (match_operand:V4DF 1 "register_operand")]
1900 rtx tmp = gen_reg_rtx (V4DFmode);
1901 rtx tmp2 = gen_reg_rtx (V4DFmode);
1902 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1903 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1904 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1908 (define_expand "reduc_splus_v2df"
1909 [(match_operand:V2DF 0 "register_operand")
1910 (match_operand:V2DF 1 "register_operand")]
1913 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1917 (define_expand "reduc_splus_v16sf"
1918 [(match_operand:V16SF 0 "register_operand")
1919 (match_operand:V16SF 1 "register_operand")]
1922 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
1926 (define_expand "reduc_splus_v8sf"
1927 [(match_operand:V8SF 0 "register_operand")
1928 (match_operand:V8SF 1 "register_operand")]
1931 rtx tmp = gen_reg_rtx (V8SFmode);
1932 rtx tmp2 = gen_reg_rtx (V8SFmode);
1933 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1934 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1935 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1936 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1940 (define_expand "reduc_splus_v4sf"
1941 [(match_operand:V4SF 0 "register_operand")
1942 (match_operand:V4SF 1 "register_operand")]
1947 rtx tmp = gen_reg_rtx (V4SFmode);
1948 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1949 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1952 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1956 ;; Modes handled by reduc_sm{in,ax}* patterns.
1957 (define_mode_iterator REDUC_SMINMAX_MODE
1958 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1959 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1960 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1961 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
1962 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
1963 (V8DF "TARGET_AVX512F")])
1965 (define_expand "reduc_<code>_<mode>"
1966 [(smaxmin:REDUC_SMINMAX_MODE
1967 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1968 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1971 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1975 (define_expand "reduc_<code>_<mode>"
1977 (match_operand:VI48_512 0 "register_operand")
1978 (match_operand:VI48_512 1 "register_operand"))]
1981 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1985 (define_expand "reduc_<code>_<mode>"
1987 (match_operand:VI_256 0 "register_operand")
1988 (match_operand:VI_256 1 "register_operand"))]
1991 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1995 (define_expand "reduc_umin_v8hi"
1997 (match_operand:V8HI 0 "register_operand")
1998 (match_operand:V8HI 1 "register_operand"))]
2001 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2005 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2007 ;; Parallel floating point comparisons
2009 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2011 (define_insn "avx_cmp<mode>3"
2012 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2014 [(match_operand:VF_128_256 1 "register_operand" "x")
2015 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2016 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2019 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2020 [(set_attr "type" "ssecmp")
2021 (set_attr "length_immediate" "1")
2022 (set_attr "prefix" "vex")
2023 (set_attr "mode" "<MODE>")])
2025 (define_insn "avx_vmcmp<mode>3"
2026 [(set (match_operand:VF_128 0 "register_operand" "=x")
2029 [(match_operand:VF_128 1 "register_operand" "x")
2030 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2031 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2036 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2037 [(set_attr "type" "ssecmp")
2038 (set_attr "length_immediate" "1")
2039 (set_attr "prefix" "vex")
2040 (set_attr "mode" "<ssescalarmode>")])
2042 (define_insn "*<sse>_maskcmp<mode>3_comm"
2043 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2044 (match_operator:VF_128_256 3 "sse_comparison_operator"
2045 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2046 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2048 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2050 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2051 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2052 [(set_attr "isa" "noavx,avx")
2053 (set_attr "type" "ssecmp")
2054 (set_attr "length_immediate" "1")
2055 (set_attr "prefix" "orig,vex")
2056 (set_attr "mode" "<MODE>")])
2058 (define_insn "<sse>_maskcmp<mode>3"
2059 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2060 (match_operator:VF_128_256 3 "sse_comparison_operator"
2061 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2062 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2065 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2066 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2067 [(set_attr "isa" "noavx,avx")
2068 (set_attr "type" "ssecmp")
2069 (set_attr "length_immediate" "1")
2070 (set_attr "prefix" "orig,vex")
2071 (set_attr "mode" "<MODE>")])
2073 (define_insn "<sse>_vmmaskcmp<mode>3"
2074 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2076 (match_operator:VF_128 3 "sse_comparison_operator"
2077 [(match_operand:VF_128 1 "register_operand" "0,x")
2078 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2083 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2084 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2085 [(set_attr "isa" "noavx,avx")
2086 (set_attr "type" "ssecmp")
2087 (set_attr "length_immediate" "1,*")
2088 (set_attr "prefix" "orig,vex")
2089 (set_attr "mode" "<ssescalarmode>")])
2091 (define_mode_attr cmp_imm_predicate
2092 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2093 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2095 (define_insn "avx512f_cmp<mode>3"
2096 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2097 (unspec:<avx512fmaskmode>
2098 [(match_operand:VI48F_512 1 "register_operand" "v")
2099 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
2100 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2103 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2104 [(set_attr "type" "ssecmp")
2105 (set_attr "length_immediate" "1")
2106 (set_attr "prefix" "evex")
2107 (set_attr "mode" "<sseinsnmode>")])
2109 (define_insn "avx512f_ucmp<mode>3"
2110 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2111 (unspec:<avx512fmaskmode>
2112 [(match_operand:VI48_512 1 "register_operand" "v")
2113 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2114 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2115 UNSPEC_UNSIGNED_PCMP))]
2117 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2118 [(set_attr "type" "ssecmp")
2119 (set_attr "length_immediate" "1")
2120 (set_attr "prefix" "evex")
2121 (set_attr "mode" "<sseinsnmode>")])
2123 (define_insn "avx512f_vmcmp<mode>3"
2124 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2125 (and:<avx512fmaskmode>
2126 (unspec:<avx512fmaskmode>
2127 [(match_operand:VF_128 1 "register_operand" "v")
2128 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2129 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2133 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2134 [(set_attr "type" "ssecmp")
2135 (set_attr "length_immediate" "1")
2136 (set_attr "prefix" "evex")
2137 (set_attr "mode" "<ssescalarmode>")])
2139 (define_insn "avx512f_vmcmp<mode>3_mask"
2140 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2141 (and:<avx512fmaskmode>
2142 (unspec:<avx512fmaskmode>
2143 [(match_operand:VF_128 1 "register_operand" "v")
2144 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2145 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2147 (and:<avx512fmaskmode>
2148 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
2151 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0%{%4%}|%0%{%4%}, %1, %2, %3}"
2152 [(set_attr "type" "ssecmp")
2153 (set_attr "length_immediate" "1")
2154 (set_attr "prefix" "evex")
2155 (set_attr "mode" "<ssescalarmode>")])
2157 (define_insn "avx512f_maskcmp<mode>3"
2158 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2159 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2160 [(match_operand:VF 1 "register_operand" "v")
2161 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2163 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2164 [(set_attr "type" "ssecmp")
2165 (set_attr "length_immediate" "1")
2166 (set_attr "prefix" "evex")
2167 (set_attr "mode" "<sseinsnmode>")])
2169 (define_insn "<sse>_comi"
2170 [(set (reg:CCFP FLAGS_REG)
2173 (match_operand:<ssevecmode> 0 "register_operand" "v")
2174 (parallel [(const_int 0)]))
2176 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
2177 (parallel [(const_int 0)]))))]
2178 "SSE_FLOAT_MODE_P (<MODE>mode)"
2179 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
2180 [(set_attr "type" "ssecomi")
2181 (set_attr "prefix" "maybe_vex")
2182 (set_attr "prefix_rep" "0")
2183 (set (attr "prefix_data16")
2184 (if_then_else (eq_attr "mode" "DF")
2186 (const_string "0")))
2187 (set_attr "mode" "<MODE>")])
2189 (define_insn "<sse>_ucomi"
2190 [(set (reg:CCFPU FLAGS_REG)
2193 (match_operand:<ssevecmode> 0 "register_operand" "v")
2194 (parallel [(const_int 0)]))
2196 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
2197 (parallel [(const_int 0)]))))]
2198 "SSE_FLOAT_MODE_P (<MODE>mode)"
2199 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
2200 [(set_attr "type" "ssecomi")
2201 (set_attr "prefix" "maybe_vex")
2202 (set_attr "prefix_rep" "0")
2203 (set (attr "prefix_data16")
2204 (if_then_else (eq_attr "mode" "DF")
2206 (const_string "0")))
2207 (set_attr "mode" "<MODE>")])
2209 (define_expand "vcond<V_512:mode><VF_512:mode>"
2210 [(set (match_operand:V_512 0 "register_operand")
2212 (match_operator 3 ""
2213 [(match_operand:VF_512 4 "nonimmediate_operand")
2214 (match_operand:VF_512 5 "nonimmediate_operand")])
2215 (match_operand:V_512 1 "general_operand")
2216 (match_operand:V_512 2 "general_operand")))]
2218 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2219 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2221 bool ok = ix86_expand_fp_vcond (operands);
2226 (define_expand "vcond<V_256:mode><VF_256:mode>"
2227 [(set (match_operand:V_256 0 "register_operand")
2229 (match_operator 3 ""
2230 [(match_operand:VF_256 4 "nonimmediate_operand")
2231 (match_operand:VF_256 5 "nonimmediate_operand")])
2232 (match_operand:V_256 1 "general_operand")
2233 (match_operand:V_256 2 "general_operand")))]
2235 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2236 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2238 bool ok = ix86_expand_fp_vcond (operands);
2243 (define_expand "vcond<V_128:mode><VF_128:mode>"
2244 [(set (match_operand:V_128 0 "register_operand")
2246 (match_operator 3 ""
2247 [(match_operand:VF_128 4 "nonimmediate_operand")
2248 (match_operand:VF_128 5 "nonimmediate_operand")])
2249 (match_operand:V_128 1 "general_operand")
2250 (match_operand:V_128 2 "general_operand")))]
2252 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2253 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2255 bool ok = ix86_expand_fp_vcond (operands);
2260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2262 ;; Parallel floating point logical operations
2264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2266 (define_insn "<sse>_andnot<mode>3"
2267 [(set (match_operand:VF 0 "register_operand" "=x,v")
2270 (match_operand:VF 1 "register_operand" "0,v"))
2271 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2274 static char buf[32];
2278 switch (get_attr_mode (insn))
2285 suffix = "<ssemodesuffix>";
2288 switch (which_alternative)
2291 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2294 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2300 /* There is no vandnp[sd]. Use vpandnq. */
2301 if (GET_MODE_SIZE (<MODE>mode) == 64)
2304 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2307 snprintf (buf, sizeof (buf), ops, suffix);
2310 [(set_attr "isa" "noavx,avx")
2311 (set_attr "type" "sselog")
2312 (set_attr "prefix" "orig,maybe_evex")
2314 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2315 (const_string "<ssePSmode>")
2316 (match_test "TARGET_AVX")
2317 (const_string "<MODE>")
2318 (match_test "optimize_function_for_size_p (cfun)")
2319 (const_string "V4SF")
2321 (const_string "<MODE>")))])
2323 (define_expand "<code><mode>3"
2324 [(set (match_operand:VF_128_256 0 "register_operand")
2325 (any_logic:VF_128_256
2326 (match_operand:VF_128_256 1 "nonimmediate_operand")
2327 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2329 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2331 (define_expand "<code><mode>3"
2332 [(set (match_operand:VF_512 0 "register_operand")
2334 (match_operand:VF_512 1 "nonimmediate_operand")
2335 (match_operand:VF_512 2 "nonimmediate_operand")))]
2337 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2339 (define_insn "*<code><mode>3"
2340 [(set (match_operand:VF 0 "register_operand" "=x,v")
2342 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2343 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2344 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2346 static char buf[32];
2350 switch (get_attr_mode (insn))
2357 suffix = "<ssemodesuffix>";
2360 switch (which_alternative)
2363 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2366 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2372 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2373 if (GET_MODE_SIZE (<MODE>mode) == 64)
2376 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2379 snprintf (buf, sizeof (buf), ops, suffix);
2382 [(set_attr "isa" "noavx,avx")
2383 (set_attr "type" "sselog")
2384 (set_attr "prefix" "orig,maybe_evex")
2386 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2387 (const_string "<ssePSmode>")
2388 (match_test "TARGET_AVX")
2389 (const_string "<MODE>")
2390 (match_test "optimize_function_for_size_p (cfun)")
2391 (const_string "V4SF")
2393 (const_string "<MODE>")))])
2395 (define_expand "copysign<mode>3"
2398 (not:VF (match_dup 3))
2399 (match_operand:VF 1 "nonimmediate_operand")))
2401 (and:VF (match_dup 3)
2402 (match_operand:VF 2 "nonimmediate_operand")))
2403 (set (match_operand:VF 0 "register_operand")
2404 (ior:VF (match_dup 4) (match_dup 5)))]
2407 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2409 operands[4] = gen_reg_rtx (<MODE>mode);
2410 operands[5] = gen_reg_rtx (<MODE>mode);
2413 ;; Also define scalar versions. These are used for abs, neg, and
2414 ;; conditional move. Using subregs into vector modes causes register
2415 ;; allocation lossage. These patterns do not allow memory operands
2416 ;; because the native instructions read the full 128-bits.
2418 (define_insn "*andnot<mode>3"
2419 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2422 (match_operand:MODEF 1 "register_operand" "0,x"))
2423 (match_operand:MODEF 2 "register_operand" "x,x")))]
2424 "SSE_FLOAT_MODE_P (<MODE>mode)"
2426 static char buf[32];
2429 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2431 switch (which_alternative)
2434 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2437 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2443 snprintf (buf, sizeof (buf), ops, suffix);
2446 [(set_attr "isa" "noavx,avx")
2447 (set_attr "type" "sselog")
2448 (set_attr "prefix" "orig,vex")
2450 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2451 (const_string "V4SF")
2452 (match_test "TARGET_AVX")
2453 (const_string "<ssevecmode>")
2454 (match_test "optimize_function_for_size_p (cfun)")
2455 (const_string "V4SF")
2457 (const_string "<ssevecmode>")))])
2459 (define_insn "*andnottf3"
2460 [(set (match_operand:TF 0 "register_operand" "=x,x")
2462 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2463 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2466 static char buf[32];
2469 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2471 switch (which_alternative)
2474 ops = "%s\t{%%2, %%0|%%0, %%2}";
2477 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2483 snprintf (buf, sizeof (buf), ops, tmp);
2486 [(set_attr "isa" "noavx,avx")
2487 (set_attr "type" "sselog")
2488 (set (attr "prefix_data16")
2490 (and (eq_attr "alternative" "0")
2491 (eq_attr "mode" "TI"))
2493 (const_string "*")))
2494 (set_attr "prefix" "orig,vex")
2496 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2497 (const_string "V4SF")
2498 (match_test "TARGET_AVX")
2500 (ior (not (match_test "TARGET_SSE2"))
2501 (match_test "optimize_function_for_size_p (cfun)"))
2502 (const_string "V4SF")
2504 (const_string "TI")))])
2506 (define_insn "*<code><mode>3"
2507 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2509 (match_operand:MODEF 1 "register_operand" "%0,x")
2510 (match_operand:MODEF 2 "register_operand" "x,x")))]
2511 "SSE_FLOAT_MODE_P (<MODE>mode)"
2513 static char buf[32];
2516 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2518 switch (which_alternative)
2521 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2524 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2530 snprintf (buf, sizeof (buf), ops, suffix);
2533 [(set_attr "isa" "noavx,avx")
2534 (set_attr "type" "sselog")
2535 (set_attr "prefix" "orig,vex")
2537 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2538 (const_string "V4SF")
2539 (match_test "TARGET_AVX")
2540 (const_string "<ssevecmode>")
2541 (match_test "optimize_function_for_size_p (cfun)")
2542 (const_string "V4SF")
2544 (const_string "<ssevecmode>")))])
2546 (define_expand "<code>tf3"
2547 [(set (match_operand:TF 0 "register_operand")
2549 (match_operand:TF 1 "nonimmediate_operand")
2550 (match_operand:TF 2 "nonimmediate_operand")))]
2552 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2554 (define_insn "*<code>tf3"
2555 [(set (match_operand:TF 0 "register_operand" "=x,x")
2557 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2558 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2560 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2562 static char buf[32];
2565 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2567 switch (which_alternative)
2570 ops = "%s\t{%%2, %%0|%%0, %%2}";
2573 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2579 snprintf (buf, sizeof (buf), ops, tmp);
2582 [(set_attr "isa" "noavx,avx")
2583 (set_attr "type" "sselog")
2584 (set (attr "prefix_data16")
2586 (and (eq_attr "alternative" "0")
2587 (eq_attr "mode" "TI"))
2589 (const_string "*")))
2590 (set_attr "prefix" "orig,vex")
2592 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2593 (const_string "V4SF")
2594 (match_test "TARGET_AVX")
2596 (ior (not (match_test "TARGET_SSE2"))
2597 (match_test "optimize_function_for_size_p (cfun)"))
2598 (const_string "V4SF")
2600 (const_string "TI")))])
2602 ;; There are no floating point xor for V16SF and V8DF in avx512f
2603 ;; but we need them for negation. Instead we use int versions of
2604 ;; xor. Maybe there could be a better way to do that.
2606 (define_mode_attr avx512flogicsuff
2607 [(V16SF "d") (V8DF "q")])
2609 (define_insn "avx512f_<logic><mode>"
2610 [(set (match_operand:VF_512 0 "register_operand" "=v")
2612 (match_operand:VF_512 1 "register_operand" "v")
2613 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2615 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2616 [(set_attr "type" "sselog")
2617 (set_attr "prefix" "evex")])
2619 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2621 ;; FMA floating point multiply/accumulate instructions. These include
2622 ;; scalar versions of the instructions as well as vector versions.
2624 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2626 ;; The standard names for scalar FMA are only available with SSE math enabled.
2627 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2628 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2629 ;; and TARGET_FMA4 are both false.
2630 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2631 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2632 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2633 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2634 (define_mode_iterator FMAMODEM
2635 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2636 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2637 (V4SF "TARGET_FMA || TARGET_FMA4")
2638 (V2DF "TARGET_FMA || TARGET_FMA4")
2639 (V8SF "TARGET_FMA || TARGET_FMA4")
2640 (V4DF "TARGET_FMA || TARGET_FMA4")
2641 (V16SF "TARGET_AVX512F")
2642 (V8DF "TARGET_AVX512F")])
2644 (define_expand "fma<mode>4"
2645 [(set (match_operand:FMAMODEM 0 "register_operand")
2647 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2648 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2649 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2652 (define_expand "fms<mode>4"
2653 [(set (match_operand:FMAMODEM 0 "register_operand")
2655 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2656 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2657 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2660 (define_expand "fnma<mode>4"
2661 [(set (match_operand:FMAMODEM 0 "register_operand")
2663 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2664 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2665 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2668 (define_expand "fnms<mode>4"
2669 [(set (match_operand:FMAMODEM 0 "register_operand")
2671 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2672 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2673 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2676 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2677 (define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2678 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2679 (V4SF "TARGET_FMA || TARGET_FMA4")
2680 (V2DF "TARGET_FMA || TARGET_FMA4")
2681 (V8SF "TARGET_FMA || TARGET_FMA4")
2682 (V4DF "TARGET_FMA || TARGET_FMA4")
2683 (V16SF "TARGET_AVX512F")
2684 (V8DF "TARGET_AVX512F")])
2686 (define_expand "fma4i_fmadd_<mode>"
2687 [(set (match_operand:FMAMODE 0 "register_operand")
2689 (match_operand:FMAMODE 1 "nonimmediate_operand")
2690 (match_operand:FMAMODE 2 "nonimmediate_operand")
2691 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2694 (define_insn "*fma_fmadd_<mode>"
2695 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2697 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2698 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2699 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2702 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2703 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2704 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2705 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2706 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2707 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2708 (set_attr "type" "ssemuladd")
2709 (set_attr "mode" "<MODE>")])
2711 (define_insn "avx512f_fmadd_<mode>_mask"
2712 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2715 (match_operand:VF_512 1 "register_operand" "0,0")
2716 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2717 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
2719 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2722 vfmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2723 vfmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2724 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2725 (set_attr "type" "ssemuladd")
2726 (set_attr "mode" "<MODE>")])
2728 (define_insn "avx512f_fmadd_<mode>_mask3"
2729 [(set (match_operand:VF_512 0 "register_operand" "=x")
2732 (match_operand:VF_512 1 "register_operand" "x")
2733 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2734 (match_operand:VF_512 3 "register_operand" "0"))
2736 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2738 "vfmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2739 [(set_attr "isa" "fma_avx512f")
2740 (set_attr "type" "ssemuladd")
2741 (set_attr "mode" "<MODE>")])
2743 (define_insn "*fma_fmsub_<mode>"
2744 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2746 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2747 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2749 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2752 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2753 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2754 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2755 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2756 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2757 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2758 (set_attr "type" "ssemuladd")
2759 (set_attr "mode" "<MODE>")])
2761 (define_insn "avx512f_fmsub_<mode>_mask"
2762 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2765 (match_operand:VF_512 1 "register_operand" "0,0")
2766 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2768 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
2770 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2773 vfmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2774 vfmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2775 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2776 (set_attr "type" "ssemuladd")
2777 (set_attr "mode" "<MODE>")])
2779 (define_insn "avx512f_fmsub_<mode>_mask3"
2780 [(set (match_operand:VF_512 0 "register_operand" "=v")
2783 (match_operand:VF_512 1 "register_operand" "v")
2784 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2786 (match_operand:VF_512 3 "register_operand" "0")))
2788 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2790 "vfmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2791 [(set_attr "isa" "fma_avx512f")
2792 (set_attr "type" "ssemuladd")
2793 (set_attr "mode" "<MODE>")])
2795 (define_insn "*fma_fnmadd_<mode>"
2796 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2799 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2800 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2801 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2804 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2805 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2806 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2807 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2808 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2809 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2810 (set_attr "type" "ssemuladd")
2811 (set_attr "mode" "<MODE>")])
2813 (define_insn "avx512f_fnmadd_<mode>_mask"
2814 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2818 (match_operand:VF_512 1 "register_operand" "0,0"))
2819 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2820 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))
2822 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2825 vfnmadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2826 vfnmadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2827 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2828 (set_attr "type" "ssemuladd")
2829 (set_attr "mode" "<MODE>")])
2831 (define_insn "avx512f_fnmadd_<mode>_mask3"
2832 [(set (match_operand:VF_512 0 "register_operand" "=v")
2836 (match_operand:VF_512 1 "register_operand" "v"))
2837 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2838 (match_operand:VF_512 3 "register_operand" "0"))
2840 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2842 "vfnmadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2843 [(set_attr "isa" "fma_avx512f")
2844 (set_attr "type" "ssemuladd")
2845 (set_attr "mode" "<MODE>")])
2847 (define_insn "*fma_fnmsub_<mode>"
2848 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2851 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2852 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2854 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2857 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2858 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2859 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2860 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2861 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2862 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2863 (set_attr "type" "ssemuladd")
2864 (set_attr "mode" "<MODE>")])
2866 (define_insn "avx512f_fnmsub_<mode>_mask"
2867 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2871 (match_operand:VF_512 1 "register_operand" "0,0"))
2872 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2874 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")))
2876 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2879 vfnmsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2880 vfnmsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2881 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2882 (set_attr "type" "ssemuladd")
2883 (set_attr "mode" "<MODE>")])
2885 (define_insn "avx512f_fnmsub_<mode>_mask3"
2886 [(set (match_operand:VF_512 0 "register_operand" "=v")
2890 (match_operand:VF_512 1 "register_operand" "v"))
2891 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2893 (match_operand:VF_512 3 "register_operand" "0")))
2895 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2897 "vfnmsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2898 [(set_attr "isa" "fma_avx512f")
2899 (set_attr "type" "ssemuladd")
2900 (set_attr "mode" "<MODE>")])
2902 ;; FMA parallel floating point multiply addsub and subadd operations.
2904 ;; It would be possible to represent these without the UNSPEC as
2907 ;; (fma op1 op2 op3)
2908 ;; (fma op1 op2 (neg op3))
2911 ;; But this doesn't seem useful in practice.
2913 (define_expand "fmaddsub_<mode>"
2914 [(set (match_operand:VF 0 "register_operand")
2916 [(match_operand:VF 1 "nonimmediate_operand")
2917 (match_operand:VF 2 "nonimmediate_operand")
2918 (match_operand:VF 3 "nonimmediate_operand")]
2920 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2922 (define_insn "*fma_fmaddsub_<mode>"
2923 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2925 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2926 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2927 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
2929 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
2931 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2932 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2933 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2934 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2935 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2936 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2937 (set_attr "type" "ssemuladd")
2938 (set_attr "mode" "<MODE>")])
2940 (define_insn "avx512f_fmaddsub_<mode>_mask"
2941 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2944 [(match_operand:VF_512 1 "register_operand" "0,0")
2945 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
2946 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm")]
2949 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
2952 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
2953 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
2954 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2955 (set_attr "type" "ssemuladd")
2956 (set_attr "mode" "<MODE>")])
2958 (define_insn "avx512f_fmaddsub_<mode>_mask3"
2959 [(set (match_operand:VF_512 0 "register_operand" "=v")
2962 [(match_operand:VF_512 1 "register_operand" "v")
2963 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
2964 (match_operand:VF_512 3 "register_operand" "0")]
2967 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
2969 "vfmaddsub231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
2970 [(set_attr "isa" "fma_avx512f")
2971 (set_attr "type" "ssemuladd")
2972 (set_attr "mode" "<MODE>")])
2974 (define_insn "*fma_fmsubadd_<mode>"
2975 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2977 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2978 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2980 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
2982 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
2984 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2985 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2986 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2987 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2988 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2989 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2990 (set_attr "type" "ssemuladd")
2991 (set_attr "mode" "<MODE>")])
2993 (define_insn "avx512f_fmsubadd_<mode>_mask"
2994 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2997 [(match_operand:VF_512 1 "register_operand" "0,0")
2998 (match_operand:VF_512 2 "nonimmediate_operand" "vm,v")
3000 (match_operand:VF_512 3 "nonimmediate_operand" "v,vm"))]
3003 (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
3006 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
3007 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
3008 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3009 (set_attr "type" "ssemuladd")
3010 (set_attr "mode" "<MODE>")])
3012 (define_insn "avx512f_fmsubadd_<mode>_mask3"
3013 [(set (match_operand:VF_512 0 "register_operand" "=v")
3016 [(match_operand:VF_512 1 "register_operand" "v")
3017 (match_operand:VF_512 2 "nonimmediate_operand" "vm")
3019 (match_operand:VF_512 3 "register_operand" "0"))]
3022 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
3024 "vfmsubadd231<ssemodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
3025 [(set_attr "isa" "fma_avx512f")
3026 (set_attr "type" "ssemuladd")
3027 (set_attr "mode" "<MODE>")])
3029 ;; FMA3 floating point scalar intrinsics. These merge result with
3030 ;; high-order elements from the destination register.
3032 (define_expand "fmai_vmfmadd_<mode>"
3033 [(set (match_operand:VF_128 0 "register_operand")
3036 (match_operand:VF_128 1 "nonimmediate_operand")
3037 (match_operand:VF_128 2 "nonimmediate_operand")
3038 (match_operand:VF_128 3 "nonimmediate_operand"))
3043 (define_insn "*fmai_fmadd_<mode>"
3044 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3047 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3048 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
3049 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
3052 "TARGET_FMA || TARGET_AVX512F"
3054 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3055 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3056 [(set_attr "type" "ssemuladd")
3057 (set_attr "mode" "<MODE>")])
3059 (define_insn "*fmai_fmsub_<mode>"
3060 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3063 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3064 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
3066 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
3069 "TARGET_FMA || TARGET_AVX512F"
3071 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3072 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3073 [(set_attr "type" "ssemuladd")
3074 (set_attr "mode" "<MODE>")])
3076 (define_insn "*fmai_fnmadd_<mode>"
3077 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3081 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
3082 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3083 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
3086 "TARGET_FMA || TARGET_AVX512F"
3088 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3089 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3090 [(set_attr "type" "ssemuladd")
3091 (set_attr "mode" "<MODE>")])
3093 (define_insn "*fmai_fnmsub_<mode>"
3094 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3098 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
3099 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
3101 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
3104 "TARGET_FMA || TARGET_AVX512F"
3106 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
3107 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
3108 [(set_attr "type" "ssemuladd")
3109 (set_attr "mode" "<MODE>")])
3111 ;; FMA4 floating point scalar intrinsics. These write the
3112 ;; entire destination register, with the high-order elements zeroed.
3114 (define_expand "fma4i_vmfmadd_<mode>"
3115 [(set (match_operand:VF_128 0 "register_operand")
3118 (match_operand:VF_128 1 "nonimmediate_operand")
3119 (match_operand:VF_128 2 "nonimmediate_operand")
3120 (match_operand:VF_128 3 "nonimmediate_operand"))
3124 "operands[4] = CONST0_RTX (<MODE>mode);")
3126 (define_insn "*fma4i_vmfmadd_<mode>"
3127 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3130 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3131 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3132 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3133 (match_operand:VF_128 4 "const0_operand")
3136 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3137 [(set_attr "type" "ssemuladd")
3138 (set_attr "mode" "<MODE>")])
3140 (define_insn "*fma4i_vmfmsub_<mode>"
3141 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3144 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3145 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3147 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3148 (match_operand:VF_128 4 "const0_operand")
3151 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3152 [(set_attr "type" "ssemuladd")
3153 (set_attr "mode" "<MODE>")])
3155 (define_insn "*fma4i_vmfnmadd_<mode>"
3156 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3160 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3161 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3162 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3163 (match_operand:VF_128 4 "const0_operand")
3166 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3167 [(set_attr "type" "ssemuladd")
3168 (set_attr "mode" "<MODE>")])
3170 (define_insn "*fma4i_vmfnmsub_<mode>"
3171 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3175 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3176 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3178 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3179 (match_operand:VF_128 4 "const0_operand")
3182 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3183 [(set_attr "type" "ssemuladd")
3184 (set_attr "mode" "<MODE>")])
3186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3188 ;; Parallel single-precision floating point conversion operations
3190 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3192 (define_insn "sse_cvtpi2ps"
3193 [(set (match_operand:V4SF 0 "register_operand" "=x")
3196 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3197 (match_operand:V4SF 1 "register_operand" "0")
3200 "cvtpi2ps\t{%2, %0|%0, %2}"
3201 [(set_attr "type" "ssecvt")
3202 (set_attr "mode" "V4SF")])
3204 (define_insn "sse_cvtps2pi"
3205 [(set (match_operand:V2SI 0 "register_operand" "=y")
3207 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3209 (parallel [(const_int 0) (const_int 1)])))]
3211 "cvtps2pi\t{%1, %0|%0, %q1}"
3212 [(set_attr "type" "ssecvt")
3213 (set_attr "unit" "mmx")
3214 (set_attr "mode" "DI")])
3216 (define_insn "sse_cvttps2pi"
3217 [(set (match_operand:V2SI 0 "register_operand" "=y")
3219 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3220 (parallel [(const_int 0) (const_int 1)])))]
3222 "cvttps2pi\t{%1, %0|%0, %q1}"
3223 [(set_attr "type" "ssecvt")
3224 (set_attr "unit" "mmx")
3225 (set_attr "prefix_rep" "0")
3226 (set_attr "mode" "SF")])
3228 (define_insn "sse_cvtsi2ss"
3229 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3232 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3233 (match_operand:V4SF 1 "register_operand" "0,0,v")
3237 cvtsi2ss\t{%2, %0|%0, %2}
3238 cvtsi2ss\t{%2, %0|%0, %2}
3239 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
3240 [(set_attr "isa" "noavx,noavx,avx")
3241 (set_attr "type" "sseicvt")
3242 (set_attr "athlon_decode" "vector,double,*")
3243 (set_attr "amdfam10_decode" "vector,double,*")
3244 (set_attr "bdver1_decode" "double,direct,*")
3245 (set_attr "btver2_decode" "double,double,double")
3246 (set_attr "prefix" "orig,orig,maybe_evex")
3247 (set_attr "mode" "SF")])
3249 (define_insn "sse_cvtsi2ssq"
3250 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3253 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
3254 (match_operand:V4SF 1 "register_operand" "0,0,v")
3256 "TARGET_SSE && TARGET_64BIT"
3258 cvtsi2ssq\t{%2, %0|%0, %2}
3259 cvtsi2ssq\t{%2, %0|%0, %2}
3260 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
3261 [(set_attr "isa" "noavx,noavx,avx")
3262 (set_attr "type" "sseicvt")
3263 (set_attr "athlon_decode" "vector,double,*")
3264 (set_attr "amdfam10_decode" "vector,double,*")
3265 (set_attr "bdver1_decode" "double,direct,*")
3266 (set_attr "btver2_decode" "double,double,double")
3267 (set_attr "length_vex" "*,*,4")
3268 (set_attr "prefix_rex" "1,1,*")
3269 (set_attr "prefix" "orig,orig,maybe_evex")
3270 (set_attr "mode" "SF")])
3272 (define_insn "sse_cvtss2si"
3273 [(set (match_operand:SI 0 "register_operand" "=r,r")
3276 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3277 (parallel [(const_int 0)]))]
3278 UNSPEC_FIX_NOTRUNC))]
3280 "%vcvtss2si\t{%1, %0|%0, %k1}"
3281 [(set_attr "type" "sseicvt")
3282 (set_attr "athlon_decode" "double,vector")
3283 (set_attr "bdver1_decode" "double,double")
3284 (set_attr "prefix_rep" "1")
3285 (set_attr "prefix" "maybe_vex")
3286 (set_attr "mode" "SI")])
3288 (define_insn "sse_cvtss2si_2"
3289 [(set (match_operand:SI 0 "register_operand" "=r,r")
3290 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3291 UNSPEC_FIX_NOTRUNC))]
3293 "%vcvtss2si\t{%1, %0|%0, %k1}"
3294 [(set_attr "type" "sseicvt")
3295 (set_attr "athlon_decode" "double,vector")
3296 (set_attr "amdfam10_decode" "double,double")
3297 (set_attr "bdver1_decode" "double,double")
3298 (set_attr "prefix_rep" "1")
3299 (set_attr "prefix" "maybe_vex")
3300 (set_attr "mode" "SI")])
3302 (define_insn "sse_cvtss2siq"
3303 [(set (match_operand:DI 0 "register_operand" "=r,r")
3306 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3307 (parallel [(const_int 0)]))]
3308 UNSPEC_FIX_NOTRUNC))]
3309 "TARGET_SSE && TARGET_64BIT"
3310 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3311 [(set_attr "type" "sseicvt")
3312 (set_attr "athlon_decode" "double,vector")
3313 (set_attr "bdver1_decode" "double,double")
3314 (set_attr "prefix_rep" "1")
3315 (set_attr "prefix" "maybe_vex")
3316 (set_attr "mode" "DI")])
3318 (define_insn "sse_cvtss2siq_2"
3319 [(set (match_operand:DI 0 "register_operand" "=r,r")
3320 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3321 UNSPEC_FIX_NOTRUNC))]
3322 "TARGET_SSE && TARGET_64BIT"
3323 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3324 [(set_attr "type" "sseicvt")
3325 (set_attr "athlon_decode" "double,vector")
3326 (set_attr "amdfam10_decode" "double,double")
3327 (set_attr "bdver1_decode" "double,double")
3328 (set_attr "prefix_rep" "1")
3329 (set_attr "prefix" "maybe_vex")
3330 (set_attr "mode" "DI")])
3332 (define_insn "sse_cvttss2si"
3333 [(set (match_operand:SI 0 "register_operand" "=r,r")
3336 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
3337 (parallel [(const_int 0)]))))]
3339 "%vcvttss2si\t{%1, %0|%0, %k1}"
3340 [(set_attr "type" "sseicvt")
3341 (set_attr "athlon_decode" "double,vector")
3342 (set_attr "amdfam10_decode" "double,double")
3343 (set_attr "bdver1_decode" "double,double")
3344 (set_attr "prefix_rep" "1")
3345 (set_attr "prefix" "maybe_vex")
3346 (set_attr "mode" "SI")])
3348 (define_insn "sse_cvttss2siq"
3349 [(set (match_operand:DI 0 "register_operand" "=r,r")
3352 (match_operand:V4SF 1 "nonimmediate_operand" "v,vm")
3353 (parallel [(const_int 0)]))))]
3354 "TARGET_SSE && TARGET_64BIT"
3355 "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
3356 [(set_attr "type" "sseicvt")
3357 (set_attr "athlon_decode" "double,vector")
3358 (set_attr "amdfam10_decode" "double,double")
3359 (set_attr "bdver1_decode" "double,double")
3360 (set_attr "prefix_rep" "1")
3361 (set_attr "prefix" "maybe_vex")
3362 (set_attr "mode" "DI")])
3364 (define_insn "cvtusi2<ssescalarmodesuffix>32"
3365 [(set (match_operand:VF_128 0 "register_operand" "=v")
3367 (vec_duplicate:VF_128
3368 (unsigned_float:<ssescalarmode>
3369 (match_operand:SI 2 "nonimmediate_operand" "rm")))
3370 (match_operand:VF_128 1 "register_operand" "v")
3373 "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3374 [(set_attr "type" "sseicvt")
3375 (set_attr "prefix" "evex")
3376 (set_attr "mode" "<ssescalarmode>")])
3378 (define_insn "cvtusi2<ssescalarmodesuffix>64"
3379 [(set (match_operand:VF_128 0 "register_operand" "=v")
3381 (vec_duplicate:VF_128
3382 (unsigned_float:<ssescalarmode>
3383 (match_operand:DI 2 "nonimmediate_operand" "rm")))
3384 (match_operand:VF_128 1 "register_operand" "v")
3386 "TARGET_AVX512F && TARGET_64BIT"
3387 "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3388 [(set_attr "type" "sseicvt")
3389 (set_attr "prefix" "evex")
3390 (set_attr "mode" "<ssescalarmode>")])
3392 (define_insn "float<sseintvecmodelower><mode>2<mask_name>"
3393 [(set (match_operand:VF1 0 "register_operand" "=v")
3395 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
3396 "TARGET_SSE2 && <mask_mode512bit_condition>"
3397 "%vcvtdq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3398 [(set_attr "type" "ssecvt")
3399 (set_attr "prefix" "maybe_vex")
3400 (set_attr "mode" "<sseinsnmode>")])
3402 (define_insn "ufloatv16siv16sf2<mask_name>"
3403 [(set (match_operand:V16SF 0 "register_operand" "=v")
3404 (unsigned_float:V16SF
3405 (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
3407 "vcvtudq2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3408 [(set_attr "type" "ssecvt")
3409 (set_attr "prefix" "evex")
3410 (set_attr "mode" "V16SF")])
3412 (define_expand "floatuns<sseintvecmodelower><mode>2"
3413 [(match_operand:VF1 0 "register_operand")
3414 (match_operand:<sseintvecmode> 1 "register_operand")]
3415 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3417 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3422 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3423 (define_mode_attr sf2simodelower
3424 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3426 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3427 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3429 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3430 UNSPEC_FIX_NOTRUNC))]
3432 "%vcvtps2dq\t{%1, %0|%0, %1}"
3433 [(set_attr "type" "ssecvt")
3434 (set (attr "prefix_data16")
3436 (match_test "TARGET_AVX")
3438 (const_string "1")))
3439 (set_attr "prefix" "maybe_vex")
3440 (set_attr "mode" "<sseinsnmode>")])
3442 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name>"
3443 [(set (match_operand:V16SI 0 "register_operand" "=v")
3445 [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
3446 UNSPEC_FIX_NOTRUNC))]
3448 "vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3449 [(set_attr "type" "ssecvt")
3450 (set_attr "prefix" "evex")
3451 (set_attr "mode" "XI")])
3453 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name>"
3454 [(set (match_operand:V16SI 0 "register_operand" "=v")
3456 [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
3457 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3459 "vcvtps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3460 [(set_attr "type" "ssecvt")
3461 (set_attr "prefix" "evex")
3462 (set_attr "mode" "XI")])
3464 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name>"
3465 [(set (match_operand:V16SI 0 "register_operand" "=v")
3467 (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
3469 "vcvttps2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3470 [(set_attr "type" "ssecvt")
3471 (set_attr "prefix" "evex")
3472 (set_attr "mode" "XI")])
3474 (define_insn "fix_truncv8sfv8si2"
3475 [(set (match_operand:V8SI 0 "register_operand" "=x")
3476 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3478 "vcvttps2dq\t{%1, %0|%0, %1}"
3479 [(set_attr "type" "ssecvt")
3480 (set_attr "prefix" "vex")
3481 (set_attr "mode" "OI")])
3483 (define_insn "fix_truncv4sfv4si2"
3484 [(set (match_operand:V4SI 0 "register_operand" "=x")
3485 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3487 "%vcvttps2dq\t{%1, %0|%0, %1}"
3488 [(set_attr "type" "ssecvt")
3489 (set (attr "prefix_rep")
3491 (match_test "TARGET_AVX")
3493 (const_string "1")))
3494 (set (attr "prefix_data16")
3496 (match_test "TARGET_AVX")
3498 (const_string "0")))
3499 (set_attr "prefix_data16" "0")
3500 (set_attr "prefix" "maybe_vex")
3501 (set_attr "mode" "TI")])
3503 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3504 [(match_operand:<sseintvecmode> 0 "register_operand")
3505 (match_operand:VF1 1 "register_operand")]
3509 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3510 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3511 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3512 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3518 ;; Parallel double-precision floating point conversion operations
3520 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3522 (define_insn "sse2_cvtpi2pd"
3523 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3524 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3526 "cvtpi2pd\t{%1, %0|%0, %1}"
3527 [(set_attr "type" "ssecvt")
3528 (set_attr "unit" "mmx,*")
3529 (set_attr "prefix_data16" "1,*")
3530 (set_attr "mode" "V2DF")])
3532 (define_insn "sse2_cvtpd2pi"
3533 [(set (match_operand:V2SI 0 "register_operand" "=y")
3534 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3535 UNSPEC_FIX_NOTRUNC))]
3537 "cvtpd2pi\t{%1, %0|%0, %1}"
3538 [(set_attr "type" "ssecvt")
3539 (set_attr "unit" "mmx")
3540 (set_attr "bdver1_decode" "double")
3541 (set_attr "btver2_decode" "direct")
3542 (set_attr "prefix_data16" "1")
3543 (set_attr "mode" "DI")])
3545 (define_insn "sse2_cvttpd2pi"
3546 [(set (match_operand:V2SI 0 "register_operand" "=y")
3547 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3549 "cvttpd2pi\t{%1, %0|%0, %1}"
3550 [(set_attr "type" "ssecvt")
3551 (set_attr "unit" "mmx")
3552 (set_attr "bdver1_decode" "double")
3553 (set_attr "prefix_data16" "1")
3554 (set_attr "mode" "TI")])
3556 (define_insn "sse2_cvtsi2sd"
3557 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3560 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3561 (match_operand:V2DF 1 "register_operand" "0,0,x")
3565 cvtsi2sd\t{%2, %0|%0, %2}
3566 cvtsi2sd\t{%2, %0|%0, %2}
3567 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3568 [(set_attr "isa" "noavx,noavx,avx")
3569 (set_attr "type" "sseicvt")
3570 (set_attr "athlon_decode" "double,direct,*")
3571 (set_attr "amdfam10_decode" "vector,double,*")
3572 (set_attr "bdver1_decode" "double,direct,*")
3573 (set_attr "btver2_decode" "double,double,double")
3574 (set_attr "prefix" "orig,orig,vex")
3575 (set_attr "mode" "DF")])
3577 (define_insn "sse2_cvtsi2sdq"
3578 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3581 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
3582 (match_operand:V2DF 1 "register_operand" "0,0,v")
3584 "TARGET_SSE2 && TARGET_64BIT"
3586 cvtsi2sdq\t{%2, %0|%0, %2}
3587 cvtsi2sdq\t{%2, %0|%0, %2}
3588 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
3589 [(set_attr "isa" "noavx,noavx,avx")
3590 (set_attr "type" "sseicvt")
3591 (set_attr "athlon_decode" "double,direct,*")
3592 (set_attr "amdfam10_decode" "vector,double,*")
3593 (set_attr "bdver1_decode" "double,direct,*")
3594 (set_attr "length_vex" "*,*,4")
3595 (set_attr "prefix_rex" "1,1,*")
3596 (set_attr "prefix" "orig,orig,maybe_evex")
3597 (set_attr "mode" "DF")])
3599 (define_insn "avx512f_vcvtss2usi"
3600 [(set (match_operand:SI 0 "register_operand" "=r")
3603 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3604 (parallel [(const_int 0)]))]
3605 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3607 "vcvtss2usi\t{%1, %0|%0, %1}"
3608 [(set_attr "type" "sseicvt")
3609 (set_attr "prefix" "evex")
3610 (set_attr "mode" "SI")])
3612 (define_insn "avx512f_vcvtss2usiq"
3613 [(set (match_operand:DI 0 "register_operand" "=r")
3616 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3617 (parallel [(const_int 0)]))]
3618 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3619 "TARGET_AVX512F && TARGET_64BIT"
3620 "vcvtss2usi\t{%1, %0|%0, %1}"
3621 [(set_attr "type" "sseicvt")
3622 (set_attr "prefix" "evex")
3623 (set_attr "mode" "DI")])
3625 (define_insn "avx512f_vcvttss2usi"
3626 [(set (match_operand:SI 0 "register_operand" "=r")
3629 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3630 (parallel [(const_int 0)]))))]
3632 "vcvttss2usi\t{%1, %0|%0, %1}"
3633 [(set_attr "type" "sseicvt")
3634 (set_attr "prefix" "evex")
3635 (set_attr "mode" "SI")])
3637 (define_insn "avx512f_vcvttss2usiq"
3638 [(set (match_operand:DI 0 "register_operand" "=r")
3641 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3642 (parallel [(const_int 0)]))))]
3643 "TARGET_AVX512F && TARGET_64BIT"
3644 "vcvttss2usi\t{%1, %0|%0, %1}"
3645 [(set_attr "type" "sseicvt")
3646 (set_attr "prefix" "evex")
3647 (set_attr "mode" "DI")])
3649 (define_insn "avx512f_vcvtsd2usi"
3650 [(set (match_operand:SI 0 "register_operand" "=r")
3653 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3654 (parallel [(const_int 0)]))]
3655 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3657 "vcvtsd2usi\t{%1, %0|%0, %1}"
3658 [(set_attr "type" "sseicvt")
3659 (set_attr "prefix" "evex")
3660 (set_attr "mode" "SI")])
3662 (define_insn "avx512f_vcvtsd2usiq"
3663 [(set (match_operand:DI 0 "register_operand" "=r")
3666 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3667 (parallel [(const_int 0)]))]
3668 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3669 "TARGET_AVX512F && TARGET_64BIT"
3670 "vcvtsd2usi\t{%1, %0|%0, %1}"
3671 [(set_attr "type" "sseicvt")
3672 (set_attr "prefix" "evex")
3673 (set_attr "mode" "DI")])
3675 (define_insn "avx512f_vcvttsd2usi"
3676 [(set (match_operand:SI 0 "register_operand" "=r")
3679 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3680 (parallel [(const_int 0)]))))]
3682 "vcvttsd2usi\t{%1, %0|%0, %1}"
3683 [(set_attr "type" "sseicvt")
3684 (set_attr "prefix" "evex")
3685 (set_attr "mode" "SI")])
3687 (define_insn "avx512f_vcvttsd2usiq"
3688 [(set (match_operand:DI 0 "register_operand" "=r")
3691 (match_operand:V2DF 1 "nonimmediate_operand" "vm")
3692 (parallel [(const_int 0)]))))]
3693 "TARGET_AVX512F && TARGET_64BIT"
3694 "vcvttsd2usi\t{%1, %0|%0, %1}"
3695 [(set_attr "type" "sseicvt")
3696 (set_attr "prefix" "evex")
3697 (set_attr "mode" "DI")])
3699 (define_insn "sse2_cvtsd2si"
3700 [(set (match_operand:SI 0 "register_operand" "=r,r")
3703 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3704 (parallel [(const_int 0)]))]
3705 UNSPEC_FIX_NOTRUNC))]
3707 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3708 [(set_attr "type" "sseicvt")
3709 (set_attr "athlon_decode" "double,vector")
3710 (set_attr "bdver1_decode" "double,double")
3711 (set_attr "btver2_decode" "double,double")
3712 (set_attr "prefix_rep" "1")
3713 (set_attr "prefix" "maybe_vex")
3714 (set_attr "mode" "SI")])
3716 (define_insn "sse2_cvtsd2si_2"
3717 [(set (match_operand:SI 0 "register_operand" "=r,r")
3718 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3719 UNSPEC_FIX_NOTRUNC))]
3721 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3722 [(set_attr "type" "sseicvt")
3723 (set_attr "athlon_decode" "double,vector")
3724 (set_attr "amdfam10_decode" "double,double")
3725 (set_attr "bdver1_decode" "double,double")
3726 (set_attr "prefix_rep" "1")
3727 (set_attr "prefix" "maybe_vex")
3728 (set_attr "mode" "SI")])
3730 (define_insn "sse2_cvtsd2siq"
3731 [(set (match_operand:DI 0 "register_operand" "=r,r")
3734 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3735 (parallel [(const_int 0)]))]
3736 UNSPEC_FIX_NOTRUNC))]
3737 "TARGET_SSE2 && TARGET_64BIT"
3738 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3739 [(set_attr "type" "sseicvt")
3740 (set_attr "athlon_decode" "double,vector")
3741 (set_attr "bdver1_decode" "double,double")
3742 (set_attr "prefix_rep" "1")
3743 (set_attr "prefix" "maybe_vex")
3744 (set_attr "mode" "DI")])
3746 (define_insn "sse2_cvtsd2siq_2"
3747 [(set (match_operand:DI 0 "register_operand" "=r,r")
3748 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3749 UNSPEC_FIX_NOTRUNC))]
3750 "TARGET_SSE2 && TARGET_64BIT"
3751 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3752 [(set_attr "type" "sseicvt")
3753 (set_attr "athlon_decode" "double,vector")
3754 (set_attr "amdfam10_decode" "double,double")
3755 (set_attr "bdver1_decode" "double,double")
3756 (set_attr "prefix_rep" "1")
3757 (set_attr "prefix" "maybe_vex")
3758 (set_attr "mode" "DI")])
3760 (define_insn "sse2_cvttsd2si"
3761 [(set (match_operand:SI 0 "register_operand" "=r,r")
3764 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3765 (parallel [(const_int 0)]))))]
3767 "%vcvttsd2si\t{%1, %0|%0, %q1}"
3768 [(set_attr "type" "sseicvt")
3769 (set_attr "athlon_decode" "double,vector")
3770 (set_attr "amdfam10_decode" "double,double")
3771 (set_attr "bdver1_decode" "double,double")
3772 (set_attr "btver2_decode" "double,double")
3773 (set_attr "prefix_rep" "1")
3774 (set_attr "prefix" "maybe_vex")
3775 (set_attr "mode" "SI")])
3777 (define_insn "sse2_cvttsd2siq"
3778 [(set (match_operand:DI 0 "register_operand" "=r,r")
3781 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3782 (parallel [(const_int 0)]))))]
3783 "TARGET_SSE2 && TARGET_64BIT"
3784 "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
3785 [(set_attr "type" "sseicvt")
3786 (set_attr "athlon_decode" "double,vector")
3787 (set_attr "amdfam10_decode" "double,double")
3788 (set_attr "bdver1_decode" "double,double")
3789 (set_attr "prefix_rep" "1")
3790 (set_attr "prefix" "maybe_vex")
3791 (set_attr "mode" "DI")])
3793 ;; For float<si2dfmode><mode>2 insn pattern
3794 (define_mode_attr si2dfmode
3795 [(V8DF "V8SI") (V4DF "V4SI")])
3796 (define_mode_attr si2dfmodelower
3797 [(V8DF "v8si") (V4DF "v4si")])
3799 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
3800 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3801 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
3802 "TARGET_AVX && <mask_mode512bit_condition>"
3803 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3804 [(set_attr "type" "ssecvt")
3805 (set_attr "prefix" "maybe_vex")
3806 (set_attr "mode" "<MODE>")])
3808 (define_insn "ufloatv8siv8df<mask_name>"
3809 [(set (match_operand:V8DF 0 "register_operand" "=v")
3810 (unsigned_float:V8DF
3811 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
3813 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3814 [(set_attr "type" "ssecvt")
3815 (set_attr "prefix" "evex")
3816 (set_attr "mode" "V8DF")])
3818 (define_insn "avx512f_cvtdq2pd512_2"
3819 [(set (match_operand:V8DF 0 "register_operand" "=v")
3822 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
3823 (parallel [(const_int 0) (const_int 1)
3824 (const_int 2) (const_int 3)
3825 (const_int 4) (const_int 5)
3826 (const_int 6) (const_int 7)]))))]
3828 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
3829 [(set_attr "type" "ssecvt")
3830 (set_attr "prefix" "evex")
3831 (set_attr "mode" "V8DF")])
3833 (define_insn "avx_cvtdq2pd256_2"
3834 [(set (match_operand:V4DF 0 "register_operand" "=x")
3837 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
3838 (parallel [(const_int 0) (const_int 1)
3839 (const_int 2) (const_int 3)]))))]
3841 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
3842 [(set_attr "type" "ssecvt")
3843 (set_attr "prefix" "vex")
3844 (set_attr "mode" "V4DF")])
3846 (define_insn "sse2_cvtdq2pd"
3847 [(set (match_operand:V2DF 0 "register_operand" "=x")
3850 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3851 (parallel [(const_int 0) (const_int 1)]))))]
3853 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
3854 [(set_attr "type" "ssecvt")
3855 (set_attr "prefix" "maybe_vex")
3856 (set_attr "mode" "V2DF")])
3858 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name>"
3859 [(set (match_operand:V8SI 0 "register_operand" "=v")
3861 [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
3862 UNSPEC_FIX_NOTRUNC))]
3864 "vcvtpd2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3865 [(set_attr "type" "ssecvt")
3866 (set_attr "prefix" "evex")
3867 (set_attr "mode" "OI")])
3869 (define_insn "avx_cvtpd2dq256"
3870 [(set (match_operand:V4SI 0 "register_operand" "=x")
3871 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3872 UNSPEC_FIX_NOTRUNC))]
3874 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3875 [(set_attr "type" "ssecvt")
3876 (set_attr "prefix" "vex")
3877 (set_attr "mode" "OI")])
3879 (define_expand "avx_cvtpd2dq256_2"
3880 [(set (match_operand:V8SI 0 "register_operand")
3882 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
3886 "operands[2] = CONST0_RTX (V4SImode);")
3888 (define_insn "*avx_cvtpd2dq256_2"
3889 [(set (match_operand:V8SI 0 "register_operand" "=x")
3891 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3893 (match_operand:V4SI 2 "const0_operand")))]
3895 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
3896 [(set_attr "type" "ssecvt")
3897 (set_attr "prefix" "vex")
3898 (set_attr "btver2_decode" "vector")
3899 (set_attr "mode" "OI")])
3901 (define_expand "sse2_cvtpd2dq"
3902 [(set (match_operand:V4SI 0 "register_operand")
3904 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
3908 "operands[2] = CONST0_RTX (V2SImode);")
3910 (define_insn "*sse2_cvtpd2dq"
3911 [(set (match_operand:V4SI 0 "register_operand" "=x")
3913 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3915 (match_operand:V2SI 2 "const0_operand")))]
3919 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
3921 return "cvtpd2dq\t{%1, %0|%0, %1}";
3923 [(set_attr "type" "ssecvt")
3924 (set_attr "prefix_rep" "1")
3925 (set_attr "prefix_data16" "0")
3926 (set_attr "prefix" "maybe_vex")
3927 (set_attr "mode" "TI")
3928 (set_attr "amdfam10_decode" "double")
3929 (set_attr "athlon_decode" "vector")
3930 (set_attr "bdver1_decode" "double")])
3932 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name>"
3933 [(set (match_operand:V8SI 0 "register_operand" "=v")
3935 [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
3936 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3938 "vcvtpd2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3939 [(set_attr "type" "ssecvt")
3940 (set_attr "prefix" "evex")
3941 (set_attr "mode" "OI")])
3943 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name>"
3944 [(set (match_operand:V8SI 0 "register_operand" "=v")
3946 (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
3948 "vcvttpd2<fixsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3949 [(set_attr "type" "ssecvt")
3950 (set_attr "prefix" "evex")
3951 (set_attr "mode" "OI")])
3953 (define_insn "fix_truncv4dfv4si2"
3954 [(set (match_operand:V4SI 0 "register_operand" "=x")
3955 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3957 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3958 [(set_attr "type" "ssecvt")
3959 (set_attr "prefix" "vex")
3960 (set_attr "mode" "OI")])
3962 (define_expand "avx_cvttpd2dq256_2"
3963 [(set (match_operand:V8SI 0 "register_operand")
3965 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
3968 "operands[2] = CONST0_RTX (V4SImode);")
3970 (define_insn "*avx_cvttpd2dq256_2"
3971 [(set (match_operand:V8SI 0 "register_operand" "=x")
3973 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
3974 (match_operand:V4SI 2 "const0_operand")))]
3976 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
3977 [(set_attr "type" "ssecvt")
3978 (set_attr "prefix" "vex")
3979 (set_attr "btver2_decode" "vector")
3980 (set_attr "mode" "OI")])
3982 (define_expand "sse2_cvttpd2dq"
3983 [(set (match_operand:V4SI 0 "register_operand")
3985 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
3988 "operands[2] = CONST0_RTX (V2SImode);")
3990 (define_insn "*sse2_cvttpd2dq"
3991 [(set (match_operand:V4SI 0 "register_operand" "=x")
3993 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3994 (match_operand:V2SI 2 "const0_operand")))]
3998 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4000 return "cvttpd2dq\t{%1, %0|%0, %1}";
4002 [(set_attr "type" "ssecvt")
4003 (set_attr "amdfam10_decode" "double")
4004 (set_attr "athlon_decode" "vector")
4005 (set_attr "bdver1_decode" "double")
4006 (set_attr "prefix" "maybe_vex")
4007 (set_attr "mode" "TI")])
4009 (define_insn "sse2_cvtsd2ss"
4010 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4013 (float_truncate:V2SF
4014 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
4015 (match_operand:V4SF 1 "register_operand" "0,0,v")
4019 cvtsd2ss\t{%2, %0|%0, %2}
4020 cvtsd2ss\t{%2, %0|%0, %q2}
4021 vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
4022 [(set_attr "isa" "noavx,noavx,avx")
4023 (set_attr "type" "ssecvt")
4024 (set_attr "athlon_decode" "vector,double,*")
4025 (set_attr "amdfam10_decode" "vector,double,*")
4026 (set_attr "bdver1_decode" "direct,direct,*")
4027 (set_attr "btver2_decode" "double,double,double")
4028 (set_attr "prefix" "orig,orig,vex")
4029 (set_attr "mode" "SF")])
4031 (define_insn "sse2_cvtss2sd"
4032 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4036 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
4037 (parallel [(const_int 0) (const_int 1)])))
4038 (match_operand:V2DF 1 "register_operand" "0,0,v")
4042 cvtss2sd\t{%2, %0|%0, %2}
4043 cvtss2sd\t{%2, %0|%0, %k2}
4044 vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
4045 [(set_attr "isa" "noavx,noavx,avx")
4046 (set_attr "type" "ssecvt")
4047 (set_attr "amdfam10_decode" "vector,double,*")
4048 (set_attr "athlon_decode" "direct,direct,*")
4049 (set_attr "bdver1_decode" "direct,direct,*")
4050 (set_attr "btver2_decode" "double,double,double")
4051 (set_attr "prefix" "orig,orig,vex")
4052 (set_attr "mode" "DF")])
4054 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
4055 [(set (match_operand:V8SF 0 "register_operand" "=v")
4056 (float_truncate:V8SF
4057 (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
4059 "vcvtpd2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4060 [(set_attr "type" "ssecvt")
4061 (set_attr "prefix" "evex")
4062 (set_attr "mode" "V8SF")])
4064 (define_insn "avx_cvtpd2ps256"
4065 [(set (match_operand:V4SF 0 "register_operand" "=x")
4066 (float_truncate:V4SF
4067 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4069 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4070 [(set_attr "type" "ssecvt")
4071 (set_attr "prefix" "vex")
4072 (set_attr "btver2_decode" "vector")
4073 (set_attr "mode" "V4SF")])
4075 (define_expand "sse2_cvtpd2ps"
4076 [(set (match_operand:V4SF 0 "register_operand")
4078 (float_truncate:V2SF
4079 (match_operand:V2DF 1 "nonimmediate_operand"))
4082 "operands[2] = CONST0_RTX (V2SFmode);")
4084 (define_insn "*sse2_cvtpd2ps"
4085 [(set (match_operand:V4SF 0 "register_operand" "=x")
4087 (float_truncate:V2SF
4088 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4089 (match_operand:V2SF 2 "const0_operand")))]
4093 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4095 return "cvtpd2ps\t{%1, %0|%0, %1}";
4097 [(set_attr "type" "ssecvt")
4098 (set_attr "amdfam10_decode" "double")
4099 (set_attr "athlon_decode" "vector")
4100 (set_attr "bdver1_decode" "double")
4101 (set_attr "prefix_data16" "1")
4102 (set_attr "prefix" "maybe_vex")
4103 (set_attr "mode" "V4SF")])
4105 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4106 (define_mode_attr sf2dfmode
4107 [(V8DF "V8SF") (V4DF "V4SF")])
4109 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name>"
4110 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4111 (float_extend:VF2_512_256
4112 (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
4113 "TARGET_AVX && <mask_mode512bit_condition>"
4114 "vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4115 [(set_attr "type" "ssecvt")
4116 (set_attr "prefix" "maybe_vex")
4117 (set_attr "mode" "<MODE>")])
4119 (define_insn "*avx_cvtps2pd256_2"
4120 [(set (match_operand:V4DF 0 "register_operand" "=x")
4123 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4124 (parallel [(const_int 0) (const_int 1)
4125 (const_int 2) (const_int 3)]))))]
4127 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4128 [(set_attr "type" "ssecvt")
4129 (set_attr "prefix" "vex")
4130 (set_attr "mode" "V4DF")])
4132 (define_insn "vec_unpacks_lo_v16sf"
4133 [(set (match_operand:V8DF 0 "register_operand" "=v")
4136 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4137 (parallel [(const_int 0) (const_int 1)
4138 (const_int 2) (const_int 3)
4139 (const_int 4) (const_int 5)
4140 (const_int 6) (const_int 7)]))))]
4142 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4143 [(set_attr "type" "ssecvt")
4144 (set_attr "prefix" "evex")
4145 (set_attr "mode" "V8DF")])
4147 (define_insn "sse2_cvtps2pd"
4148 [(set (match_operand:V2DF 0 "register_operand" "=x")
4151 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4152 (parallel [(const_int 0) (const_int 1)]))))]
4154 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4155 [(set_attr "type" "ssecvt")
4156 (set_attr "amdfam10_decode" "direct")
4157 (set_attr "athlon_decode" "double")
4158 (set_attr "bdver1_decode" "double")
4159 (set_attr "prefix_data16" "0")
4160 (set_attr "prefix" "maybe_vex")
4161 (set_attr "mode" "V2DF")])
4163 (define_expand "vec_unpacks_hi_v4sf"
4168 (match_operand:V4SF 1 "nonimmediate_operand"))
4169 (parallel [(const_int 6) (const_int 7)
4170 (const_int 2) (const_int 3)])))
4171 (set (match_operand:V2DF 0 "register_operand")
4175 (parallel [(const_int 0) (const_int 1)]))))]
4177 "operands[2] = gen_reg_rtx (V4SFmode);")
4179 (define_expand "vec_unpacks_hi_v8sf"
4182 (match_operand:V8SF 1 "nonimmediate_operand")
4183 (parallel [(const_int 4) (const_int 5)
4184 (const_int 6) (const_int 7)])))
4185 (set (match_operand:V4DF 0 "register_operand")
4189 "operands[2] = gen_reg_rtx (V4SFmode);")
4191 (define_expand "vec_unpacks_hi_v16sf"
4194 (match_operand:V16SF 1 "nonimmediate_operand")
4195 (parallel [(const_int 8) (const_int 9)
4196 (const_int 10) (const_int 11)
4197 (const_int 12) (const_int 13)
4198 (const_int 14) (const_int 15)])))
4199 (set (match_operand:V8DF 0 "register_operand")
4203 "operands[2] = gen_reg_rtx (V8SFmode);")
4205 (define_expand "vec_unpacks_lo_v4sf"
4206 [(set (match_operand:V2DF 0 "register_operand")
4209 (match_operand:V4SF 1 "nonimmediate_operand")
4210 (parallel [(const_int 0) (const_int 1)]))))]
4213 (define_expand "vec_unpacks_lo_v8sf"
4214 [(set (match_operand:V4DF 0 "register_operand")
4217 (match_operand:V8SF 1 "nonimmediate_operand")
4218 (parallel [(const_int 0) (const_int 1)
4219 (const_int 2) (const_int 3)]))))]
4222 (define_mode_attr sseunpackfltmode
4223 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4224 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4226 (define_expand "vec_unpacks_float_hi_<mode>"
4227 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4228 (match_operand:VI2_AVX512F 1 "register_operand")]
4231 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4233 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4234 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4235 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4239 (define_expand "vec_unpacks_float_lo_<mode>"
4240 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4241 (match_operand:VI2_AVX512F 1 "register_operand")]
4244 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4246 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4247 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4248 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4252 (define_expand "vec_unpacku_float_hi_<mode>"
4253 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4254 (match_operand:VI2_AVX512F 1 "register_operand")]
4257 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4259 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4260 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4261 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4265 (define_expand "vec_unpacku_float_lo_<mode>"
4266 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4267 (match_operand:VI2_AVX512F 1 "register_operand")]
4270 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4272 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4274 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4278 (define_expand "vec_unpacks_float_hi_v4si"
4281 (match_operand:V4SI 1 "nonimmediate_operand")
4282 (parallel [(const_int 2) (const_int 3)
4283 (const_int 2) (const_int 3)])))
4284 (set (match_operand:V2DF 0 "register_operand")
4288 (parallel [(const_int 0) (const_int 1)]))))]
4290 "operands[2] = gen_reg_rtx (V4SImode);")
4292 (define_expand "vec_unpacks_float_lo_v4si"
4293 [(set (match_operand:V2DF 0 "register_operand")
4296 (match_operand:V4SI 1 "nonimmediate_operand")
4297 (parallel [(const_int 0) (const_int 1)]))))]
4300 (define_expand "vec_unpacks_float_hi_v8si"
4303 (match_operand:V8SI 1 "nonimmediate_operand")
4304 (parallel [(const_int 4) (const_int 5)
4305 (const_int 6) (const_int 7)])))
4306 (set (match_operand:V4DF 0 "register_operand")
4310 "operands[2] = gen_reg_rtx (V4SImode);")
4312 (define_expand "vec_unpacks_float_lo_v8si"
4313 [(set (match_operand:V4DF 0 "register_operand")
4316 (match_operand:V8SI 1 "nonimmediate_operand")
4317 (parallel [(const_int 0) (const_int 1)
4318 (const_int 2) (const_int 3)]))))]
4321 (define_expand "vec_unpacks_float_hi_v16si"
4324 (match_operand:V16SI 1 "nonimmediate_operand")
4325 (parallel [(const_int 8) (const_int 9)
4326 (const_int 10) (const_int 11)
4327 (const_int 12) (const_int 13)
4328 (const_int 14) (const_int 15)])))
4329 (set (match_operand:V8DF 0 "register_operand")
4333 "operands[2] = gen_reg_rtx (V8SImode);")
4335 (define_expand "vec_unpacks_float_lo_v16si"
4336 [(set (match_operand:V8DF 0 "register_operand")
4339 (match_operand:V16SI 1 "nonimmediate_operand")
4340 (parallel [(const_int 0) (const_int 1)
4341 (const_int 2) (const_int 3)
4342 (const_int 4) (const_int 5)
4343 (const_int 6) (const_int 7)]))))]
4346 (define_expand "vec_unpacku_float_hi_v4si"
4349 (match_operand:V4SI 1 "nonimmediate_operand")
4350 (parallel [(const_int 2) (const_int 3)
4351 (const_int 2) (const_int 3)])))
4356 (parallel [(const_int 0) (const_int 1)]))))
4358 (lt:V2DF (match_dup 6) (match_dup 3)))
4360 (and:V2DF (match_dup 7) (match_dup 4)))
4361 (set (match_operand:V2DF 0 "register_operand")
4362 (plus:V2DF (match_dup 6) (match_dup 8)))]
4365 REAL_VALUE_TYPE TWO32r;
4369 real_ldexp (&TWO32r, &dconst1, 32);
4370 x = const_double_from_real_value (TWO32r, DFmode);
4372 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4373 operands[4] = force_reg (V2DFmode,
4374 ix86_build_const_vector (V2DFmode, 1, x));
4376 operands[5] = gen_reg_rtx (V4SImode);
4378 for (i = 6; i < 9; i++)
4379 operands[i] = gen_reg_rtx (V2DFmode);
4382 (define_expand "vec_unpacku_float_lo_v4si"
4386 (match_operand:V4SI 1 "nonimmediate_operand")
4387 (parallel [(const_int 0) (const_int 1)]))))
4389 (lt:V2DF (match_dup 5) (match_dup 3)))
4391 (and:V2DF (match_dup 6) (match_dup 4)))
4392 (set (match_operand:V2DF 0 "register_operand")
4393 (plus:V2DF (match_dup 5) (match_dup 7)))]
4396 REAL_VALUE_TYPE TWO32r;
4400 real_ldexp (&TWO32r, &dconst1, 32);
4401 x = const_double_from_real_value (TWO32r, DFmode);
4403 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4404 operands[4] = force_reg (V2DFmode,
4405 ix86_build_const_vector (V2DFmode, 1, x));
4407 for (i = 5; i < 8; i++)
4408 operands[i] = gen_reg_rtx (V2DFmode);
4411 (define_expand "vec_unpacku_float_hi_v8si"
4412 [(match_operand:V4DF 0 "register_operand")
4413 (match_operand:V8SI 1 "register_operand")]
4416 REAL_VALUE_TYPE TWO32r;
4420 real_ldexp (&TWO32r, &dconst1, 32);
4421 x = const_double_from_real_value (TWO32r, DFmode);
4423 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4424 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4425 tmp[5] = gen_reg_rtx (V4SImode);
4427 for (i = 2; i < 5; i++)
4428 tmp[i] = gen_reg_rtx (V4DFmode);
4429 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4430 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4431 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4432 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4433 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4434 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4438 (define_expand "vec_unpacku_float_lo_v8si"
4439 [(match_operand:V4DF 0 "register_operand")
4440 (match_operand:V8SI 1 "nonimmediate_operand")]
4443 REAL_VALUE_TYPE TWO32r;
4447 real_ldexp (&TWO32r, &dconst1, 32);
4448 x = const_double_from_real_value (TWO32r, DFmode);
4450 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4451 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4453 for (i = 2; i < 5; i++)
4454 tmp[i] = gen_reg_rtx (V4DFmode);
4455 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4456 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4457 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4458 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4459 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4463 (define_expand "vec_unpacku_float_lo_v16si"
4464 [(match_operand:V8DF 0 "register_operand")
4465 (match_operand:V16SI 1 "nonimmediate_operand")]
4468 REAL_VALUE_TYPE TWO32r;
4471 real_ldexp (&TWO32r, &dconst1, 32);
4472 x = const_double_from_real_value (TWO32r, DFmode);
4474 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4475 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4476 tmp[2] = gen_reg_rtx (V8DFmode);
4477 k = gen_reg_rtx (QImode);
4479 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4480 emit_insn (gen_rtx_SET (VOIDmode, k,
4481 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4482 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4483 emit_move_insn (operands[0], tmp[2]);
4487 (define_expand "vec_pack_trunc_<mode>"
4489 (float_truncate:<sf2dfmode>
4490 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4492 (float_truncate:<sf2dfmode>
4493 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4494 (set (match_operand:<ssePSmode> 0 "register_operand")
4495 (vec_concat:<ssePSmode>
4500 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4501 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4504 (define_expand "vec_pack_trunc_v2df"
4505 [(match_operand:V4SF 0 "register_operand")
4506 (match_operand:V2DF 1 "nonimmediate_operand")
4507 (match_operand:V2DF 2 "nonimmediate_operand")]
4512 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4514 tmp0 = gen_reg_rtx (V4DFmode);
4515 tmp1 = force_reg (V2DFmode, operands[1]);
4517 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4518 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4522 tmp0 = gen_reg_rtx (V4SFmode);
4523 tmp1 = gen_reg_rtx (V4SFmode);
4525 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4526 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4527 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4532 (define_expand "vec_pack_sfix_trunc_v8df"
4533 [(match_operand:V16SI 0 "register_operand")
4534 (match_operand:V8DF 1 "nonimmediate_operand")
4535 (match_operand:V8DF 2 "nonimmediate_operand")]
4540 r1 = gen_reg_rtx (V8SImode);
4541 r2 = gen_reg_rtx (V8SImode);
4543 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4544 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4545 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4549 (define_expand "vec_pack_sfix_trunc_v4df"
4550 [(match_operand:V8SI 0 "register_operand")
4551 (match_operand:V4DF 1 "nonimmediate_operand")
4552 (match_operand:V4DF 2 "nonimmediate_operand")]
4557 r1 = gen_reg_rtx (V4SImode);
4558 r2 = gen_reg_rtx (V4SImode);
4560 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4561 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4562 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4566 (define_expand "vec_pack_sfix_trunc_v2df"
4567 [(match_operand:V4SI 0 "register_operand")
4568 (match_operand:V2DF 1 "nonimmediate_operand")
4569 (match_operand:V2DF 2 "nonimmediate_operand")]
4572 rtx tmp0, tmp1, tmp2;
4574 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4576 tmp0 = gen_reg_rtx (V4DFmode);
4577 tmp1 = force_reg (V2DFmode, operands[1]);
4579 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4580 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4584 tmp0 = gen_reg_rtx (V4SImode);
4585 tmp1 = gen_reg_rtx (V4SImode);
4586 tmp2 = gen_reg_rtx (V2DImode);
4588 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4589 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4590 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4591 gen_lowpart (V2DImode, tmp0),
4592 gen_lowpart (V2DImode, tmp1)));
4593 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4598 (define_mode_attr ssepackfltmode
4599 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4601 (define_expand "vec_pack_ufix_trunc_<mode>"
4602 [(match_operand:<ssepackfltmode> 0 "register_operand")
4603 (match_operand:VF2_128_256 1 "register_operand")
4604 (match_operand:VF2_128_256 2 "register_operand")]
4608 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4609 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4610 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4611 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4612 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4614 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4615 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4619 tmp[5] = gen_reg_rtx (V8SFmode);
4620 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4621 gen_lowpart (V8SFmode, tmp[3]), 0);
4622 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4624 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4625 operands[0], 0, OPTAB_DIRECT);
4626 if (tmp[6] != operands[0])
4627 emit_move_insn (operands[0], tmp[6]);
4631 (define_expand "vec_pack_sfix_v4df"
4632 [(match_operand:V8SI 0 "register_operand")
4633 (match_operand:V4DF 1 "nonimmediate_operand")
4634 (match_operand:V4DF 2 "nonimmediate_operand")]
4639 r1 = gen_reg_rtx (V4SImode);
4640 r2 = gen_reg_rtx (V4SImode);
4642 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4643 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4644 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4648 (define_expand "vec_pack_sfix_v2df"
4649 [(match_operand:V4SI 0 "register_operand")
4650 (match_operand:V2DF 1 "nonimmediate_operand")
4651 (match_operand:V2DF 2 "nonimmediate_operand")]
4654 rtx tmp0, tmp1, tmp2;
4656 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4658 tmp0 = gen_reg_rtx (V4DFmode);
4659 tmp1 = force_reg (V2DFmode, operands[1]);
4661 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4662 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4666 tmp0 = gen_reg_rtx (V4SImode);
4667 tmp1 = gen_reg_rtx (V4SImode);
4668 tmp2 = gen_reg_rtx (V2DImode);
4670 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4671 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4672 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4673 gen_lowpart (V2DImode, tmp0),
4674 gen_lowpart (V2DImode, tmp1)));
4675 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4680 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4682 ;; Parallel single-precision floating point element swizzling
4684 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4686 (define_expand "sse_movhlps_exp"
4687 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4690 (match_operand:V4SF 1 "nonimmediate_operand")
4691 (match_operand:V4SF 2 "nonimmediate_operand"))
4692 (parallel [(const_int 6)
4698 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4700 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
4702 /* Fix up the destination if needed. */
4703 if (dst != operands[0])
4704 emit_move_insn (operands[0], dst);
4709 (define_insn "sse_movhlps"
4710 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4713 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4714 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
4715 (parallel [(const_int 6)
4719 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4721 movhlps\t{%2, %0|%0, %2}
4722 vmovhlps\t{%2, %1, %0|%0, %1, %2}
4723 movlps\t{%H2, %0|%0, %H2}
4724 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
4725 %vmovhps\t{%2, %0|%q0, %2}"
4726 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4727 (set_attr "type" "ssemov")
4728 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4729 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4731 (define_expand "sse_movlhps_exp"
4732 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4735 (match_operand:V4SF 1 "nonimmediate_operand")
4736 (match_operand:V4SF 2 "nonimmediate_operand"))
4737 (parallel [(const_int 0)
4743 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4745 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
4747 /* Fix up the destination if needed. */
4748 if (dst != operands[0])
4749 emit_move_insn (operands[0], dst);
4754 (define_insn "sse_movlhps"
4755 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
4758 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4759 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
4760 (parallel [(const_int 0)
4764 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
4766 movlhps\t{%2, %0|%0, %2}
4767 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4768 movhps\t{%2, %0|%0, %q2}
4769 vmovhps\t{%2, %1, %0|%0, %1, %q2}
4770 %vmovlps\t{%2, %H0|%H0, %2}"
4771 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4772 (set_attr "type" "ssemov")
4773 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4774 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4776 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
4777 [(set (match_operand:V16SF 0 "register_operand" "=v")
4780 (match_operand:V16SF 1 "register_operand" "v")
4781 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4782 (parallel [(const_int 2) (const_int 18)
4783 (const_int 3) (const_int 19)
4784 (const_int 6) (const_int 22)
4785 (const_int 7) (const_int 23)
4786 (const_int 10) (const_int 26)
4787 (const_int 11) (const_int 27)
4788 (const_int 14) (const_int 30)
4789 (const_int 15) (const_int 31)])))]
4791 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4792 [(set_attr "type" "sselog")
4793 (set_attr "prefix" "evex")
4794 (set_attr "mode" "V16SF")])
4796 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4797 (define_insn "avx_unpckhps256"
4798 [(set (match_operand:V8SF 0 "register_operand" "=x")
4801 (match_operand:V8SF 1 "register_operand" "x")
4802 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4803 (parallel [(const_int 2) (const_int 10)
4804 (const_int 3) (const_int 11)
4805 (const_int 6) (const_int 14)
4806 (const_int 7) (const_int 15)])))]
4808 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4809 [(set_attr "type" "sselog")
4810 (set_attr "prefix" "vex")
4811 (set_attr "mode" "V8SF")])
4813 (define_expand "vec_interleave_highv8sf"
4817 (match_operand:V8SF 1 "register_operand" "x")
4818 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4819 (parallel [(const_int 0) (const_int 8)
4820 (const_int 1) (const_int 9)
4821 (const_int 4) (const_int 12)
4822 (const_int 5) (const_int 13)])))
4828 (parallel [(const_int 2) (const_int 10)
4829 (const_int 3) (const_int 11)
4830 (const_int 6) (const_int 14)
4831 (const_int 7) (const_int 15)])))
4832 (set (match_operand:V8SF 0 "register_operand")
4837 (parallel [(const_int 4) (const_int 5)
4838 (const_int 6) (const_int 7)
4839 (const_int 12) (const_int 13)
4840 (const_int 14) (const_int 15)])))]
4843 operands[3] = gen_reg_rtx (V8SFmode);
4844 operands[4] = gen_reg_rtx (V8SFmode);
4847 (define_insn "vec_interleave_highv4sf"
4848 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4851 (match_operand:V4SF 1 "register_operand" "0,x")
4852 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
4853 (parallel [(const_int 2) (const_int 6)
4854 (const_int 3) (const_int 7)])))]
4857 unpckhps\t{%2, %0|%0, %2}
4858 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
4859 [(set_attr "isa" "noavx,avx")
4860 (set_attr "type" "sselog")
4861 (set_attr "prefix" "orig,vex")
4862 (set_attr "mode" "V4SF")])
4864 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
4865 [(set (match_operand:V16SF 0 "register_operand" "=v")
4868 (match_operand:V16SF 1 "register_operand" "v")
4869 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
4870 (parallel [(const_int 0) (const_int 16)
4871 (const_int 1) (const_int 17)
4872 (const_int 4) (const_int 20)
4873 (const_int 5) (const_int 21)
4874 (const_int 8) (const_int 24)
4875 (const_int 9) (const_int 25)
4876 (const_int 12) (const_int 28)
4877 (const_int 13) (const_int 29)])))]
4879 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4880 [(set_attr "type" "sselog")
4881 (set_attr "prefix" "evex")
4882 (set_attr "mode" "V16SF")])
4884 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4885 (define_insn "avx_unpcklps256"
4886 [(set (match_operand:V8SF 0 "register_operand" "=x")
4889 (match_operand:V8SF 1 "register_operand" "x")
4890 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4891 (parallel [(const_int 0) (const_int 8)
4892 (const_int 1) (const_int 9)
4893 (const_int 4) (const_int 12)
4894 (const_int 5) (const_int 13)])))]
4896 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4897 [(set_attr "type" "sselog")
4898 (set_attr "prefix" "vex")
4899 (set_attr "mode" "V8SF")])
4901 (define_expand "vec_interleave_lowv8sf"
4905 (match_operand:V8SF 1 "register_operand" "x")
4906 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4907 (parallel [(const_int 0) (const_int 8)
4908 (const_int 1) (const_int 9)
4909 (const_int 4) (const_int 12)
4910 (const_int 5) (const_int 13)])))
4916 (parallel [(const_int 2) (const_int 10)
4917 (const_int 3) (const_int 11)
4918 (const_int 6) (const_int 14)
4919 (const_int 7) (const_int 15)])))
4920 (set (match_operand:V8SF 0 "register_operand")
4925 (parallel [(const_int 0) (const_int 1)
4926 (const_int 2) (const_int 3)
4927 (const_int 8) (const_int 9)
4928 (const_int 10) (const_int 11)])))]
4931 operands[3] = gen_reg_rtx (V8SFmode);
4932 operands[4] = gen_reg_rtx (V8SFmode);
4935 (define_insn "vec_interleave_lowv4sf"
4936 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4939 (match_operand:V4SF 1 "register_operand" "0,x")
4940 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
4941 (parallel [(const_int 0) (const_int 4)
4942 (const_int 1) (const_int 5)])))]
4945 unpcklps\t{%2, %0|%0, %2}
4946 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4947 [(set_attr "isa" "noavx,avx")
4948 (set_attr "type" "sselog")
4949 (set_attr "prefix" "orig,vex")
4950 (set_attr "mode" "V4SF")])
4952 ;; These are modeled with the same vec_concat as the others so that we
4953 ;; capture users of shufps that can use the new instructions
4954 (define_insn "avx_movshdup256"
4955 [(set (match_operand:V8SF 0 "register_operand" "=x")
4958 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4960 (parallel [(const_int 1) (const_int 1)
4961 (const_int 3) (const_int 3)
4962 (const_int 5) (const_int 5)
4963 (const_int 7) (const_int 7)])))]
4965 "vmovshdup\t{%1, %0|%0, %1}"
4966 [(set_attr "type" "sse")
4967 (set_attr "prefix" "vex")
4968 (set_attr "mode" "V8SF")])
4970 (define_insn "sse3_movshdup"
4971 [(set (match_operand:V4SF 0 "register_operand" "=x")
4974 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4976 (parallel [(const_int 1)
4981 "%vmovshdup\t{%1, %0|%0, %1}"
4982 [(set_attr "type" "sse")
4983 (set_attr "prefix_rep" "1")
4984 (set_attr "prefix" "maybe_vex")
4985 (set_attr "mode" "V4SF")])
4987 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
4988 [(set (match_operand:V16SF 0 "register_operand" "=v")
4991 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4993 (parallel [(const_int 1) (const_int 1)
4994 (const_int 3) (const_int 3)
4995 (const_int 5) (const_int 5)
4996 (const_int 7) (const_int 7)
4997 (const_int 9) (const_int 9)
4998 (const_int 11) (const_int 11)
4999 (const_int 13) (const_int 13)
5000 (const_int 15) (const_int 15)])))]
5002 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5003 [(set_attr "type" "sse")
5004 (set_attr "prefix" "evex")
5005 (set_attr "mode" "V16SF")])
5007 (define_insn "avx_movsldup256"
5008 [(set (match_operand:V8SF 0 "register_operand" "=x")
5011 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5013 (parallel [(const_int 0) (const_int 0)
5014 (const_int 2) (const_int 2)
5015 (const_int 4) (const_int 4)
5016 (const_int 6) (const_int 6)])))]
5018 "vmovsldup\t{%1, %0|%0, %1}"
5019 [(set_attr "type" "sse")
5020 (set_attr "prefix" "vex")
5021 (set_attr "mode" "V8SF")])
5023 (define_insn "sse3_movsldup"
5024 [(set (match_operand:V4SF 0 "register_operand" "=x")
5027 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5029 (parallel [(const_int 0)
5034 "%vmovsldup\t{%1, %0|%0, %1}"
5035 [(set_attr "type" "sse")
5036 (set_attr "prefix_rep" "1")
5037 (set_attr "prefix" "maybe_vex")
5038 (set_attr "mode" "V4SF")])
5040 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5041 [(set (match_operand:V16SF 0 "register_operand" "=v")
5044 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5046 (parallel [(const_int 0) (const_int 0)
5047 (const_int 2) (const_int 2)
5048 (const_int 4) (const_int 4)
5049 (const_int 6) (const_int 6)
5050 (const_int 8) (const_int 8)
5051 (const_int 10) (const_int 10)
5052 (const_int 12) (const_int 12)
5053 (const_int 14) (const_int 14)])))]
5055 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5056 [(set_attr "type" "sse")
5057 (set_attr "prefix" "evex")
5058 (set_attr "mode" "V16SF")])
5060 (define_expand "avx_shufps256"
5061 [(match_operand:V8SF 0 "register_operand")
5062 (match_operand:V8SF 1 "register_operand")
5063 (match_operand:V8SF 2 "nonimmediate_operand")
5064 (match_operand:SI 3 "const_int_operand")]
5067 int mask = INTVAL (operands[3]);
5068 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5069 GEN_INT ((mask >> 0) & 3),
5070 GEN_INT ((mask >> 2) & 3),
5071 GEN_INT (((mask >> 4) & 3) + 8),
5072 GEN_INT (((mask >> 6) & 3) + 8),
5073 GEN_INT (((mask >> 0) & 3) + 4),
5074 GEN_INT (((mask >> 2) & 3) + 4),
5075 GEN_INT (((mask >> 4) & 3) + 12),
5076 GEN_INT (((mask >> 6) & 3) + 12)));
5080 ;; One bit in mask selects 2 elements.
5081 (define_insn "avx_shufps256_1"
5082 [(set (match_operand:V8SF 0 "register_operand" "=x")
5085 (match_operand:V8SF 1 "register_operand" "x")
5086 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5087 (parallel [(match_operand 3 "const_0_to_3_operand" )
5088 (match_operand 4 "const_0_to_3_operand" )
5089 (match_operand 5 "const_8_to_11_operand" )
5090 (match_operand 6 "const_8_to_11_operand" )
5091 (match_operand 7 "const_4_to_7_operand" )
5092 (match_operand 8 "const_4_to_7_operand" )
5093 (match_operand 9 "const_12_to_15_operand")
5094 (match_operand 10 "const_12_to_15_operand")])))]
5096 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5097 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5098 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5099 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5102 mask = INTVAL (operands[3]);
5103 mask |= INTVAL (operands[4]) << 2;
5104 mask |= (INTVAL (operands[5]) - 8) << 4;
5105 mask |= (INTVAL (operands[6]) - 8) << 6;
5106 operands[3] = GEN_INT (mask);
5108 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5110 [(set_attr "type" "sseshuf")
5111 (set_attr "length_immediate" "1")
5112 (set_attr "prefix" "vex")
5113 (set_attr "mode" "V8SF")])
5115 (define_expand "sse_shufps"
5116 [(match_operand:V4SF 0 "register_operand")
5117 (match_operand:V4SF 1 "register_operand")
5118 (match_operand:V4SF 2 "nonimmediate_operand")
5119 (match_operand:SI 3 "const_int_operand")]
5122 int mask = INTVAL (operands[3]);
5123 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5124 GEN_INT ((mask >> 0) & 3),
5125 GEN_INT ((mask >> 2) & 3),
5126 GEN_INT (((mask >> 4) & 3) + 4),
5127 GEN_INT (((mask >> 6) & 3) + 4)));
5131 (define_insn "sse_shufps_<mode>"
5132 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5133 (vec_select:VI4F_128
5134 (vec_concat:<ssedoublevecmode>
5135 (match_operand:VI4F_128 1 "register_operand" "0,x")
5136 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5137 (parallel [(match_operand 3 "const_0_to_3_operand")
5138 (match_operand 4 "const_0_to_3_operand")
5139 (match_operand 5 "const_4_to_7_operand")
5140 (match_operand 6 "const_4_to_7_operand")])))]
5144 mask |= INTVAL (operands[3]) << 0;
5145 mask |= INTVAL (operands[4]) << 2;
5146 mask |= (INTVAL (operands[5]) - 4) << 4;
5147 mask |= (INTVAL (operands[6]) - 4) << 6;
5148 operands[3] = GEN_INT (mask);
5150 switch (which_alternative)
5153 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5155 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5160 [(set_attr "isa" "noavx,avx")
5161 (set_attr "type" "sseshuf")
5162 (set_attr "length_immediate" "1")
5163 (set_attr "prefix" "orig,vex")
5164 (set_attr "mode" "V4SF")])
5166 (define_insn "sse_storehps"
5167 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5169 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5170 (parallel [(const_int 2) (const_int 3)])))]
5173 %vmovhps\t{%1, %0|%q0, %1}
5174 %vmovhlps\t{%1, %d0|%d0, %1}
5175 %vmovlps\t{%H1, %d0|%d0, %H1}"
5176 [(set_attr "type" "ssemov")
5177 (set_attr "prefix" "maybe_vex")
5178 (set_attr "mode" "V2SF,V4SF,V2SF")])
5180 (define_expand "sse_loadhps_exp"
5181 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5184 (match_operand:V4SF 1 "nonimmediate_operand")
5185 (parallel [(const_int 0) (const_int 1)]))
5186 (match_operand:V2SF 2 "nonimmediate_operand")))]
5189 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5191 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5193 /* Fix up the destination if needed. */
5194 if (dst != operands[0])
5195 emit_move_insn (operands[0], dst);
5200 (define_insn "sse_loadhps"
5201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5204 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5205 (parallel [(const_int 0) (const_int 1)]))
5206 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5209 movhps\t{%2, %0|%0, %q2}
5210 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5211 movlhps\t{%2, %0|%0, %2}
5212 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5213 %vmovlps\t{%2, %H0|%H0, %2}"
5214 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5215 (set_attr "type" "ssemov")
5216 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5217 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5219 (define_insn "sse_storelps"
5220 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5222 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5223 (parallel [(const_int 0) (const_int 1)])))]
5226 %vmovlps\t{%1, %0|%q0, %1}
5227 %vmovaps\t{%1, %0|%0, %1}
5228 %vmovlps\t{%1, %d0|%d0, %q1}"
5229 [(set_attr "type" "ssemov")
5230 (set_attr "prefix" "maybe_vex")
5231 (set_attr "mode" "V2SF,V4SF,V2SF")])
5233 (define_expand "sse_loadlps_exp"
5234 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5236 (match_operand:V2SF 2 "nonimmediate_operand")
5238 (match_operand:V4SF 1 "nonimmediate_operand")
5239 (parallel [(const_int 2) (const_int 3)]))))]
5242 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5244 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5246 /* Fix up the destination if needed. */
5247 if (dst != operands[0])
5248 emit_move_insn (operands[0], dst);
5253 (define_insn "sse_loadlps"
5254 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5256 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5258 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5259 (parallel [(const_int 2) (const_int 3)]))))]
5262 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5263 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5264 movlps\t{%2, %0|%0, %q2}
5265 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5266 %vmovlps\t{%2, %0|%q0, %2}"
5267 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5268 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5269 (set_attr "length_immediate" "1,1,*,*,*")
5270 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5271 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5273 (define_insn "sse_movss"
5274 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5276 (match_operand:V4SF 2 "register_operand" " x,x")
5277 (match_operand:V4SF 1 "register_operand" " 0,x")
5281 movss\t{%2, %0|%0, %2}
5282 vmovss\t{%2, %1, %0|%0, %1, %2}"
5283 [(set_attr "isa" "noavx,avx")
5284 (set_attr "type" "ssemov")
5285 (set_attr "prefix" "orig,vex")
5286 (set_attr "mode" "SF")])
5288 (define_insn "avx2_vec_dup<mode>"
5289 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5290 (vec_duplicate:VF1_128_256
5292 (match_operand:V4SF 1 "register_operand" "x")
5293 (parallel [(const_int 0)]))))]
5295 "vbroadcastss\t{%1, %0|%0, %1}"
5296 [(set_attr "type" "sselog1")
5297 (set_attr "prefix" "vex")
5298 (set_attr "mode" "<MODE>")])
5300 (define_insn "avx2_vec_dupv8sf_1"
5301 [(set (match_operand:V8SF 0 "register_operand" "=x")
5304 (match_operand:V8SF 1 "register_operand" "x")
5305 (parallel [(const_int 0)]))))]
5307 "vbroadcastss\t{%x1, %0|%0, %x1}"
5308 [(set_attr "type" "sselog1")
5309 (set_attr "prefix" "vex")
5310 (set_attr "mode" "V8SF")])
5312 (define_insn "vec_dupv4sf"
5313 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5315 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5318 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5319 vbroadcastss\t{%1, %0|%0, %1}
5320 shufps\t{$0, %0, %0|%0, %0, 0}"
5321 [(set_attr "isa" "avx,avx,noavx")
5322 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5323 (set_attr "length_immediate" "1,0,1")
5324 (set_attr "prefix_extra" "0,1,*")
5325 (set_attr "prefix" "vex,vex,orig")
5326 (set_attr "mode" "V4SF")])
5328 ;; Although insertps takes register source, we prefer
5329 ;; unpcklps with register source since it is shorter.
5330 (define_insn "*vec_concatv2sf_sse4_1"
5331 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5333 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5334 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5337 unpcklps\t{%2, %0|%0, %2}
5338 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5339 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5340 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5341 %vmovss\t{%1, %0|%0, %1}
5342 punpckldq\t{%2, %0|%0, %2}
5343 movd\t{%1, %0|%0, %1}"
5344 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5345 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5346 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5347 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5348 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5349 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5350 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5352 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5353 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5354 ;; alternatives pretty much forces the MMX alternative to be chosen.
5355 (define_insn "*vec_concatv2sf_sse"
5356 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5358 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5359 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5362 unpcklps\t{%2, %0|%0, %2}
5363 movss\t{%1, %0|%0, %1}
5364 punpckldq\t{%2, %0|%0, %2}
5365 movd\t{%1, %0|%0, %1}"
5366 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5367 (set_attr "mode" "V4SF,SF,DI,DI")])
5369 (define_insn "*vec_concatv4sf"
5370 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5372 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5373 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5376 movlhps\t{%2, %0|%0, %2}
5377 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5378 movhps\t{%2, %0|%0, %q2}
5379 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5380 [(set_attr "isa" "noavx,avx,noavx,avx")
5381 (set_attr "type" "ssemov")
5382 (set_attr "prefix" "orig,vex,orig,vex")
5383 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5385 (define_expand "vec_init<mode>"
5386 [(match_operand:V_128 0 "register_operand")
5390 ix86_expand_vector_init (false, operands[0], operands[1]);
5394 ;; Avoid combining registers from different units in a single alternative,
5395 ;; see comment above inline_secondary_memory_needed function in i386.c
5396 (define_insn "vec_set<mode>_0"
5397 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5398 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5400 (vec_duplicate:VI4F_128
5401 (match_operand:<ssescalarmode> 2 "general_operand"
5402 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5403 (match_operand:VI4F_128 1 "vector_move_operand"
5404 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5408 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5409 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5410 %vmovd\t{%2, %0|%0, %2}
5411 movss\t{%2, %0|%0, %2}
5412 movss\t{%2, %0|%0, %2}
5413 vmovss\t{%2, %1, %0|%0, %1, %2}
5414 pinsrd\t{$0, %2, %0|%0, %2, 0}
5415 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5419 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5421 (cond [(eq_attr "alternative" "0,6,7")
5422 (const_string "sselog")
5423 (eq_attr "alternative" "9")
5424 (const_string "imov")
5425 (eq_attr "alternative" "10")
5426 (const_string "fmov")
5428 (const_string "ssemov")))
5429 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5430 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5431 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5432 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5434 ;; A subset is vec_setv4sf.
5435 (define_insn "*vec_setv4sf_sse4_1"
5436 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5439 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5440 (match_operand:V4SF 1 "register_operand" "0,x")
5441 (match_operand:SI 3 "const_int_operand")))]
5443 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5444 < GET_MODE_NUNITS (V4SFmode))"
5446 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5447 switch (which_alternative)
5450 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5452 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5457 [(set_attr "isa" "noavx,avx")
5458 (set_attr "type" "sselog")
5459 (set_attr "prefix_data16" "1,*")
5460 (set_attr "prefix_extra" "1")
5461 (set_attr "length_immediate" "1")
5462 (set_attr "prefix" "orig,vex")
5463 (set_attr "mode" "V4SF")])
5465 (define_insn "sse4_1_insertps"
5466 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5467 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5468 (match_operand:V4SF 1 "register_operand" "0,x")
5469 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5473 if (MEM_P (operands[2]))
5475 unsigned count_s = INTVAL (operands[3]) >> 6;
5477 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5478 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5480 switch (which_alternative)
5483 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5485 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5490 [(set_attr "isa" "noavx,avx")
5491 (set_attr "type" "sselog")
5492 (set_attr "prefix_data16" "1,*")
5493 (set_attr "prefix_extra" "1")
5494 (set_attr "length_immediate" "1")
5495 (set_attr "prefix" "orig,vex")
5496 (set_attr "mode" "V4SF")])
5499 [(set (match_operand:VI4F_128 0 "memory_operand")
5501 (vec_duplicate:VI4F_128
5502 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5505 "TARGET_SSE && reload_completed"
5506 [(set (match_dup 0) (match_dup 1))]
5507 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5509 (define_expand "vec_set<mode>"
5510 [(match_operand:V 0 "register_operand")
5511 (match_operand:<ssescalarmode> 1 "register_operand")
5512 (match_operand 2 "const_int_operand")]
5515 ix86_expand_vector_set (false, operands[0], operands[1],
5516 INTVAL (operands[2]));
5520 (define_insn_and_split "*vec_extractv4sf_0"
5521 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5523 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5524 (parallel [(const_int 0)])))]
5525 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5527 "&& reload_completed"
5528 [(set (match_dup 0) (match_dup 1))]
5530 if (REG_P (operands[1]))
5531 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5533 operands[1] = adjust_address (operands[1], SFmode, 0);
5536 (define_insn_and_split "*sse4_1_extractps"
5537 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5539 (match_operand:V4SF 1 "register_operand" "x,0,x")
5540 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5543 %vextractps\t{%2, %1, %0|%0, %1, %2}
5546 "&& reload_completed && SSE_REG_P (operands[0])"
5549 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5550 switch (INTVAL (operands[2]))
5554 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5555 operands[2], operands[2],
5556 GEN_INT (INTVAL (operands[2]) + 4),
5557 GEN_INT (INTVAL (operands[2]) + 4)));
5560 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5563 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5568 [(set_attr "isa" "*,noavx,avx")
5569 (set_attr "type" "sselog,*,*")
5570 (set_attr "prefix_data16" "1,*,*")
5571 (set_attr "prefix_extra" "1,*,*")
5572 (set_attr "length_immediate" "1,*,*")
5573 (set_attr "prefix" "maybe_vex,*,*")
5574 (set_attr "mode" "V4SF,*,*")])
5576 (define_insn_and_split "*vec_extractv4sf_mem"
5577 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5579 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5580 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5583 "&& reload_completed"
5584 [(set (match_dup 0) (match_dup 1))]
5586 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5589 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5590 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5591 (match_operand:V16FI 1 "register_operand")
5592 (match_operand:SI 2 "const_0_to_3_operand")
5593 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5594 (match_operand:QI 4 "register_operand")]
5597 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5598 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5599 switch (INTVAL (operands[2]))
5602 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5603 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5604 GEN_INT (3), operands[3], operands[4]));
5607 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5608 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5609 GEN_INT (7), operands[3], operands[4]));
5612 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5613 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5614 GEN_INT (11), operands[3], operands[4]));
5617 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5618 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5619 GEN_INT (15), operands[3], operands[4]));
5627 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5628 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5629 (vec_merge:<ssequartermode>
5630 (vec_select:<ssequartermode>
5631 (match_operand:V16FI 1 "register_operand" "v")
5632 (parallel [(match_operand 2 "const_0_to_15_operand")
5633 (match_operand 3 "const_0_to_15_operand")
5634 (match_operand 4 "const_0_to_15_operand")
5635 (match_operand 5 "const_0_to_15_operand")]))
5636 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5637 (match_operand:QI 7 "register_operand" "k")))]
5638 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5639 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5640 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5642 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5643 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5645 [(set_attr "type" "sselog")
5646 (set_attr "prefix_extra" "1")
5647 (set_attr "length_immediate" "1")
5648 (set_attr "memory" "store")
5649 (set_attr "prefix" "evex")
5650 (set_attr "mode" "<sseinsnmode>")])
5652 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5653 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5654 (vec_select:<ssequartermode>
5655 (match_operand:V16FI 1 "register_operand" "v")
5656 (parallel [(match_operand 2 "const_0_to_15_operand")
5657 (match_operand 3 "const_0_to_15_operand")
5658 (match_operand 4 "const_0_to_15_operand")
5659 (match_operand 5 "const_0_to_15_operand")])))]
5660 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5661 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5662 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5664 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5665 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5667 [(set_attr "type" "sselog")
5668 (set_attr "prefix_extra" "1")
5669 (set_attr "length_immediate" "1")
5670 (set (attr "memory")
5671 (if_then_else (match_test "MEM_P (operands[0])")
5672 (const_string "store")
5673 (const_string "none")))
5674 (set_attr "prefix" "evex")
5675 (set_attr "mode" "<sseinsnmode>")])
5677 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
5678 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5679 (match_operand:V8FI 1 "register_operand")
5680 (match_operand:SI 2 "const_0_to_1_operand")
5681 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
5682 (match_operand:QI 4 "register_operand")]
5685 rtx (*insn)(rtx, rtx, rtx, rtx);
5687 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5688 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5690 switch (INTVAL (operands[2]))
5693 insn = gen_vec_extract_lo_<mode>_mask;
5696 insn = gen_vec_extract_hi_<mode>_mask;
5702 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
5707 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5708 (vec_select:<ssehalfvecmode>
5709 (match_operand:V8FI 1 "nonimmediate_operand")
5710 (parallel [(const_int 0) (const_int 1)
5711 (const_int 2) (const_int 3)])))]
5712 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
5713 && reload_completed"
5716 rtx op1 = operands[1];
5718 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5720 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5721 emit_move_insn (operands[0], op1);
5725 (define_insn "vec_extract_lo_<mode>_maskm"
5726 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5727 (vec_merge:<ssehalfvecmode>
5728 (vec_select:<ssehalfvecmode>
5729 (match_operand:V8FI 1 "register_operand" "v")
5730 (parallel [(const_int 0) (const_int 1)
5731 (const_int 2) (const_int 3)]))
5732 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5733 (match_operand:QI 3 "register_operand" "k")))]
5735 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
5736 [(set_attr "type" "sselog")
5737 (set_attr "prefix_extra" "1")
5738 (set_attr "length_immediate" "1")
5739 (set_attr "prefix" "evex")
5740 (set_attr "mode" "<sseinsnmode>")])
5742 (define_insn "vec_extract_lo_<mode><mask_name>"
5743 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5744 (vec_select:<ssehalfvecmode>
5745 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
5746 (parallel [(const_int 0) (const_int 1)
5747 (const_int 2) (const_int 3)])))]
5748 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5751 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
5755 [(set_attr "type" "sselog")
5756 (set_attr "prefix_extra" "1")
5757 (set_attr "length_immediate" "1")
5758 (set (attr "memory")
5759 (if_then_else (match_test "MEM_P (operands[0])")
5760 (const_string "store")
5761 (const_string "none")))
5762 (set_attr "prefix" "evex")
5763 (set_attr "mode" "<sseinsnmode>")])
5765 (define_insn "vec_extract_hi_<mode>_maskm"
5766 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5767 (vec_merge:<ssehalfvecmode>
5768 (vec_select:<ssehalfvecmode>
5769 (match_operand:V8FI 1 "register_operand" "v")
5770 (parallel [(const_int 4) (const_int 5)
5771 (const_int 6) (const_int 7)]))
5772 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5773 (match_operand:QI 3 "register_operand" "k")))]
5775 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
5776 [(set_attr "type" "sselog")
5777 (set_attr "prefix_extra" "1")
5778 (set_attr "length_immediate" "1")
5779 (set_attr "memory" "store")
5780 (set_attr "prefix" "evex")
5781 (set_attr "mode" "<sseinsnmode>")])
5783 (define_insn "vec_extract_hi_<mode><mask_name>"
5784 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5785 (vec_select:<ssehalfvecmode>
5786 (match_operand:V8FI 1 "register_operand" "v")
5787 (parallel [(const_int 4) (const_int 5)
5788 (const_int 6) (const_int 7)])))]
5790 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
5791 [(set_attr "type" "sselog")
5792 (set_attr "prefix_extra" "1")
5793 (set_attr "length_immediate" "1")
5794 (set (attr "memory")
5795 (if_then_else (match_test "MEM_P (operands[0])")
5796 (const_string "store")
5797 (const_string "none")))
5798 (set_attr "prefix" "evex")
5799 (set_attr "mode" "<sseinsnmode>")])
5801 (define_expand "avx_vextractf128<mode>"
5802 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5803 (match_operand:V_256 1 "register_operand")
5804 (match_operand:SI 2 "const_0_to_1_operand")]
5807 rtx (*insn)(rtx, rtx);
5809 switch (INTVAL (operands[2]))
5812 insn = gen_vec_extract_lo_<mode>;
5815 insn = gen_vec_extract_hi_<mode>;
5821 emit_insn (insn (operands[0], operands[1]));
5825 (define_insn_and_split "vec_extract_lo_<mode>"
5826 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5827 (vec_select:<ssehalfvecmode>
5828 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
5829 (parallel [(const_int 0) (const_int 1)
5830 (const_int 2) (const_int 3)
5831 (const_int 4) (const_int 5)
5832 (const_int 6) (const_int 7)])))]
5833 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5835 "&& reload_completed"
5838 rtx op1 = operands[1];
5840 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5842 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5843 emit_move_insn (operands[0], op1);
5847 (define_insn "vec_extract_hi_<mode>"
5848 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
5849 (vec_select:<ssehalfvecmode>
5850 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
5851 (parallel [(const_int 8) (const_int 9)
5852 (const_int 10) (const_int 11)
5853 (const_int 12) (const_int 13)
5854 (const_int 14) (const_int 15)])))]
5856 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
5857 [(set_attr "type" "sselog")
5858 (set_attr "prefix_extra" "1")
5859 (set_attr "length_immediate" "1")
5860 (set_attr "memory" "none,store")
5861 (set_attr "prefix" "evex")
5862 (set_attr "mode" "XI")])
5864 (define_insn_and_split "vec_extract_lo_<mode>"
5865 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5866 (vec_select:<ssehalfvecmode>
5867 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
5868 (parallel [(const_int 0) (const_int 1)])))]
5869 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5871 "&& reload_completed"
5872 [(set (match_dup 0) (match_dup 1))]
5874 if (REG_P (operands[1]))
5875 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
5877 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
5880 (define_insn "vec_extract_hi_<mode>"
5881 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5882 (vec_select:<ssehalfvecmode>
5883 (match_operand:VI8F_256 1 "register_operand" "x,x")
5884 (parallel [(const_int 2) (const_int 3)])))]
5886 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
5887 [(set_attr "type" "sselog")
5888 (set_attr "prefix_extra" "1")
5889 (set_attr "length_immediate" "1")
5890 (set_attr "memory" "none,store")
5891 (set_attr "prefix" "vex")
5892 (set_attr "mode" "<sseinsnmode>")])
5894 (define_insn_and_split "vec_extract_lo_<mode>"
5895 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5896 (vec_select:<ssehalfvecmode>
5897 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
5898 (parallel [(const_int 0) (const_int 1)
5899 (const_int 2) (const_int 3)])))]
5900 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5902 "&& reload_completed"
5903 [(set (match_dup 0) (match_dup 1))]
5905 if (REG_P (operands[1]))
5906 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
5908 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
5911 (define_insn "vec_extract_hi_<mode>"
5912 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
5913 (vec_select:<ssehalfvecmode>
5914 (match_operand:VI4F_256 1 "register_operand" "x,x")
5915 (parallel [(const_int 4) (const_int 5)
5916 (const_int 6) (const_int 7)])))]
5918 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
5919 [(set_attr "type" "sselog")
5920 (set_attr "prefix_extra" "1")
5921 (set_attr "length_immediate" "1")
5922 (set_attr "memory" "none,store")
5923 (set_attr "prefix" "vex")
5924 (set_attr "mode" "<sseinsnmode>")])
5926 (define_insn_and_split "vec_extract_lo_v32hi"
5927 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
5929 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
5930 (parallel [(const_int 0) (const_int 1)
5931 (const_int 2) (const_int 3)
5932 (const_int 4) (const_int 5)
5933 (const_int 6) (const_int 7)
5934 (const_int 8) (const_int 9)
5935 (const_int 10) (const_int 11)
5936 (const_int 12) (const_int 13)
5937 (const_int 14) (const_int 15)])))]
5938 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5940 "&& reload_completed"
5941 [(set (match_dup 0) (match_dup 1))]
5943 if (REG_P (operands[1]))
5944 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
5946 operands[1] = adjust_address (operands[1], V16HImode, 0);
5949 (define_insn "vec_extract_hi_v32hi"
5950 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
5952 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
5953 (parallel [(const_int 16) (const_int 17)
5954 (const_int 18) (const_int 19)
5955 (const_int 20) (const_int 21)
5956 (const_int 22) (const_int 23)
5957 (const_int 24) (const_int 25)
5958 (const_int 26) (const_int 27)
5959 (const_int 28) (const_int 29)
5960 (const_int 30) (const_int 31)])))]
5962 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
5963 [(set_attr "type" "sselog")
5964 (set_attr "prefix_extra" "1")
5965 (set_attr "length_immediate" "1")
5966 (set_attr "memory" "none,store")
5967 (set_attr "prefix" "evex")
5968 (set_attr "mode" "XI")])
5970 (define_insn_and_split "vec_extract_lo_v16hi"
5971 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
5973 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
5974 (parallel [(const_int 0) (const_int 1)
5975 (const_int 2) (const_int 3)
5976 (const_int 4) (const_int 5)
5977 (const_int 6) (const_int 7)])))]
5978 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5980 "&& reload_completed"
5981 [(set (match_dup 0) (match_dup 1))]
5983 if (REG_P (operands[1]))
5984 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
5986 operands[1] = adjust_address (operands[1], V8HImode, 0);
5989 (define_insn "vec_extract_hi_v16hi"
5990 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
5992 (match_operand:V16HI 1 "register_operand" "x,x")
5993 (parallel [(const_int 8) (const_int 9)
5994 (const_int 10) (const_int 11)
5995 (const_int 12) (const_int 13)
5996 (const_int 14) (const_int 15)])))]
5998 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
5999 [(set_attr "type" "sselog")
6000 (set_attr "prefix_extra" "1")
6001 (set_attr "length_immediate" "1")
6002 (set_attr "memory" "none,store")
6003 (set_attr "prefix" "vex")
6004 (set_attr "mode" "OI")])
6006 (define_insn_and_split "vec_extract_lo_v64qi"
6007 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6009 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6010 (parallel [(const_int 0) (const_int 1)
6011 (const_int 2) (const_int 3)
6012 (const_int 4) (const_int 5)
6013 (const_int 6) (const_int 7)
6014 (const_int 8) (const_int 9)
6015 (const_int 10) (const_int 11)
6016 (const_int 12) (const_int 13)
6017 (const_int 14) (const_int 15)
6018 (const_int 16) (const_int 17)
6019 (const_int 18) (const_int 19)
6020 (const_int 20) (const_int 21)
6021 (const_int 22) (const_int 23)
6022 (const_int 24) (const_int 25)
6023 (const_int 26) (const_int 27)
6024 (const_int 28) (const_int 29)
6025 (const_int 30) (const_int 31)])))]
6026 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6028 "&& reload_completed"
6029 [(set (match_dup 0) (match_dup 1))]
6031 if (REG_P (operands[1]))
6032 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6034 operands[1] = adjust_address (operands[1], V32QImode, 0);
6037 (define_insn "vec_extract_hi_v64qi"
6038 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6040 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6041 (parallel [(const_int 32) (const_int 33)
6042 (const_int 34) (const_int 35)
6043 (const_int 36) (const_int 37)
6044 (const_int 38) (const_int 39)
6045 (const_int 40) (const_int 41)
6046 (const_int 42) (const_int 43)
6047 (const_int 44) (const_int 45)
6048 (const_int 46) (const_int 47)
6049 (const_int 48) (const_int 49)
6050 (const_int 50) (const_int 51)
6051 (const_int 52) (const_int 53)
6052 (const_int 54) (const_int 55)
6053 (const_int 56) (const_int 57)
6054 (const_int 58) (const_int 59)
6055 (const_int 60) (const_int 61)
6056 (const_int 62) (const_int 63)])))]
6058 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6059 [(set_attr "type" "sselog")
6060 (set_attr "prefix_extra" "1")
6061 (set_attr "length_immediate" "1")
6062 (set_attr "memory" "none,store")
6063 (set_attr "prefix" "evex")
6064 (set_attr "mode" "XI")])
6066 (define_insn_and_split "vec_extract_lo_v32qi"
6067 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6069 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6070 (parallel [(const_int 0) (const_int 1)
6071 (const_int 2) (const_int 3)
6072 (const_int 4) (const_int 5)
6073 (const_int 6) (const_int 7)
6074 (const_int 8) (const_int 9)
6075 (const_int 10) (const_int 11)
6076 (const_int 12) (const_int 13)
6077 (const_int 14) (const_int 15)])))]
6078 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6080 "&& reload_completed"
6081 [(set (match_dup 0) (match_dup 1))]
6083 if (REG_P (operands[1]))
6084 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6086 operands[1] = adjust_address (operands[1], V16QImode, 0);
6089 (define_insn "vec_extract_hi_v32qi"
6090 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6092 (match_operand:V32QI 1 "register_operand" "x,x")
6093 (parallel [(const_int 16) (const_int 17)
6094 (const_int 18) (const_int 19)
6095 (const_int 20) (const_int 21)
6096 (const_int 22) (const_int 23)
6097 (const_int 24) (const_int 25)
6098 (const_int 26) (const_int 27)
6099 (const_int 28) (const_int 29)
6100 (const_int 30) (const_int 31)])))]
6102 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6103 [(set_attr "type" "sselog")
6104 (set_attr "prefix_extra" "1")
6105 (set_attr "length_immediate" "1")
6106 (set_attr "memory" "none,store")
6107 (set_attr "prefix" "vex")
6108 (set_attr "mode" "OI")])
6110 ;; Modes handled by vec_extract patterns.
6111 (define_mode_iterator VEC_EXTRACT_MODE
6112 [(V32QI "TARGET_AVX") V16QI
6113 (V16HI "TARGET_AVX") V8HI
6114 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6115 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6116 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6117 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6119 (define_expand "vec_extract<mode>"
6120 [(match_operand:<ssescalarmode> 0 "register_operand")
6121 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6122 (match_operand 2 "const_int_operand")]
6125 ix86_expand_vector_extract (false, operands[0], operands[1],
6126 INTVAL (operands[2]));
6130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6132 ;; Parallel double-precision floating point element swizzling
6134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6136 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6137 [(set (match_operand:V8DF 0 "register_operand" "=v")
6140 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6141 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6142 (parallel [(const_int 1) (const_int 9)
6143 (const_int 3) (const_int 11)
6144 (const_int 5) (const_int 13)
6145 (const_int 7) (const_int 15)])))]
6147 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6148 [(set_attr "type" "sselog")
6149 (set_attr "prefix" "evex")
6150 (set_attr "mode" "V8DF")])
6152 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6153 (define_insn "avx_unpckhpd256"
6154 [(set (match_operand:V4DF 0 "register_operand" "=x")
6157 (match_operand:V4DF 1 "register_operand" "x")
6158 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6159 (parallel [(const_int 1) (const_int 5)
6160 (const_int 3) (const_int 7)])))]
6162 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6163 [(set_attr "type" "sselog")
6164 (set_attr "prefix" "vex")
6165 (set_attr "mode" "V4DF")])
6167 (define_expand "vec_interleave_highv4df"
6171 (match_operand:V4DF 1 "register_operand" "x")
6172 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6173 (parallel [(const_int 0) (const_int 4)
6174 (const_int 2) (const_int 6)])))
6180 (parallel [(const_int 1) (const_int 5)
6181 (const_int 3) (const_int 7)])))
6182 (set (match_operand:V4DF 0 "register_operand")
6187 (parallel [(const_int 2) (const_int 3)
6188 (const_int 6) (const_int 7)])))]
6191 operands[3] = gen_reg_rtx (V4DFmode);
6192 operands[4] = gen_reg_rtx (V4DFmode);
6196 (define_expand "vec_interleave_highv2df"
6197 [(set (match_operand:V2DF 0 "register_operand")
6200 (match_operand:V2DF 1 "nonimmediate_operand")
6201 (match_operand:V2DF 2 "nonimmediate_operand"))
6202 (parallel [(const_int 1)
6206 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6207 operands[2] = force_reg (V2DFmode, operands[2]);
6210 (define_insn "*vec_interleave_highv2df"
6211 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6214 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6215 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6216 (parallel [(const_int 1)
6218 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6220 unpckhpd\t{%2, %0|%0, %2}
6221 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6222 %vmovddup\t{%H1, %0|%0, %H1}
6223 movlpd\t{%H1, %0|%0, %H1}
6224 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6225 %vmovhpd\t{%1, %0|%q0, %1}"
6226 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6227 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6228 (set_attr "prefix_data16" "*,*,*,1,*,1")
6229 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6230 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6232 (define_expand "avx512f_movddup512<mask_name>"
6233 [(set (match_operand:V8DF 0 "register_operand")
6236 (match_operand:V8DF 1 "nonimmediate_operand")
6238 (parallel [(const_int 0) (const_int 8)
6239 (const_int 2) (const_int 10)
6240 (const_int 4) (const_int 12)
6241 (const_int 6) (const_int 14)])))]
6244 (define_expand "avx512f_unpcklpd512<mask_name>"
6245 [(set (match_operand:V8DF 0 "register_operand")
6248 (match_operand:V8DF 1 "register_operand")
6249 (match_operand:V8DF 2 "nonimmediate_operand"))
6250 (parallel [(const_int 0) (const_int 8)
6251 (const_int 2) (const_int 10)
6252 (const_int 4) (const_int 12)
6253 (const_int 6) (const_int 14)])))]
6256 (define_insn "*avx512f_unpcklpd512<mask_name>"
6257 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6260 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6261 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6262 (parallel [(const_int 0) (const_int 8)
6263 (const_int 2) (const_int 10)
6264 (const_int 4) (const_int 12)
6265 (const_int 6) (const_int 14)])))]
6268 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6269 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6270 [(set_attr "type" "sselog")
6271 (set_attr "prefix" "evex")
6272 (set_attr "mode" "V8DF")])
6274 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6275 (define_expand "avx_movddup256"
6276 [(set (match_operand:V4DF 0 "register_operand")
6279 (match_operand:V4DF 1 "nonimmediate_operand")
6281 (parallel [(const_int 0) (const_int 4)
6282 (const_int 2) (const_int 6)])))]
6285 (define_expand "avx_unpcklpd256"
6286 [(set (match_operand:V4DF 0 "register_operand")
6289 (match_operand:V4DF 1 "register_operand")
6290 (match_operand:V4DF 2 "nonimmediate_operand"))
6291 (parallel [(const_int 0) (const_int 4)
6292 (const_int 2) (const_int 6)])))]
6295 (define_insn "*avx_unpcklpd256"
6296 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6299 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6300 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6301 (parallel [(const_int 0) (const_int 4)
6302 (const_int 2) (const_int 6)])))]
6305 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6306 vmovddup\t{%1, %0|%0, %1}"
6307 [(set_attr "type" "sselog")
6308 (set_attr "prefix" "vex")
6309 (set_attr "mode" "V4DF")])
6311 (define_expand "vec_interleave_lowv4df"
6315 (match_operand:V4DF 1 "register_operand" "x")
6316 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6317 (parallel [(const_int 0) (const_int 4)
6318 (const_int 2) (const_int 6)])))
6324 (parallel [(const_int 1) (const_int 5)
6325 (const_int 3) (const_int 7)])))
6326 (set (match_operand:V4DF 0 "register_operand")
6331 (parallel [(const_int 0) (const_int 1)
6332 (const_int 4) (const_int 5)])))]
6335 operands[3] = gen_reg_rtx (V4DFmode);
6336 operands[4] = gen_reg_rtx (V4DFmode);
6339 (define_expand "vec_interleave_lowv2df"
6340 [(set (match_operand:V2DF 0 "register_operand")
6343 (match_operand:V2DF 1 "nonimmediate_operand")
6344 (match_operand:V2DF 2 "nonimmediate_operand"))
6345 (parallel [(const_int 0)
6349 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6350 operands[1] = force_reg (V2DFmode, operands[1]);
6353 (define_insn "*vec_interleave_lowv2df"
6354 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6357 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6358 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6359 (parallel [(const_int 0)
6361 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6363 unpcklpd\t{%2, %0|%0, %2}
6364 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6365 %vmovddup\t{%1, %0|%0, %q1}
6366 movhpd\t{%2, %0|%0, %q2}
6367 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6368 %vmovlpd\t{%2, %H0|%H0, %2}"
6369 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6370 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6371 (set_attr "prefix_data16" "*,*,*,1,*,1")
6372 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6373 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6376 [(set (match_operand:V2DF 0 "memory_operand")
6379 (match_operand:V2DF 1 "register_operand")
6381 (parallel [(const_int 0)
6383 "TARGET_SSE3 && reload_completed"
6386 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6387 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6388 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6393 [(set (match_operand:V2DF 0 "register_operand")
6396 (match_operand:V2DF 1 "memory_operand")
6398 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6399 (match_operand:SI 3 "const_int_operand")])))]
6400 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6401 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6403 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6406 (define_insn "*avx512f_vmscalef<mode>"
6407 [(set (match_operand:VF_128 0 "register_operand" "=v")
6410 [(match_operand:VF_128 1 "register_operand" "v")
6411 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
6416 "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6417 [(set_attr "prefix" "evex")
6418 (set_attr "mode" "<ssescalarmode>")])
6420 (define_insn "avx512f_scalef<mode><mask_name>"
6421 [(set (match_operand:VF_512 0 "register_operand" "=v")
6423 [(match_operand:VF_512 1 "register_operand" "v")
6424 (match_operand:VF_512 2 "nonimmediate_operand" "vm")]
6427 "%vscalef<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6428 [(set_attr "prefix" "evex")
6429 (set_attr "mode" "<MODE>")])
6431 (define_insn "avx512f_vternlog<mode>"
6432 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6434 [(match_operand:VI48_512 1 "register_operand" "0")
6435 (match_operand:VI48_512 2 "register_operand" "v")
6436 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6437 (match_operand:SI 4 "const_0_to_255_operand")]
6440 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6441 [(set_attr "type" "sselog")
6442 (set_attr "prefix" "evex")
6443 (set_attr "mode" "<sseinsnmode>")])
6445 (define_insn "avx512f_vternlog<mode>_mask"
6446 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6449 [(match_operand:VI48_512 1 "register_operand" "0")
6450 (match_operand:VI48_512 2 "register_operand" "v")
6451 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6452 (match_operand:SI 4 "const_0_to_255_operand")]
6455 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6457 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6458 [(set_attr "type" "sselog")
6459 (set_attr "prefix" "evex")
6460 (set_attr "mode" "<sseinsnmode>")])
6462 (define_insn "avx512f_getexp<mode><mask_name>"
6463 [(set (match_operand:VF_512 0 "register_operand" "=v")
6464 (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
6467 "vgetexp<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
6468 [(set_attr "prefix" "evex")
6469 (set_attr "mode" "<MODE>")])
6471 (define_insn "avx512f_sgetexp<mode>"
6472 [(set (match_operand:VF_128 0 "register_operand" "=v")
6475 [(match_operand:VF_128 1 "register_operand" "v")
6476 (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
6481 "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
6482 [(set_attr "prefix" "evex")
6483 (set_attr "mode" "<ssescalarmode>")])
6485 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6486 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6487 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6488 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6489 (match_operand:SI 3 "const_0_to_255_operand")]
6492 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6493 [(set_attr "prefix" "evex")
6494 (set_attr "mode" "<sseinsnmode>")])
6496 (define_expand "avx512f_shufps512_mask"
6497 [(match_operand:V16SF 0 "register_operand")
6498 (match_operand:V16SF 1 "register_operand")
6499 (match_operand:V16SF 2 "nonimmediate_operand")
6500 (match_operand:SI 3 "const_0_to_255_operand")
6501 (match_operand:V16SF 4 "register_operand")
6502 (match_operand:HI 5 "register_operand")]
6505 int mask = INTVAL (operands[3]);
6506 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6507 GEN_INT ((mask >> 0) & 3),
6508 GEN_INT ((mask >> 2) & 3),
6509 GEN_INT (((mask >> 4) & 3) + 16),
6510 GEN_INT (((mask >> 6) & 3) + 16),
6511 GEN_INT (((mask >> 0) & 3) + 4),
6512 GEN_INT (((mask >> 2) & 3) + 4),
6513 GEN_INT (((mask >> 4) & 3) + 20),
6514 GEN_INT (((mask >> 6) & 3) + 20),
6515 GEN_INT (((mask >> 0) & 3) + 8),
6516 GEN_INT (((mask >> 2) & 3) + 8),
6517 GEN_INT (((mask >> 4) & 3) + 24),
6518 GEN_INT (((mask >> 6) & 3) + 24),
6519 GEN_INT (((mask >> 0) & 3) + 12),
6520 GEN_INT (((mask >> 2) & 3) + 12),
6521 GEN_INT (((mask >> 4) & 3) + 28),
6522 GEN_INT (((mask >> 6) & 3) + 28),
6523 operands[4], operands[5]));
6527 (define_insn "avx512f_fixupimm<mode>"
6528 [(set (match_operand:VF_512 0 "register_operand" "=v")
6530 [(match_operand:VF_512 1 "register_operand" "0")
6531 (match_operand:VF_512 2 "register_operand" "v")
6532 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6533 (match_operand:SI 4 "const_0_to_255_operand")]
6536 "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
6537 [(set_attr "prefix" "evex")
6538 (set_attr "mode" "<MODE>")])
6540 (define_insn "avx512f_fixupimm<mode>_mask"
6541 [(set (match_operand:VF_512 0 "register_operand" "=v")
6544 [(match_operand:VF_512 1 "register_operand" "0")
6545 (match_operand:VF_512 2 "register_operand" "v")
6546 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6547 (match_operand:SI 4 "const_0_to_255_operand")]
6550 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6552 "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
6553 [(set_attr "prefix" "evex")
6554 (set_attr "mode" "<MODE>")])
6556 (define_insn "avx512f_sfixupimm<mode>"
6557 [(set (match_operand:VF_128 0 "register_operand" "=v")
6560 [(match_operand:VF_128 1 "register_operand" "0")
6561 (match_operand:VF_128 2 "register_operand" "v")
6562 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6563 (match_operand:SI 4 "const_0_to_255_operand")]
6568 "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
6569 [(set_attr "prefix" "evex")
6570 (set_attr "mode" "<ssescalarmode>")])
6572 (define_insn "avx512f_sfixupimm<mode>_mask"
6573 [(set (match_operand:VF_128 0 "register_operand" "=v")
6577 [(match_operand:VF_128 1 "register_operand" "0")
6578 (match_operand:VF_128 2 "register_operand" "v")
6579 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "vm")
6580 (match_operand:SI 4 "const_0_to_255_operand")]
6585 (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
6587 "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}";
6588 [(set_attr "prefix" "evex")
6589 (set_attr "mode" "<ssescalarmode>")])
6591 (define_insn "avx512f_rndscale<mode><mask_name>"
6592 [(set (match_operand:VF_512 0 "register_operand" "=v")
6594 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
6595 (match_operand:SI 2 "const_0_to_255_operand")]
6598 "vrndscale<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6599 [(set_attr "length_immediate" "1")
6600 (set_attr "prefix" "evex")
6601 (set_attr "mode" "<MODE>")])
6603 (define_insn "*avx512f_rndscale<mode>"
6604 [(set (match_operand:VF_128 0 "register_operand" "=v")
6607 [(match_operand:VF_128 1 "register_operand" "v")
6608 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
6609 (match_operand:SI 3 "const_0_to_255_operand")]
6614 "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
6615 [(set_attr "length_immediate" "1")
6616 (set_attr "prefix" "evex")
6617 (set_attr "mode" "<MODE>")])
6619 ;; One bit in mask selects 2 elements.
6620 (define_insn "avx512f_shufps512_1<mask_name>"
6621 [(set (match_operand:V16SF 0 "register_operand" "=v")
6624 (match_operand:V16SF 1 "register_operand" "v")
6625 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6626 (parallel [(match_operand 3 "const_0_to_3_operand")
6627 (match_operand 4 "const_0_to_3_operand")
6628 (match_operand 5 "const_16_to_19_operand")
6629 (match_operand 6 "const_16_to_19_operand")
6630 (match_operand 7 "const_4_to_7_operand")
6631 (match_operand 8 "const_4_to_7_operand")
6632 (match_operand 9 "const_20_to_23_operand")
6633 (match_operand 10 "const_20_to_23_operand")
6634 (match_operand 11 "const_8_to_11_operand")
6635 (match_operand 12 "const_8_to_11_operand")
6636 (match_operand 13 "const_24_to_27_operand")
6637 (match_operand 14 "const_24_to_27_operand")
6638 (match_operand 15 "const_12_to_15_operand")
6639 (match_operand 16 "const_12_to_15_operand")
6640 (match_operand 17 "const_28_to_31_operand")
6641 (match_operand 18 "const_28_to_31_operand")])))]
6643 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6644 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6645 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6646 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
6647 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
6648 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
6649 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
6650 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
6651 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
6652 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
6653 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
6654 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
6657 mask = INTVAL (operands[3]);
6658 mask |= INTVAL (operands[4]) << 2;
6659 mask |= (INTVAL (operands[5]) - 16) << 4;
6660 mask |= (INTVAL (operands[6]) - 16) << 6;
6661 operands[3] = GEN_INT (mask);
6663 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
6665 [(set_attr "type" "sselog")
6666 (set_attr "length_immediate" "1")
6667 (set_attr "prefix" "evex")
6668 (set_attr "mode" "V16SF")])
6670 (define_expand "avx512f_shufpd512_mask"
6671 [(match_operand:V8DF 0 "register_operand")
6672 (match_operand:V8DF 1 "register_operand")
6673 (match_operand:V8DF 2 "nonimmediate_operand")
6674 (match_operand:SI 3 "const_0_to_255_operand")
6675 (match_operand:V8DF 4 "register_operand")
6676 (match_operand:QI 5 "register_operand")]
6679 int mask = INTVAL (operands[3]);
6680 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
6682 GEN_INT (mask & 2 ? 9 : 8),
6683 GEN_INT (mask & 4 ? 3 : 2),
6684 GEN_INT (mask & 8 ? 11 : 10),
6685 GEN_INT (mask & 16 ? 5 : 4),
6686 GEN_INT (mask & 32 ? 13 : 12),
6687 GEN_INT (mask & 64 ? 7 : 6),
6688 GEN_INT (mask & 128 ? 15 : 14),
6689 operands[4], operands[5]));
6693 (define_insn "avx512f_shufpd512_1<mask_name>"
6694 [(set (match_operand:V8DF 0 "register_operand" "=v")
6697 (match_operand:V8DF 1 "register_operand" "v")
6698 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6699 (parallel [(match_operand 3 "const_0_to_1_operand")
6700 (match_operand 4 "const_8_to_9_operand")
6701 (match_operand 5 "const_2_to_3_operand")
6702 (match_operand 6 "const_10_to_11_operand")
6703 (match_operand 7 "const_4_to_5_operand")
6704 (match_operand 8 "const_12_to_13_operand")
6705 (match_operand 9 "const_6_to_7_operand")
6706 (match_operand 10 "const_14_to_15_operand")])))]
6710 mask = INTVAL (operands[3]);
6711 mask |= (INTVAL (operands[4]) - 8) << 1;
6712 mask |= (INTVAL (operands[5]) - 2) << 2;
6713 mask |= (INTVAL (operands[6]) - 10) << 3;
6714 mask |= (INTVAL (operands[7]) - 4) << 4;
6715 mask |= (INTVAL (operands[8]) - 12) << 5;
6716 mask |= (INTVAL (operands[9]) - 6) << 6;
6717 mask |= (INTVAL (operands[10]) - 14) << 7;
6718 operands[3] = GEN_INT (mask);
6720 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6722 [(set_attr "type" "sselog")
6723 (set_attr "length_immediate" "1")
6724 (set_attr "prefix" "evex")
6725 (set_attr "mode" "V8DF")])
6727 (define_expand "avx_shufpd256"
6728 [(match_operand:V4DF 0 "register_operand")
6729 (match_operand:V4DF 1 "register_operand")
6730 (match_operand:V4DF 2 "nonimmediate_operand")
6731 (match_operand:SI 3 "const_int_operand")]
6734 int mask = INTVAL (operands[3]);
6735 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
6737 GEN_INT (mask & 2 ? 5 : 4),
6738 GEN_INT (mask & 4 ? 3 : 2),
6739 GEN_INT (mask & 8 ? 7 : 6)));
6743 (define_insn "avx_shufpd256_1"
6744 [(set (match_operand:V4DF 0 "register_operand" "=x")
6747 (match_operand:V4DF 1 "register_operand" "x")
6748 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6749 (parallel [(match_operand 3 "const_0_to_1_operand")
6750 (match_operand 4 "const_4_to_5_operand")
6751 (match_operand 5 "const_2_to_3_operand")
6752 (match_operand 6 "const_6_to_7_operand")])))]
6756 mask = INTVAL (operands[3]);
6757 mask |= (INTVAL (operands[4]) - 4) << 1;
6758 mask |= (INTVAL (operands[5]) - 2) << 2;
6759 mask |= (INTVAL (operands[6]) - 6) << 3;
6760 operands[3] = GEN_INT (mask);
6762 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6764 [(set_attr "type" "sseshuf")
6765 (set_attr "length_immediate" "1")
6766 (set_attr "prefix" "vex")
6767 (set_attr "mode" "V4DF")])
6769 (define_expand "sse2_shufpd"
6770 [(match_operand:V2DF 0 "register_operand")
6771 (match_operand:V2DF 1 "register_operand")
6772 (match_operand:V2DF 2 "nonimmediate_operand")
6773 (match_operand:SI 3 "const_int_operand")]
6776 int mask = INTVAL (operands[3]);
6777 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
6779 GEN_INT (mask & 2 ? 3 : 2)));
6783 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
6784 (define_insn "avx2_interleave_highv4di"
6785 [(set (match_operand:V4DI 0 "register_operand" "=x")
6788 (match_operand:V4DI 1 "register_operand" "x")
6789 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
6790 (parallel [(const_int 1)
6795 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
6796 [(set_attr "type" "sselog")
6797 (set_attr "prefix" "vex")
6798 (set_attr "mode" "OI")])
6800 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
6801 [(set (match_operand:V8DI 0 "register_operand" "=v")
6804 (match_operand:V8DI 1 "register_operand" "v")
6805 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
6806 (parallel [(const_int 1) (const_int 9)
6807 (const_int 3) (const_int 11)
6808 (const_int 5) (const_int 13)
6809 (const_int 7) (const_int 15)])))]
6811 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6812 [(set_attr "type" "sselog")
6813 (set_attr "prefix" "evex")
6814 (set_attr "mode" "XI")])
6816 (define_insn "vec_interleave_highv2di"
6817 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6820 (match_operand:V2DI 1 "register_operand" "0,x")
6821 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
6822 (parallel [(const_int 1)
6826 punpckhqdq\t{%2, %0|%0, %2}
6827 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
6828 [(set_attr "isa" "noavx,avx")
6829 (set_attr "type" "sselog")
6830 (set_attr "prefix_data16" "1,*")
6831 (set_attr "prefix" "orig,vex")
6832 (set_attr "mode" "TI")])
6834 (define_insn "avx2_interleave_lowv4di"
6835 [(set (match_operand:V4DI 0 "register_operand" "=x")
6838 (match_operand:V4DI 1 "register_operand" "x")
6839 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
6840 (parallel [(const_int 0)
6845 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
6846 [(set_attr "type" "sselog")
6847 (set_attr "prefix" "vex")
6848 (set_attr "mode" "OI")])
6850 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
6851 [(set (match_operand:V8DI 0 "register_operand" "=v")
6854 (match_operand:V8DI 1 "register_operand" "v")
6855 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
6856 (parallel [(const_int 0) (const_int 8)
6857 (const_int 2) (const_int 10)
6858 (const_int 4) (const_int 12)
6859 (const_int 6) (const_int 14)])))]
6861 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6862 [(set_attr "type" "sselog")
6863 (set_attr "prefix" "evex")
6864 (set_attr "mode" "XI")])
6866 (define_insn "vec_interleave_lowv2di"
6867 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6870 (match_operand:V2DI 1 "register_operand" "0,x")
6871 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
6872 (parallel [(const_int 0)
6876 punpcklqdq\t{%2, %0|%0, %2}
6877 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
6878 [(set_attr "isa" "noavx,avx")
6879 (set_attr "type" "sselog")
6880 (set_attr "prefix_data16" "1,*")
6881 (set_attr "prefix" "orig,vex")
6882 (set_attr "mode" "TI")])
6884 (define_insn "sse2_shufpd_<mode>"
6885 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
6886 (vec_select:VI8F_128
6887 (vec_concat:<ssedoublevecmode>
6888 (match_operand:VI8F_128 1 "register_operand" "0,x")
6889 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
6890 (parallel [(match_operand 3 "const_0_to_1_operand")
6891 (match_operand 4 "const_2_to_3_operand")])))]
6895 mask = INTVAL (operands[3]);
6896 mask |= (INTVAL (operands[4]) - 2) << 1;
6897 operands[3] = GEN_INT (mask);
6899 switch (which_alternative)
6902 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
6904 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6909 [(set_attr "isa" "noavx,avx")
6910 (set_attr "type" "sseshuf")
6911 (set_attr "length_immediate" "1")
6912 (set_attr "prefix" "orig,vex")
6913 (set_attr "mode" "V2DF")])
6915 ;; Avoid combining registers from different units in a single alternative,
6916 ;; see comment above inline_secondary_memory_needed function in i386.c
6917 (define_insn "sse2_storehpd"
6918 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
6920 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
6921 (parallel [(const_int 1)])))]
6922 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6924 %vmovhpd\t{%1, %0|%0, %1}
6926 vunpckhpd\t{%d1, %0|%0, %d1}
6930 [(set_attr "isa" "*,noavx,avx,*,*,*")
6931 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
6932 (set (attr "prefix_data16")
6934 (and (eq_attr "alternative" "0")
6935 (not (match_test "TARGET_AVX")))
6937 (const_string "*")))
6938 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
6939 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
6942 [(set (match_operand:DF 0 "register_operand")
6944 (match_operand:V2DF 1 "memory_operand")
6945 (parallel [(const_int 1)])))]
6946 "TARGET_SSE2 && reload_completed"
6947 [(set (match_dup 0) (match_dup 1))]
6948 "operands[1] = adjust_address (operands[1], DFmode, 8);")
6950 (define_insn "*vec_extractv2df_1_sse"
6951 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
6953 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
6954 (parallel [(const_int 1)])))]
6955 "!TARGET_SSE2 && TARGET_SSE
6956 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6958 movhps\t{%1, %0|%q0, %1}
6959 movhlps\t{%1, %0|%0, %1}
6960 movlps\t{%H1, %0|%0, %H1}"
6961 [(set_attr "type" "ssemov")
6962 (set_attr "mode" "V2SF,V4SF,V2SF")])
6964 ;; Avoid combining registers from different units in a single alternative,
6965 ;; see comment above inline_secondary_memory_needed function in i386.c
6966 (define_insn "sse2_storelpd"
6967 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
6969 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
6970 (parallel [(const_int 0)])))]
6971 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6973 %vmovlpd\t{%1, %0|%0, %1}
6978 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
6979 (set_attr "prefix_data16" "1,*,*,*,*")
6980 (set_attr "prefix" "maybe_vex")
6981 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
6984 [(set (match_operand:DF 0 "register_operand")
6986 (match_operand:V2DF 1 "nonimmediate_operand")
6987 (parallel [(const_int 0)])))]
6988 "TARGET_SSE2 && reload_completed"
6989 [(set (match_dup 0) (match_dup 1))]
6991 if (REG_P (operands[1]))
6992 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
6994 operands[1] = adjust_address (operands[1], DFmode, 0);
6997 (define_insn "*vec_extractv2df_0_sse"
6998 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7000 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7001 (parallel [(const_int 0)])))]
7002 "!TARGET_SSE2 && TARGET_SSE
7003 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7005 movlps\t{%1, %0|%0, %1}
7006 movaps\t{%1, %0|%0, %1}
7007 movlps\t{%1, %0|%0, %q1}"
7008 [(set_attr "type" "ssemov")
7009 (set_attr "mode" "V2SF,V4SF,V2SF")])
7011 (define_expand "sse2_loadhpd_exp"
7012 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7015 (match_operand:V2DF 1 "nonimmediate_operand")
7016 (parallel [(const_int 0)]))
7017 (match_operand:DF 2 "nonimmediate_operand")))]
7020 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7022 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7024 /* Fix up the destination if needed. */
7025 if (dst != operands[0])
7026 emit_move_insn (operands[0], dst);
7031 ;; Avoid combining registers from different units in a single alternative,
7032 ;; see comment above inline_secondary_memory_needed function in i386.c
7033 (define_insn "sse2_loadhpd"
7034 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7038 (match_operand:V2DF 1 "nonimmediate_operand"
7040 (parallel [(const_int 0)]))
7041 (match_operand:DF 2 "nonimmediate_operand"
7042 " m,m,x,x,x,*f,r")))]
7043 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7045 movhpd\t{%2, %0|%0, %2}
7046 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7047 unpcklpd\t{%2, %0|%0, %2}
7048 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7052 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7053 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7054 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7055 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7056 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7059 [(set (match_operand:V2DF 0 "memory_operand")
7061 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7062 (match_operand:DF 1 "register_operand")))]
7063 "TARGET_SSE2 && reload_completed"
7064 [(set (match_dup 0) (match_dup 1))]
7065 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7067 (define_expand "sse2_loadlpd_exp"
7068 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7070 (match_operand:DF 2 "nonimmediate_operand")
7072 (match_operand:V2DF 1 "nonimmediate_operand")
7073 (parallel [(const_int 1)]))))]
7076 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7078 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7080 /* Fix up the destination if needed. */
7081 if (dst != operands[0])
7082 emit_move_insn (operands[0], dst);
7087 ;; Avoid combining registers from different units in a single alternative,
7088 ;; see comment above inline_secondary_memory_needed function in i386.c
7089 (define_insn "sse2_loadlpd"
7090 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7091 "=x,x,x,x,x,x,x,x,m,m ,m")
7093 (match_operand:DF 2 "nonimmediate_operand"
7094 " m,m,m,x,x,0,0,x,x,*f,r")
7096 (match_operand:V2DF 1 "vector_move_operand"
7097 " C,0,x,0,x,x,o,o,0,0 ,0")
7098 (parallel [(const_int 1)]))))]
7099 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7101 %vmovsd\t{%2, %0|%0, %2}
7102 movlpd\t{%2, %0|%0, %2}
7103 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7104 movsd\t{%2, %0|%0, %2}
7105 vmovsd\t{%2, %1, %0|%0, %1, %2}
7106 shufpd\t{$2, %1, %0|%0, %1, 2}
7107 movhpd\t{%H1, %0|%0, %H1}
7108 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7112 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7114 (cond [(eq_attr "alternative" "5")
7115 (const_string "sselog")
7116 (eq_attr "alternative" "9")
7117 (const_string "fmov")
7118 (eq_attr "alternative" "10")
7119 (const_string "imov")
7121 (const_string "ssemov")))
7122 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7123 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7124 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7125 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7128 [(set (match_operand:V2DF 0 "memory_operand")
7130 (match_operand:DF 1 "register_operand")
7131 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7132 "TARGET_SSE2 && reload_completed"
7133 [(set (match_dup 0) (match_dup 1))]
7134 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7136 (define_insn "sse2_movsd"
7137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7139 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7140 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7144 movsd\t{%2, %0|%0, %2}
7145 vmovsd\t{%2, %1, %0|%0, %1, %2}
7146 movlpd\t{%2, %0|%0, %q2}
7147 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7148 %vmovlpd\t{%2, %0|%q0, %2}
7149 shufpd\t{$2, %1, %0|%0, %1, 2}
7150 movhps\t{%H1, %0|%0, %H1}
7151 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7152 %vmovhps\t{%1, %H0|%H0, %1}"
7153 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7156 (eq_attr "alternative" "5")
7157 (const_string "sselog")
7158 (const_string "ssemov")))
7159 (set (attr "prefix_data16")
7161 (and (eq_attr "alternative" "2,4")
7162 (not (match_test "TARGET_AVX")))
7164 (const_string "*")))
7165 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7166 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7167 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7169 (define_insn "vec_dupv2df"
7170 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7172 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7176 %vmovddup\t{%1, %0|%0, %1}"
7177 [(set_attr "isa" "noavx,sse3")
7178 (set_attr "type" "sselog1")
7179 (set_attr "prefix" "orig,maybe_vex")
7180 (set_attr "mode" "V2DF,DF")])
7182 (define_insn "*vec_concatv2df"
7183 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7185 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7186 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7189 unpcklpd\t{%2, %0|%0, %2}
7190 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7191 %vmovddup\t{%1, %0|%0, %1}
7192 movhpd\t{%2, %0|%0, %2}
7193 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7194 %vmovsd\t{%1, %0|%0, %1}
7195 movlhps\t{%2, %0|%0, %2}
7196 movhps\t{%2, %0|%0, %2}"
7197 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7200 (eq_attr "alternative" "0,1,2")
7201 (const_string "sselog")
7202 (const_string "ssemov")))
7203 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7204 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7205 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7209 ;; Parallel integer down-conversion operations
7211 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7213 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7214 (define_mode_attr pmov_src_mode
7215 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7216 (define_mode_attr pmov_src_lower
7217 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7218 (define_mode_attr pmov_suff
7219 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7221 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7222 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7223 (any_truncate:PMOV_DST_MODE
7224 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7226 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7227 [(set_attr "type" "ssemov")
7228 (set_attr "memory" "none,store")
7229 (set_attr "prefix" "evex")
7230 (set_attr "mode" "<sseinsnmode>")])
7232 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7233 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7234 (vec_merge:PMOV_DST_MODE
7235 (any_truncate:PMOV_DST_MODE
7236 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7237 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7238 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
7240 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7241 [(set_attr "type" "ssemov")
7242 (set_attr "memory" "none,store")
7243 (set_attr "prefix" "evex")
7244 (set_attr "mode" "<sseinsnmode>")])
7246 (define_insn "*avx512f_<code>v8div16qi2"
7247 [(set (match_operand:V16QI 0 "register_operand" "=v")
7250 (match_operand:V8DI 1 "register_operand" "v"))
7251 (const_vector:V8QI [(const_int 0) (const_int 0)
7252 (const_int 0) (const_int 0)
7253 (const_int 0) (const_int 0)
7254 (const_int 0) (const_int 0)])))]
7256 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7257 [(set_attr "type" "ssemov")
7258 (set_attr "prefix" "evex")
7259 (set_attr "mode" "TI")])
7261 (define_insn "*avx512f_<code>v8div16qi2_store"
7262 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7265 (match_operand:V8DI 1 "register_operand" "v"))
7268 (parallel [(const_int 8) (const_int 9)
7269 (const_int 10) (const_int 11)
7270 (const_int 12) (const_int 13)
7271 (const_int 14) (const_int 15)]))))]
7273 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7274 [(set_attr "type" "ssemov")
7275 (set_attr "memory" "store")
7276 (set_attr "prefix" "evex")
7277 (set_attr "mode" "TI")])
7279 (define_insn "avx512f_<code>v8div16qi2_mask"
7280 [(set (match_operand:V16QI 0 "register_operand" "=v")
7284 (match_operand:V8DI 1 "register_operand" "v"))
7286 (match_operand:V16QI 2 "vector_move_operand" "0C")
7287 (parallel [(const_int 0) (const_int 1)
7288 (const_int 2) (const_int 3)
7289 (const_int 4) (const_int 5)
7290 (const_int 6) (const_int 7)]))
7291 (match_operand:QI 3 "register_operand" "k"))
7292 (const_vector:V8QI [(const_int 0) (const_int 0)
7293 (const_int 0) (const_int 0)
7294 (const_int 0) (const_int 0)
7295 (const_int 0) (const_int 0)])))]
7297 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7298 [(set_attr "type" "ssemov")
7299 (set_attr "prefix" "evex")
7300 (set_attr "mode" "TI")])
7302 (define_insn "*avx512f_<code>v8div16qi2_store_mask"
7303 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7307 (match_operand:V8DI 1 "register_operand" "v"))
7310 (parallel [(const_int 0) (const_int 1)
7311 (const_int 2) (const_int 3)
7312 (const_int 4) (const_int 5)
7313 (const_int 6) (const_int 7)]))
7314 (match_operand:QI 2 "register_operand" "k"))
7317 (parallel [(const_int 8) (const_int 9)
7318 (const_int 10) (const_int 11)
7319 (const_int 12) (const_int 13)
7320 (const_int 14) (const_int 15)]))))]
7322 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7323 [(set_attr "type" "ssemov")
7324 (set_attr "memory" "store")
7325 (set_attr "prefix" "evex")
7326 (set_attr "mode" "TI")])
7328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7330 ;; Parallel integral arithmetic
7332 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7334 (define_expand "neg<mode>2"
7335 [(set (match_operand:VI_AVX2 0 "register_operand")
7338 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7340 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7342 (define_expand "<plusminus_insn><mode>3<mask_name>"
7343 [(set (match_operand:VI_AVX2 0 "register_operand")
7345 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7346 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7347 "TARGET_SSE2 && <mask_mode512bit_condition>"
7348 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7350 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7351 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7353 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7354 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7355 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7357 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7358 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7359 [(set_attr "isa" "noavx,avx")
7360 (set_attr "type" "sseiadd")
7361 (set_attr "prefix_data16" "1,*")
7362 (set_attr "prefix" "<mask_prefix3>")
7363 (set_attr "mode" "<sseinsnmode>")])
7365 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7366 [(set (match_operand:VI12_AVX2 0 "register_operand")
7367 (sat_plusminus:VI12_AVX2
7368 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7369 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7371 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7373 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7374 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7375 (sat_plusminus:VI12_AVX2
7376 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7377 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7378 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7380 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7381 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7382 [(set_attr "isa" "noavx,avx")
7383 (set_attr "type" "sseiadd")
7384 (set_attr "prefix_data16" "1,*")
7385 (set_attr "prefix" "orig,vex")
7386 (set_attr "mode" "TI")])
7388 (define_expand "mul<mode>3"
7389 [(set (match_operand:VI1_AVX2 0 "register_operand")
7390 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7391 (match_operand:VI1_AVX2 2 "register_operand")))]
7394 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7398 (define_expand "mul<mode>3"
7399 [(set (match_operand:VI2_AVX2 0 "register_operand")
7400 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7401 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7403 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7405 (define_insn "*mul<mode>3"
7406 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7407 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7408 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7409 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7411 pmullw\t{%2, %0|%0, %2}
7412 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7413 [(set_attr "isa" "noavx,avx")
7414 (set_attr "type" "sseimul")
7415 (set_attr "prefix_data16" "1,*")
7416 (set_attr "prefix" "orig,vex")
7417 (set_attr "mode" "<sseinsnmode>")])
7419 (define_expand "<s>mul<mode>3_highpart"
7420 [(set (match_operand:VI2_AVX2 0 "register_operand")
7422 (lshiftrt:<ssedoublemode>
7423 (mult:<ssedoublemode>
7424 (any_extend:<ssedoublemode>
7425 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7426 (any_extend:<ssedoublemode>
7427 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7430 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7432 (define_insn "*<s>mul<mode>3_highpart"
7433 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7435 (lshiftrt:<ssedoublemode>
7436 (mult:<ssedoublemode>
7437 (any_extend:<ssedoublemode>
7438 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7439 (any_extend:<ssedoublemode>
7440 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7442 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7444 pmulh<u>w\t{%2, %0|%0, %2}
7445 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7446 [(set_attr "isa" "noavx,avx")
7447 (set_attr "type" "sseimul")
7448 (set_attr "prefix_data16" "1,*")
7449 (set_attr "prefix" "orig,vex")
7450 (set_attr "mode" "<sseinsnmode>")])
7452 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7453 [(set (match_operand:V8DI 0 "register_operand")
7457 (match_operand:V16SI 1 "nonimmediate_operand")
7458 (parallel [(const_int 0) (const_int 2)
7459 (const_int 4) (const_int 6)
7460 (const_int 8) (const_int 10)
7461 (const_int 12) (const_int 14)])))
7464 (match_operand:V16SI 2 "nonimmediate_operand")
7465 (parallel [(const_int 0) (const_int 2)
7466 (const_int 4) (const_int 6)
7467 (const_int 8) (const_int 10)
7468 (const_int 12) (const_int 14)])))))]
7470 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7472 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7473 [(set (match_operand:V8DI 0 "register_operand" "=v")
7477 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7478 (parallel [(const_int 0) (const_int 2)
7479 (const_int 4) (const_int 6)
7480 (const_int 8) (const_int 10)
7481 (const_int 12) (const_int 14)])))
7484 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7485 (parallel [(const_int 0) (const_int 2)
7486 (const_int 4) (const_int 6)
7487 (const_int 8) (const_int 10)
7488 (const_int 12) (const_int 14)])))))]
7489 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7490 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7491 [(set_attr "isa" "avx512f")
7492 (set_attr "type" "sseimul")
7493 (set_attr "prefix_extra" "1")
7494 (set_attr "prefix" "evex")
7495 (set_attr "mode" "XI")])
7497 (define_expand "vec_widen_umult_even_v8si"
7498 [(set (match_operand:V4DI 0 "register_operand")
7502 (match_operand:V8SI 1 "nonimmediate_operand")
7503 (parallel [(const_int 0) (const_int 2)
7504 (const_int 4) (const_int 6)])))
7507 (match_operand:V8SI 2 "nonimmediate_operand")
7508 (parallel [(const_int 0) (const_int 2)
7509 (const_int 4) (const_int 6)])))))]
7511 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7513 (define_insn "*vec_widen_umult_even_v8si"
7514 [(set (match_operand:V4DI 0 "register_operand" "=x")
7518 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7519 (parallel [(const_int 0) (const_int 2)
7520 (const_int 4) (const_int 6)])))
7523 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7524 (parallel [(const_int 0) (const_int 2)
7525 (const_int 4) (const_int 6)])))))]
7526 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7527 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7528 [(set_attr "type" "sseimul")
7529 (set_attr "prefix" "vex")
7530 (set_attr "mode" "OI")])
7532 (define_expand "vec_widen_umult_even_v4si"
7533 [(set (match_operand:V2DI 0 "register_operand")
7537 (match_operand:V4SI 1 "nonimmediate_operand")
7538 (parallel [(const_int 0) (const_int 2)])))
7541 (match_operand:V4SI 2 "nonimmediate_operand")
7542 (parallel [(const_int 0) (const_int 2)])))))]
7544 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7546 (define_insn "*vec_widen_umult_even_v4si"
7547 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7551 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7552 (parallel [(const_int 0) (const_int 2)])))
7555 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7556 (parallel [(const_int 0) (const_int 2)])))))]
7557 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7559 pmuludq\t{%2, %0|%0, %2}
7560 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7561 [(set_attr "isa" "noavx,avx")
7562 (set_attr "type" "sseimul")
7563 (set_attr "prefix_data16" "1,*")
7564 (set_attr "prefix" "orig,vex")
7565 (set_attr "mode" "TI")])
7567 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7568 [(set (match_operand:V8DI 0 "register_operand")
7572 (match_operand:V16SI 1 "nonimmediate_operand")
7573 (parallel [(const_int 0) (const_int 2)
7574 (const_int 4) (const_int 6)
7575 (const_int 8) (const_int 10)
7576 (const_int 12) (const_int 14)])))
7579 (match_operand:V16SI 2 "nonimmediate_operand")
7580 (parallel [(const_int 0) (const_int 2)
7581 (const_int 4) (const_int 6)
7582 (const_int 8) (const_int 10)
7583 (const_int 12) (const_int 14)])))))]
7585 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7587 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7588 [(set (match_operand:V8DI 0 "register_operand" "=v")
7592 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7593 (parallel [(const_int 0) (const_int 2)
7594 (const_int 4) (const_int 6)
7595 (const_int 8) (const_int 10)
7596 (const_int 12) (const_int 14)])))
7599 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7600 (parallel [(const_int 0) (const_int 2)
7601 (const_int 4) (const_int 6)
7602 (const_int 8) (const_int 10)
7603 (const_int 12) (const_int 14)])))))]
7604 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7605 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7606 [(set_attr "isa" "avx512f")
7607 (set_attr "type" "sseimul")
7608 (set_attr "prefix_extra" "1")
7609 (set_attr "prefix" "evex")
7610 (set_attr "mode" "XI")])
7612 (define_expand "vec_widen_smult_even_v8si"
7613 [(set (match_operand:V4DI 0 "register_operand")
7617 (match_operand:V8SI 1 "nonimmediate_operand")
7618 (parallel [(const_int 0) (const_int 2)
7619 (const_int 4) (const_int 6)])))
7622 (match_operand:V8SI 2 "nonimmediate_operand")
7623 (parallel [(const_int 0) (const_int 2)
7624 (const_int 4) (const_int 6)])))))]
7626 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7628 (define_insn "*vec_widen_smult_even_v8si"
7629 [(set (match_operand:V4DI 0 "register_operand" "=x")
7633 (match_operand:V8SI 1 "nonimmediate_operand" "x")
7634 (parallel [(const_int 0) (const_int 2)
7635 (const_int 4) (const_int 6)])))
7638 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7639 (parallel [(const_int 0) (const_int 2)
7640 (const_int 4) (const_int 6)])))))]
7641 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7642 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7643 [(set_attr "type" "sseimul")
7644 (set_attr "prefix_extra" "1")
7645 (set_attr "prefix" "vex")
7646 (set_attr "mode" "OI")])
7648 (define_expand "sse4_1_mulv2siv2di3"
7649 [(set (match_operand:V2DI 0 "register_operand")
7653 (match_operand:V4SI 1 "nonimmediate_operand")
7654 (parallel [(const_int 0) (const_int 2)])))
7657 (match_operand:V4SI 2 "nonimmediate_operand")
7658 (parallel [(const_int 0) (const_int 2)])))))]
7660 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7662 (define_insn "*sse4_1_mulv2siv2di3"
7663 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7667 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7668 (parallel [(const_int 0) (const_int 2)])))
7671 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7672 (parallel [(const_int 0) (const_int 2)])))))]
7673 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7675 pmuldq\t{%2, %0|%0, %2}
7676 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7677 [(set_attr "isa" "noavx,avx")
7678 (set_attr "type" "sseimul")
7679 (set_attr "prefix_data16" "1,*")
7680 (set_attr "prefix_extra" "1")
7681 (set_attr "prefix" "orig,vex")
7682 (set_attr "mode" "TI")])
7684 (define_expand "avx2_pmaddwd"
7685 [(set (match_operand:V8SI 0 "register_operand")
7690 (match_operand:V16HI 1 "nonimmediate_operand")
7691 (parallel [(const_int 0) (const_int 2)
7692 (const_int 4) (const_int 6)
7693 (const_int 8) (const_int 10)
7694 (const_int 12) (const_int 14)])))
7697 (match_operand:V16HI 2 "nonimmediate_operand")
7698 (parallel [(const_int 0) (const_int 2)
7699 (const_int 4) (const_int 6)
7700 (const_int 8) (const_int 10)
7701 (const_int 12) (const_int 14)]))))
7704 (vec_select:V8HI (match_dup 1)
7705 (parallel [(const_int 1) (const_int 3)
7706 (const_int 5) (const_int 7)
7707 (const_int 9) (const_int 11)
7708 (const_int 13) (const_int 15)])))
7710 (vec_select:V8HI (match_dup 2)
7711 (parallel [(const_int 1) (const_int 3)
7712 (const_int 5) (const_int 7)
7713 (const_int 9) (const_int 11)
7714 (const_int 13) (const_int 15)]))))))]
7716 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
7718 (define_insn "*avx2_pmaddwd"
7719 [(set (match_operand:V8SI 0 "register_operand" "=x")
7724 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
7725 (parallel [(const_int 0) (const_int 2)
7726 (const_int 4) (const_int 6)
7727 (const_int 8) (const_int 10)
7728 (const_int 12) (const_int 14)])))
7731 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7732 (parallel [(const_int 0) (const_int 2)
7733 (const_int 4) (const_int 6)
7734 (const_int 8) (const_int 10)
7735 (const_int 12) (const_int 14)]))))
7738 (vec_select:V8HI (match_dup 1)
7739 (parallel [(const_int 1) (const_int 3)
7740 (const_int 5) (const_int 7)
7741 (const_int 9) (const_int 11)
7742 (const_int 13) (const_int 15)])))
7744 (vec_select:V8HI (match_dup 2)
7745 (parallel [(const_int 1) (const_int 3)
7746 (const_int 5) (const_int 7)
7747 (const_int 9) (const_int 11)
7748 (const_int 13) (const_int 15)]))))))]
7749 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
7750 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7751 [(set_attr "type" "sseiadd")
7752 (set_attr "prefix" "vex")
7753 (set_attr "mode" "OI")])
7755 (define_expand "sse2_pmaddwd"
7756 [(set (match_operand:V4SI 0 "register_operand")
7761 (match_operand:V8HI 1 "nonimmediate_operand")
7762 (parallel [(const_int 0) (const_int 2)
7763 (const_int 4) (const_int 6)])))
7766 (match_operand:V8HI 2 "nonimmediate_operand")
7767 (parallel [(const_int 0) (const_int 2)
7768 (const_int 4) (const_int 6)]))))
7771 (vec_select:V4HI (match_dup 1)
7772 (parallel [(const_int 1) (const_int 3)
7773 (const_int 5) (const_int 7)])))
7775 (vec_select:V4HI (match_dup 2)
7776 (parallel [(const_int 1) (const_int 3)
7777 (const_int 5) (const_int 7)]))))))]
7779 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7781 (define_insn "*sse2_pmaddwd"
7782 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7787 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
7788 (parallel [(const_int 0) (const_int 2)
7789 (const_int 4) (const_int 6)])))
7792 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7793 (parallel [(const_int 0) (const_int 2)
7794 (const_int 4) (const_int 6)]))))
7797 (vec_select:V4HI (match_dup 1)
7798 (parallel [(const_int 1) (const_int 3)
7799 (const_int 5) (const_int 7)])))
7801 (vec_select:V4HI (match_dup 2)
7802 (parallel [(const_int 1) (const_int 3)
7803 (const_int 5) (const_int 7)]))))))]
7804 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7806 pmaddwd\t{%2, %0|%0, %2}
7807 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
7808 [(set_attr "isa" "noavx,avx")
7809 (set_attr "type" "sseiadd")
7810 (set_attr "atom_unit" "simul")
7811 (set_attr "prefix_data16" "1,*")
7812 (set_attr "prefix" "orig,vex")
7813 (set_attr "mode" "TI")])
7815 (define_expand "mul<mode>3<mask_name>"
7816 [(set (match_operand:VI4_AVX512F 0 "register_operand")
7818 (match_operand:VI4_AVX512F 1 "general_vector_operand")
7819 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
7820 "TARGET_SSE2 && <mask_mode512bit_condition>"
7824 if (!nonimmediate_operand (operands[1], <MODE>mode))
7825 operands[1] = force_reg (<MODE>mode, operands[1]);
7826 if (!nonimmediate_operand (operands[2], <MODE>mode))
7827 operands[2] = force_reg (<MODE>mode, operands[2]);
7828 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
7832 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
7837 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
7838 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
7840 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
7841 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
7842 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
7844 pmulld\t{%2, %0|%0, %2}
7845 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7846 [(set_attr "isa" "noavx,avx")
7847 (set_attr "type" "sseimul")
7848 (set_attr "prefix_extra" "1")
7849 (set_attr "prefix" "<mask_prefix3>")
7850 (set_attr "btver2_decode" "vector,vector")
7851 (set_attr "mode" "<sseinsnmode>")])
7853 (define_expand "mul<mode>3"
7854 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
7855 (mult:VI8_AVX2_AVX512F
7856 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
7857 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
7860 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
7864 (define_expand "vec_widen_<s>mult_hi_<mode>"
7865 [(match_operand:<sseunpackmode> 0 "register_operand")
7866 (any_extend:<sseunpackmode>
7867 (match_operand:VI124_AVX2 1 "register_operand"))
7868 (match_operand:VI124_AVX2 2 "register_operand")]
7871 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
7876 (define_expand "vec_widen_<s>mult_lo_<mode>"
7877 [(match_operand:<sseunpackmode> 0 "register_operand")
7878 (any_extend:<sseunpackmode>
7879 (match_operand:VI124_AVX2 1 "register_operand"))
7880 (match_operand:VI124_AVX2 2 "register_operand")]
7883 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
7888 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
7889 ;; named patterns, but signed V4SI needs special help for plain SSE2.
7890 (define_expand "vec_widen_smult_even_v4si"
7891 [(match_operand:V2DI 0 "register_operand")
7892 (match_operand:V4SI 1 "nonimmediate_operand")
7893 (match_operand:V4SI 2 "nonimmediate_operand")]
7896 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
7901 (define_expand "vec_widen_<s>mult_odd_<mode>"
7902 [(match_operand:<sseunpackmode> 0 "register_operand")
7903 (any_extend:<sseunpackmode>
7904 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
7905 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
7908 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
7913 (define_expand "sdot_prod<mode>"
7914 [(match_operand:<sseunpackmode> 0 "register_operand")
7915 (match_operand:VI2_AVX2 1 "register_operand")
7916 (match_operand:VI2_AVX2 2 "register_operand")
7917 (match_operand:<sseunpackmode> 3 "register_operand")]
7920 rtx t = gen_reg_rtx (<sseunpackmode>mode);
7921 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
7922 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
7923 gen_rtx_PLUS (<sseunpackmode>mode,
7928 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
7929 ;; back together when madd is available.
7930 (define_expand "sdot_prodv4si"
7931 [(match_operand:V2DI 0 "register_operand")
7932 (match_operand:V4SI 1 "register_operand")
7933 (match_operand:V4SI 2 "register_operand")
7934 (match_operand:V2DI 3 "register_operand")]
7937 rtx t = gen_reg_rtx (V2DImode);
7938 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
7939 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
7943 (define_insn "ashr<mode>3"
7944 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
7946 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
7947 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
7950 psra<ssemodesuffix>\t{%2, %0|%0, %2}
7951 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7952 [(set_attr "isa" "noavx,avx")
7953 (set_attr "type" "sseishft")
7954 (set (attr "length_immediate")
7955 (if_then_else (match_operand 2 "const_int_operand")
7957 (const_string "0")))
7958 (set_attr "prefix_data16" "1,*")
7959 (set_attr "prefix" "orig,vex")
7960 (set_attr "mode" "<sseinsnmode>")])
7962 (define_insn "ashr<mode>3<mask_name>"
7963 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
7965 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
7966 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
7967 "TARGET_AVX512F && <mask_mode512bit_condition>"
7968 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7969 [(set_attr "type" "sseishft")
7970 (set (attr "length_immediate")
7971 (if_then_else (match_operand 2 "const_int_operand")
7973 (const_string "0")))
7974 (set_attr "mode" "<sseinsnmode>")])
7976 (define_insn "<shift_insn><mode>3"
7977 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
7978 (any_lshift:VI248_AVX2
7979 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
7980 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
7983 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
7984 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7985 [(set_attr "isa" "noavx,avx")
7986 (set_attr "type" "sseishft")
7987 (set (attr "length_immediate")
7988 (if_then_else (match_operand 2 "const_int_operand")
7990 (const_string "0")))
7991 (set_attr "prefix_data16" "1,*")
7992 (set_attr "prefix" "orig,vex")
7993 (set_attr "mode" "<sseinsnmode>")])
7995 (define_insn "<shift_insn><mode>3<mask_name>"
7996 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
7997 (any_lshift:VI48_512
7998 (match_operand:VI48_512 1 "register_operand" "v,m")
7999 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8000 "TARGET_AVX512F && <mask_mode512bit_condition>"
8001 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8002 [(set_attr "isa" "avx512f")
8003 (set_attr "type" "sseishft")
8004 (set (attr "length_immediate")
8005 (if_then_else (match_operand 2 "const_int_operand")
8007 (const_string "0")))
8008 (set_attr "prefix" "evex")
8009 (set_attr "mode" "<sseinsnmode>")])
8012 (define_expand "vec_shl_<mode>"
8015 (match_operand:VI_128 1 "register_operand")
8016 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8017 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8020 operands[1] = gen_lowpart (V1TImode, operands[1]);
8021 operands[3] = gen_reg_rtx (V1TImode);
8022 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8025 (define_insn "<sse2_avx2>_ashl<mode>3"
8026 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8028 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8029 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8032 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8034 switch (which_alternative)
8037 return "pslldq\t{%2, %0|%0, %2}";
8039 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8044 [(set_attr "isa" "noavx,avx")
8045 (set_attr "type" "sseishft")
8046 (set_attr "length_immediate" "1")
8047 (set_attr "prefix_data16" "1,*")
8048 (set_attr "prefix" "orig,vex")
8049 (set_attr "mode" "<sseinsnmode>")])
8051 (define_expand "vec_shr_<mode>"
8054 (match_operand:VI_128 1 "register_operand")
8055 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8056 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8059 operands[1] = gen_lowpart (V1TImode, operands[1]);
8060 operands[3] = gen_reg_rtx (V1TImode);
8061 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8064 (define_insn "<sse2_avx2>_lshr<mode>3"
8065 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8066 (lshiftrt:VIMAX_AVX2
8067 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8068 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8071 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8073 switch (which_alternative)
8076 return "psrldq\t{%2, %0|%0, %2}";
8078 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8083 [(set_attr "isa" "noavx,avx")
8084 (set_attr "type" "sseishft")
8085 (set_attr "length_immediate" "1")
8086 (set_attr "atom_unit" "sishuf")
8087 (set_attr "prefix_data16" "1,*")
8088 (set_attr "prefix" "orig,vex")
8089 (set_attr "mode" "<sseinsnmode>")])
8091 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8092 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8093 (any_rotate:VI48_512
8094 (match_operand:VI48_512 1 "register_operand" "v")
8095 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8097 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8098 [(set_attr "prefix" "evex")
8099 (set_attr "mode" "<sseinsnmode>")])
8101 (define_insn "avx512f_<rotate><mode><mask_name>"
8102 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8103 (any_rotate:VI48_512
8104 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8105 (match_operand:SI 2 "const_0_to_255_operand")))]
8107 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8108 [(set_attr "prefix" "evex")
8109 (set_attr "mode" "<sseinsnmode>")])
8111 (define_expand "<code><mode>3<mask_name>"
8112 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8113 (maxmin:VI124_256_48_512
8114 (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
8115 (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
8116 "TARGET_AVX2 && <mask_mode512bit_condition>"
8117 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8119 (define_insn "*avx2_<code><mode>3<mask_name>"
8120 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8121 (maxmin:VI124_256_48_512
8122 (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
8123 (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
8124 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8125 && <mask_mode512bit_condition>"
8126 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8127 [(set_attr "type" "sseiadd")
8128 (set_attr "prefix_extra" "1")
8129 (set_attr "prefix" "maybe_evex")
8130 (set_attr "mode" "OI")])
8132 (define_expand "<code><mode>3"
8133 [(set (match_operand:VI8_AVX2 0 "register_operand")
8135 (match_operand:VI8_AVX2 1 "register_operand")
8136 (match_operand:VI8_AVX2 2 "register_operand")))]
8143 xops[0] = operands[0];
8145 if (<CODE> == SMAX || <CODE> == UMAX)
8147 xops[1] = operands[1];
8148 xops[2] = operands[2];
8152 xops[1] = operands[2];
8153 xops[2] = operands[1];
8156 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8158 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8159 xops[4] = operands[1];
8160 xops[5] = operands[2];
8162 ok = ix86_expand_int_vcond (xops);
8167 (define_expand "<code><mode>3"
8168 [(set (match_operand:VI124_128 0 "register_operand")
8170 (match_operand:VI124_128 1 "nonimmediate_operand")
8171 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8174 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8175 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8181 xops[0] = operands[0];
8182 operands[1] = force_reg (<MODE>mode, operands[1]);
8183 operands[2] = force_reg (<MODE>mode, operands[2]);
8187 xops[1] = operands[1];
8188 xops[2] = operands[2];
8192 xops[1] = operands[2];
8193 xops[2] = operands[1];
8196 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8197 xops[4] = operands[1];
8198 xops[5] = operands[2];
8200 ok = ix86_expand_int_vcond (xops);
8206 (define_insn "*sse4_1_<code><mode>3"
8207 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8209 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8210 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8211 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8213 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8214 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8215 [(set_attr "isa" "noavx,avx")
8216 (set_attr "type" "sseiadd")
8217 (set_attr "prefix_extra" "1,*")
8218 (set_attr "prefix" "orig,vex")
8219 (set_attr "mode" "TI")])
8221 (define_insn "*<code>v8hi3"
8222 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8224 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8225 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8226 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8228 p<maxmin_int>w\t{%2, %0|%0, %2}
8229 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8230 [(set_attr "isa" "noavx,avx")
8231 (set_attr "type" "sseiadd")
8232 (set_attr "prefix_data16" "1,*")
8233 (set_attr "prefix_extra" "*,1")
8234 (set_attr "prefix" "orig,vex")
8235 (set_attr "mode" "TI")])
8237 (define_expand "<code><mode>3"
8238 [(set (match_operand:VI124_128 0 "register_operand")
8240 (match_operand:VI124_128 1 "nonimmediate_operand")
8241 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8244 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8245 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8246 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8248 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8249 operands[1] = force_reg (<MODE>mode, operands[1]);
8250 if (rtx_equal_p (op3, op2))
8251 op3 = gen_reg_rtx (V8HImode);
8252 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8253 emit_insn (gen_addv8hi3 (op0, op3, op2));
8261 operands[1] = force_reg (<MODE>mode, operands[1]);
8262 operands[2] = force_reg (<MODE>mode, operands[2]);
8264 xops[0] = operands[0];
8268 xops[1] = operands[1];
8269 xops[2] = operands[2];
8273 xops[1] = operands[2];
8274 xops[2] = operands[1];
8277 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8278 xops[4] = operands[1];
8279 xops[5] = operands[2];
8281 ok = ix86_expand_int_vcond (xops);
8287 (define_insn "*sse4_1_<code><mode>3"
8288 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8290 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8291 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8292 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8294 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8295 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8296 [(set_attr "isa" "noavx,avx")
8297 (set_attr "type" "sseiadd")
8298 (set_attr "prefix_extra" "1,*")
8299 (set_attr "prefix" "orig,vex")
8300 (set_attr "mode" "TI")])
8302 (define_insn "*<code>v16qi3"
8303 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8305 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8306 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8307 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8309 p<maxmin_int>b\t{%2, %0|%0, %2}
8310 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8311 [(set_attr "isa" "noavx,avx")
8312 (set_attr "type" "sseiadd")
8313 (set_attr "prefix_data16" "1,*")
8314 (set_attr "prefix_extra" "*,1")
8315 (set_attr "prefix" "orig,vex")
8316 (set_attr "mode" "TI")])
8318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8320 ;; Parallel integral comparisons
8322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8324 (define_expand "avx2_eq<mode>3"
8325 [(set (match_operand:VI_256 0 "register_operand")
8327 (match_operand:VI_256 1 "nonimmediate_operand")
8328 (match_operand:VI_256 2 "nonimmediate_operand")))]
8330 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8332 (define_insn "*avx2_eq<mode>3"
8333 [(set (match_operand:VI_256 0 "register_operand" "=x")
8335 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8336 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8337 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8338 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8339 [(set_attr "type" "ssecmp")
8340 (set_attr "prefix_extra" "1")
8341 (set_attr "prefix" "vex")
8342 (set_attr "mode" "OI")])
8344 (define_expand "avx512f_eq<mode>3"
8345 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8346 (unspec:<avx512fmaskmode>
8347 [(match_operand:VI48_512 1 "register_operand")
8348 (match_operand:VI48_512 2 "nonimmediate_operand")]
8351 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8353 (define_insn "avx512f_eq<mode>3_1"
8354 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8355 (unspec:<avx512fmaskmode>
8356 [(match_operand:VI48_512 1 "register_operand" "%v")
8357 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8359 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8360 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8361 [(set_attr "type" "ssecmp")
8362 (set_attr "prefix_extra" "1")
8363 (set_attr "prefix" "evex")
8364 (set_attr "mode" "<sseinsnmode>")])
8366 (define_insn "*sse4_1_eqv2di3"
8367 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8369 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8370 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8371 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8373 pcmpeqq\t{%2, %0|%0, %2}
8374 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8375 [(set_attr "isa" "noavx,avx")
8376 (set_attr "type" "ssecmp")
8377 (set_attr "prefix_extra" "1")
8378 (set_attr "prefix" "orig,vex")
8379 (set_attr "mode" "TI")])
8381 (define_insn "*sse2_eq<mode>3"
8382 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8384 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8385 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8386 "TARGET_SSE2 && !TARGET_XOP
8387 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8389 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8390 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8391 [(set_attr "isa" "noavx,avx")
8392 (set_attr "type" "ssecmp")
8393 (set_attr "prefix_data16" "1,*")
8394 (set_attr "prefix" "orig,vex")
8395 (set_attr "mode" "TI")])
8397 (define_expand "sse2_eq<mode>3"
8398 [(set (match_operand:VI124_128 0 "register_operand")
8400 (match_operand:VI124_128 1 "nonimmediate_operand")
8401 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8402 "TARGET_SSE2 && !TARGET_XOP "
8403 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8405 (define_expand "sse4_1_eqv2di3"
8406 [(set (match_operand:V2DI 0 "register_operand")
8408 (match_operand:V2DI 1 "nonimmediate_operand")
8409 (match_operand:V2DI 2 "nonimmediate_operand")))]
8411 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8413 (define_insn "sse4_2_gtv2di3"
8414 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8416 (match_operand:V2DI 1 "register_operand" "0,x")
8417 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8420 pcmpgtq\t{%2, %0|%0, %2}
8421 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8422 [(set_attr "isa" "noavx,avx")
8423 (set_attr "type" "ssecmp")
8424 (set_attr "prefix_extra" "1")
8425 (set_attr "prefix" "orig,vex")
8426 (set_attr "mode" "TI")])
8428 (define_insn "avx2_gt<mode>3"
8429 [(set (match_operand:VI_256 0 "register_operand" "=x")
8431 (match_operand:VI_256 1 "register_operand" "x")
8432 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8434 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8435 [(set_attr "type" "ssecmp")
8436 (set_attr "prefix_extra" "1")
8437 (set_attr "prefix" "vex")
8438 (set_attr "mode" "OI")])
8440 (define_insn "avx512f_gt<mode>3"
8441 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8442 (unspec:<avx512fmaskmode>
8443 [(match_operand:VI48_512 1 "register_operand" "v")
8444 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8446 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8447 [(set_attr "type" "ssecmp")
8448 (set_attr "prefix_extra" "1")
8449 (set_attr "prefix" "evex")
8450 (set_attr "mode" "<sseinsnmode>")])
8452 (define_insn "sse2_gt<mode>3"
8453 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8455 (match_operand:VI124_128 1 "register_operand" "0,x")
8456 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8457 "TARGET_SSE2 && !TARGET_XOP"
8459 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8460 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8461 [(set_attr "isa" "noavx,avx")
8462 (set_attr "type" "ssecmp")
8463 (set_attr "prefix_data16" "1,*")
8464 (set_attr "prefix" "orig,vex")
8465 (set_attr "mode" "TI")])
8467 (define_expand "vcond<V_512:mode><VI_512:mode>"
8468 [(set (match_operand:V_512 0 "register_operand")
8470 (match_operator 3 ""
8471 [(match_operand:VI_512 4 "nonimmediate_operand")
8472 (match_operand:VI_512 5 "general_operand")])
8473 (match_operand:V_512 1)
8474 (match_operand:V_512 2)))]
8476 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8477 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8479 bool ok = ix86_expand_int_vcond (operands);
8484 (define_expand "vcond<V_256:mode><VI_256:mode>"
8485 [(set (match_operand:V_256 0 "register_operand")
8487 (match_operator 3 ""
8488 [(match_operand:VI_256 4 "nonimmediate_operand")
8489 (match_operand:VI_256 5 "general_operand")])
8490 (match_operand:V_256 1)
8491 (match_operand:V_256 2)))]
8493 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8494 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8496 bool ok = ix86_expand_int_vcond (operands);
8501 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8502 [(set (match_operand:V_128 0 "register_operand")
8504 (match_operator 3 ""
8505 [(match_operand:VI124_128 4 "nonimmediate_operand")
8506 (match_operand:VI124_128 5 "general_operand")])
8507 (match_operand:V_128 1)
8508 (match_operand:V_128 2)))]
8510 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8511 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8513 bool ok = ix86_expand_int_vcond (operands);
8518 (define_expand "vcond<VI8F_128:mode>v2di"
8519 [(set (match_operand:VI8F_128 0 "register_operand")
8520 (if_then_else:VI8F_128
8521 (match_operator 3 ""
8522 [(match_operand:V2DI 4 "nonimmediate_operand")
8523 (match_operand:V2DI 5 "general_operand")])
8524 (match_operand:VI8F_128 1)
8525 (match_operand:VI8F_128 2)))]
8528 bool ok = ix86_expand_int_vcond (operands);
8533 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8534 [(set (match_operand:V_512 0 "register_operand")
8536 (match_operator 3 ""
8537 [(match_operand:VI_512 4 "nonimmediate_operand")
8538 (match_operand:VI_512 5 "nonimmediate_operand")])
8539 (match_operand:V_512 1 "general_operand")
8540 (match_operand:V_512 2 "general_operand")))]
8542 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8543 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8545 bool ok = ix86_expand_int_vcond (operands);
8550 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8551 [(set (match_operand:V_256 0 "register_operand")
8553 (match_operator 3 ""
8554 [(match_operand:VI_256 4 "nonimmediate_operand")
8555 (match_operand:VI_256 5 "nonimmediate_operand")])
8556 (match_operand:V_256 1 "general_operand")
8557 (match_operand:V_256 2 "general_operand")))]
8559 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8560 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8562 bool ok = ix86_expand_int_vcond (operands);
8567 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8568 [(set (match_operand:V_128 0 "register_operand")
8570 (match_operator 3 ""
8571 [(match_operand:VI124_128 4 "nonimmediate_operand")
8572 (match_operand:VI124_128 5 "nonimmediate_operand")])
8573 (match_operand:V_128 1 "general_operand")
8574 (match_operand:V_128 2 "general_operand")))]
8576 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8577 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8579 bool ok = ix86_expand_int_vcond (operands);
8584 (define_expand "vcondu<VI8F_128:mode>v2di"
8585 [(set (match_operand:VI8F_128 0 "register_operand")
8586 (if_then_else:VI8F_128
8587 (match_operator 3 ""
8588 [(match_operand:V2DI 4 "nonimmediate_operand")
8589 (match_operand:V2DI 5 "nonimmediate_operand")])
8590 (match_operand:VI8F_128 1 "general_operand")
8591 (match_operand:VI8F_128 2 "general_operand")))]
8594 bool ok = ix86_expand_int_vcond (operands);
8599 (define_mode_iterator VEC_PERM_AVX2
8600 [V16QI V8HI V4SI V2DI V4SF V2DF
8601 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8602 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
8603 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
8604 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
8605 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
8607 (define_expand "vec_perm<mode>"
8608 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
8609 (match_operand:VEC_PERM_AVX2 1 "register_operand")
8610 (match_operand:VEC_PERM_AVX2 2 "register_operand")
8611 (match_operand:<sseintvecmode> 3 "register_operand")]
8612 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
8614 ix86_expand_vec_perm (operands);
8618 (define_mode_iterator VEC_PERM_CONST
8619 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
8620 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
8621 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
8622 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
8623 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
8624 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8625 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
8626 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
8628 (define_expand "vec_perm_const<mode>"
8629 [(match_operand:VEC_PERM_CONST 0 "register_operand")
8630 (match_operand:VEC_PERM_CONST 1 "register_operand")
8631 (match_operand:VEC_PERM_CONST 2 "register_operand")
8632 (match_operand:<sseintvecmode> 3)]
8635 if (ix86_expand_vec_perm_const (operands))
8641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8643 ;; Parallel bitwise logical operations
8645 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8647 (define_expand "one_cmpl<mode>2"
8648 [(set (match_operand:VI 0 "register_operand")
8649 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
8653 int i, n = GET_MODE_NUNITS (<MODE>mode);
8654 rtvec v = rtvec_alloc (n);
8656 for (i = 0; i < n; ++i)
8657 RTVEC_ELT (v, i) = constm1_rtx;
8659 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
8662 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
8663 [(set (match_operand:VI_AVX2 0 "register_operand")
8665 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
8666 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8667 "TARGET_SSE2 && <mask_mode512bit_condition>")
8669 (define_insn "*andnot<mode>3<mask_name>"
8670 [(set (match_operand:VI 0 "register_operand" "=x,v")
8672 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
8673 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8674 "TARGET_SSE && <mask_mode512bit_condition>"
8676 static char buf[64];
8680 switch (get_attr_mode (insn))
8683 gcc_assert (TARGET_AVX512F);
8685 tmp = "pandn<ssemodesuffix>";
8689 gcc_assert (TARGET_AVX2);
8691 gcc_assert (TARGET_SSE2);
8697 gcc_assert (TARGET_AVX);
8699 gcc_assert (TARGET_SSE);
8708 switch (which_alternative)
8711 ops = "%s\t{%%2, %%0|%%0, %%2}";
8714 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8720 snprintf (buf, sizeof (buf), ops, tmp);
8723 [(set_attr "isa" "noavx,avx")
8724 (set_attr "type" "sselog")
8725 (set (attr "prefix_data16")
8727 (and (eq_attr "alternative" "0")
8728 (eq_attr "mode" "TI"))
8730 (const_string "*")))
8731 (set_attr "prefix" "<mask_prefix3>")
8733 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8734 (const_string "<ssePSmode>")
8735 (match_test "TARGET_AVX2")
8736 (const_string "<sseinsnmode>")
8737 (match_test "TARGET_AVX")
8739 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
8740 (const_string "V8SF")
8741 (const_string "<sseinsnmode>"))
8742 (ior (not (match_test "TARGET_SSE2"))
8743 (match_test "optimize_function_for_size_p (cfun)"))
8744 (const_string "V4SF")
8746 (const_string "<sseinsnmode>")))])
8748 (define_expand "<code><mode>3"
8749 [(set (match_operand:VI 0 "register_operand")
8751 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
8752 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
8755 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
8759 (define_insn "<mask_codefor><code><mode>3<mask_name>"
8760 [(set (match_operand:VI 0 "register_operand" "=x,v")
8762 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
8763 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8764 "TARGET_SSE && <mask_mode512bit_condition>
8765 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8767 static char buf[64];
8771 switch (get_attr_mode (insn))
8774 gcc_assert (TARGET_AVX512F);
8775 tmp = "p<logic><ssemodesuffix>";
8779 gcc_assert (TARGET_AVX2);
8781 gcc_assert (TARGET_SSE2);
8787 gcc_assert (TARGET_AVX512F);
8789 gcc_assert (TARGET_AVX);
8791 gcc_assert (TARGET_SSE);
8800 switch (which_alternative)
8803 ops = "%s\t{%%2, %%0|%%0, %%2}";
8806 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
8812 snprintf (buf, sizeof (buf), ops, tmp);
8815 [(set_attr "isa" "noavx,avx")
8816 (set_attr "type" "sselog")
8817 (set (attr "prefix_data16")
8819 (and (eq_attr "alternative" "0")
8820 (eq_attr "mode" "TI"))
8822 (const_string "*")))
8823 (set_attr "prefix" "<mask_prefix3>")
8825 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
8826 (const_string "<ssePSmode>")
8827 (match_test "TARGET_AVX2")
8828 (const_string "<sseinsnmode>")
8829 (match_test "TARGET_AVX")
8831 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
8832 (const_string "V8SF")
8833 (const_string "<sseinsnmode>"))
8834 (ior (not (match_test "TARGET_SSE2"))
8835 (match_test "optimize_function_for_size_p (cfun)"))
8836 (const_string "V4SF")
8838 (const_string "<sseinsnmode>")))])
8840 (define_insn "avx512f_testm<mode>3"
8841 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8842 (unspec:<avx512fmaskmode>
8843 [(match_operand:VI48_512 1 "register_operand" "v")
8844 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8847 "vptestm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8848 [(set_attr "prefix" "evex")
8849 (set_attr "mode" "<sseinsnmode>")])
8851 (define_insn "avx512f_testnm<mode>3"
8852 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
8853 (unspec:<avx512fmaskmode>
8854 [(match_operand:VI48_512 1 "register_operand" "v")
8855 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8858 "%vptestnm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8859 [(set_attr "prefix" "evex")
8860 (set_attr "mode" "<sseinsnmode>")])
8862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8864 ;; Parallel integral element swizzling
8866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8868 (define_expand "vec_pack_trunc_<mode>"
8869 [(match_operand:<ssepackmode> 0 "register_operand")
8870 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
8871 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
8874 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
8875 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
8876 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
8880 (define_insn "<sse2_avx2>_packsswb"
8881 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8882 (vec_concat:VI1_AVX2
8883 (ss_truncate:<ssehalfvecmode>
8884 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8885 (ss_truncate:<ssehalfvecmode>
8886 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8889 packsswb\t{%2, %0|%0, %2}
8890 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
8891 [(set_attr "isa" "noavx,avx")
8892 (set_attr "type" "sselog")
8893 (set_attr "prefix_data16" "1,*")
8894 (set_attr "prefix" "orig,vex")
8895 (set_attr "mode" "<sseinsnmode>")])
8897 (define_insn "<sse2_avx2>_packssdw"
8898 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8899 (vec_concat:VI2_AVX2
8900 (ss_truncate:<ssehalfvecmode>
8901 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8902 (ss_truncate:<ssehalfvecmode>
8903 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8906 packssdw\t{%2, %0|%0, %2}
8907 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
8908 [(set_attr "isa" "noavx,avx")
8909 (set_attr "type" "sselog")
8910 (set_attr "prefix_data16" "1,*")
8911 (set_attr "prefix" "orig,vex")
8912 (set_attr "mode" "<sseinsnmode>")])
8914 (define_insn "<sse2_avx2>_packuswb"
8915 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8916 (vec_concat:VI1_AVX2
8917 (us_truncate:<ssehalfvecmode>
8918 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
8919 (us_truncate:<ssehalfvecmode>
8920 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
8923 packuswb\t{%2, %0|%0, %2}
8924 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
8925 [(set_attr "isa" "noavx,avx")
8926 (set_attr "type" "sselog")
8927 (set_attr "prefix_data16" "1,*")
8928 (set_attr "prefix" "orig,vex")
8929 (set_attr "mode" "<sseinsnmode>")])
8931 (define_insn "avx2_interleave_highv32qi"
8932 [(set (match_operand:V32QI 0 "register_operand" "=x")
8935 (match_operand:V32QI 1 "register_operand" "x")
8936 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
8937 (parallel [(const_int 8) (const_int 40)
8938 (const_int 9) (const_int 41)
8939 (const_int 10) (const_int 42)
8940 (const_int 11) (const_int 43)
8941 (const_int 12) (const_int 44)
8942 (const_int 13) (const_int 45)
8943 (const_int 14) (const_int 46)
8944 (const_int 15) (const_int 47)
8945 (const_int 24) (const_int 56)
8946 (const_int 25) (const_int 57)
8947 (const_int 26) (const_int 58)
8948 (const_int 27) (const_int 59)
8949 (const_int 28) (const_int 60)
8950 (const_int 29) (const_int 61)
8951 (const_int 30) (const_int 62)
8952 (const_int 31) (const_int 63)])))]
8954 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
8955 [(set_attr "type" "sselog")
8956 (set_attr "prefix" "vex")
8957 (set_attr "mode" "OI")])
8959 (define_insn "vec_interleave_highv16qi"
8960 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8963 (match_operand:V16QI 1 "register_operand" "0,x")
8964 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
8965 (parallel [(const_int 8) (const_int 24)
8966 (const_int 9) (const_int 25)
8967 (const_int 10) (const_int 26)
8968 (const_int 11) (const_int 27)
8969 (const_int 12) (const_int 28)
8970 (const_int 13) (const_int 29)
8971 (const_int 14) (const_int 30)
8972 (const_int 15) (const_int 31)])))]
8975 punpckhbw\t{%2, %0|%0, %2}
8976 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
8977 [(set_attr "isa" "noavx,avx")
8978 (set_attr "type" "sselog")
8979 (set_attr "prefix_data16" "1,*")
8980 (set_attr "prefix" "orig,vex")
8981 (set_attr "mode" "TI")])
8983 (define_insn "avx2_interleave_lowv32qi"
8984 [(set (match_operand:V32QI 0 "register_operand" "=x")
8987 (match_operand:V32QI 1 "register_operand" "x")
8988 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
8989 (parallel [(const_int 0) (const_int 32)
8990 (const_int 1) (const_int 33)
8991 (const_int 2) (const_int 34)
8992 (const_int 3) (const_int 35)
8993 (const_int 4) (const_int 36)
8994 (const_int 5) (const_int 37)
8995 (const_int 6) (const_int 38)
8996 (const_int 7) (const_int 39)
8997 (const_int 16) (const_int 48)
8998 (const_int 17) (const_int 49)
8999 (const_int 18) (const_int 50)
9000 (const_int 19) (const_int 51)
9001 (const_int 20) (const_int 52)
9002 (const_int 21) (const_int 53)
9003 (const_int 22) (const_int 54)
9004 (const_int 23) (const_int 55)])))]
9006 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9007 [(set_attr "type" "sselog")
9008 (set_attr "prefix" "vex")
9009 (set_attr "mode" "OI")])
9011 (define_insn "vec_interleave_lowv16qi"
9012 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9015 (match_operand:V16QI 1 "register_operand" "0,x")
9016 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9017 (parallel [(const_int 0) (const_int 16)
9018 (const_int 1) (const_int 17)
9019 (const_int 2) (const_int 18)
9020 (const_int 3) (const_int 19)
9021 (const_int 4) (const_int 20)
9022 (const_int 5) (const_int 21)
9023 (const_int 6) (const_int 22)
9024 (const_int 7) (const_int 23)])))]
9027 punpcklbw\t{%2, %0|%0, %2}
9028 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9029 [(set_attr "isa" "noavx,avx")
9030 (set_attr "type" "sselog")
9031 (set_attr "prefix_data16" "1,*")
9032 (set_attr "prefix" "orig,vex")
9033 (set_attr "mode" "TI")])
9035 (define_insn "avx2_interleave_highv16hi"
9036 [(set (match_operand:V16HI 0 "register_operand" "=x")
9039 (match_operand:V16HI 1 "register_operand" "x")
9040 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9041 (parallel [(const_int 4) (const_int 20)
9042 (const_int 5) (const_int 21)
9043 (const_int 6) (const_int 22)
9044 (const_int 7) (const_int 23)
9045 (const_int 12) (const_int 28)
9046 (const_int 13) (const_int 29)
9047 (const_int 14) (const_int 30)
9048 (const_int 15) (const_int 31)])))]
9050 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9051 [(set_attr "type" "sselog")
9052 (set_attr "prefix" "vex")
9053 (set_attr "mode" "OI")])
9055 (define_insn "vec_interleave_highv8hi"
9056 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9059 (match_operand:V8HI 1 "register_operand" "0,x")
9060 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9061 (parallel [(const_int 4) (const_int 12)
9062 (const_int 5) (const_int 13)
9063 (const_int 6) (const_int 14)
9064 (const_int 7) (const_int 15)])))]
9067 punpckhwd\t{%2, %0|%0, %2}
9068 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9069 [(set_attr "isa" "noavx,avx")
9070 (set_attr "type" "sselog")
9071 (set_attr "prefix_data16" "1,*")
9072 (set_attr "prefix" "orig,vex")
9073 (set_attr "mode" "TI")])
9075 (define_insn "avx2_interleave_lowv16hi"
9076 [(set (match_operand:V16HI 0 "register_operand" "=x")
9079 (match_operand:V16HI 1 "register_operand" "x")
9080 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9081 (parallel [(const_int 0) (const_int 16)
9082 (const_int 1) (const_int 17)
9083 (const_int 2) (const_int 18)
9084 (const_int 3) (const_int 19)
9085 (const_int 8) (const_int 24)
9086 (const_int 9) (const_int 25)
9087 (const_int 10) (const_int 26)
9088 (const_int 11) (const_int 27)])))]
9090 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9091 [(set_attr "type" "sselog")
9092 (set_attr "prefix" "vex")
9093 (set_attr "mode" "OI")])
9095 (define_insn "vec_interleave_lowv8hi"
9096 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9099 (match_operand:V8HI 1 "register_operand" "0,x")
9100 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9101 (parallel [(const_int 0) (const_int 8)
9102 (const_int 1) (const_int 9)
9103 (const_int 2) (const_int 10)
9104 (const_int 3) (const_int 11)])))]
9107 punpcklwd\t{%2, %0|%0, %2}
9108 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9109 [(set_attr "isa" "noavx,avx")
9110 (set_attr "type" "sselog")
9111 (set_attr "prefix_data16" "1,*")
9112 (set_attr "prefix" "orig,vex")
9113 (set_attr "mode" "TI")])
9115 (define_insn "avx2_interleave_highv8si"
9116 [(set (match_operand:V8SI 0 "register_operand" "=x")
9119 (match_operand:V8SI 1 "register_operand" "x")
9120 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9121 (parallel [(const_int 2) (const_int 10)
9122 (const_int 3) (const_int 11)
9123 (const_int 6) (const_int 14)
9124 (const_int 7) (const_int 15)])))]
9126 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9127 [(set_attr "type" "sselog")
9128 (set_attr "prefix" "vex")
9129 (set_attr "mode" "OI")])
9131 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9132 [(set (match_operand:V16SI 0 "register_operand" "=v")
9135 (match_operand:V16SI 1 "register_operand" "v")
9136 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9137 (parallel [(const_int 2) (const_int 18)
9138 (const_int 3) (const_int 19)
9139 (const_int 6) (const_int 22)
9140 (const_int 7) (const_int 23)
9141 (const_int 10) (const_int 26)
9142 (const_int 11) (const_int 27)
9143 (const_int 14) (const_int 30)
9144 (const_int 15) (const_int 31)])))]
9146 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9147 [(set_attr "type" "sselog")
9148 (set_attr "prefix" "evex")
9149 (set_attr "mode" "XI")])
9152 (define_insn "vec_interleave_highv4si"
9153 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9156 (match_operand:V4SI 1 "register_operand" "0,x")
9157 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9158 (parallel [(const_int 2) (const_int 6)
9159 (const_int 3) (const_int 7)])))]
9162 punpckhdq\t{%2, %0|%0, %2}
9163 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9164 [(set_attr "isa" "noavx,avx")
9165 (set_attr "type" "sselog")
9166 (set_attr "prefix_data16" "1,*")
9167 (set_attr "prefix" "orig,vex")
9168 (set_attr "mode" "TI")])
9170 (define_insn "avx2_interleave_lowv8si"
9171 [(set (match_operand:V8SI 0 "register_operand" "=x")
9174 (match_operand:V8SI 1 "register_operand" "x")
9175 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9176 (parallel [(const_int 0) (const_int 8)
9177 (const_int 1) (const_int 9)
9178 (const_int 4) (const_int 12)
9179 (const_int 5) (const_int 13)])))]
9181 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9182 [(set_attr "type" "sselog")
9183 (set_attr "prefix" "vex")
9184 (set_attr "mode" "OI")])
9186 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9187 [(set (match_operand:V16SI 0 "register_operand" "=v")
9190 (match_operand:V16SI 1 "register_operand" "v")
9191 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9192 (parallel [(const_int 0) (const_int 16)
9193 (const_int 1) (const_int 17)
9194 (const_int 4) (const_int 20)
9195 (const_int 5) (const_int 21)
9196 (const_int 8) (const_int 24)
9197 (const_int 9) (const_int 25)
9198 (const_int 12) (const_int 28)
9199 (const_int 13) (const_int 29)])))]
9201 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9202 [(set_attr "type" "sselog")
9203 (set_attr "prefix" "evex")
9204 (set_attr "mode" "XI")])
9206 (define_insn "vec_interleave_lowv4si"
9207 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9210 (match_operand:V4SI 1 "register_operand" "0,x")
9211 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9212 (parallel [(const_int 0) (const_int 4)
9213 (const_int 1) (const_int 5)])))]
9216 punpckldq\t{%2, %0|%0, %2}
9217 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9218 [(set_attr "isa" "noavx,avx")
9219 (set_attr "type" "sselog")
9220 (set_attr "prefix_data16" "1,*")
9221 (set_attr "prefix" "orig,vex")
9222 (set_attr "mode" "TI")])
9224 (define_expand "vec_interleave_high<mode>"
9225 [(match_operand:VI_256 0 "register_operand" "=x")
9226 (match_operand:VI_256 1 "register_operand" "x")
9227 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9230 rtx t1 = gen_reg_rtx (<MODE>mode);
9231 rtx t2 = gen_reg_rtx (<MODE>mode);
9232 rtx t3 = gen_reg_rtx (V4DImode);
9233 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9234 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9235 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9236 gen_lowpart (V4DImode, t2),
9237 GEN_INT (1 + (3 << 4))));
9238 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9242 (define_expand "vec_interleave_low<mode>"
9243 [(match_operand:VI_256 0 "register_operand" "=x")
9244 (match_operand:VI_256 1 "register_operand" "x")
9245 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9248 rtx t1 = gen_reg_rtx (<MODE>mode);
9249 rtx t2 = gen_reg_rtx (<MODE>mode);
9250 rtx t3 = gen_reg_rtx (V4DImode);
9251 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9252 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9253 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9254 gen_lowpart (V4DImode, t2),
9255 GEN_INT (0 + (2 << 4))));
9256 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9260 ;; Modes handled by pinsr patterns.
9261 (define_mode_iterator PINSR_MODE
9262 [(V16QI "TARGET_SSE4_1") V8HI
9263 (V4SI "TARGET_SSE4_1")
9264 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9266 (define_mode_attr sse2p4_1
9267 [(V16QI "sse4_1") (V8HI "sse2")
9268 (V4SI "sse4_1") (V2DI "sse4_1")])
9270 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9271 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9272 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9273 (vec_merge:PINSR_MODE
9274 (vec_duplicate:PINSR_MODE
9275 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9276 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9277 (match_operand:SI 3 "const_int_operand")))]
9279 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9280 < GET_MODE_NUNITS (<MODE>mode))"
9282 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9284 switch (which_alternative)
9287 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9288 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9291 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9293 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9294 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9297 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9302 [(set_attr "isa" "noavx,noavx,avx,avx")
9303 (set_attr "type" "sselog")
9304 (set (attr "prefix_rex")
9306 (and (not (match_test "TARGET_AVX"))
9307 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9309 (const_string "*")))
9310 (set (attr "prefix_data16")
9312 (and (not (match_test "TARGET_AVX"))
9313 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9315 (const_string "*")))
9316 (set (attr "prefix_extra")
9318 (and (not (match_test "TARGET_AVX"))
9319 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9321 (const_string "1")))
9322 (set_attr "length_immediate" "1")
9323 (set_attr "prefix" "orig,orig,vex,vex")
9324 (set_attr "mode" "TI")])
9326 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9327 [(match_operand:V16FI 0 "register_operand")
9328 (match_operand:V16FI 1 "register_operand")
9329 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9330 (match_operand:SI 3 "const_0_to_3_operand")
9331 (match_operand:V16FI 4 "register_operand")
9332 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9335 switch (INTVAL (operands[3]))
9338 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9339 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9343 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9344 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9348 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9349 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9353 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9354 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9364 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9365 [(set (match_operand:V16FI 0 "register_operand" "=v")
9367 (match_operand:V16FI 1 "register_operand" "v")
9368 (vec_duplicate:V16FI
9369 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9370 (match_operand:SI 3 "const_int_operand" "n")))]
9374 if (INTVAL (operands[3]) == 0xFFF)
9376 else if ( INTVAL (operands[3]) == 0xF0FF)
9378 else if ( INTVAL (operands[3]) == 0xFF0F)
9380 else if ( INTVAL (operands[3]) == 0xFFF0)
9385 operands[3] = GEN_INT (mask);
9387 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9389 [(set_attr "type" "sselog")
9390 (set_attr "length_immediate" "1")
9391 (set_attr "prefix" "evex")
9392 (set_attr "mode" "<sseinsnmode>")])
9394 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9395 [(match_operand:V8FI 0 "register_operand")
9396 (match_operand:V8FI 1 "register_operand")
9397 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9398 (match_operand:SI 3 "const_0_to_1_operand")
9399 (match_operand:V8FI 4 "register_operand")
9400 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9403 int mask = INTVAL (operands[3]);
9405 emit_insn (gen_vec_set_lo_<mode>_mask
9406 (operands[0], operands[1], operands[2],
9407 operands[4], operands[5]));
9409 emit_insn (gen_vec_set_hi_<mode>_mask
9410 (operands[0], operands[1], operands[2],
9411 operands[4], operands[5]));
9415 (define_insn "vec_set_lo_<mode><mask_name>"
9416 [(set (match_operand:V8FI 0 "register_operand" "=v")
9418 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9419 (vec_select:<ssehalfvecmode>
9420 (match_operand:V8FI 1 "register_operand" "v")
9421 (parallel [(const_int 4) (const_int 5)
9422 (const_int 6) (const_int 7)]))))]
9424 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9425 [(set_attr "type" "sselog")
9426 (set_attr "length_immediate" "1")
9427 (set_attr "prefix" "evex")
9428 (set_attr "mode" "XI")])
9430 (define_insn "vec_set_hi_<mode><mask_name>"
9431 [(set (match_operand:V8FI 0 "register_operand" "=v")
9433 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9434 (vec_select:<ssehalfvecmode>
9435 (match_operand:V8FI 1 "register_operand" "v")
9436 (parallel [(const_int 0) (const_int 1)
9437 (const_int 2) (const_int 3)]))))]
9439 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9440 [(set_attr "type" "sselog")
9441 (set_attr "length_immediate" "1")
9442 (set_attr "prefix" "evex")
9443 (set_attr "mode" "XI")])
9445 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9446 [(match_operand:V8FI 0 "register_operand")
9447 (match_operand:V8FI 1 "register_operand")
9448 (match_operand:V8FI 2 "nonimmediate_operand")
9449 (match_operand:SI 3 "const_0_to_255_operand")
9450 (match_operand:V8FI 4 "register_operand")
9451 (match_operand:QI 5 "register_operand")]
9454 int mask = INTVAL (operands[3]);
9455 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9456 (operands[0], operands[1], operands[2],
9457 GEN_INT (((mask >> 0) & 3) * 2),
9458 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9459 GEN_INT (((mask >> 2) & 3) * 2),
9460 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9461 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9462 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9463 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9464 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9465 operands[4], operands[5]));
9469 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9470 [(set (match_operand:V8FI 0 "register_operand" "=v")
9472 (vec_concat:<ssedoublemode>
9473 (match_operand:V8FI 1 "register_operand" "v")
9474 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9475 (parallel [(match_operand 3 "const_0_to_7_operand")
9476 (match_operand 4 "const_0_to_7_operand")
9477 (match_operand 5 "const_0_to_7_operand")
9478 (match_operand 6 "const_0_to_7_operand")
9479 (match_operand 7 "const_8_to_15_operand")
9480 (match_operand 8 "const_8_to_15_operand")
9481 (match_operand 9 "const_8_to_15_operand")
9482 (match_operand 10 "const_8_to_15_operand")])))]
9484 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9485 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9486 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9487 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9490 mask = INTVAL (operands[3]) / 2;
9491 mask |= INTVAL (operands[5]) / 2 << 2;
9492 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9493 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9494 operands[3] = GEN_INT (mask);
9496 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9498 [(set_attr "type" "sselog")
9499 (set_attr "length_immediate" "1")
9500 (set_attr "prefix" "evex")
9501 (set_attr "mode" "<sseinsnmode>")])
9503 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9504 [(match_operand:V16FI 0 "register_operand")
9505 (match_operand:V16FI 1 "register_operand")
9506 (match_operand:V16FI 2 "nonimmediate_operand")
9507 (match_operand:SI 3 "const_0_to_255_operand")
9508 (match_operand:V16FI 4 "register_operand")
9509 (match_operand:HI 5 "register_operand")]
9512 int mask = INTVAL (operands[3]);
9513 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9514 (operands[0], operands[1], operands[2],
9515 GEN_INT (((mask >> 0) & 3) * 4),
9516 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9517 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9518 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9519 GEN_INT (((mask >> 2) & 3) * 4),
9520 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9521 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9522 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9523 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9524 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9525 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9526 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9527 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9528 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9529 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9530 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9531 operands[4], operands[5]));
9535 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9536 [(set (match_operand:V16FI 0 "register_operand" "=v")
9538 (vec_concat:<ssedoublemode>
9539 (match_operand:V16FI 1 "register_operand" "v")
9540 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9541 (parallel [(match_operand 3 "const_0_to_15_operand")
9542 (match_operand 4 "const_0_to_15_operand")
9543 (match_operand 5 "const_0_to_15_operand")
9544 (match_operand 6 "const_0_to_15_operand")
9545 (match_operand 7 "const_0_to_15_operand")
9546 (match_operand 8 "const_0_to_15_operand")
9547 (match_operand 9 "const_0_to_15_operand")
9548 (match_operand 10 "const_0_to_15_operand")
9549 (match_operand 11 "const_16_to_31_operand")
9550 (match_operand 12 "const_16_to_31_operand")
9551 (match_operand 13 "const_16_to_31_operand")
9552 (match_operand 14 "const_16_to_31_operand")
9553 (match_operand 15 "const_16_to_31_operand")
9554 (match_operand 16 "const_16_to_31_operand")
9555 (match_operand 17 "const_16_to_31_operand")
9556 (match_operand 18 "const_16_to_31_operand")])))]
9558 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9559 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9560 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9561 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9562 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9563 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9564 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9565 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9566 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9567 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9568 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
9569 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
9572 mask = INTVAL (operands[3]) / 4;
9573 mask |= INTVAL (operands[7]) / 4 << 2;
9574 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
9575 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
9576 operands[3] = GEN_INT (mask);
9578 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9580 [(set_attr "type" "sselog")
9581 (set_attr "length_immediate" "1")
9582 (set_attr "prefix" "evex")
9583 (set_attr "mode" "<sseinsnmode>")])
9585 (define_expand "avx512f_pshufdv3_mask"
9586 [(match_operand:V16SI 0 "register_operand")
9587 (match_operand:V16SI 1 "nonimmediate_operand")
9588 (match_operand:SI 2 "const_0_to_255_operand")
9589 (match_operand:V16SI 3 "register_operand")
9590 (match_operand:HI 4 "register_operand")]
9593 int mask = INTVAL (operands[2]);
9594 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
9595 GEN_INT ((mask >> 0) & 3),
9596 GEN_INT ((mask >> 2) & 3),
9597 GEN_INT ((mask >> 4) & 3),
9598 GEN_INT ((mask >> 6) & 3),
9599 GEN_INT (((mask >> 0) & 3) + 4),
9600 GEN_INT (((mask >> 2) & 3) + 4),
9601 GEN_INT (((mask >> 4) & 3) + 4),
9602 GEN_INT (((mask >> 6) & 3) + 4),
9603 GEN_INT (((mask >> 0) & 3) + 8),
9604 GEN_INT (((mask >> 2) & 3) + 8),
9605 GEN_INT (((mask >> 4) & 3) + 8),
9606 GEN_INT (((mask >> 6) & 3) + 8),
9607 GEN_INT (((mask >> 0) & 3) + 12),
9608 GEN_INT (((mask >> 2) & 3) + 12),
9609 GEN_INT (((mask >> 4) & 3) + 12),
9610 GEN_INT (((mask >> 6) & 3) + 12),
9611 operands[3], operands[4]));
9615 (define_insn "avx512f_pshufd_1<mask_name>"
9616 [(set (match_operand:V16SI 0 "register_operand" "=v")
9618 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
9619 (parallel [(match_operand 2 "const_0_to_3_operand")
9620 (match_operand 3 "const_0_to_3_operand")
9621 (match_operand 4 "const_0_to_3_operand")
9622 (match_operand 5 "const_0_to_3_operand")
9623 (match_operand 6 "const_4_to_7_operand")
9624 (match_operand 7 "const_4_to_7_operand")
9625 (match_operand 8 "const_4_to_7_operand")
9626 (match_operand 9 "const_4_to_7_operand")
9627 (match_operand 10 "const_8_to_11_operand")
9628 (match_operand 11 "const_8_to_11_operand")
9629 (match_operand 12 "const_8_to_11_operand")
9630 (match_operand 13 "const_8_to_11_operand")
9631 (match_operand 14 "const_12_to_15_operand")
9632 (match_operand 15 "const_12_to_15_operand")
9633 (match_operand 16 "const_12_to_15_operand")
9634 (match_operand 17 "const_12_to_15_operand")])))]
9636 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9637 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9638 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9639 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
9640 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
9641 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
9642 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
9643 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
9644 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
9645 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
9646 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
9647 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
9650 mask |= INTVAL (operands[2]) << 0;
9651 mask |= INTVAL (operands[3]) << 2;
9652 mask |= INTVAL (operands[4]) << 4;
9653 mask |= INTVAL (operands[5]) << 6;
9654 operands[2] = GEN_INT (mask);
9656 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
9658 [(set_attr "type" "sselog1")
9659 (set_attr "prefix" "evex")
9660 (set_attr "length_immediate" "1")
9661 (set_attr "mode" "XI")])
9663 (define_expand "avx2_pshufdv3"
9664 [(match_operand:V8SI 0 "register_operand")
9665 (match_operand:V8SI 1 "nonimmediate_operand")
9666 (match_operand:SI 2 "const_0_to_255_operand")]
9669 int mask = INTVAL (operands[2]);
9670 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
9671 GEN_INT ((mask >> 0) & 3),
9672 GEN_INT ((mask >> 2) & 3),
9673 GEN_INT ((mask >> 4) & 3),
9674 GEN_INT ((mask >> 6) & 3),
9675 GEN_INT (((mask >> 0) & 3) + 4),
9676 GEN_INT (((mask >> 2) & 3) + 4),
9677 GEN_INT (((mask >> 4) & 3) + 4),
9678 GEN_INT (((mask >> 6) & 3) + 4)));
9682 (define_insn "avx2_pshufd_1"
9683 [(set (match_operand:V8SI 0 "register_operand" "=x")
9685 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
9686 (parallel [(match_operand 2 "const_0_to_3_operand")
9687 (match_operand 3 "const_0_to_3_operand")
9688 (match_operand 4 "const_0_to_3_operand")
9689 (match_operand 5 "const_0_to_3_operand")
9690 (match_operand 6 "const_4_to_7_operand")
9691 (match_operand 7 "const_4_to_7_operand")
9692 (match_operand 8 "const_4_to_7_operand")
9693 (match_operand 9 "const_4_to_7_operand")])))]
9695 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9696 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9697 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9698 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
9701 mask |= INTVAL (operands[2]) << 0;
9702 mask |= INTVAL (operands[3]) << 2;
9703 mask |= INTVAL (operands[4]) << 4;
9704 mask |= INTVAL (operands[5]) << 6;
9705 operands[2] = GEN_INT (mask);
9707 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
9709 [(set_attr "type" "sselog1")
9710 (set_attr "prefix" "vex")
9711 (set_attr "length_immediate" "1")
9712 (set_attr "mode" "OI")])
9714 (define_expand "sse2_pshufd"
9715 [(match_operand:V4SI 0 "register_operand")
9716 (match_operand:V4SI 1 "nonimmediate_operand")
9717 (match_operand:SI 2 "const_int_operand")]
9720 int mask = INTVAL (operands[2]);
9721 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
9722 GEN_INT ((mask >> 0) & 3),
9723 GEN_INT ((mask >> 2) & 3),
9724 GEN_INT ((mask >> 4) & 3),
9725 GEN_INT ((mask >> 6) & 3)));
9729 (define_insn "sse2_pshufd_1"
9730 [(set (match_operand:V4SI 0 "register_operand" "=x")
9732 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9733 (parallel [(match_operand 2 "const_0_to_3_operand")
9734 (match_operand 3 "const_0_to_3_operand")
9735 (match_operand 4 "const_0_to_3_operand")
9736 (match_operand 5 "const_0_to_3_operand")])))]
9740 mask |= INTVAL (operands[2]) << 0;
9741 mask |= INTVAL (operands[3]) << 2;
9742 mask |= INTVAL (operands[4]) << 4;
9743 mask |= INTVAL (operands[5]) << 6;
9744 operands[2] = GEN_INT (mask);
9746 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
9748 [(set_attr "type" "sselog1")
9749 (set_attr "prefix_data16" "1")
9750 (set_attr "prefix" "maybe_vex")
9751 (set_attr "length_immediate" "1")
9752 (set_attr "mode" "TI")])
9754 (define_expand "avx2_pshuflwv3"
9755 [(match_operand:V16HI 0 "register_operand")
9756 (match_operand:V16HI 1 "nonimmediate_operand")
9757 (match_operand:SI 2 "const_0_to_255_operand")]
9760 int mask = INTVAL (operands[2]);
9761 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
9762 GEN_INT ((mask >> 0) & 3),
9763 GEN_INT ((mask >> 2) & 3),
9764 GEN_INT ((mask >> 4) & 3),
9765 GEN_INT ((mask >> 6) & 3),
9766 GEN_INT (((mask >> 0) & 3) + 8),
9767 GEN_INT (((mask >> 2) & 3) + 8),
9768 GEN_INT (((mask >> 4) & 3) + 8),
9769 GEN_INT (((mask >> 6) & 3) + 8)));
9773 (define_insn "avx2_pshuflw_1"
9774 [(set (match_operand:V16HI 0 "register_operand" "=x")
9776 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9777 (parallel [(match_operand 2 "const_0_to_3_operand")
9778 (match_operand 3 "const_0_to_3_operand")
9779 (match_operand 4 "const_0_to_3_operand")
9780 (match_operand 5 "const_0_to_3_operand")
9785 (match_operand 6 "const_8_to_11_operand")
9786 (match_operand 7 "const_8_to_11_operand")
9787 (match_operand 8 "const_8_to_11_operand")
9788 (match_operand 9 "const_8_to_11_operand")
9794 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
9795 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
9796 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
9797 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
9800 mask |= INTVAL (operands[2]) << 0;
9801 mask |= INTVAL (operands[3]) << 2;
9802 mask |= INTVAL (operands[4]) << 4;
9803 mask |= INTVAL (operands[5]) << 6;
9804 operands[2] = GEN_INT (mask);
9806 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
9808 [(set_attr "type" "sselog")
9809 (set_attr "prefix" "vex")
9810 (set_attr "length_immediate" "1")
9811 (set_attr "mode" "OI")])
9813 (define_expand "sse2_pshuflw"
9814 [(match_operand:V8HI 0 "register_operand")
9815 (match_operand:V8HI 1 "nonimmediate_operand")
9816 (match_operand:SI 2 "const_int_operand")]
9819 int mask = INTVAL (operands[2]);
9820 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
9821 GEN_INT ((mask >> 0) & 3),
9822 GEN_INT ((mask >> 2) & 3),
9823 GEN_INT ((mask >> 4) & 3),
9824 GEN_INT ((mask >> 6) & 3)));
9828 (define_insn "sse2_pshuflw_1"
9829 [(set (match_operand:V8HI 0 "register_operand" "=x")
9831 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9832 (parallel [(match_operand 2 "const_0_to_3_operand")
9833 (match_operand 3 "const_0_to_3_operand")
9834 (match_operand 4 "const_0_to_3_operand")
9835 (match_operand 5 "const_0_to_3_operand")
9843 mask |= INTVAL (operands[2]) << 0;
9844 mask |= INTVAL (operands[3]) << 2;
9845 mask |= INTVAL (operands[4]) << 4;
9846 mask |= INTVAL (operands[5]) << 6;
9847 operands[2] = GEN_INT (mask);
9849 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
9851 [(set_attr "type" "sselog")
9852 (set_attr "prefix_data16" "0")
9853 (set_attr "prefix_rep" "1")
9854 (set_attr "prefix" "maybe_vex")
9855 (set_attr "length_immediate" "1")
9856 (set_attr "mode" "TI")])
9858 (define_expand "avx2_pshufhwv3"
9859 [(match_operand:V16HI 0 "register_operand")
9860 (match_operand:V16HI 1 "nonimmediate_operand")
9861 (match_operand:SI 2 "const_0_to_255_operand")]
9864 int mask = INTVAL (operands[2]);
9865 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
9866 GEN_INT (((mask >> 0) & 3) + 4),
9867 GEN_INT (((mask >> 2) & 3) + 4),
9868 GEN_INT (((mask >> 4) & 3) + 4),
9869 GEN_INT (((mask >> 6) & 3) + 4),
9870 GEN_INT (((mask >> 0) & 3) + 12),
9871 GEN_INT (((mask >> 2) & 3) + 12),
9872 GEN_INT (((mask >> 4) & 3) + 12),
9873 GEN_INT (((mask >> 6) & 3) + 12)));
9877 (define_insn "avx2_pshufhw_1"
9878 [(set (match_operand:V16HI 0 "register_operand" "=x")
9880 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
9881 (parallel [(const_int 0)
9885 (match_operand 2 "const_4_to_7_operand")
9886 (match_operand 3 "const_4_to_7_operand")
9887 (match_operand 4 "const_4_to_7_operand")
9888 (match_operand 5 "const_4_to_7_operand")
9893 (match_operand 6 "const_12_to_15_operand")
9894 (match_operand 7 "const_12_to_15_operand")
9895 (match_operand 8 "const_12_to_15_operand")
9896 (match_operand 9 "const_12_to_15_operand")])))]
9898 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
9899 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
9900 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
9901 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
9904 mask |= (INTVAL (operands[2]) - 4) << 0;
9905 mask |= (INTVAL (operands[3]) - 4) << 2;
9906 mask |= (INTVAL (operands[4]) - 4) << 4;
9907 mask |= (INTVAL (operands[5]) - 4) << 6;
9908 operands[2] = GEN_INT (mask);
9910 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
9912 [(set_attr "type" "sselog")
9913 (set_attr "prefix" "vex")
9914 (set_attr "length_immediate" "1")
9915 (set_attr "mode" "OI")])
9917 (define_expand "sse2_pshufhw"
9918 [(match_operand:V8HI 0 "register_operand")
9919 (match_operand:V8HI 1 "nonimmediate_operand")
9920 (match_operand:SI 2 "const_int_operand")]
9923 int mask = INTVAL (operands[2]);
9924 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
9925 GEN_INT (((mask >> 0) & 3) + 4),
9926 GEN_INT (((mask >> 2) & 3) + 4),
9927 GEN_INT (((mask >> 4) & 3) + 4),
9928 GEN_INT (((mask >> 6) & 3) + 4)));
9932 (define_insn "sse2_pshufhw_1"
9933 [(set (match_operand:V8HI 0 "register_operand" "=x")
9935 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9936 (parallel [(const_int 0)
9940 (match_operand 2 "const_4_to_7_operand")
9941 (match_operand 3 "const_4_to_7_operand")
9942 (match_operand 4 "const_4_to_7_operand")
9943 (match_operand 5 "const_4_to_7_operand")])))]
9947 mask |= (INTVAL (operands[2]) - 4) << 0;
9948 mask |= (INTVAL (operands[3]) - 4) << 2;
9949 mask |= (INTVAL (operands[4]) - 4) << 4;
9950 mask |= (INTVAL (operands[5]) - 4) << 6;
9951 operands[2] = GEN_INT (mask);
9953 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
9955 [(set_attr "type" "sselog")
9956 (set_attr "prefix_rep" "1")
9957 (set_attr "prefix_data16" "0")
9958 (set_attr "prefix" "maybe_vex")
9959 (set_attr "length_immediate" "1")
9960 (set_attr "mode" "TI")])
9962 (define_expand "sse2_loadd"
9963 [(set (match_operand:V4SI 0 "register_operand")
9966 (match_operand:SI 1 "nonimmediate_operand"))
9970 "operands[2] = CONST0_RTX (V4SImode);")
9972 (define_insn "sse2_loadld"
9973 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
9976 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
9977 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
9981 %vmovd\t{%2, %0|%0, %2}
9982 %vmovd\t{%2, %0|%0, %2}
9983 movss\t{%2, %0|%0, %2}
9984 movss\t{%2, %0|%0, %2}
9985 vmovss\t{%2, %1, %0|%0, %1, %2}"
9986 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
9987 (set_attr "type" "ssemov")
9988 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
9989 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
9991 (define_insn "*vec_extract<mode>"
9992 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
9993 (vec_select:<ssescalarmode>
9994 (match_operand:VI12_128 1 "register_operand" "x,x")
9996 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
9999 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10000 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10001 [(set_attr "type" "sselog1")
10002 (set (attr "prefix_data16")
10004 (and (eq_attr "alternative" "0")
10005 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10007 (const_string "*")))
10008 (set (attr "prefix_extra")
10010 (and (eq_attr "alternative" "0")
10011 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10013 (const_string "1")))
10014 (set_attr "length_immediate" "1")
10015 (set_attr "prefix" "maybe_vex")
10016 (set_attr "mode" "TI")])
10018 (define_insn "*vec_extractv8hi_sse2"
10019 [(set (match_operand:HI 0 "register_operand" "=r")
10021 (match_operand:V8HI 1 "register_operand" "x")
10023 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10024 "TARGET_SSE2 && !TARGET_SSE4_1"
10025 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10026 [(set_attr "type" "sselog1")
10027 (set_attr "prefix_data16" "1")
10028 (set_attr "length_immediate" "1")
10029 (set_attr "mode" "TI")])
10031 (define_insn "*vec_extractv16qi_zext"
10032 [(set (match_operand:SWI48 0 "register_operand" "=r")
10035 (match_operand:V16QI 1 "register_operand" "x")
10037 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10039 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10040 [(set_attr "type" "sselog1")
10041 (set_attr "prefix_extra" "1")
10042 (set_attr "length_immediate" "1")
10043 (set_attr "prefix" "maybe_vex")
10044 (set_attr "mode" "TI")])
10046 (define_insn "*vec_extractv8hi_zext"
10047 [(set (match_operand:SWI48 0 "register_operand" "=r")
10050 (match_operand:V8HI 1 "register_operand" "x")
10052 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10054 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10055 [(set_attr "type" "sselog1")
10056 (set_attr "prefix_data16" "1")
10057 (set_attr "length_immediate" "1")
10058 (set_attr "prefix" "maybe_vex")
10059 (set_attr "mode" "TI")])
10061 (define_insn "*vec_extract<mode>_mem"
10062 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10063 (vec_select:<ssescalarmode>
10064 (match_operand:VI12_128 1 "memory_operand" "o")
10066 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10070 (define_insn "*vec_extract<ssevecmodelower>_0"
10071 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10073 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10074 (parallel [(const_int 0)])))]
10075 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10077 [(set_attr "isa" "*,sse4,*,*")])
10079 (define_insn_and_split "*vec_extractv4si_0_zext"
10080 [(set (match_operand:DI 0 "register_operand" "=r")
10083 (match_operand:V4SI 1 "register_operand" "x")
10084 (parallel [(const_int 0)]))))]
10085 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10087 "&& reload_completed"
10088 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10089 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10091 (define_insn "*vec_extractv2di_0_sse"
10092 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10094 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10095 (parallel [(const_int 0)])))]
10096 "TARGET_SSE && !TARGET_64BIT
10097 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10101 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10103 (match_operand:<ssevecmode> 1 "register_operand")
10104 (parallel [(const_int 0)])))]
10105 "TARGET_SSE && reload_completed"
10106 [(set (match_dup 0) (match_dup 1))]
10107 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10109 (define_insn "*vec_extractv4si"
10110 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10112 (match_operand:V4SI 1 "register_operand" "x,0,x")
10113 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10116 switch (which_alternative)
10119 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10122 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10123 return "psrldq\t{%2, %0|%0, %2}";
10126 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10127 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10130 gcc_unreachable ();
10133 [(set_attr "isa" "*,noavx,avx")
10134 (set_attr "type" "sselog1,sseishft1,sseishft1")
10135 (set_attr "prefix_extra" "1,*,*")
10136 (set_attr "length_immediate" "1")
10137 (set_attr "prefix" "maybe_vex,orig,vex")
10138 (set_attr "mode" "TI")])
10140 (define_insn "*vec_extractv4si_zext"
10141 [(set (match_operand:DI 0 "register_operand" "=r")
10144 (match_operand:V4SI 1 "register_operand" "x")
10145 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10146 "TARGET_64BIT && TARGET_SSE4_1"
10147 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10148 [(set_attr "type" "sselog1")
10149 (set_attr "prefix_extra" "1")
10150 (set_attr "length_immediate" "1")
10151 (set_attr "prefix" "maybe_vex")
10152 (set_attr "mode" "TI")])
10154 (define_insn "*vec_extractv4si_mem"
10155 [(set (match_operand:SI 0 "register_operand" "=x,r")
10157 (match_operand:V4SI 1 "memory_operand" "o,o")
10158 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10162 (define_insn_and_split "*vec_extractv4si_zext_mem"
10163 [(set (match_operand:DI 0 "register_operand" "=x,r")
10166 (match_operand:V4SI 1 "memory_operand" "o,o")
10167 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10168 "TARGET_64BIT && TARGET_SSE"
10170 "&& reload_completed"
10171 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10173 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10176 (define_insn "*vec_extractv2di_1"
10177 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10179 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10180 (parallel [(const_int 1)])))]
10181 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10183 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10184 %vmovhps\t{%1, %0|%0, %1}
10185 psrldq\t{$8, %0|%0, 8}
10186 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10187 movhlps\t{%1, %0|%0, %1}
10190 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10191 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10192 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10193 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10194 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10195 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10196 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10199 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10200 (vec_select:<ssescalarmode>
10201 (match_operand:VI_128 1 "memory_operand")
10203 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10204 "TARGET_SSE && reload_completed"
10205 [(set (match_dup 0) (match_dup 1))]
10207 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10209 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10212 (define_insn "*vec_dupv4si"
10213 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10214 (vec_duplicate:V4SI
10215 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10218 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10219 vbroadcastss\t{%1, %0|%0, %1}
10220 shufps\t{$0, %0, %0|%0, %0, 0}"
10221 [(set_attr "isa" "sse2,avx,noavx")
10222 (set_attr "type" "sselog1,ssemov,sselog1")
10223 (set_attr "length_immediate" "1,0,1")
10224 (set_attr "prefix_extra" "0,1,*")
10225 (set_attr "prefix" "maybe_vex,vex,orig")
10226 (set_attr "mode" "TI,V4SF,V4SF")])
10228 (define_insn "*vec_dupv2di"
10229 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10230 (vec_duplicate:V2DI
10231 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10235 vpunpcklqdq\t{%d1, %0|%0, %d1}
10236 %vmovddup\t{%1, %0|%0, %1}
10238 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10239 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10240 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10241 (set_attr "mode" "TI,TI,DF,V4SF")])
10243 (define_insn "*vec_concatv2si_sse4_1"
10244 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10246 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10247 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10250 pinsrd\t{$1, %2, %0|%0, %2, 1}
10251 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10252 punpckldq\t{%2, %0|%0, %2}
10253 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10254 %vmovd\t{%1, %0|%0, %1}
10255 punpckldq\t{%2, %0|%0, %2}
10256 movd\t{%1, %0|%0, %1}"
10257 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10258 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10259 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10260 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10261 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10262 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10264 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10265 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10266 ;; alternatives pretty much forces the MMX alternative to be chosen.
10267 (define_insn "*vec_concatv2si"
10268 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10270 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10271 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10272 "TARGET_SSE && !TARGET_SSE4_1"
10274 punpckldq\t{%2, %0|%0, %2}
10275 movd\t{%1, %0|%0, %1}
10276 movd\t{%1, %0|%0, %1}
10277 unpcklps\t{%2, %0|%0, %2}
10278 movss\t{%1, %0|%0, %1}
10279 punpckldq\t{%2, %0|%0, %2}
10280 movd\t{%1, %0|%0, %1}"
10281 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10282 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10283 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10285 (define_insn "*vec_concatv4si"
10286 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10288 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10289 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10292 punpcklqdq\t{%2, %0|%0, %2}
10293 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10294 movlhps\t{%2, %0|%0, %2}
10295 movhps\t{%2, %0|%0, %q2}
10296 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10297 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10298 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10299 (set_attr "prefix" "orig,vex,orig,orig,vex")
10300 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10302 ;; movd instead of movq is required to handle broken assemblers.
10303 (define_insn "vec_concatv2di"
10304 [(set (match_operand:V2DI 0 "register_operand"
10305 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10307 (match_operand:DI 1 "nonimmediate_operand"
10308 " 0,x ,r ,xm,*y,0,x,0,0,x")
10309 (match_operand:DI 2 "vector_move_operand"
10310 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10313 pinsrq\t{$1, %2, %0|%0, %2, 1}
10314 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10315 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10316 %vmovq\t{%1, %0|%0, %1}
10317 movq2dq\t{%1, %0|%0, %1}
10318 punpcklqdq\t{%2, %0|%0, %2}
10319 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10320 movlhps\t{%2, %0|%0, %2}
10321 movhps\t{%2, %0|%0, %2}
10322 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10323 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10326 (eq_attr "alternative" "0,1,5,6")
10327 (const_string "sselog")
10328 (const_string "ssemov")))
10329 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10330 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10331 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10332 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10333 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10335 (define_expand "vec_unpacks_lo_<mode>"
10336 [(match_operand:<sseunpackmode> 0 "register_operand")
10337 (match_operand:VI124_AVX512F 1 "register_operand")]
10339 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10341 (define_expand "vec_unpacks_hi_<mode>"
10342 [(match_operand:<sseunpackmode> 0 "register_operand")
10343 (match_operand:VI124_AVX512F 1 "register_operand")]
10345 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10347 (define_expand "vec_unpacku_lo_<mode>"
10348 [(match_operand:<sseunpackmode> 0 "register_operand")
10349 (match_operand:VI124_AVX512F 1 "register_operand")]
10351 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10353 (define_expand "vec_unpacku_hi_<mode>"
10354 [(match_operand:<sseunpackmode> 0 "register_operand")
10355 (match_operand:VI124_AVX512F 1 "register_operand")]
10357 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10365 (define_expand "<sse2_avx2>_uavg<mode>3"
10366 [(set (match_operand:VI12_AVX2 0 "register_operand")
10367 (truncate:VI12_AVX2
10368 (lshiftrt:<ssedoublemode>
10369 (plus:<ssedoublemode>
10370 (plus:<ssedoublemode>
10371 (zero_extend:<ssedoublemode>
10372 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10373 (zero_extend:<ssedoublemode>
10374 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10379 operands[3] = CONST1_RTX(<MODE>mode);
10380 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10383 (define_insn "*<sse2_avx2>_uavg<mode>3"
10384 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10385 (truncate:VI12_AVX2
10386 (lshiftrt:<ssedoublemode>
10387 (plus:<ssedoublemode>
10388 (plus:<ssedoublemode>
10389 (zero_extend:<ssedoublemode>
10390 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10391 (zero_extend:<ssedoublemode>
10392 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10393 (match_operand:VI12_AVX2 3 "const1_operand"))
10395 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10397 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10398 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10399 [(set_attr "isa" "noavx,avx")
10400 (set_attr "type" "sseiadd")
10401 (set_attr "prefix_data16" "1,*")
10402 (set_attr "prefix" "orig,vex")
10403 (set_attr "mode" "<sseinsnmode>")])
10405 ;; The correct representation for this is absolutely enormous, and
10406 ;; surely not generally useful.
10407 (define_insn "<sse2_avx2>_psadbw"
10408 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10410 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10411 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10415 psadbw\t{%2, %0|%0, %2}
10416 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10417 [(set_attr "isa" "noavx,avx")
10418 (set_attr "type" "sseiadd")
10419 (set_attr "atom_unit" "simul")
10420 (set_attr "prefix_data16" "1,*")
10421 (set_attr "prefix" "orig,vex")
10422 (set_attr "mode" "<sseinsnmode>")])
10424 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10425 [(set (match_operand:SI 0 "register_operand" "=r")
10427 [(match_operand:VF_128_256 1 "register_operand" "x")]
10430 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10431 [(set_attr "type" "ssemov")
10432 (set_attr "prefix" "maybe_vex")
10433 (set_attr "mode" "<MODE>")])
10435 (define_insn "avx2_pmovmskb"
10436 [(set (match_operand:SI 0 "register_operand" "=r")
10437 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10440 "vpmovmskb\t{%1, %0|%0, %1}"
10441 [(set_attr "type" "ssemov")
10442 (set_attr "prefix" "vex")
10443 (set_attr "mode" "DI")])
10445 (define_insn "sse2_pmovmskb"
10446 [(set (match_operand:SI 0 "register_operand" "=r")
10447 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10450 "%vpmovmskb\t{%1, %0|%0, %1}"
10451 [(set_attr "type" "ssemov")
10452 (set_attr "prefix_data16" "1")
10453 (set_attr "prefix" "maybe_vex")
10454 (set_attr "mode" "SI")])
10456 (define_expand "sse2_maskmovdqu"
10457 [(set (match_operand:V16QI 0 "memory_operand")
10458 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10459 (match_operand:V16QI 2 "register_operand")
10464 (define_insn "*sse2_maskmovdqu"
10465 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10466 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10467 (match_operand:V16QI 2 "register_operand" "x")
10468 (mem:V16QI (match_dup 0))]
10472 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10473 that requires %v to be at the beginning of the opcode name. */
10474 if (Pmode != word_mode)
10475 fputs ("\taddr32", asm_out_file);
10476 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10478 [(set_attr "type" "ssemov")
10479 (set_attr "prefix_data16" "1")
10480 (set (attr "length_address")
10481 (symbol_ref ("Pmode != word_mode")))
10482 ;; The implicit %rdi operand confuses default length_vex computation.
10483 (set (attr "length_vex")
10484 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10485 (set_attr "prefix" "maybe_vex")
10486 (set_attr "mode" "TI")])
10488 (define_insn "sse_ldmxcsr"
10489 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10493 [(set_attr "type" "sse")
10494 (set_attr "atom_sse_attr" "mxcsr")
10495 (set_attr "prefix" "maybe_vex")
10496 (set_attr "memory" "load")])
10498 (define_insn "sse_stmxcsr"
10499 [(set (match_operand:SI 0 "memory_operand" "=m")
10500 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10503 [(set_attr "type" "sse")
10504 (set_attr "atom_sse_attr" "mxcsr")
10505 (set_attr "prefix" "maybe_vex")
10506 (set_attr "memory" "store")])
10508 (define_insn "sse2_clflush"
10509 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10513 [(set_attr "type" "sse")
10514 (set_attr "atom_sse_attr" "fence")
10515 (set_attr "memory" "unknown")])
10518 (define_insn "sse3_mwait"
10519 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
10520 (match_operand:SI 1 "register_operand" "c")]
10523 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10524 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10525 ;; we only need to set up 32bit registers.
10527 [(set_attr "length" "3")])
10529 (define_insn "sse3_monitor_<mode>"
10530 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10531 (match_operand:SI 1 "register_operand" "c")
10532 (match_operand:SI 2 "register_operand" "d")]
10535 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10536 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10537 ;; zero extended to 64bit, we only need to set up 32bit registers.
10539 [(set (attr "length")
10540 (symbol_ref ("(Pmode != word_mode) + 3")))])
10542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10544 ;; SSSE3 instructions
10546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10548 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10550 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10551 [(set (match_operand:V16HI 0 "register_operand" "=x")
10556 (ssse3_plusminus:HI
10558 (match_operand:V16HI 1 "register_operand" "x")
10559 (parallel [(const_int 0)]))
10560 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10561 (ssse3_plusminus:HI
10562 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10563 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10565 (ssse3_plusminus:HI
10566 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10567 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10568 (ssse3_plusminus:HI
10569 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10570 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10573 (ssse3_plusminus:HI
10574 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
10575 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
10576 (ssse3_plusminus:HI
10577 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
10578 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
10580 (ssse3_plusminus:HI
10581 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
10582 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
10583 (ssse3_plusminus:HI
10584 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
10585 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
10589 (ssse3_plusminus:HI
10591 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10592 (parallel [(const_int 0)]))
10593 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10594 (ssse3_plusminus:HI
10595 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10596 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10598 (ssse3_plusminus:HI
10599 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10600 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10601 (ssse3_plusminus:HI
10602 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10603 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
10606 (ssse3_plusminus:HI
10607 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
10608 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
10609 (ssse3_plusminus:HI
10610 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
10611 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
10613 (ssse3_plusminus:HI
10614 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
10615 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
10616 (ssse3_plusminus:HI
10617 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
10618 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
10620 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10621 [(set_attr "type" "sseiadd")
10622 (set_attr "prefix_extra" "1")
10623 (set_attr "prefix" "vex")
10624 (set_attr "mode" "OI")])
10626 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
10627 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10631 (ssse3_plusminus:HI
10633 (match_operand:V8HI 1 "register_operand" "0,x")
10634 (parallel [(const_int 0)]))
10635 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10636 (ssse3_plusminus:HI
10637 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10638 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10640 (ssse3_plusminus:HI
10641 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10642 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10643 (ssse3_plusminus:HI
10644 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10645 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10648 (ssse3_plusminus:HI
10650 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10651 (parallel [(const_int 0)]))
10652 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10653 (ssse3_plusminus:HI
10654 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10655 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10657 (ssse3_plusminus:HI
10658 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10659 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10660 (ssse3_plusminus:HI
10661 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10662 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
10665 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
10666 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10667 [(set_attr "isa" "noavx,avx")
10668 (set_attr "type" "sseiadd")
10669 (set_attr "atom_unit" "complex")
10670 (set_attr "prefix_data16" "1,*")
10671 (set_attr "prefix_extra" "1")
10672 (set_attr "prefix" "orig,vex")
10673 (set_attr "mode" "TI")])
10675 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
10676 [(set (match_operand:V4HI 0 "register_operand" "=y")
10679 (ssse3_plusminus:HI
10681 (match_operand:V4HI 1 "register_operand" "0")
10682 (parallel [(const_int 0)]))
10683 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10684 (ssse3_plusminus:HI
10685 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10686 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10688 (ssse3_plusminus:HI
10690 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
10691 (parallel [(const_int 0)]))
10692 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10693 (ssse3_plusminus:HI
10694 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10695 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
10697 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
10698 [(set_attr "type" "sseiadd")
10699 (set_attr "atom_unit" "complex")
10700 (set_attr "prefix_extra" "1")
10701 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10702 (set_attr "mode" "DI")])
10704 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
10705 [(set (match_operand:V8SI 0 "register_operand" "=x")
10711 (match_operand:V8SI 1 "register_operand" "x")
10712 (parallel [(const_int 0)]))
10713 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10715 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10716 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10719 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
10720 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
10722 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
10723 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
10728 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
10729 (parallel [(const_int 0)]))
10730 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10732 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10733 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
10736 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
10737 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
10739 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
10740 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
10742 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10743 [(set_attr "type" "sseiadd")
10744 (set_attr "prefix_extra" "1")
10745 (set_attr "prefix" "vex")
10746 (set_attr "mode" "OI")])
10748 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
10749 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10754 (match_operand:V4SI 1 "register_operand" "0,x")
10755 (parallel [(const_int 0)]))
10756 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10758 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
10759 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
10763 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
10764 (parallel [(const_int 0)]))
10765 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
10767 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
10768 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
10771 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
10772 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
10773 [(set_attr "isa" "noavx,avx")
10774 (set_attr "type" "sseiadd")
10775 (set_attr "atom_unit" "complex")
10776 (set_attr "prefix_data16" "1,*")
10777 (set_attr "prefix_extra" "1")
10778 (set_attr "prefix" "orig,vex")
10779 (set_attr "mode" "TI")])
10781 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
10782 [(set (match_operand:V2SI 0 "register_operand" "=y")
10786 (match_operand:V2SI 1 "register_operand" "0")
10787 (parallel [(const_int 0)]))
10788 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
10791 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
10792 (parallel [(const_int 0)]))
10793 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
10795 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
10796 [(set_attr "type" "sseiadd")
10797 (set_attr "atom_unit" "complex")
10798 (set_attr "prefix_extra" "1")
10799 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10800 (set_attr "mode" "DI")])
10802 (define_insn "avx2_pmaddubsw256"
10803 [(set (match_operand:V16HI 0 "register_operand" "=x")
10808 (match_operand:V32QI 1 "register_operand" "x")
10809 (parallel [(const_int 0) (const_int 2)
10810 (const_int 4) (const_int 6)
10811 (const_int 8) (const_int 10)
10812 (const_int 12) (const_int 14)
10813 (const_int 16) (const_int 18)
10814 (const_int 20) (const_int 22)
10815 (const_int 24) (const_int 26)
10816 (const_int 28) (const_int 30)])))
10819 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
10820 (parallel [(const_int 0) (const_int 2)
10821 (const_int 4) (const_int 6)
10822 (const_int 8) (const_int 10)
10823 (const_int 12) (const_int 14)
10824 (const_int 16) (const_int 18)
10825 (const_int 20) (const_int 22)
10826 (const_int 24) (const_int 26)
10827 (const_int 28) (const_int 30)]))))
10830 (vec_select:V16QI (match_dup 1)
10831 (parallel [(const_int 1) (const_int 3)
10832 (const_int 5) (const_int 7)
10833 (const_int 9) (const_int 11)
10834 (const_int 13) (const_int 15)
10835 (const_int 17) (const_int 19)
10836 (const_int 21) (const_int 23)
10837 (const_int 25) (const_int 27)
10838 (const_int 29) (const_int 31)])))
10840 (vec_select:V16QI (match_dup 2)
10841 (parallel [(const_int 1) (const_int 3)
10842 (const_int 5) (const_int 7)
10843 (const_int 9) (const_int 11)
10844 (const_int 13) (const_int 15)
10845 (const_int 17) (const_int 19)
10846 (const_int 21) (const_int 23)
10847 (const_int 25) (const_int 27)
10848 (const_int 29) (const_int 31)]))))))]
10850 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
10851 [(set_attr "type" "sseiadd")
10852 (set_attr "prefix_extra" "1")
10853 (set_attr "prefix" "vex")
10854 (set_attr "mode" "OI")])
10856 (define_insn "ssse3_pmaddubsw128"
10857 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10862 (match_operand:V16QI 1 "register_operand" "0,x")
10863 (parallel [(const_int 0) (const_int 2)
10864 (const_int 4) (const_int 6)
10865 (const_int 8) (const_int 10)
10866 (const_int 12) (const_int 14)])))
10869 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
10870 (parallel [(const_int 0) (const_int 2)
10871 (const_int 4) (const_int 6)
10872 (const_int 8) (const_int 10)
10873 (const_int 12) (const_int 14)]))))
10876 (vec_select:V8QI (match_dup 1)
10877 (parallel [(const_int 1) (const_int 3)
10878 (const_int 5) (const_int 7)
10879 (const_int 9) (const_int 11)
10880 (const_int 13) (const_int 15)])))
10882 (vec_select:V8QI (match_dup 2)
10883 (parallel [(const_int 1) (const_int 3)
10884 (const_int 5) (const_int 7)
10885 (const_int 9) (const_int 11)
10886 (const_int 13) (const_int 15)]))))))]
10889 pmaddubsw\t{%2, %0|%0, %2}
10890 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
10891 [(set_attr "isa" "noavx,avx")
10892 (set_attr "type" "sseiadd")
10893 (set_attr "atom_unit" "simul")
10894 (set_attr "prefix_data16" "1,*")
10895 (set_attr "prefix_extra" "1")
10896 (set_attr "prefix" "orig,vex")
10897 (set_attr "mode" "TI")])
10899 (define_insn "ssse3_pmaddubsw"
10900 [(set (match_operand:V4HI 0 "register_operand" "=y")
10905 (match_operand:V8QI 1 "register_operand" "0")
10906 (parallel [(const_int 0) (const_int 2)
10907 (const_int 4) (const_int 6)])))
10910 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
10911 (parallel [(const_int 0) (const_int 2)
10912 (const_int 4) (const_int 6)]))))
10915 (vec_select:V4QI (match_dup 1)
10916 (parallel [(const_int 1) (const_int 3)
10917 (const_int 5) (const_int 7)])))
10919 (vec_select:V4QI (match_dup 2)
10920 (parallel [(const_int 1) (const_int 3)
10921 (const_int 5) (const_int 7)]))))))]
10923 "pmaddubsw\t{%2, %0|%0, %2}"
10924 [(set_attr "type" "sseiadd")
10925 (set_attr "atom_unit" "simul")
10926 (set_attr "prefix_extra" "1")
10927 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10928 (set_attr "mode" "DI")])
10930 (define_mode_iterator PMULHRSW
10931 [V4HI V8HI (V16HI "TARGET_AVX2")])
10933 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
10934 [(set (match_operand:PMULHRSW 0 "register_operand")
10936 (lshiftrt:<ssedoublemode>
10937 (plus:<ssedoublemode>
10938 (lshiftrt:<ssedoublemode>
10939 (mult:<ssedoublemode>
10940 (sign_extend:<ssedoublemode>
10941 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
10942 (sign_extend:<ssedoublemode>
10943 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
10949 operands[3] = CONST1_RTX(<MODE>mode);
10950 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10953 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
10954 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
10956 (lshiftrt:<ssedoublemode>
10957 (plus:<ssedoublemode>
10958 (lshiftrt:<ssedoublemode>
10959 (mult:<ssedoublemode>
10960 (sign_extend:<ssedoublemode>
10961 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
10962 (sign_extend:<ssedoublemode>
10963 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
10965 (match_operand:VI2_AVX2 3 "const1_operand"))
10967 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
10969 pmulhrsw\t{%2, %0|%0, %2}
10970 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
10971 [(set_attr "isa" "noavx,avx")
10972 (set_attr "type" "sseimul")
10973 (set_attr "prefix_data16" "1,*")
10974 (set_attr "prefix_extra" "1")
10975 (set_attr "prefix" "orig,vex")
10976 (set_attr "mode" "<sseinsnmode>")])
10978 (define_insn "*ssse3_pmulhrswv4hi3"
10979 [(set (match_operand:V4HI 0 "register_operand" "=y")
10986 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
10988 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
10990 (match_operand:V4HI 3 "const1_operand"))
10992 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
10993 "pmulhrsw\t{%2, %0|%0, %2}"
10994 [(set_attr "type" "sseimul")
10995 (set_attr "prefix_extra" "1")
10996 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
10997 (set_attr "mode" "DI")])
10999 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11000 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11002 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11003 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11007 pshufb\t{%2, %0|%0, %2}
11008 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11009 [(set_attr "isa" "noavx,avx")
11010 (set_attr "type" "sselog1")
11011 (set_attr "prefix_data16" "1,*")
11012 (set_attr "prefix_extra" "1")
11013 (set_attr "prefix" "orig,vex")
11014 (set_attr "btver2_decode" "vector,vector")
11015 (set_attr "mode" "<sseinsnmode>")])
11017 (define_insn "ssse3_pshufbv8qi3"
11018 [(set (match_operand:V8QI 0 "register_operand" "=y")
11019 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11020 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11023 "pshufb\t{%2, %0|%0, %2}";
11024 [(set_attr "type" "sselog1")
11025 (set_attr "prefix_extra" "1")
11026 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11027 (set_attr "mode" "DI")])
11029 (define_insn "<ssse3_avx2>_psign<mode>3"
11030 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11032 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11033 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11037 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11038 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11039 [(set_attr "isa" "noavx,avx")
11040 (set_attr "type" "sselog1")
11041 (set_attr "prefix_data16" "1,*")
11042 (set_attr "prefix_extra" "1")
11043 (set_attr "prefix" "orig,vex")
11044 (set_attr "mode" "<sseinsnmode>")])
11046 (define_insn "ssse3_psign<mode>3"
11047 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11049 [(match_operand:MMXMODEI 1 "register_operand" "0")
11050 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11053 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11054 [(set_attr "type" "sselog1")
11055 (set_attr "prefix_extra" "1")
11056 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11057 (set_attr "mode" "DI")])
11059 (define_insn "<ssse3_avx2>_palignr<mode>"
11060 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11061 (unspec:SSESCALARMODE
11062 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11063 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11064 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11068 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11070 switch (which_alternative)
11073 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11075 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11077 gcc_unreachable ();
11080 [(set_attr "isa" "noavx,avx")
11081 (set_attr "type" "sseishft")
11082 (set_attr "atom_unit" "sishuf")
11083 (set_attr "prefix_data16" "1,*")
11084 (set_attr "prefix_extra" "1")
11085 (set_attr "length_immediate" "1")
11086 (set_attr "prefix" "orig,vex")
11087 (set_attr "mode" "<sseinsnmode>")])
11089 (define_insn "ssse3_palignrdi"
11090 [(set (match_operand:DI 0 "register_operand" "=y")
11091 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11092 (match_operand:DI 2 "nonimmediate_operand" "ym")
11093 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11097 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11098 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11100 [(set_attr "type" "sseishft")
11101 (set_attr "atom_unit" "sishuf")
11102 (set_attr "prefix_extra" "1")
11103 (set_attr "length_immediate" "1")
11104 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11105 (set_attr "mode" "DI")])
11107 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11108 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11109 (abs:VI124_AVX2_48_AVX512F
11110 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11111 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11112 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11113 [(set_attr "type" "sselog1")
11114 (set_attr "prefix_data16" "1")
11115 (set_attr "prefix_extra" "1")
11116 (set_attr "prefix" "maybe_vex")
11117 (set_attr "mode" "<sseinsnmode>")])
11119 (define_expand "abs<mode>2"
11120 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11121 (abs:VI124_AVX2_48_AVX512F
11122 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11127 ix86_expand_sse2_abs (operands[0], operands[1]);
11132 (define_insn "abs<mode>2"
11133 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11135 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11137 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11138 [(set_attr "type" "sselog1")
11139 (set_attr "prefix_rep" "0")
11140 (set_attr "prefix_extra" "1")
11141 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11142 (set_attr "mode" "DI")])
11144 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11146 ;; AMD SSE4A instructions
11148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11150 (define_insn "sse4a_movnt<mode>"
11151 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11153 [(match_operand:MODEF 1 "register_operand" "x")]
11156 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11157 [(set_attr "type" "ssemov")
11158 (set_attr "mode" "<MODE>")])
11160 (define_insn "sse4a_vmmovnt<mode>"
11161 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11162 (unspec:<ssescalarmode>
11163 [(vec_select:<ssescalarmode>
11164 (match_operand:VF_128 1 "register_operand" "x")
11165 (parallel [(const_int 0)]))]
11168 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11169 [(set_attr "type" "ssemov")
11170 (set_attr "mode" "<ssescalarmode>")])
11172 (define_insn "sse4a_extrqi"
11173 [(set (match_operand:V2DI 0 "register_operand" "=x")
11174 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11175 (match_operand 2 "const_0_to_255_operand")
11176 (match_operand 3 "const_0_to_255_operand")]
11179 "extrq\t{%3, %2, %0|%0, %2, %3}"
11180 [(set_attr "type" "sse")
11181 (set_attr "prefix_data16" "1")
11182 (set_attr "length_immediate" "2")
11183 (set_attr "mode" "TI")])
11185 (define_insn "sse4a_extrq"
11186 [(set (match_operand:V2DI 0 "register_operand" "=x")
11187 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11188 (match_operand:V16QI 2 "register_operand" "x")]
11191 "extrq\t{%2, %0|%0, %2}"
11192 [(set_attr "type" "sse")
11193 (set_attr "prefix_data16" "1")
11194 (set_attr "mode" "TI")])
11196 (define_insn "sse4a_insertqi"
11197 [(set (match_operand:V2DI 0 "register_operand" "=x")
11198 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11199 (match_operand:V2DI 2 "register_operand" "x")
11200 (match_operand 3 "const_0_to_255_operand")
11201 (match_operand 4 "const_0_to_255_operand")]
11204 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11205 [(set_attr "type" "sseins")
11206 (set_attr "prefix_data16" "0")
11207 (set_attr "prefix_rep" "1")
11208 (set_attr "length_immediate" "2")
11209 (set_attr "mode" "TI")])
11211 (define_insn "sse4a_insertq"
11212 [(set (match_operand:V2DI 0 "register_operand" "=x")
11213 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11214 (match_operand:V2DI 2 "register_operand" "x")]
11217 "insertq\t{%2, %0|%0, %2}"
11218 [(set_attr "type" "sseins")
11219 (set_attr "prefix_data16" "0")
11220 (set_attr "prefix_rep" "1")
11221 (set_attr "mode" "TI")])
11223 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11225 ;; Intel SSE4.1 instructions
11227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11229 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11230 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11231 (vec_merge:VF_128_256
11232 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11233 (match_operand:VF_128_256 1 "register_operand" "0,x")
11234 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11237 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11238 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11239 [(set_attr "isa" "noavx,avx")
11240 (set_attr "type" "ssemov")
11241 (set_attr "length_immediate" "1")
11242 (set_attr "prefix_data16" "1,*")
11243 (set_attr "prefix_extra" "1")
11244 (set_attr "prefix" "orig,vex")
11245 (set_attr "mode" "<MODE>")])
11247 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11248 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11250 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11251 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11252 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11256 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11257 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11258 [(set_attr "isa" "noavx,avx")
11259 (set_attr "type" "ssemov")
11260 (set_attr "length_immediate" "1")
11261 (set_attr "prefix_data16" "1,*")
11262 (set_attr "prefix_extra" "1")
11263 (set_attr "prefix" "orig,vex")
11264 (set_attr "btver2_decode" "vector,vector")
11265 (set_attr "mode" "<MODE>")])
11267 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11268 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11270 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11271 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11272 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11276 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11277 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11278 [(set_attr "isa" "noavx,avx")
11279 (set_attr "type" "ssemul")
11280 (set_attr "length_immediate" "1")
11281 (set_attr "prefix_data16" "1,*")
11282 (set_attr "prefix_extra" "1")
11283 (set_attr "prefix" "orig,vex")
11284 (set_attr "btver2_decode" "vector,vector")
11285 (set_attr "mode" "<MODE>")])
11287 (define_insn "<sse4_1_avx2>_movntdqa"
11288 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
11289 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
11292 "%vmovntdqa\t{%1, %0|%0, %1}"
11293 [(set_attr "type" "ssemov")
11294 (set_attr "prefix_extra" "1")
11295 (set_attr "prefix" "maybe_vex")
11296 (set_attr "mode" "<sseinsnmode>")])
11298 (define_insn "<sse4_1_avx2>_mpsadbw"
11299 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11301 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11302 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11303 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11307 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11308 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11309 [(set_attr "isa" "noavx,avx")
11310 (set_attr "type" "sselog1")
11311 (set_attr "length_immediate" "1")
11312 (set_attr "prefix_extra" "1")
11313 (set_attr "prefix" "orig,vex")
11314 (set_attr "btver2_decode" "vector,vector")
11315 (set_attr "mode" "<sseinsnmode>")])
11317 (define_insn "avx2_packusdw"
11318 [(set (match_operand:V16HI 0 "register_operand" "=x")
11321 (match_operand:V8SI 1 "register_operand" "x"))
11323 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11325 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11326 [(set_attr "type" "sselog")
11327 (set_attr "prefix_extra" "1")
11328 (set_attr "prefix" "vex")
11329 (set_attr "mode" "OI")])
11331 (define_insn "sse4_1_packusdw"
11332 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11335 (match_operand:V4SI 1 "register_operand" "0,x"))
11337 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11340 packusdw\t{%2, %0|%0, %2}
11341 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11342 [(set_attr "isa" "noavx,avx")
11343 (set_attr "type" "sselog")
11344 (set_attr "prefix_extra" "1")
11345 (set_attr "prefix" "orig,vex")
11346 (set_attr "mode" "TI")])
11348 (define_insn "<sse4_1_avx2>_pblendvb"
11349 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11351 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11352 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11353 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11357 pblendvb\t{%3, %2, %0|%0, %2, %3}
11358 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11359 [(set_attr "isa" "noavx,avx")
11360 (set_attr "type" "ssemov")
11361 (set_attr "prefix_extra" "1")
11362 (set_attr "length_immediate" "*,1")
11363 (set_attr "prefix" "orig,vex")
11364 (set_attr "btver2_decode" "vector,vector")
11365 (set_attr "mode" "<sseinsnmode>")])
11367 (define_insn "sse4_1_pblendw"
11368 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11370 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11371 (match_operand:V8HI 1 "register_operand" "0,x")
11372 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11375 pblendw\t{%3, %2, %0|%0, %2, %3}
11376 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11377 [(set_attr "isa" "noavx,avx")
11378 (set_attr "type" "ssemov")
11379 (set_attr "prefix_extra" "1")
11380 (set_attr "length_immediate" "1")
11381 (set_attr "prefix" "orig,vex")
11382 (set_attr "mode" "TI")])
11384 ;; The builtin uses an 8-bit immediate. Expand that.
11385 (define_expand "avx2_pblendw"
11386 [(set (match_operand:V16HI 0 "register_operand")
11388 (match_operand:V16HI 2 "nonimmediate_operand")
11389 (match_operand:V16HI 1 "register_operand")
11390 (match_operand:SI 3 "const_0_to_255_operand")))]
11393 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11394 operands[3] = GEN_INT (val << 8 | val);
11397 (define_insn "*avx2_pblendw"
11398 [(set (match_operand:V16HI 0 "register_operand" "=x")
11400 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11401 (match_operand:V16HI 1 "register_operand" "x")
11402 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11405 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11406 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11408 [(set_attr "type" "ssemov")
11409 (set_attr "prefix_extra" "1")
11410 (set_attr "length_immediate" "1")
11411 (set_attr "prefix" "vex")
11412 (set_attr "mode" "OI")])
11414 (define_insn "avx2_pblendd<mode>"
11415 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11416 (vec_merge:VI4_AVX2
11417 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11418 (match_operand:VI4_AVX2 1 "register_operand" "x")
11419 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11421 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11422 [(set_attr "type" "ssemov")
11423 (set_attr "prefix_extra" "1")
11424 (set_attr "length_immediate" "1")
11425 (set_attr "prefix" "vex")
11426 (set_attr "mode" "<sseinsnmode>")])
11428 (define_insn "sse4_1_phminposuw"
11429 [(set (match_operand:V8HI 0 "register_operand" "=x")
11430 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11431 UNSPEC_PHMINPOSUW))]
11433 "%vphminposuw\t{%1, %0|%0, %1}"
11434 [(set_attr "type" "sselog1")
11435 (set_attr "prefix_extra" "1")
11436 (set_attr "prefix" "maybe_vex")
11437 (set_attr "mode" "TI")])
11439 (define_insn "avx2_<code>v16qiv16hi2"
11440 [(set (match_operand:V16HI 0 "register_operand" "=x")
11442 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11444 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11445 [(set_attr "type" "ssemov")
11446 (set_attr "prefix_extra" "1")
11447 (set_attr "prefix" "vex")
11448 (set_attr "mode" "OI")])
11450 (define_insn "sse4_1_<code>v8qiv8hi2"
11451 [(set (match_operand:V8HI 0 "register_operand" "=x")
11454 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11455 (parallel [(const_int 0) (const_int 1)
11456 (const_int 2) (const_int 3)
11457 (const_int 4) (const_int 5)
11458 (const_int 6) (const_int 7)]))))]
11460 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11461 [(set_attr "type" "ssemov")
11462 (set_attr "prefix_extra" "1")
11463 (set_attr "prefix" "maybe_vex")
11464 (set_attr "mode" "TI")])
11466 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11467 [(set (match_operand:V16SI 0 "register_operand" "=v")
11469 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11471 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11472 [(set_attr "type" "ssemov")
11473 (set_attr "prefix" "evex")
11474 (set_attr "mode" "XI")])
11476 (define_insn "avx2_<code>v8qiv8si2"
11477 [(set (match_operand:V8SI 0 "register_operand" "=x")
11480 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11481 (parallel [(const_int 0) (const_int 1)
11482 (const_int 2) (const_int 3)
11483 (const_int 4) (const_int 5)
11484 (const_int 6) (const_int 7)]))))]
11486 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11487 [(set_attr "type" "ssemov")
11488 (set_attr "prefix_extra" "1")
11489 (set_attr "prefix" "vex")
11490 (set_attr "mode" "OI")])
11492 (define_insn "sse4_1_<code>v4qiv4si2"
11493 [(set (match_operand:V4SI 0 "register_operand" "=x")
11496 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11497 (parallel [(const_int 0) (const_int 1)
11498 (const_int 2) (const_int 3)]))))]
11500 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11501 [(set_attr "type" "ssemov")
11502 (set_attr "prefix_extra" "1")
11503 (set_attr "prefix" "maybe_vex")
11504 (set_attr "mode" "TI")])
11506 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11507 [(set (match_operand:V16SI 0 "register_operand" "=v")
11509 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11511 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11512 [(set_attr "type" "ssemov")
11513 (set_attr "prefix" "evex")
11514 (set_attr "mode" "XI")])
11516 (define_insn "avx2_<code>v8hiv8si2"
11517 [(set (match_operand:V8SI 0 "register_operand" "=x")
11519 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11521 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11522 [(set_attr "type" "ssemov")
11523 (set_attr "prefix_extra" "1")
11524 (set_attr "prefix" "vex")
11525 (set_attr "mode" "OI")])
11527 (define_insn "sse4_1_<code>v4hiv4si2"
11528 [(set (match_operand:V4SI 0 "register_operand" "=x")
11531 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11532 (parallel [(const_int 0) (const_int 1)
11533 (const_int 2) (const_int 3)]))))]
11535 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11536 [(set_attr "type" "ssemov")
11537 (set_attr "prefix_extra" "1")
11538 (set_attr "prefix" "maybe_vex")
11539 (set_attr "mode" "TI")])
11541 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11542 [(set (match_operand:V8DI 0 "register_operand" "=v")
11545 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11546 (parallel [(const_int 0) (const_int 1)
11547 (const_int 2) (const_int 3)
11548 (const_int 4) (const_int 5)
11549 (const_int 6) (const_int 7)]))))]
11551 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11552 [(set_attr "type" "ssemov")
11553 (set_attr "prefix" "evex")
11554 (set_attr "mode" "XI")])
11556 (define_insn "avx2_<code>v4qiv4di2"
11557 [(set (match_operand:V4DI 0 "register_operand" "=x")
11560 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11561 (parallel [(const_int 0) (const_int 1)
11562 (const_int 2) (const_int 3)]))))]
11564 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
11565 [(set_attr "type" "ssemov")
11566 (set_attr "prefix_extra" "1")
11567 (set_attr "prefix" "vex")
11568 (set_attr "mode" "OI")])
11570 (define_insn "sse4_1_<code>v2qiv2di2"
11571 [(set (match_operand:V2DI 0 "register_operand" "=x")
11574 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11575 (parallel [(const_int 0) (const_int 1)]))))]
11577 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
11578 [(set_attr "type" "ssemov")
11579 (set_attr "prefix_extra" "1")
11580 (set_attr "prefix" "maybe_vex")
11581 (set_attr "mode" "TI")])
11583 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
11584 [(set (match_operand:V8DI 0 "register_operand" "=v")
11586 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
11588 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11589 [(set_attr "type" "ssemov")
11590 (set_attr "prefix" "evex")
11591 (set_attr "mode" "XI")])
11593 (define_insn "avx2_<code>v4hiv4di2"
11594 [(set (match_operand:V4DI 0 "register_operand" "=x")
11597 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11598 (parallel [(const_int 0) (const_int 1)
11599 (const_int 2) (const_int 3)]))))]
11601 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
11602 [(set_attr "type" "ssemov")
11603 (set_attr "prefix_extra" "1")
11604 (set_attr "prefix" "vex")
11605 (set_attr "mode" "OI")])
11607 (define_insn "sse4_1_<code>v2hiv2di2"
11608 [(set (match_operand:V2DI 0 "register_operand" "=x")
11611 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11612 (parallel [(const_int 0) (const_int 1)]))))]
11614 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
11615 [(set_attr "type" "ssemov")
11616 (set_attr "prefix_extra" "1")
11617 (set_attr "prefix" "maybe_vex")
11618 (set_attr "mode" "TI")])
11620 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
11621 [(set (match_operand:V8DI 0 "register_operand" "=v")
11623 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
11625 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11626 [(set_attr "type" "ssemov")
11627 (set_attr "prefix" "evex")
11628 (set_attr "mode" "XI")])
11630 (define_insn "avx2_<code>v4siv4di2"
11631 [(set (match_operand:V4DI 0 "register_operand" "=x")
11633 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
11635 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
11636 [(set_attr "type" "ssemov")
11637 (set_attr "prefix_extra" "1")
11638 (set_attr "mode" "OI")])
11640 (define_insn "sse4_1_<code>v2siv2di2"
11641 [(set (match_operand:V2DI 0 "register_operand" "=x")
11644 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11645 (parallel [(const_int 0) (const_int 1)]))))]
11647 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
11648 [(set_attr "type" "ssemov")
11649 (set_attr "prefix_extra" "1")
11650 (set_attr "prefix" "maybe_vex")
11651 (set_attr "mode" "TI")])
11653 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
11654 ;; setting FLAGS_REG. But it is not a really compare instruction.
11655 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
11656 [(set (reg:CC FLAGS_REG)
11657 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
11658 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
11661 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
11662 [(set_attr "type" "ssecomi")
11663 (set_attr "prefix_extra" "1")
11664 (set_attr "prefix" "vex")
11665 (set_attr "mode" "<MODE>")])
11667 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
11668 ;; But it is not a really compare instruction.
11669 (define_insn "avx_ptest256"
11670 [(set (reg:CC FLAGS_REG)
11671 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
11672 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
11675 "vptest\t{%1, %0|%0, %1}"
11676 [(set_attr "type" "ssecomi")
11677 (set_attr "prefix_extra" "1")
11678 (set_attr "prefix" "vex")
11679 (set_attr "btver2_decode" "vector")
11680 (set_attr "mode" "OI")])
11682 (define_insn "sse4_1_ptest"
11683 [(set (reg:CC FLAGS_REG)
11684 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
11685 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11688 "%vptest\t{%1, %0|%0, %1}"
11689 [(set_attr "type" "ssecomi")
11690 (set_attr "prefix_extra" "1")
11691 (set_attr "prefix" "maybe_vex")
11692 (set_attr "mode" "TI")])
11694 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
11695 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
11697 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
11698 (match_operand:SI 2 "const_0_to_15_operand" "n")]
11701 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11702 [(set_attr "type" "ssecvt")
11703 (set (attr "prefix_data16")
11705 (match_test "TARGET_AVX")
11707 (const_string "1")))
11708 (set_attr "prefix_extra" "1")
11709 (set_attr "length_immediate" "1")
11710 (set_attr "prefix" "maybe_vex")
11711 (set_attr "mode" "<MODE>")])
11713 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
11714 [(match_operand:<sseintvecmode> 0 "register_operand")
11715 (match_operand:VF1_128_256 1 "nonimmediate_operand")
11716 (match_operand:SI 2 "const_0_to_15_operand")]
11719 rtx tmp = gen_reg_rtx (<MODE>mode);
11722 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
11725 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11729 (define_expand "avx512f_roundpd512"
11730 [(match_operand:V8DF 0 "register_operand")
11731 (match_operand:V8DF 1 "nonimmediate_operand")
11732 (match_operand:SI 2 "const_0_to_15_operand")]
11735 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
11739 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
11740 [(match_operand:<ssepackfltmode> 0 "register_operand")
11741 (match_operand:VF2 1 "nonimmediate_operand")
11742 (match_operand:VF2 2 "nonimmediate_operand")
11743 (match_operand:SI 3 "const_0_to_15_operand")]
11748 if (<MODE>mode == V2DFmode
11749 && TARGET_AVX && !TARGET_PREFER_AVX128)
11751 rtx tmp2 = gen_reg_rtx (V4DFmode);
11753 tmp0 = gen_reg_rtx (V4DFmode);
11754 tmp1 = force_reg (V2DFmode, operands[1]);
11756 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11757 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
11758 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11762 tmp0 = gen_reg_rtx (<MODE>mode);
11763 tmp1 = gen_reg_rtx (<MODE>mode);
11766 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
11769 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
11772 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
11777 (define_insn "sse4_1_round<ssescalarmodesuffix>"
11778 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
11781 [(match_operand:VF_128 2 "register_operand" "x,x")
11782 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
11784 (match_operand:VF_128 1 "register_operand" "0,x")
11788 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
11789 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11790 [(set_attr "isa" "noavx,avx")
11791 (set_attr "type" "ssecvt")
11792 (set_attr "length_immediate" "1")
11793 (set_attr "prefix_data16" "1,*")
11794 (set_attr "prefix_extra" "1")
11795 (set_attr "prefix" "orig,vex")
11796 (set_attr "mode" "<MODE>")])
11798 (define_expand "round<mode>2"
11799 [(set (match_dup 4)
11801 (match_operand:VF 1 "register_operand")
11803 (set (match_operand:VF 0 "register_operand")
11805 [(match_dup 4) (match_dup 5)]
11807 "TARGET_ROUND && !flag_trapping_math"
11809 enum machine_mode scalar_mode;
11810 const struct real_format *fmt;
11811 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
11812 rtx half, vec_half;
11814 scalar_mode = GET_MODE_INNER (<MODE>mode);
11816 /* load nextafter (0.5, 0.0) */
11817 fmt = REAL_MODE_FORMAT (scalar_mode);
11818 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
11819 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
11820 half = const_double_from_real_value (pred_half, scalar_mode);
11822 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
11823 vec_half = force_reg (<MODE>mode, vec_half);
11825 operands[3] = gen_reg_rtx (<MODE>mode);
11826 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
11828 operands[4] = gen_reg_rtx (<MODE>mode);
11829 operands[5] = GEN_INT (ROUND_TRUNC);
11832 (define_expand "round<mode>2_sfix"
11833 [(match_operand:<sseintvecmode> 0 "register_operand")
11834 (match_operand:VF1_128_256 1 "register_operand")]
11835 "TARGET_ROUND && !flag_trapping_math"
11837 rtx tmp = gen_reg_rtx (<MODE>mode);
11839 emit_insn (gen_round<mode>2 (tmp, operands[1]));
11842 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
11846 (define_expand "round<mode>2_vec_pack_sfix"
11847 [(match_operand:<ssepackfltmode> 0 "register_operand")
11848 (match_operand:VF2 1 "register_operand")
11849 (match_operand:VF2 2 "register_operand")]
11850 "TARGET_ROUND && !flag_trapping_math"
11854 if (<MODE>mode == V2DFmode
11855 && TARGET_AVX && !TARGET_PREFER_AVX128)
11857 rtx tmp2 = gen_reg_rtx (V4DFmode);
11859 tmp0 = gen_reg_rtx (V4DFmode);
11860 tmp1 = force_reg (V2DFmode, operands[1]);
11862 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
11863 emit_insn (gen_roundv4df2 (tmp2, tmp0));
11864 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
11868 tmp0 = gen_reg_rtx (<MODE>mode);
11869 tmp1 = gen_reg_rtx (<MODE>mode);
11871 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
11872 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
11875 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
11880 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11882 ;; Intel SSE4.2 string/text processing instructions
11884 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11886 (define_insn_and_split "sse4_2_pcmpestr"
11887 [(set (match_operand:SI 0 "register_operand" "=c,c")
11889 [(match_operand:V16QI 2 "register_operand" "x,x")
11890 (match_operand:SI 3 "register_operand" "a,a")
11891 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
11892 (match_operand:SI 5 "register_operand" "d,d")
11893 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
11895 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
11903 (set (reg:CC FLAGS_REG)
11912 && can_create_pseudo_p ()"
11917 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
11918 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
11919 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
11922 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
11923 operands[3], operands[4],
11924 operands[5], operands[6]));
11926 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
11927 operands[3], operands[4],
11928 operands[5], operands[6]));
11929 if (flags && !(ecx || xmm0))
11930 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
11931 operands[2], operands[3],
11932 operands[4], operands[5],
11934 if (!(flags || ecx || xmm0))
11935 emit_note (NOTE_INSN_DELETED);
11939 [(set_attr "type" "sselog")
11940 (set_attr "prefix_data16" "1")
11941 (set_attr "prefix_extra" "1")
11942 (set_attr "length_immediate" "1")
11943 (set_attr "memory" "none,load")
11944 (set_attr "mode" "TI")])
11946 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
11947 [(set (match_operand:SI 0 "register_operand" "=c")
11949 [(match_operand:V16QI 2 "register_operand" "x")
11950 (match_operand:SI 3 "register_operand" "a")
11952 [(match_operand:V16QI 4 "memory_operand" "m")]
11954 (match_operand:SI 5 "register_operand" "d")
11955 (match_operand:SI 6 "const_0_to_255_operand" "n")]
11957 (set (match_operand:V16QI 1 "register_operand" "=Yz")
11961 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
11965 (set (reg:CC FLAGS_REG)
11969 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
11974 && can_create_pseudo_p ()"
11979 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
11980 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
11981 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
11984 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
11985 operands[3], operands[4],
11986 operands[5], operands[6]));
11988 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
11989 operands[3], operands[4],
11990 operands[5], operands[6]));
11991 if (flags && !(ecx || xmm0))
11992 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
11993 operands[2], operands[3],
11994 operands[4], operands[5],
11996 if (!(flags || ecx || xmm0))
11997 emit_note (NOTE_INSN_DELETED);
12001 [(set_attr "type" "sselog")
12002 (set_attr "prefix_data16" "1")
12003 (set_attr "prefix_extra" "1")
12004 (set_attr "length_immediate" "1")
12005 (set_attr "memory" "load")
12006 (set_attr "mode" "TI")])
12008 (define_insn "sse4_2_pcmpestri"
12009 [(set (match_operand:SI 0 "register_operand" "=c,c")
12011 [(match_operand:V16QI 1 "register_operand" "x,x")
12012 (match_operand:SI 2 "register_operand" "a,a")
12013 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12014 (match_operand:SI 4 "register_operand" "d,d")
12015 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12017 (set (reg:CC FLAGS_REG)
12026 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12027 [(set_attr "type" "sselog")
12028 (set_attr "prefix_data16" "1")
12029 (set_attr "prefix_extra" "1")
12030 (set_attr "prefix" "maybe_vex")
12031 (set_attr "length_immediate" "1")
12032 (set_attr "btver2_decode" "vector")
12033 (set_attr "memory" "none,load")
12034 (set_attr "mode" "TI")])
12036 (define_insn "sse4_2_pcmpestrm"
12037 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12039 [(match_operand:V16QI 1 "register_operand" "x,x")
12040 (match_operand:SI 2 "register_operand" "a,a")
12041 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12042 (match_operand:SI 4 "register_operand" "d,d")
12043 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12045 (set (reg:CC FLAGS_REG)
12054 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12055 [(set_attr "type" "sselog")
12056 (set_attr "prefix_data16" "1")
12057 (set_attr "prefix_extra" "1")
12058 (set_attr "length_immediate" "1")
12059 (set_attr "prefix" "maybe_vex")
12060 (set_attr "btver2_decode" "vector")
12061 (set_attr "memory" "none,load")
12062 (set_attr "mode" "TI")])
12064 (define_insn "sse4_2_pcmpestr_cconly"
12065 [(set (reg:CC FLAGS_REG)
12067 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12068 (match_operand:SI 3 "register_operand" "a,a,a,a")
12069 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12070 (match_operand:SI 5 "register_operand" "d,d,d,d")
12071 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12073 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12074 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12077 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12078 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12079 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12080 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12081 [(set_attr "type" "sselog")
12082 (set_attr "prefix_data16" "1")
12083 (set_attr "prefix_extra" "1")
12084 (set_attr "length_immediate" "1")
12085 (set_attr "memory" "none,load,none,load")
12086 (set_attr "btver2_decode" "vector,vector,vector,vector")
12087 (set_attr "prefix" "maybe_vex")
12088 (set_attr "mode" "TI")])
12090 (define_insn_and_split "sse4_2_pcmpistr"
12091 [(set (match_operand:SI 0 "register_operand" "=c,c")
12093 [(match_operand:V16QI 2 "register_operand" "x,x")
12094 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12095 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12097 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12103 (set (reg:CC FLAGS_REG)
12110 && can_create_pseudo_p ()"
12115 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12116 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12117 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12120 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12121 operands[3], operands[4]));
12123 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12124 operands[3], operands[4]));
12125 if (flags && !(ecx || xmm0))
12126 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12127 operands[2], operands[3],
12129 if (!(flags || ecx || xmm0))
12130 emit_note (NOTE_INSN_DELETED);
12134 [(set_attr "type" "sselog")
12135 (set_attr "prefix_data16" "1")
12136 (set_attr "prefix_extra" "1")
12137 (set_attr "length_immediate" "1")
12138 (set_attr "memory" "none,load")
12139 (set_attr "mode" "TI")])
12141 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12142 [(set (match_operand:SI 0 "register_operand" "=c")
12144 [(match_operand:V16QI 2 "register_operand" "x")
12146 [(match_operand:V16QI 3 "memory_operand" "m")]
12148 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12150 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12153 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12156 (set (reg:CC FLAGS_REG)
12159 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12163 && can_create_pseudo_p ()"
12168 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12169 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12170 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12173 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12174 operands[3], operands[4]));
12176 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12177 operands[3], operands[4]));
12178 if (flags && !(ecx || xmm0))
12179 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12180 operands[2], operands[3],
12182 if (!(flags || ecx || xmm0))
12183 emit_note (NOTE_INSN_DELETED);
12187 [(set_attr "type" "sselog")
12188 (set_attr "prefix_data16" "1")
12189 (set_attr "prefix_extra" "1")
12190 (set_attr "length_immediate" "1")
12191 (set_attr "memory" "load")
12192 (set_attr "mode" "TI")])
12194 (define_insn "sse4_2_pcmpistri"
12195 [(set (match_operand:SI 0 "register_operand" "=c,c")
12197 [(match_operand:V16QI 1 "register_operand" "x,x")
12198 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12199 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12201 (set (reg:CC FLAGS_REG)
12208 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12209 [(set_attr "type" "sselog")
12210 (set_attr "prefix_data16" "1")
12211 (set_attr "prefix_extra" "1")
12212 (set_attr "length_immediate" "1")
12213 (set_attr "prefix" "maybe_vex")
12214 (set_attr "memory" "none,load")
12215 (set_attr "btver2_decode" "vector")
12216 (set_attr "mode" "TI")])
12218 (define_insn "sse4_2_pcmpistrm"
12219 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12221 [(match_operand:V16QI 1 "register_operand" "x,x")
12222 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12223 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12225 (set (reg:CC FLAGS_REG)
12232 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12233 [(set_attr "type" "sselog")
12234 (set_attr "prefix_data16" "1")
12235 (set_attr "prefix_extra" "1")
12236 (set_attr "length_immediate" "1")
12237 (set_attr "prefix" "maybe_vex")
12238 (set_attr "memory" "none,load")
12239 (set_attr "btver2_decode" "vector")
12240 (set_attr "mode" "TI")])
12242 (define_insn "sse4_2_pcmpistr_cconly"
12243 [(set (reg:CC FLAGS_REG)
12245 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12246 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12247 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12249 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12250 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12253 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12254 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12255 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12256 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12257 [(set_attr "type" "sselog")
12258 (set_attr "prefix_data16" "1")
12259 (set_attr "prefix_extra" "1")
12260 (set_attr "length_immediate" "1")
12261 (set_attr "memory" "none,load,none,load")
12262 (set_attr "prefix" "maybe_vex")
12263 (set_attr "btver2_decode" "vector,vector,vector,vector")
12264 (set_attr "mode" "TI")])
12266 (define_expand "avx512pf_gatherpf<mode>"
12268 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12269 (mem:<ssescalarmode>
12271 [(match_operand 2 "vsib_address_operand")
12272 (match_operand:VI48_512 1 "register_operand")
12273 (match_operand:SI 3 "const1248_operand")]))
12274 (match_operand:SI 4 "const_0_to_1_operand")]
12275 UNSPEC_GATHER_PREFETCH)]
12279 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12280 operands[3]), UNSPEC_VSIBADDR);
12283 (define_insn "*avx512pf_gatherpf<mode>_mask"
12285 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12286 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
12288 [(match_operand:P 2 "vsib_address_operand" "Tv")
12289 (match_operand:VI48_512 1 "register_operand" "v")
12290 (match_operand:SI 3 "const1248_operand" "n")]
12292 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12293 UNSPEC_GATHER_PREFETCH)]
12296 switch (INTVAL (operands[4]))
12299 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12301 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12303 gcc_unreachable ();
12306 [(set_attr "type" "sse")
12307 (set_attr "prefix" "evex")
12308 (set_attr "mode" "XI")])
12310 (define_insn "*avx512pf_gatherpf<mode>"
12313 (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
12315 [(match_operand:P 1 "vsib_address_operand" "Tv")
12316 (match_operand:VI48_512 0 "register_operand" "v")
12317 (match_operand:SI 2 "const1248_operand" "n")]
12319 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12320 UNSPEC_GATHER_PREFETCH)]
12323 switch (INTVAL (operands[3]))
12326 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12328 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12330 gcc_unreachable ();
12333 [(set_attr "type" "sse")
12334 (set_attr "prefix" "evex")
12335 (set_attr "mode" "XI")])
12337 (define_expand "avx512pf_scatterpf<mode>"
12339 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12340 (mem:<ssescalarmode>
12342 [(match_operand 2 "vsib_address_operand")
12343 (match_operand:VI48_512 1 "register_operand")
12344 (match_operand:SI 3 "const1248_operand")]))
12345 (match_operand:SI 4 "const_0_to_1_operand")]
12346 UNSPEC_SCATTER_PREFETCH)]
12350 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12351 operands[3]), UNSPEC_VSIBADDR);
12354 (define_insn "*avx512pf_scatterpf<mode>_mask"
12356 [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
12357 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
12359 [(match_operand:P 2 "vsib_address_operand" "Tv")
12360 (match_operand:VI48_512 1 "register_operand" "v")
12361 (match_operand:SI 3 "const1248_operand" "n")]
12363 (match_operand:SI 4 "const_0_to_1_operand" "n")]
12364 UNSPEC_SCATTER_PREFETCH)]
12367 switch (INTVAL (operands[4]))
12370 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12372 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12374 gcc_unreachable ();
12377 [(set_attr "type" "sse")
12378 (set_attr "prefix" "evex")
12379 (set_attr "mode" "XI")])
12381 (define_insn "*avx512pf_scatterpf<mode>"
12384 (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
12386 [(match_operand:P 1 "vsib_address_operand" "Tv")
12387 (match_operand:VI48_512 0 "register_operand" "v")
12388 (match_operand:SI 2 "const1248_operand" "n")]
12390 (match_operand:SI 3 "const_0_to_1_operand" "n")]
12391 UNSPEC_SCATTER_PREFETCH)]
12394 switch (INTVAL (operands[3]))
12397 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12399 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12401 gcc_unreachable ();
12404 [(set_attr "type" "sse")
12405 (set_attr "prefix" "evex")
12406 (set_attr "mode" "XI")])
12408 (define_insn "avx512er_exp2<mode><mask_name>"
12409 [(set (match_operand:VF_512 0 "register_operand" "=v")
12411 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12414 "vexp2<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12415 [(set_attr "prefix" "evex")
12416 (set_attr "mode" "<MODE>")])
12418 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name>"
12419 [(set (match_operand:VF_512 0 "register_operand" "=v")
12421 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12424 "vrcp28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12425 [(set_attr "prefix" "evex")
12426 (set_attr "mode" "<MODE>")])
12428 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name>"
12429 [(set (match_operand:VF_512 0 "register_operand" "=v")
12431 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
12434 "vrsqrt28<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12435 [(set_attr "prefix" "evex")
12436 (set_attr "mode" "<MODE>")])
12438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12440 ;; XOP instructions
12442 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12444 (define_code_iterator xop_plus [plus ss_plus])
12446 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
12447 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
12449 ;; XOP parallel integer multiply/add instructions.
12451 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
12452 [(set (match_operand:VI24_128 0 "register_operand" "=x")
12455 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
12456 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
12457 (match_operand:VI24_128 3 "register_operand" "x")))]
12459 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12460 [(set_attr "type" "ssemuladd")
12461 (set_attr "mode" "TI")])
12463 (define_insn "xop_p<macs>dql"
12464 [(set (match_operand:V2DI 0 "register_operand" "=x")
12469 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12470 (parallel [(const_int 0) (const_int 2)])))
12473 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12474 (parallel [(const_int 0) (const_int 2)]))))
12475 (match_operand:V2DI 3 "register_operand" "x")))]
12477 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12478 [(set_attr "type" "ssemuladd")
12479 (set_attr "mode" "TI")])
12481 (define_insn "xop_p<macs>dqh"
12482 [(set (match_operand:V2DI 0 "register_operand" "=x")
12487 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12488 (parallel [(const_int 1) (const_int 3)])))
12491 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12492 (parallel [(const_int 1) (const_int 3)]))))
12493 (match_operand:V2DI 3 "register_operand" "x")))]
12495 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12496 [(set_attr "type" "ssemuladd")
12497 (set_attr "mode" "TI")])
12499 ;; XOP parallel integer multiply/add instructions for the intrinisics
12500 (define_insn "xop_p<macs>wd"
12501 [(set (match_operand:V4SI 0 "register_operand" "=x")
12506 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12507 (parallel [(const_int 1) (const_int 3)
12508 (const_int 5) (const_int 7)])))
12511 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12512 (parallel [(const_int 1) (const_int 3)
12513 (const_int 5) (const_int 7)]))))
12514 (match_operand:V4SI 3 "register_operand" "x")))]
12516 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12517 [(set_attr "type" "ssemuladd")
12518 (set_attr "mode" "TI")])
12520 (define_insn "xop_p<madcs>wd"
12521 [(set (match_operand:V4SI 0 "register_operand" "=x")
12527 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
12528 (parallel [(const_int 0) (const_int 2)
12529 (const_int 4) (const_int 6)])))
12532 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12533 (parallel [(const_int 0) (const_int 2)
12534 (const_int 4) (const_int 6)]))))
12539 (parallel [(const_int 1) (const_int 3)
12540 (const_int 5) (const_int 7)])))
12544 (parallel [(const_int 1) (const_int 3)
12545 (const_int 5) (const_int 7)])))))
12546 (match_operand:V4SI 3 "register_operand" "x")))]
12548 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12549 [(set_attr "type" "ssemuladd")
12550 (set_attr "mode" "TI")])
12552 ;; XOP parallel XMM conditional moves
12553 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
12554 [(set (match_operand:V 0 "register_operand" "=x,x")
12556 (match_operand:V 3 "nonimmediate_operand" "x,m")
12557 (match_operand:V 1 "register_operand" "x,x")
12558 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
12560 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12561 [(set_attr "type" "sse4arg")])
12563 ;; XOP horizontal add/subtract instructions
12564 (define_insn "xop_phadd<u>bw"
12565 [(set (match_operand:V8HI 0 "register_operand" "=x")
12569 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12570 (parallel [(const_int 0) (const_int 2)
12571 (const_int 4) (const_int 6)
12572 (const_int 8) (const_int 10)
12573 (const_int 12) (const_int 14)])))
12577 (parallel [(const_int 1) (const_int 3)
12578 (const_int 5) (const_int 7)
12579 (const_int 9) (const_int 11)
12580 (const_int 13) (const_int 15)])))))]
12582 "vphadd<u>bw\t{%1, %0|%0, %1}"
12583 [(set_attr "type" "sseiadd1")])
12585 (define_insn "xop_phadd<u>bd"
12586 [(set (match_operand:V4SI 0 "register_operand" "=x")
12591 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12592 (parallel [(const_int 0) (const_int 4)
12593 (const_int 8) (const_int 12)])))
12597 (parallel [(const_int 1) (const_int 5)
12598 (const_int 9) (const_int 13)]))))
12603 (parallel [(const_int 2) (const_int 6)
12604 (const_int 10) (const_int 14)])))
12608 (parallel [(const_int 3) (const_int 7)
12609 (const_int 11) (const_int 15)]))))))]
12611 "vphadd<u>bd\t{%1, %0|%0, %1}"
12612 [(set_attr "type" "sseiadd1")])
12614 (define_insn "xop_phadd<u>bq"
12615 [(set (match_operand:V2DI 0 "register_operand" "=x")
12621 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12622 (parallel [(const_int 0) (const_int 8)])))
12626 (parallel [(const_int 1) (const_int 9)]))))
12631 (parallel [(const_int 2) (const_int 10)])))
12635 (parallel [(const_int 3) (const_int 11)])))))
12641 (parallel [(const_int 4) (const_int 12)])))
12645 (parallel [(const_int 5) (const_int 13)]))))
12650 (parallel [(const_int 6) (const_int 14)])))
12654 (parallel [(const_int 7) (const_int 15)])))))))]
12656 "vphadd<u>bq\t{%1, %0|%0, %1}"
12657 [(set_attr "type" "sseiadd1")])
12659 (define_insn "xop_phadd<u>wd"
12660 [(set (match_operand:V4SI 0 "register_operand" "=x")
12664 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12665 (parallel [(const_int 0) (const_int 2)
12666 (const_int 4) (const_int 6)])))
12670 (parallel [(const_int 1) (const_int 3)
12671 (const_int 5) (const_int 7)])))))]
12673 "vphadd<u>wd\t{%1, %0|%0, %1}"
12674 [(set_attr "type" "sseiadd1")])
12676 (define_insn "xop_phadd<u>wq"
12677 [(set (match_operand:V2DI 0 "register_operand" "=x")
12682 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12683 (parallel [(const_int 0) (const_int 4)])))
12687 (parallel [(const_int 1) (const_int 5)]))))
12692 (parallel [(const_int 2) (const_int 6)])))
12696 (parallel [(const_int 3) (const_int 7)]))))))]
12698 "vphadd<u>wq\t{%1, %0|%0, %1}"
12699 [(set_attr "type" "sseiadd1")])
12701 (define_insn "xop_phadd<u>dq"
12702 [(set (match_operand:V2DI 0 "register_operand" "=x")
12706 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12707 (parallel [(const_int 0) (const_int 2)])))
12711 (parallel [(const_int 1) (const_int 3)])))))]
12713 "vphadd<u>dq\t{%1, %0|%0, %1}"
12714 [(set_attr "type" "sseiadd1")])
12716 (define_insn "xop_phsubbw"
12717 [(set (match_operand:V8HI 0 "register_operand" "=x")
12721 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12722 (parallel [(const_int 0) (const_int 2)
12723 (const_int 4) (const_int 6)
12724 (const_int 8) (const_int 10)
12725 (const_int 12) (const_int 14)])))
12729 (parallel [(const_int 1) (const_int 3)
12730 (const_int 5) (const_int 7)
12731 (const_int 9) (const_int 11)
12732 (const_int 13) (const_int 15)])))))]
12734 "vphsubbw\t{%1, %0|%0, %1}"
12735 [(set_attr "type" "sseiadd1")])
12737 (define_insn "xop_phsubwd"
12738 [(set (match_operand:V4SI 0 "register_operand" "=x")
12742 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12743 (parallel [(const_int 0) (const_int 2)
12744 (const_int 4) (const_int 6)])))
12748 (parallel [(const_int 1) (const_int 3)
12749 (const_int 5) (const_int 7)])))))]
12751 "vphsubwd\t{%1, %0|%0, %1}"
12752 [(set_attr "type" "sseiadd1")])
12754 (define_insn "xop_phsubdq"
12755 [(set (match_operand:V2DI 0 "register_operand" "=x")
12759 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12760 (parallel [(const_int 0) (const_int 2)])))
12764 (parallel [(const_int 1) (const_int 3)])))))]
12766 "vphsubdq\t{%1, %0|%0, %1}"
12767 [(set_attr "type" "sseiadd1")])
12769 ;; XOP permute instructions
12770 (define_insn "xop_pperm"
12771 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
12773 [(match_operand:V16QI 1 "register_operand" "x,x")
12774 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12775 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
12776 UNSPEC_XOP_PERMUTE))]
12777 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12778 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12779 [(set_attr "type" "sse4arg")
12780 (set_attr "mode" "TI")])
12782 ;; XOP pack instructions that combine two vectors into a smaller vector
12783 (define_insn "xop_pperm_pack_v2di_v4si"
12784 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
12787 (match_operand:V2DI 1 "register_operand" "x,x"))
12789 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
12790 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12791 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12792 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12793 [(set_attr "type" "sse4arg")
12794 (set_attr "mode" "TI")])
12796 (define_insn "xop_pperm_pack_v4si_v8hi"
12797 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12800 (match_operand:V4SI 1 "register_operand" "x,x"))
12802 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
12803 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12804 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12805 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12806 [(set_attr "type" "sse4arg")
12807 (set_attr "mode" "TI")])
12809 (define_insn "xop_pperm_pack_v8hi_v16qi"
12810 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
12813 (match_operand:V8HI 1 "register_operand" "x,x"))
12815 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
12816 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
12817 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
12818 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12819 [(set_attr "type" "sse4arg")
12820 (set_attr "mode" "TI")])
12822 ;; XOP packed rotate instructions
12823 (define_expand "rotl<mode>3"
12824 [(set (match_operand:VI_128 0 "register_operand")
12826 (match_operand:VI_128 1 "nonimmediate_operand")
12827 (match_operand:SI 2 "general_operand")))]
12830 /* If we were given a scalar, convert it to parallel */
12831 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
12833 rtvec vs = rtvec_alloc (<ssescalarnum>);
12834 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
12835 rtx reg = gen_reg_rtx (<MODE>mode);
12836 rtx op2 = operands[2];
12839 if (GET_MODE (op2) != <ssescalarmode>mode)
12841 op2 = gen_reg_rtx (<ssescalarmode>mode);
12842 convert_move (op2, operands[2], false);
12845 for (i = 0; i < <ssescalarnum>; i++)
12846 RTVEC_ELT (vs, i) = op2;
12848 emit_insn (gen_vec_init<mode> (reg, par));
12849 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
12854 (define_expand "rotr<mode>3"
12855 [(set (match_operand:VI_128 0 "register_operand")
12857 (match_operand:VI_128 1 "nonimmediate_operand")
12858 (match_operand:SI 2 "general_operand")))]
12861 /* If we were given a scalar, convert it to parallel */
12862 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
12864 rtvec vs = rtvec_alloc (<ssescalarnum>);
12865 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
12866 rtx neg = gen_reg_rtx (<MODE>mode);
12867 rtx reg = gen_reg_rtx (<MODE>mode);
12868 rtx op2 = operands[2];
12871 if (GET_MODE (op2) != <ssescalarmode>mode)
12873 op2 = gen_reg_rtx (<ssescalarmode>mode);
12874 convert_move (op2, operands[2], false);
12877 for (i = 0; i < <ssescalarnum>; i++)
12878 RTVEC_ELT (vs, i) = op2;
12880 emit_insn (gen_vec_init<mode> (reg, par));
12881 emit_insn (gen_neg<mode>2 (neg, reg));
12882 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
12887 (define_insn "xop_rotl<mode>3"
12888 [(set (match_operand:VI_128 0 "register_operand" "=x")
12890 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
12891 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
12893 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12894 [(set_attr "type" "sseishft")
12895 (set_attr "length_immediate" "1")
12896 (set_attr "mode" "TI")])
12898 (define_insn "xop_rotr<mode>3"
12899 [(set (match_operand:VI_128 0 "register_operand" "=x")
12901 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
12902 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
12906 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
12907 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
12909 [(set_attr "type" "sseishft")
12910 (set_attr "length_immediate" "1")
12911 (set_attr "mode" "TI")])
12913 (define_expand "vrotr<mode>3"
12914 [(match_operand:VI_128 0 "register_operand")
12915 (match_operand:VI_128 1 "register_operand")
12916 (match_operand:VI_128 2 "register_operand")]
12919 rtx reg = gen_reg_rtx (<MODE>mode);
12920 emit_insn (gen_neg<mode>2 (reg, operands[2]));
12921 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
12925 (define_expand "vrotl<mode>3"
12926 [(match_operand:VI_128 0 "register_operand")
12927 (match_operand:VI_128 1 "register_operand")
12928 (match_operand:VI_128 2 "register_operand")]
12931 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
12935 (define_insn "xop_vrotl<mode>3"
12936 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
12937 (if_then_else:VI_128
12939 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
12942 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
12946 (neg:VI_128 (match_dup 2)))))]
12947 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12948 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12949 [(set_attr "type" "sseishft")
12950 (set_attr "prefix_data16" "0")
12951 (set_attr "prefix_extra" "2")
12952 (set_attr "mode" "TI")])
12954 ;; XOP packed shift instructions.
12955 (define_expand "vlshr<mode>3"
12956 [(set (match_operand:VI12_128 0 "register_operand")
12958 (match_operand:VI12_128 1 "register_operand")
12959 (match_operand:VI12_128 2 "nonimmediate_operand")))]
12962 rtx neg = gen_reg_rtx (<MODE>mode);
12963 emit_insn (gen_neg<mode>2 (neg, operands[2]));
12964 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
12968 (define_expand "vlshr<mode>3"
12969 [(set (match_operand:VI48_128 0 "register_operand")
12971 (match_operand:VI48_128 1 "register_operand")
12972 (match_operand:VI48_128 2 "nonimmediate_operand")))]
12973 "TARGET_AVX2 || TARGET_XOP"
12977 rtx neg = gen_reg_rtx (<MODE>mode);
12978 emit_insn (gen_neg<mode>2 (neg, operands[2]));
12979 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
12984 (define_expand "vlshr<mode>3"
12985 [(set (match_operand:VI48_512 0 "register_operand")
12987 (match_operand:VI48_512 1 "register_operand")
12988 (match_operand:VI48_512 2 "nonimmediate_operand")))]
12991 (define_expand "vlshr<mode>3"
12992 [(set (match_operand:VI48_256 0 "register_operand")
12994 (match_operand:VI48_256 1 "register_operand")
12995 (match_operand:VI48_256 2 "nonimmediate_operand")))]
12998 (define_expand "vashr<mode>3"
12999 [(set (match_operand:VI128_128 0 "register_operand")
13000 (ashiftrt:VI128_128
13001 (match_operand:VI128_128 1 "register_operand")
13002 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13005 rtx neg = gen_reg_rtx (<MODE>mode);
13006 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13007 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13011 (define_expand "vashrv4si3"
13012 [(set (match_operand:V4SI 0 "register_operand")
13013 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13014 (match_operand:V4SI 2 "nonimmediate_operand")))]
13015 "TARGET_AVX2 || TARGET_XOP"
13019 rtx neg = gen_reg_rtx (V4SImode);
13020 emit_insn (gen_negv4si2 (neg, operands[2]));
13021 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13026 (define_expand "vashrv16si3"
13027 [(set (match_operand:V16SI 0 "register_operand")
13028 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13029 (match_operand:V16SI 2 "nonimmediate_operand")))]
13032 (define_expand "vashrv8si3"
13033 [(set (match_operand:V8SI 0 "register_operand")
13034 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13035 (match_operand:V8SI 2 "nonimmediate_operand")))]
13038 (define_expand "vashl<mode>3"
13039 [(set (match_operand:VI12_128 0 "register_operand")
13041 (match_operand:VI12_128 1 "register_operand")
13042 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13045 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13049 (define_expand "vashl<mode>3"
13050 [(set (match_operand:VI48_128 0 "register_operand")
13052 (match_operand:VI48_128 1 "register_operand")
13053 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13054 "TARGET_AVX2 || TARGET_XOP"
13058 operands[2] = force_reg (<MODE>mode, operands[2]);
13059 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13064 (define_expand "vashl<mode>3"
13065 [(set (match_operand:VI48_512 0 "register_operand")
13067 (match_operand:VI48_512 1 "register_operand")
13068 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13071 (define_expand "vashl<mode>3"
13072 [(set (match_operand:VI48_256 0 "register_operand")
13074 (match_operand:VI48_256 1 "register_operand")
13075 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13078 (define_insn "xop_sha<mode>3"
13079 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13080 (if_then_else:VI_128
13082 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13085 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13089 (neg:VI_128 (match_dup 2)))))]
13090 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13091 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13092 [(set_attr "type" "sseishft")
13093 (set_attr "prefix_data16" "0")
13094 (set_attr "prefix_extra" "2")
13095 (set_attr "mode" "TI")])
13097 (define_insn "xop_shl<mode>3"
13098 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13099 (if_then_else:VI_128
13101 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13104 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13108 (neg:VI_128 (match_dup 2)))))]
13109 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13110 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13111 [(set_attr "type" "sseishft")
13112 (set_attr "prefix_data16" "0")
13113 (set_attr "prefix_extra" "2")
13114 (set_attr "mode" "TI")])
13116 (define_expand "<shift_insn><mode>3"
13117 [(set (match_operand:VI1_AVX2 0 "register_operand")
13118 (any_shift:VI1_AVX2
13119 (match_operand:VI1_AVX2 1 "register_operand")
13120 (match_operand:SI 2 "nonmemory_operand")))]
13123 if (TARGET_XOP && <MODE>mode == V16QImode)
13125 bool negate = false;
13126 rtx (*gen) (rtx, rtx, rtx);
13130 if (<CODE> != ASHIFT)
13132 if (CONST_INT_P (operands[2]))
13133 operands[2] = GEN_INT (-INTVAL (operands[2]));
13137 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13138 for (i = 0; i < 16; i++)
13139 XVECEXP (par, 0, i) = operands[2];
13141 tmp = gen_reg_rtx (V16QImode);
13142 emit_insn (gen_vec_initv16qi (tmp, par));
13145 emit_insn (gen_negv16qi2 (tmp, tmp));
13147 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13148 emit_insn (gen (operands[0], operands[1], tmp));
13151 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13155 (define_expand "ashrv2di3"
13156 [(set (match_operand:V2DI 0 "register_operand")
13158 (match_operand:V2DI 1 "register_operand")
13159 (match_operand:DI 2 "nonmemory_operand")))]
13162 rtx reg = gen_reg_rtx (V2DImode);
13164 bool negate = false;
13167 if (CONST_INT_P (operands[2]))
13168 operands[2] = GEN_INT (-INTVAL (operands[2]));
13172 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13173 for (i = 0; i < 2; i++)
13174 XVECEXP (par, 0, i) = operands[2];
13176 emit_insn (gen_vec_initv2di (reg, par));
13179 emit_insn (gen_negv2di2 (reg, reg));
13181 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13185 ;; XOP FRCZ support
13186 (define_insn "xop_frcz<mode>2"
13187 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13189 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13192 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13193 [(set_attr "type" "ssecvt1")
13194 (set_attr "mode" "<MODE>")])
13197 (define_expand "xop_vmfrcz<mode>2"
13198 [(set (match_operand:VF_128 0 "register_operand")
13201 [(match_operand:VF_128 1 "nonimmediate_operand")]
13207 operands[3] = CONST0_RTX (<MODE>mode);
13210 (define_insn "*xop_vmfrcz_<mode>"
13211 [(set (match_operand:VF_128 0 "register_operand" "=x")
13214 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13216 (match_operand:VF_128 2 "const0_operand")
13219 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13220 [(set_attr "type" "ssecvt1")
13221 (set_attr "mode" "<MODE>")])
13223 (define_insn "xop_maskcmp<mode>3"
13224 [(set (match_operand:VI_128 0 "register_operand" "=x")
13225 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13226 [(match_operand:VI_128 2 "register_operand" "x")
13227 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13229 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13230 [(set_attr "type" "sse4arg")
13231 (set_attr "prefix_data16" "0")
13232 (set_attr "prefix_rep" "0")
13233 (set_attr "prefix_extra" "2")
13234 (set_attr "length_immediate" "1")
13235 (set_attr "mode" "TI")])
13237 (define_insn "xop_maskcmp_uns<mode>3"
13238 [(set (match_operand:VI_128 0 "register_operand" "=x")
13239 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13240 [(match_operand:VI_128 2 "register_operand" "x")
13241 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13243 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13244 [(set_attr "type" "ssecmp")
13245 (set_attr "prefix_data16" "0")
13246 (set_attr "prefix_rep" "0")
13247 (set_attr "prefix_extra" "2")
13248 (set_attr "length_immediate" "1")
13249 (set_attr "mode" "TI")])
13251 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13252 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13253 ;; the exact instruction generated for the intrinsic.
13254 (define_insn "xop_maskcmp_uns2<mode>3"
13255 [(set (match_operand:VI_128 0 "register_operand" "=x")
13257 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13258 [(match_operand:VI_128 2 "register_operand" "x")
13259 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13260 UNSPEC_XOP_UNSIGNED_CMP))]
13262 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13263 [(set_attr "type" "ssecmp")
13264 (set_attr "prefix_data16" "0")
13265 (set_attr "prefix_extra" "2")
13266 (set_attr "length_immediate" "1")
13267 (set_attr "mode" "TI")])
13269 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13270 ;; being added here to be complete.
13271 (define_insn "xop_pcom_tf<mode>3"
13272 [(set (match_operand:VI_128 0 "register_operand" "=x")
13274 [(match_operand:VI_128 1 "register_operand" "x")
13275 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13276 (match_operand:SI 3 "const_int_operand" "n")]
13277 UNSPEC_XOP_TRUEFALSE))]
13280 return ((INTVAL (operands[3]) != 0)
13281 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13282 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13284 [(set_attr "type" "ssecmp")
13285 (set_attr "prefix_data16" "0")
13286 (set_attr "prefix_extra" "2")
13287 (set_attr "length_immediate" "1")
13288 (set_attr "mode" "TI")])
13290 (define_insn "xop_vpermil2<mode>3"
13291 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13293 [(match_operand:VF_128_256 1 "register_operand" "x")
13294 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13295 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13296 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13299 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13300 [(set_attr "type" "sse4arg")
13301 (set_attr "length_immediate" "1")
13302 (set_attr "mode" "<MODE>")])
13304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13306 (define_insn "aesenc"
13307 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13308 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13309 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13313 aesenc\t{%2, %0|%0, %2}
13314 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13315 [(set_attr "isa" "noavx,avx")
13316 (set_attr "type" "sselog1")
13317 (set_attr "prefix_extra" "1")
13318 (set_attr "prefix" "orig,vex")
13319 (set_attr "btver2_decode" "double,double")
13320 (set_attr "mode" "TI")])
13322 (define_insn "aesenclast"
13323 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13324 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13325 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13326 UNSPEC_AESENCLAST))]
13329 aesenclast\t{%2, %0|%0, %2}
13330 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13331 [(set_attr "isa" "noavx,avx")
13332 (set_attr "type" "sselog1")
13333 (set_attr "prefix_extra" "1")
13334 (set_attr "prefix" "orig,vex")
13335 (set_attr "btver2_decode" "double,double")
13336 (set_attr "mode" "TI")])
13338 (define_insn "aesdec"
13339 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13340 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13341 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13345 aesdec\t{%2, %0|%0, %2}
13346 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13347 [(set_attr "isa" "noavx,avx")
13348 (set_attr "type" "sselog1")
13349 (set_attr "prefix_extra" "1")
13350 (set_attr "prefix" "orig,vex")
13351 (set_attr "btver2_decode" "double,double")
13352 (set_attr "mode" "TI")])
13354 (define_insn "aesdeclast"
13355 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13356 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13357 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13358 UNSPEC_AESDECLAST))]
13361 aesdeclast\t{%2, %0|%0, %2}
13362 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
13363 [(set_attr "isa" "noavx,avx")
13364 (set_attr "type" "sselog1")
13365 (set_attr "prefix_extra" "1")
13366 (set_attr "prefix" "orig,vex")
13367 (set_attr "btver2_decode" "double,double")
13368 (set_attr "mode" "TI")])
13370 (define_insn "aesimc"
13371 [(set (match_operand:V2DI 0 "register_operand" "=x")
13372 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13375 "%vaesimc\t{%1, %0|%0, %1}"
13376 [(set_attr "type" "sselog1")
13377 (set_attr "prefix_extra" "1")
13378 (set_attr "prefix" "maybe_vex")
13379 (set_attr "mode" "TI")])
13381 (define_insn "aeskeygenassist"
13382 [(set (match_operand:V2DI 0 "register_operand" "=x")
13383 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
13384 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13385 UNSPEC_AESKEYGENASSIST))]
13387 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
13388 [(set_attr "type" "sselog1")
13389 (set_attr "prefix_extra" "1")
13390 (set_attr "length_immediate" "1")
13391 (set_attr "prefix" "maybe_vex")
13392 (set_attr "mode" "TI")])
13394 (define_insn "pclmulqdq"
13395 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13396 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13397 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
13398 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13402 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
13403 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13404 [(set_attr "isa" "noavx,avx")
13405 (set_attr "type" "sselog1")
13406 (set_attr "prefix_extra" "1")
13407 (set_attr "length_immediate" "1")
13408 (set_attr "prefix" "orig,vex")
13409 (set_attr "mode" "TI")])
13411 (define_expand "avx_vzeroall"
13412 [(match_par_dup 0 [(const_int 0)])]
13415 int nregs = TARGET_64BIT ? 16 : 8;
13418 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
13420 XVECEXP (operands[0], 0, 0)
13421 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
13424 for (regno = 0; regno < nregs; regno++)
13425 XVECEXP (operands[0], 0, regno + 1)
13426 = gen_rtx_SET (VOIDmode,
13427 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
13428 CONST0_RTX (V8SImode));
13431 (define_insn "*avx_vzeroall"
13432 [(match_parallel 0 "vzeroall_operation"
13433 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
13436 [(set_attr "type" "sse")
13437 (set_attr "modrm" "0")
13438 (set_attr "memory" "none")
13439 (set_attr "prefix" "vex")
13440 (set_attr "btver2_decode" "vector")
13441 (set_attr "mode" "OI")])
13443 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
13444 ;; if the upper 128bits are unused.
13445 (define_insn "avx_vzeroupper"
13446 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
13449 [(set_attr "type" "sse")
13450 (set_attr "modrm" "0")
13451 (set_attr "memory" "none")
13452 (set_attr "prefix" "vex")
13453 (set_attr "btver2_decode" "vector")
13454 (set_attr "mode" "OI")])
13456 (define_insn "avx2_pbroadcast<mode>"
13457 [(set (match_operand:VI 0 "register_operand" "=x")
13459 (vec_select:<ssescalarmode>
13460 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
13461 (parallel [(const_int 0)]))))]
13463 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
13464 [(set_attr "type" "ssemov")
13465 (set_attr "prefix_extra" "1")
13466 (set_attr "prefix" "vex")
13467 (set_attr "mode" "<sseinsnmode>")])
13469 (define_insn "avx2_pbroadcast<mode>_1"
13470 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
13471 (vec_duplicate:VI_256
13472 (vec_select:<ssescalarmode>
13473 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
13474 (parallel [(const_int 0)]))))]
13477 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
13478 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
13479 [(set_attr "type" "ssemov")
13480 (set_attr "prefix_extra" "1")
13481 (set_attr "prefix" "vex")
13482 (set_attr "mode" "<sseinsnmode>")])
13484 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
13485 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
13486 (unspec:VI48F_256_512
13487 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
13488 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
13490 "TARGET_AVX2 && <mask_mode512bit_condition>"
13491 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
13492 [(set_attr "type" "sselog")
13493 (set_attr "prefix" "<mask_prefix2>")
13494 (set_attr "mode" "<sseinsnmode>")])
13496 (define_expand "<avx2_avx512f>_perm<mode>"
13497 [(match_operand:VI8F_256_512 0 "register_operand")
13498 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
13499 (match_operand:SI 2 "const_0_to_255_operand")]
13502 int mask = INTVAL (operands[2]);
13503 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
13504 GEN_INT ((mask >> 0) & 3),
13505 GEN_INT ((mask >> 2) & 3),
13506 GEN_INT ((mask >> 4) & 3),
13507 GEN_INT ((mask >> 6) & 3)));
13511 (define_expand "avx512f_perm<mode>_mask"
13512 [(match_operand:V8FI 0 "register_operand")
13513 (match_operand:V8FI 1 "nonimmediate_operand")
13514 (match_operand:SI 2 "const_0_to_255_operand")
13515 (match_operand:V8FI 3 "vector_move_operand")
13516 (match_operand:<avx512fmaskmode> 4 "register_operand")]
13519 int mask = INTVAL (operands[2]);
13520 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
13521 GEN_INT ((mask >> 0) & 3),
13522 GEN_INT ((mask >> 2) & 3),
13523 GEN_INT ((mask >> 4) & 3),
13524 GEN_INT ((mask >> 6) & 3),
13525 operands[3], operands[4]));
13529 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
13530 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
13531 (vec_select:VI8F_256_512
13532 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
13533 (parallel [(match_operand 2 "const_0_to_3_operand")
13534 (match_operand 3 "const_0_to_3_operand")
13535 (match_operand 4 "const_0_to_3_operand")
13536 (match_operand 5 "const_0_to_3_operand")])))]
13537 "TARGET_AVX2 && <mask_mode512bit_condition>"
13540 mask |= INTVAL (operands[2]) << 0;
13541 mask |= INTVAL (operands[3]) << 2;
13542 mask |= INTVAL (operands[4]) << 4;
13543 mask |= INTVAL (operands[5]) << 6;
13544 operands[2] = GEN_INT (mask);
13545 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13547 [(set_attr "type" "sselog")
13548 (set_attr "prefix" "<mask_prefix2>")
13549 (set_attr "mode" "<sseinsnmode>")])
13551 (define_insn "avx2_permv2ti"
13552 [(set (match_operand:V4DI 0 "register_operand" "=x")
13554 [(match_operand:V4DI 1 "register_operand" "x")
13555 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
13556 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13559 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13560 [(set_attr "type" "sselog")
13561 (set_attr "prefix" "vex")
13562 (set_attr "mode" "OI")])
13564 (define_insn "avx2_vec_dupv4df"
13565 [(set (match_operand:V4DF 0 "register_operand" "=x")
13566 (vec_duplicate:V4DF
13568 (match_operand:V2DF 1 "register_operand" "x")
13569 (parallel [(const_int 0)]))))]
13571 "vbroadcastsd\t{%1, %0|%0, %1}"
13572 [(set_attr "type" "sselog1")
13573 (set_attr "prefix" "vex")
13574 (set_attr "mode" "V4DF")])
13576 ;; Modes handled by AVX vec_dup patterns.
13577 (define_mode_iterator AVX_VEC_DUP_MODE
13578 [V8SI V8SF V4DI V4DF])
13580 (define_insn "vec_dup<mode>"
13581 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
13582 (vec_duplicate:AVX_VEC_DUP_MODE
13583 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
13586 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
13587 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
13589 [(set_attr "type" "ssemov")
13590 (set_attr "prefix_extra" "1")
13591 (set_attr "prefix" "vex")
13592 (set_attr "isa" "*,avx2,noavx2")
13593 (set_attr "mode" "V8SF")])
13595 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
13596 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13597 (vec_duplicate:VI48F_512
13598 (vec_select:<ssescalarmode>
13599 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
13600 (parallel [(const_int 0)]))))]
13602 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13603 [(set_attr "type" "ssemov")
13604 (set_attr "prefix" "evex")
13605 (set_attr "mode" "<sseinsnmode>")])
13607 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
13608 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
13609 (vec_duplicate:V16FI
13610 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
13613 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
13614 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13615 [(set_attr "type" "ssemov")
13616 (set_attr "prefix" "evex")
13617 (set_attr "mode" "<sseinsnmode>")])
13619 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
13620 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
13621 (vec_duplicate:V8FI
13622 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
13625 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
13626 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13627 [(set_attr "type" "ssemov")
13628 (set_attr "prefix" "evex")
13629 (set_attr "mode" "<sseinsnmode>")])
13631 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
13632 [(set (match_operand:VI48_512 0 "register_operand" "=v")
13633 (vec_duplicate:VI48_512
13634 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
13635 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
13636 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13637 [(set_attr "type" "ssemov")
13638 (set_attr "prefix" "evex")
13639 (set_attr "mode" "<sseinsnmode>")])
13641 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
13642 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13643 (vec_duplicate:VI48F_512
13644 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
13646 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13647 [(set_attr "type" "ssemov")
13648 (set_attr "prefix" "evex")
13649 (set_attr "mode" "<sseinsnmode>")])
13651 (define_insn "avx2_vbroadcasti128_<mode>"
13652 [(set (match_operand:VI_256 0 "register_operand" "=x")
13654 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
13657 "vbroadcasti128\t{%1, %0|%0, %1}"
13658 [(set_attr "type" "ssemov")
13659 (set_attr "prefix_extra" "1")
13660 (set_attr "prefix" "vex")
13661 (set_attr "mode" "OI")])
13664 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
13665 (vec_duplicate:AVX_VEC_DUP_MODE
13666 (match_operand:<ssescalarmode> 1 "register_operand")))]
13667 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
13668 [(set (match_dup 2)
13669 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
13671 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
13672 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
13674 (define_insn "avx_vbroadcastf128_<mode>"
13675 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
13677 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
13681 vbroadcast<i128>\t{%1, %0|%0, %1}
13682 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
13683 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
13684 [(set_attr "type" "ssemov,sselog1,sselog1")
13685 (set_attr "prefix_extra" "1")
13686 (set_attr "length_immediate" "0,1,1")
13687 (set_attr "prefix" "vex")
13688 (set_attr "mode" "<sseinsnmode>")])
13690 (define_insn "avx512cd_maskb_vec_dupv8di"
13691 [(set (match_operand:V8DI 0 "register_operand" "=v")
13692 (vec_duplicate:V8DI
13694 (match_operand:QI 1 "register_operand" "k"))))]
13696 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
13697 [(set_attr "type" "mskmov")
13698 (set_attr "prefix" "evex")
13699 (set_attr "mode" "XI")])
13701 (define_insn "avx512cd_maskw_vec_dupv16si"
13702 [(set (match_operand:V16SI 0 "register_operand" "=v")
13703 (vec_duplicate:V16SI
13705 (match_operand:HI 1 "register_operand" "k"))))]
13707 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
13708 [(set_attr "type" "mskmov")
13709 (set_attr "prefix" "evex")
13710 (set_attr "mode" "XI")])
13712 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
13713 ;; If it so happens that the input is in memory, use vbroadcast.
13714 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
13715 (define_insn "*avx_vperm_broadcast_v4sf"
13716 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
13718 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
13719 (match_parallel 2 "avx_vbroadcast_operand"
13720 [(match_operand 3 "const_int_operand" "C,n,n")])))]
13723 int elt = INTVAL (operands[3]);
13724 switch (which_alternative)
13728 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
13729 return "vbroadcastss\t{%1, %0|%0, %k1}";
13731 operands[2] = GEN_INT (elt * 0x55);
13732 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
13734 gcc_unreachable ();
13737 [(set_attr "type" "ssemov,ssemov,sselog1")
13738 (set_attr "prefix_extra" "1")
13739 (set_attr "length_immediate" "0,0,1")
13740 (set_attr "prefix" "vex")
13741 (set_attr "mode" "SF,SF,V4SF")])
13743 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
13744 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
13746 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
13747 (match_parallel 2 "avx_vbroadcast_operand"
13748 [(match_operand 3 "const_int_operand" "C,n,n")])))]
13751 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
13752 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
13754 rtx op0 = operands[0], op1 = operands[1];
13755 int elt = INTVAL (operands[3]);
13761 if (TARGET_AVX2 && elt == 0)
13763 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
13768 /* Shuffle element we care about into all elements of the 128-bit lane.
13769 The other lane gets shuffled too, but we don't care. */
13770 if (<MODE>mode == V4DFmode)
13771 mask = (elt & 1 ? 15 : 0);
13773 mask = (elt & 3) * 0x55;
13774 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
13776 /* Shuffle the lane we care about into both lanes of the dest. */
13777 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
13778 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
13782 operands[1] = adjust_address (op1, <ssescalarmode>mode,
13783 elt * GET_MODE_SIZE (<ssescalarmode>mode));
13786 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
13787 [(set (match_operand:VF2 0 "register_operand")
13789 (match_operand:VF2 1 "nonimmediate_operand")
13790 (match_operand:SI 2 "const_0_to_255_operand")))]
13791 "TARGET_AVX && <mask_mode512bit_condition>"
13793 int mask = INTVAL (operands[2]);
13794 rtx perm[<ssescalarnum>];
13797 for (i = 0; i < <ssescalarnum>; i = i + 2)
13799 perm[i] = GEN_INT (((mask >> i) & 1) + i);
13800 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
13804 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
13807 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
13808 [(set (match_operand:VF1 0 "register_operand")
13810 (match_operand:VF1 1 "nonimmediate_operand")
13811 (match_operand:SI 2 "const_0_to_255_operand")))]
13812 "TARGET_AVX && <mask_mode512bit_condition>"
13814 int mask = INTVAL (operands[2]);
13815 rtx perm[<ssescalarnum>];
13818 for (i = 0; i < <ssescalarnum>; i = i + 4)
13820 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
13821 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
13822 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
13823 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
13827 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
13830 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
13831 [(set (match_operand:VF 0 "register_operand" "=v")
13833 (match_operand:VF 1 "nonimmediate_operand" "vm")
13834 (match_parallel 2 ""
13835 [(match_operand 3 "const_int_operand")])))]
13836 "TARGET_AVX && <mask_mode512bit_condition>
13837 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
13839 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
13840 operands[2] = GEN_INT (mask);
13841 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
13843 [(set_attr "type" "sselog")
13844 (set_attr "prefix_extra" "1")
13845 (set_attr "length_immediate" "1")
13846 (set_attr "prefix" "<mask_prefix>")
13847 (set_attr "mode" "<sseinsnmode>")])
13849 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
13850 [(set (match_operand:VF 0 "register_operand" "=v")
13852 [(match_operand:VF 1 "register_operand" "v")
13853 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
13855 "TARGET_AVX && <mask_mode512bit_condition>"
13856 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13857 [(set_attr "type" "sselog")
13858 (set_attr "prefix_extra" "1")
13859 (set_attr "btver2_decode" "vector")
13860 (set_attr "prefix" "<mask_prefix>")
13861 (set_attr "mode" "<sseinsnmode>")])
13863 (define_insn "avx512f_vpermi2var<mode>3"
13864 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13866 [(match_operand:VI48F_512 1 "register_operand" "v")
13867 (match_operand:<sseintvecmode> 2 "register_operand" "0")
13868 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13871 "vpermi2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
13872 [(set_attr "type" "sselog")
13873 (set_attr "prefix" "evex")
13874 (set_attr "mode" "<sseinsnmode>")])
13876 (define_insn "avx512f_vpermi2var<mode>3_mask"
13877 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13878 (vec_merge:VI48F_512
13880 [(match_operand:VI48F_512 1 "register_operand" "v")
13881 (match_operand:<sseintvecmode> 2 "register_operand" "0")
13882 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13883 UNSPEC_VPERMI2_MASK)
13885 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
13887 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
13888 [(set_attr "type" "sselog")
13889 (set_attr "prefix" "evex")
13890 (set_attr "mode" "<sseinsnmode>")])
13892 (define_insn "avx512f_vpermt2var<mode>3"
13893 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13895 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
13896 (match_operand:VI48F_512 2 "register_operand" "0")
13897 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13900 "vpermt2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
13901 [(set_attr "type" "sselog")
13902 (set_attr "prefix" "evex")
13903 (set_attr "mode" "<sseinsnmode>")])
13905 (define_insn "avx512f_vpermt2var<mode>3_mask"
13906 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
13907 (vec_merge:VI48F_512
13909 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
13910 (match_operand:VI48F_512 2 "register_operand" "0")
13911 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
13914 (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
13916 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
13917 [(set_attr "type" "sselog")
13918 (set_attr "prefix" "evex")
13919 (set_attr "mode" "<sseinsnmode>")])
13921 (define_expand "avx_vperm2f128<mode>3"
13922 [(set (match_operand:AVX256MODE2P 0 "register_operand")
13923 (unspec:AVX256MODE2P
13924 [(match_operand:AVX256MODE2P 1 "register_operand")
13925 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
13926 (match_operand:SI 3 "const_0_to_255_operand")]
13927 UNSPEC_VPERMIL2F128))]
13930 int mask = INTVAL (operands[3]);
13931 if ((mask & 0x88) == 0)
13933 rtx perm[<ssescalarnum>], t1, t2;
13934 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
13936 base = (mask & 3) * nelt2;
13937 for (i = 0; i < nelt2; ++i)
13938 perm[i] = GEN_INT (base + i);
13940 base = ((mask >> 4) & 3) * nelt2;
13941 for (i = 0; i < nelt2; ++i)
13942 perm[i + nelt2] = GEN_INT (base + i);
13944 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
13945 operands[1], operands[2]);
13946 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
13947 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
13948 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
13954 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
13955 ;; means that in order to represent this properly in rtl we'd have to
13956 ;; nest *another* vec_concat with a zero operand and do the select from
13957 ;; a 4x wide vector. That doesn't seem very nice.
13958 (define_insn "*avx_vperm2f128<mode>_full"
13959 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
13960 (unspec:AVX256MODE2P
13961 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
13962 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
13963 (match_operand:SI 3 "const_0_to_255_operand" "n")]
13964 UNSPEC_VPERMIL2F128))]
13966 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13967 [(set_attr "type" "sselog")
13968 (set_attr "prefix_extra" "1")
13969 (set_attr "length_immediate" "1")
13970 (set_attr "prefix" "vex")
13971 (set_attr "mode" "<sseinsnmode>")])
13973 (define_insn "*avx_vperm2f128<mode>_nozero"
13974 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
13975 (vec_select:AVX256MODE2P
13976 (vec_concat:<ssedoublevecmode>
13977 (match_operand:AVX256MODE2P 1 "register_operand" "x")
13978 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
13979 (match_parallel 3 ""
13980 [(match_operand 4 "const_int_operand")])))]
13982 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
13984 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
13986 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
13988 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
13989 operands[3] = GEN_INT (mask);
13990 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13992 [(set_attr "type" "sselog")
13993 (set_attr "prefix_extra" "1")
13994 (set_attr "length_immediate" "1")
13995 (set_attr "prefix" "vex")
13996 (set_attr "mode" "<sseinsnmode>")])
13998 (define_expand "avx_vinsertf128<mode>"
13999 [(match_operand:V_256 0 "register_operand")
14000 (match_operand:V_256 1 "register_operand")
14001 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14002 (match_operand:SI 3 "const_0_to_1_operand")]
14005 rtx (*insn)(rtx, rtx, rtx);
14007 switch (INTVAL (operands[3]))
14010 insn = gen_vec_set_lo_<mode>;
14013 insn = gen_vec_set_hi_<mode>;
14016 gcc_unreachable ();
14019 emit_insn (insn (operands[0], operands[1], operands[2]));
14023 (define_insn "avx2_vec_set_lo_v4di"
14024 [(set (match_operand:V4DI 0 "register_operand" "=x")
14026 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14028 (match_operand:V4DI 1 "register_operand" "x")
14029 (parallel [(const_int 2) (const_int 3)]))))]
14031 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14032 [(set_attr "type" "sselog")
14033 (set_attr "prefix_extra" "1")
14034 (set_attr "length_immediate" "1")
14035 (set_attr "prefix" "vex")
14036 (set_attr "mode" "OI")])
14038 (define_insn "avx2_vec_set_hi_v4di"
14039 [(set (match_operand:V4DI 0 "register_operand" "=x")
14042 (match_operand:V4DI 1 "register_operand" "x")
14043 (parallel [(const_int 0) (const_int 1)]))
14044 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14046 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14047 [(set_attr "type" "sselog")
14048 (set_attr "prefix_extra" "1")
14049 (set_attr "length_immediate" "1")
14050 (set_attr "prefix" "vex")
14051 (set_attr "mode" "OI")])
14053 (define_insn "vec_set_lo_<mode>"
14054 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14055 (vec_concat:VI8F_256
14056 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14057 (vec_select:<ssehalfvecmode>
14058 (match_operand:VI8F_256 1 "register_operand" "x")
14059 (parallel [(const_int 2) (const_int 3)]))))]
14061 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14062 [(set_attr "type" "sselog")
14063 (set_attr "prefix_extra" "1")
14064 (set_attr "length_immediate" "1")
14065 (set_attr "prefix" "vex")
14066 (set_attr "mode" "<sseinsnmode>")])
14068 (define_insn "vec_set_hi_<mode>"
14069 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14070 (vec_concat:VI8F_256
14071 (vec_select:<ssehalfvecmode>
14072 (match_operand:VI8F_256 1 "register_operand" "x")
14073 (parallel [(const_int 0) (const_int 1)]))
14074 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14076 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14077 [(set_attr "type" "sselog")
14078 (set_attr "prefix_extra" "1")
14079 (set_attr "length_immediate" "1")
14080 (set_attr "prefix" "vex")
14081 (set_attr "mode" "<sseinsnmode>")])
14083 (define_insn "vec_set_lo_<mode>"
14084 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14085 (vec_concat:VI4F_256
14086 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14087 (vec_select:<ssehalfvecmode>
14088 (match_operand:VI4F_256 1 "register_operand" "x")
14089 (parallel [(const_int 4) (const_int 5)
14090 (const_int 6) (const_int 7)]))))]
14092 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14093 [(set_attr "type" "sselog")
14094 (set_attr "prefix_extra" "1")
14095 (set_attr "length_immediate" "1")
14096 (set_attr "prefix" "vex")
14097 (set_attr "mode" "<sseinsnmode>")])
14099 (define_insn "vec_set_hi_<mode>"
14100 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14101 (vec_concat:VI4F_256
14102 (vec_select:<ssehalfvecmode>
14103 (match_operand:VI4F_256 1 "register_operand" "x")
14104 (parallel [(const_int 0) (const_int 1)
14105 (const_int 2) (const_int 3)]))
14106 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14108 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14109 [(set_attr "type" "sselog")
14110 (set_attr "prefix_extra" "1")
14111 (set_attr "length_immediate" "1")
14112 (set_attr "prefix" "vex")
14113 (set_attr "mode" "<sseinsnmode>")])
14115 (define_insn "vec_set_lo_v16hi"
14116 [(set (match_operand:V16HI 0 "register_operand" "=x")
14118 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14120 (match_operand:V16HI 1 "register_operand" "x")
14121 (parallel [(const_int 8) (const_int 9)
14122 (const_int 10) (const_int 11)
14123 (const_int 12) (const_int 13)
14124 (const_int 14) (const_int 15)]))))]
14126 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14127 [(set_attr "type" "sselog")
14128 (set_attr "prefix_extra" "1")
14129 (set_attr "length_immediate" "1")
14130 (set_attr "prefix" "vex")
14131 (set_attr "mode" "OI")])
14133 (define_insn "vec_set_hi_v16hi"
14134 [(set (match_operand:V16HI 0 "register_operand" "=x")
14137 (match_operand:V16HI 1 "register_operand" "x")
14138 (parallel [(const_int 0) (const_int 1)
14139 (const_int 2) (const_int 3)
14140 (const_int 4) (const_int 5)
14141 (const_int 6) (const_int 7)]))
14142 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14144 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14145 [(set_attr "type" "sselog")
14146 (set_attr "prefix_extra" "1")
14147 (set_attr "length_immediate" "1")
14148 (set_attr "prefix" "vex")
14149 (set_attr "mode" "OI")])
14151 (define_insn "vec_set_lo_v32qi"
14152 [(set (match_operand:V32QI 0 "register_operand" "=x")
14154 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14156 (match_operand:V32QI 1 "register_operand" "x")
14157 (parallel [(const_int 16) (const_int 17)
14158 (const_int 18) (const_int 19)
14159 (const_int 20) (const_int 21)
14160 (const_int 22) (const_int 23)
14161 (const_int 24) (const_int 25)
14162 (const_int 26) (const_int 27)
14163 (const_int 28) (const_int 29)
14164 (const_int 30) (const_int 31)]))))]
14166 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14167 [(set_attr "type" "sselog")
14168 (set_attr "prefix_extra" "1")
14169 (set_attr "length_immediate" "1")
14170 (set_attr "prefix" "vex")
14171 (set_attr "mode" "OI")])
14173 (define_insn "vec_set_hi_v32qi"
14174 [(set (match_operand:V32QI 0 "register_operand" "=x")
14177 (match_operand:V32QI 1 "register_operand" "x")
14178 (parallel [(const_int 0) (const_int 1)
14179 (const_int 2) (const_int 3)
14180 (const_int 4) (const_int 5)
14181 (const_int 6) (const_int 7)
14182 (const_int 8) (const_int 9)
14183 (const_int 10) (const_int 11)
14184 (const_int 12) (const_int 13)
14185 (const_int 14) (const_int 15)]))
14186 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14188 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14189 [(set_attr "type" "sselog")
14190 (set_attr "prefix_extra" "1")
14191 (set_attr "length_immediate" "1")
14192 (set_attr "prefix" "vex")
14193 (set_attr "mode" "OI")])
14195 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14196 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14198 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14199 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14202 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14203 [(set_attr "type" "sselog1")
14204 (set_attr "prefix_extra" "1")
14205 (set_attr "prefix" "vex")
14206 (set_attr "btver2_decode" "vector")
14207 (set_attr "mode" "<sseinsnmode>")])
14209 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14210 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14212 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14213 (match_operand:V48_AVX2 2 "register_operand" "x")
14217 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14218 [(set_attr "type" "sselog1")
14219 (set_attr "prefix_extra" "1")
14220 (set_attr "prefix" "vex")
14221 (set_attr "btver2_decode" "vector")
14222 (set_attr "mode" "<sseinsnmode>")])
14224 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14225 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14226 (unspec:AVX256MODE2P
14227 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14231 "&& reload_completed"
14234 rtx op0 = operands[0];
14235 rtx op1 = operands[1];
14237 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14239 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14240 emit_move_insn (op0, op1);
14244 (define_expand "vec_init<mode>"
14245 [(match_operand:V_256 0 "register_operand")
14249 ix86_expand_vector_init (false, operands[0], operands[1]);
14253 (define_expand "vec_init<mode>"
14254 [(match_operand:VI48F_512 0 "register_operand")
14258 ix86_expand_vector_init (false, operands[0], operands[1]);
14262 (define_expand "avx2_extracti128"
14263 [(match_operand:V2DI 0 "nonimmediate_operand")
14264 (match_operand:V4DI 1 "register_operand")
14265 (match_operand:SI 2 "const_0_to_1_operand")]
14268 rtx (*insn)(rtx, rtx);
14270 switch (INTVAL (operands[2]))
14273 insn = gen_vec_extract_lo_v4di;
14276 insn = gen_vec_extract_hi_v4di;
14279 gcc_unreachable ();
14282 emit_insn (insn (operands[0], operands[1]));
14286 (define_expand "avx2_inserti128"
14287 [(match_operand:V4DI 0 "register_operand")
14288 (match_operand:V4DI 1 "register_operand")
14289 (match_operand:V2DI 2 "nonimmediate_operand")
14290 (match_operand:SI 3 "const_0_to_1_operand")]
14293 rtx (*insn)(rtx, rtx, rtx);
14295 switch (INTVAL (operands[3]))
14298 insn = gen_avx2_vec_set_lo_v4di;
14301 insn = gen_avx2_vec_set_hi_v4di;
14304 gcc_unreachable ();
14307 emit_insn (insn (operands[0], operands[1], operands[2]));
14311 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
14312 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
14313 (ashiftrt:VI48_AVX512F
14314 (match_operand:VI48_AVX512F 1 "register_operand" "v")
14315 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
14316 "TARGET_AVX2 && <mask_mode512bit_condition>"
14317 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14318 [(set_attr "type" "sseishft")
14319 (set_attr "prefix" "maybe_evex")
14320 (set_attr "mode" "<sseinsnmode>")])
14322 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
14323 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
14324 (any_lshift:VI48_AVX2_48_AVX512F
14325 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
14326 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
14327 "TARGET_AVX2 && <mask_mode512bit_condition>"
14328 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14329 [(set_attr "type" "sseishft")
14330 (set_attr "prefix" "maybe_evex")
14331 (set_attr "mode" "<sseinsnmode>")])
14333 ;; For avx_vec_concat<mode> insn pattern
14334 (define_mode_attr concat_tg_mode
14335 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
14336 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
14338 (define_insn "avx_vec_concat<mode>"
14339 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
14340 (vec_concat:V_256_512
14341 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
14342 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
14345 switch (which_alternative)
14348 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
14350 switch (get_attr_mode (insn))
14353 return "vmovaps\t{%1, %t0|%t0, %1}";
14355 return "vmovapd\t{%1, %t0|%t0, %1}";
14357 return "vmovaps\t{%1, %x0|%x0, %1}";
14359 return "vmovapd\t{%1, %x0|%x0, %1}";
14361 return "vmovdqa\t{%1, %t0|%t0, %1}";
14363 return "vmovdqa\t{%1, %x0|%x0, %1}";
14365 gcc_unreachable ();
14368 gcc_unreachable ();
14371 [(set_attr "type" "sselog,ssemov")
14372 (set_attr "prefix_extra" "1,*")
14373 (set_attr "length_immediate" "1,*")
14374 (set_attr "prefix" "maybe_evex")
14375 (set_attr "mode" "<sseinsnmode>")])
14377 (define_insn "vcvtph2ps"
14378 [(set (match_operand:V4SF 0 "register_operand" "=x")
14380 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
14382 (parallel [(const_int 0) (const_int 1)
14383 (const_int 2) (const_int 3)])))]
14385 "vcvtph2ps\t{%1, %0|%0, %1}"
14386 [(set_attr "type" "ssecvt")
14387 (set_attr "prefix" "vex")
14388 (set_attr "mode" "V4SF")])
14390 (define_insn "*vcvtph2ps_load"
14391 [(set (match_operand:V4SF 0 "register_operand" "=x")
14392 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
14393 UNSPEC_VCVTPH2PS))]
14395 "vcvtph2ps\t{%1, %0|%0, %1}"
14396 [(set_attr "type" "ssecvt")
14397 (set_attr "prefix" "vex")
14398 (set_attr "mode" "V8SF")])
14400 (define_insn "vcvtph2ps256"
14401 [(set (match_operand:V8SF 0 "register_operand" "=x")
14402 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14403 UNSPEC_VCVTPH2PS))]
14405 "vcvtph2ps\t{%1, %0|%0, %1}"
14406 [(set_attr "type" "ssecvt")
14407 (set_attr "prefix" "vex")
14408 (set_attr "btver2_decode" "double")
14409 (set_attr "mode" "V8SF")])
14411 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name>"
14412 [(set (match_operand:V16SF 0 "register_operand" "=v")
14414 [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
14415 UNSPEC_VCVTPH2PS))]
14417 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14418 [(set_attr "type" "ssecvt")
14419 (set_attr "prefix" "evex")
14420 (set_attr "mode" "V16SF")])
14422 (define_expand "vcvtps2ph"
14423 [(set (match_operand:V8HI 0 "register_operand")
14425 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
14426 (match_operand:SI 2 "const_0_to_255_operand")]
14430 "operands[3] = CONST0_RTX (V4HImode);")
14432 (define_insn "*vcvtps2ph"
14433 [(set (match_operand:V8HI 0 "register_operand" "=x")
14435 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14436 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14438 (match_operand:V4HI 3 "const0_operand")))]
14440 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14441 [(set_attr "type" "ssecvt")
14442 (set_attr "prefix" "vex")
14443 (set_attr "mode" "V4SF")])
14445 (define_insn "*vcvtps2ph_store"
14446 [(set (match_operand:V4HI 0 "memory_operand" "=m")
14447 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
14448 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14449 UNSPEC_VCVTPS2PH))]
14451 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14452 [(set_attr "type" "ssecvt")
14453 (set_attr "prefix" "vex")
14454 (set_attr "mode" "V4SF")])
14456 (define_insn "vcvtps2ph256"
14457 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
14458 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
14459 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14460 UNSPEC_VCVTPS2PH))]
14462 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
14463 [(set_attr "type" "ssecvt")
14464 (set_attr "prefix" "vex")
14465 (set_attr "btver2_decode" "vector")
14466 (set_attr "mode" "V8SF")])
14468 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
14469 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
14471 [(match_operand:V16SF 1 "register_operand" "v")
14472 (match_operand:SI 2 "const_0_to_255_operand" "N")]
14473 UNSPEC_VCVTPS2PH))]
14475 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14476 [(set_attr "type" "ssecvt")
14477 (set_attr "prefix" "evex")
14478 (set_attr "mode" "V16SF")])
14480 ;; For gather* insn patterns
14481 (define_mode_iterator VEC_GATHER_MODE
14482 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
14483 (define_mode_attr VEC_GATHER_IDXSI
14484 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
14485 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
14486 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
14487 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
14489 (define_mode_attr VEC_GATHER_IDXDI
14490 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14491 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
14492 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
14493 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
14495 (define_mode_attr VEC_GATHER_SRCDI
14496 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
14497 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
14498 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
14499 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
14501 (define_expand "avx2_gathersi<mode>"
14502 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14503 (unspec:VEC_GATHER_MODE
14504 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
14505 (mem:<ssescalarmode>
14507 [(match_operand 2 "vsib_address_operand")
14508 (match_operand:<VEC_GATHER_IDXSI>
14509 3 "register_operand")
14510 (match_operand:SI 5 "const1248_operand ")]))
14511 (mem:BLK (scratch))
14512 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
14514 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14518 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14519 operands[5]), UNSPEC_VSIBADDR);
14522 (define_insn "*avx2_gathersi<mode>"
14523 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14524 (unspec:VEC_GATHER_MODE
14525 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
14526 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14528 [(match_operand:P 3 "vsib_address_operand" "Tv")
14529 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
14530 (match_operand:SI 6 "const1248_operand" "n")]
14532 (mem:BLK (scratch))
14533 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
14535 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14537 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
14538 [(set_attr "type" "ssemov")
14539 (set_attr "prefix" "vex")
14540 (set_attr "mode" "<sseinsnmode>")])
14542 (define_insn "*avx2_gathersi<mode>_2"
14543 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14544 (unspec:VEC_GATHER_MODE
14546 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14548 [(match_operand:P 2 "vsib_address_operand" "Tv")
14549 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
14550 (match_operand:SI 5 "const1248_operand" "n")]
14552 (mem:BLK (scratch))
14553 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
14555 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14557 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
14558 [(set_attr "type" "ssemov")
14559 (set_attr "prefix" "vex")
14560 (set_attr "mode" "<sseinsnmode>")])
14562 (define_expand "avx2_gatherdi<mode>"
14563 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
14564 (unspec:VEC_GATHER_MODE
14565 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
14566 (mem:<ssescalarmode>
14568 [(match_operand 2 "vsib_address_operand")
14569 (match_operand:<VEC_GATHER_IDXDI>
14570 3 "register_operand")
14571 (match_operand:SI 5 "const1248_operand ")]))
14572 (mem:BLK (scratch))
14573 (match_operand:<VEC_GATHER_SRCDI>
14574 4 "register_operand")]
14576 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
14580 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14581 operands[5]), UNSPEC_VSIBADDR);
14584 (define_insn "*avx2_gatherdi<mode>"
14585 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14586 (unspec:VEC_GATHER_MODE
14587 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
14588 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14590 [(match_operand:P 3 "vsib_address_operand" "Tv")
14591 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
14592 (match_operand:SI 6 "const1248_operand" "n")]
14594 (mem:BLK (scratch))
14595 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
14597 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14599 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
14600 [(set_attr "type" "ssemov")
14601 (set_attr "prefix" "vex")
14602 (set_attr "mode" "<sseinsnmode>")])
14604 (define_insn "*avx2_gatherdi<mode>_2"
14605 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
14606 (unspec:VEC_GATHER_MODE
14608 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14610 [(match_operand:P 2 "vsib_address_operand" "Tv")
14611 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
14612 (match_operand:SI 5 "const1248_operand" "n")]
14614 (mem:BLK (scratch))
14615 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
14617 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
14620 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
14621 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
14622 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
14624 [(set_attr "type" "ssemov")
14625 (set_attr "prefix" "vex")
14626 (set_attr "mode" "<sseinsnmode>")])
14628 (define_insn "*avx2_gatherdi<mode>_3"
14629 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
14630 (vec_select:<VEC_GATHER_SRCDI>
14632 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
14633 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
14635 [(match_operand:P 3 "vsib_address_operand" "Tv")
14636 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
14637 (match_operand:SI 6 "const1248_operand" "n")]
14639 (mem:BLK (scratch))
14640 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
14642 (parallel [(const_int 0) (const_int 1)
14643 (const_int 2) (const_int 3)])))
14644 (clobber (match_scratch:VI4F_256 1 "=&x"))]
14646 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
14647 [(set_attr "type" "ssemov")
14648 (set_attr "prefix" "vex")
14649 (set_attr "mode" "<sseinsnmode>")])
14651 (define_insn "*avx2_gatherdi<mode>_4"
14652 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
14653 (vec_select:<VEC_GATHER_SRCDI>
14656 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14658 [(match_operand:P 2 "vsib_address_operand" "Tv")
14659 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
14660 (match_operand:SI 5 "const1248_operand" "n")]
14662 (mem:BLK (scratch))
14663 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
14665 (parallel [(const_int 0) (const_int 1)
14666 (const_int 2) (const_int 3)])))
14667 (clobber (match_scratch:VI4F_256 1 "=&x"))]
14669 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
14670 [(set_attr "type" "ssemov")
14671 (set_attr "prefix" "vex")
14672 (set_attr "mode" "<sseinsnmode>")])
14674 (define_expand "avx512f_gathersi<mode>"
14675 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
14677 [(match_operand:VI48F_512 1 "register_operand")
14678 (match_operand:<avx512fmaskmode> 4 "register_operand")
14679 (mem:<ssescalarmode>
14681 [(match_operand 2 "vsib_address_operand")
14682 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
14683 (match_operand:SI 5 "const1248_operand")]))]
14685 (clobber (match_scratch:<avx512fmaskmode> 7))])]
14689 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14690 operands[5]), UNSPEC_VSIBADDR);
14693 (define_insn "*avx512f_gathersi<mode>"
14694 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14696 [(match_operand:VI48F_512 1 "register_operand" "0")
14697 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
14698 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14700 [(match_operand:P 4 "vsib_address_operand" "Tv")
14701 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
14702 (match_operand:SI 5 "const1248_operand" "n")]
14703 UNSPEC_VSIBADDR)])]
14705 (clobber (match_scratch:<avx512fmaskmode> 2 "=&k"))]
14707 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
14708 [(set_attr "type" "ssemov")
14709 (set_attr "prefix" "evex")
14710 (set_attr "mode" "<sseinsnmode>")])
14712 (define_insn "*avx512f_gathersi<mode>_2"
14713 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14716 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
14717 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
14719 [(match_operand:P 3 "vsib_address_operand" "Tv")
14720 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
14721 (match_operand:SI 4 "const1248_operand" "n")]
14722 UNSPEC_VSIBADDR)])]
14724 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
14726 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
14727 [(set_attr "type" "ssemov")
14728 (set_attr "prefix" "evex")
14729 (set_attr "mode" "<sseinsnmode>")])
14732 (define_expand "avx512f_gatherdi<mode>"
14733 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
14735 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
14736 (match_operand:QI 4 "register_operand")
14737 (mem:<ssescalarmode>
14739 [(match_operand 2 "vsib_address_operand")
14740 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
14741 (match_operand:SI 5 "const1248_operand")]))]
14743 (clobber (match_scratch:QI 7))])]
14747 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
14748 operands[5]), UNSPEC_VSIBADDR);
14751 (define_insn "*avx512f_gatherdi<mode>"
14752 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14754 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
14755 (match_operand:QI 7 "register_operand" "2")
14756 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
14758 [(match_operand:P 4 "vsib_address_operand" "Tv")
14759 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
14760 (match_operand:SI 5 "const1248_operand" "n")]
14761 UNSPEC_VSIBADDR)])]
14763 (clobber (match_scratch:QI 2 "=&k"))]
14765 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
14766 [(set_attr "type" "ssemov")
14767 (set_attr "prefix" "evex")
14768 (set_attr "mode" "<sseinsnmode>")])
14770 (define_insn "*avx512f_gatherdi<mode>_2"
14771 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
14774 (match_operand:QI 6 "register_operand" "1")
14775 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
14777 [(match_operand:P 3 "vsib_address_operand" "Tv")
14778 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
14779 (match_operand:SI 4 "const1248_operand" "n")]
14780 UNSPEC_VSIBADDR)])]
14782 (clobber (match_scratch:QI 1 "=&k"))]
14785 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
14786 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
14787 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
14789 [(set_attr "type" "ssemov")
14790 (set_attr "prefix" "evex")
14791 (set_attr "mode" "<sseinsnmode>")])
14793 (define_expand "avx512f_scattersi<mode>"
14794 [(parallel [(set (mem:VI48F_512
14796 [(match_operand 0 "vsib_address_operand")
14797 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
14798 (match_operand:SI 4 "const1248_operand")]))
14800 [(match_operand:<avx512fmaskmode> 1 "register_operand")
14801 (match_operand:VI48F_512 3 "register_operand")]
14803 (clobber (match_scratch:<avx512fmaskmode> 6))])]
14807 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
14808 operands[4]), UNSPEC_VSIBADDR);
14811 (define_insn "*avx512f_scattersi<mode>"
14812 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
14814 [(match_operand:P 0 "vsib_address_operand" "Tv")
14815 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
14816 (match_operand:SI 4 "const1248_operand" "n")]
14819 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
14820 (match_operand:VI48F_512 3 "register_operand" "v")]
14822 (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
14824 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
14825 [(set_attr "type" "ssemov")
14826 (set_attr "prefix" "evex")
14827 (set_attr "mode" "<sseinsnmode>")])
14829 (define_expand "avx512f_scatterdi<mode>"
14830 [(parallel [(set (mem:VI48F_512
14832 [(match_operand 0 "vsib_address_operand")
14833 (match_operand:V8DI 2 "register_operand")
14834 (match_operand:SI 4 "const1248_operand")]))
14836 [(match_operand:QI 1 "register_operand")
14837 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
14839 (clobber (match_scratch:QI 6))])]
14843 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
14844 operands[4]), UNSPEC_VSIBADDR);
14847 (define_insn "*avx512f_scatterdi<mode>"
14848 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
14850 [(match_operand:P 0 "vsib_address_operand" "Tv")
14851 (match_operand:V8DI 2 "register_operand" "v")
14852 (match_operand:SI 4 "const1248_operand" "n")]
14855 [(match_operand:QI 6 "register_operand" "1")
14856 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
14858 (clobber (match_scratch:QI 1 "=&k"))]
14860 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
14861 [(set_attr "type" "ssemov")
14862 (set_attr "prefix" "evex")
14863 (set_attr "mode" "<sseinsnmode>")])
14865 (define_insn "avx512f_compress<mode>_mask"
14866 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14868 [(match_operand:VI48F_512 1 "register_operand" "v")
14869 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
14870 (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
14873 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14874 [(set_attr "type" "ssemov")
14875 (set_attr "prefix" "evex")
14876 (set_attr "mode" "<sseinsnmode>")])
14878 (define_insn "avx512f_compressstore<mode>_mask"
14879 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
14881 [(match_operand:VI48F_512 1 "register_operand" "x")
14883 (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
14884 UNSPEC_COMPRESS_STORE))]
14886 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14887 [(set_attr "type" "ssemov")
14888 (set_attr "prefix" "evex")
14889 (set_attr "memory" "store")
14890 (set_attr "mode" "<sseinsnmode>")])
14892 (define_insn "avx512f_expand<mode>_mask"
14893 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
14895 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
14896 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
14897 (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
14900 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14901 [(set_attr "type" "ssemov")
14902 (set_attr "prefix" "evex")
14903 (set_attr "memory" "none,load")
14904 (set_attr "mode" "<sseinsnmode>")])
14906 (define_insn "avx512f_getmant<mode><mask_name>"
14907 [(set (match_operand:VF_512 0 "register_operand" "=v")
14909 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
14910 (match_operand:SI 2 "const_0_to_15_operand")]
14913 "vgetmant<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14914 [(set_attr "prefix" "evex")
14915 (set_attr "mode" "<MODE>")])
14917 (define_insn "avx512f_getmant<mode>"
14918 [(set (match_operand:VF_128 0 "register_operand" "=v")
14921 [(match_operand:VF_128 1 "register_operand" "v")
14922 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
14923 (match_operand:SI 3 "const_0_to_15_operand")]
14928 "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14929 [(set_attr "prefix" "evex")
14930 (set_attr "mode" "<ssescalarmode>")])
14932 (define_insn "clz<mode>2<mask_name>"
14933 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14935 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
14937 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14938 [(set_attr "type" "sse")
14939 (set_attr "prefix" "evex")
14940 (set_attr "mode" "<sseinsnmode>")])
14942 (define_insn "<mask_codefor>conflict<mode><mask_name>"
14943 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14945 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
14948 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14949 [(set_attr "type" "sse")
14950 (set_attr "prefix" "evex")
14951 (set_attr "mode" "<sseinsnmode>")])