1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
133 (define_c_enum "unspecv" [
143 ;; All vector modes including V?TImode, used in move patterns.
144 (define_mode_iterator VMOVE
145 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
146 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
147 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
148 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
149 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
153 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
154 (define_mode_iterator V48_AVX512VL
155 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
156 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
157 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
158 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
160 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
161 (define_mode_iterator VI12_AVX512VL
162 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
163 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
166 (define_mode_iterator V
167 [(V32QI "TARGET_AVX") V16QI
168 (V16HI "TARGET_AVX") V8HI
169 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
170 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
171 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
172 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
174 ;; All 128bit vector modes
175 (define_mode_iterator V_128
176 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
178 ;; All 256bit vector modes
179 (define_mode_iterator V_256
180 [V32QI V16HI V8SI V4DI V8SF V4DF])
182 ;; All 512bit vector modes
183 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
185 ;; All 256bit and 512bit vector modes
186 (define_mode_iterator V_256_512
187 [V32QI V16HI V8SI V4DI V8SF V4DF
188 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
189 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
191 ;; All vector float modes
192 (define_mode_iterator VF
193 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
194 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
196 ;; 128- and 256-bit float vector modes
197 (define_mode_iterator VF_128_256
198 [(V8SF "TARGET_AVX") V4SF
199 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
201 ;; All SFmode vector float modes
202 (define_mode_iterator VF1
203 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
205 ;; 128- and 256-bit SF vector modes
206 (define_mode_iterator VF1_128_256
207 [(V8SF "TARGET_AVX") V4SF])
209 (define_mode_iterator VF1_128_256VL
210 [V8SF (V4SF "TARGET_AVX512VL")])
212 ;; All DFmode vector float modes
213 (define_mode_iterator VF2
214 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
216 ;; 128- and 256-bit DF vector modes
217 (define_mode_iterator VF2_128_256
218 [(V4DF "TARGET_AVX") V2DF])
220 (define_mode_iterator VF2_512_256
221 [(V8DF "TARGET_AVX512F") V4DF])
223 (define_mode_iterator VF2_512_256VL
224 [V8DF (V4DF "TARGET_AVX512VL")])
226 ;; All 128bit vector float modes
227 (define_mode_iterator VF_128
228 [V4SF (V2DF "TARGET_SSE2")])
230 ;; All 256bit vector float modes
231 (define_mode_iterator VF_256
234 ;; All 512bit vector float modes
235 (define_mode_iterator VF_512
238 (define_mode_iterator VI48_AVX512VL
239 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
240 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
242 (define_mode_iterator VF2_AVX512VL
243 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
245 (define_mode_iterator VF1_AVX512VL
246 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
248 ;; All vector integer modes
249 (define_mode_iterator VI
250 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
251 (V32QI "TARGET_AVX") V16QI
252 (V16HI "TARGET_AVX") V8HI
253 (V8SI "TARGET_AVX") V4SI
254 (V4DI "TARGET_AVX") V2DI])
256 (define_mode_iterator VI_AVX2
257 [(V32QI "TARGET_AVX2") V16QI
258 (V16HI "TARGET_AVX2") V8HI
259 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
260 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
262 ;; All QImode vector integer modes
263 (define_mode_iterator VI1
264 [(V32QI "TARGET_AVX") V16QI])
266 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
268 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
270 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
271 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
272 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
274 ;; All DImode vector integer modes
275 (define_mode_iterator VI8
276 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
278 (define_mode_iterator VI8_AVX512VL
279 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
281 (define_mode_iterator VI8_256_512
282 [V8DI (V4DI "TARGET_AVX512VL")])
284 (define_mode_iterator VI1_AVX2
285 [(V32QI "TARGET_AVX2") V16QI])
287 (define_mode_iterator VI2_AVX2
288 [(V16HI "TARGET_AVX2") V8HI])
290 (define_mode_iterator VI2_AVX512F
291 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
293 (define_mode_iterator VI4_AVX
294 [(V8SI "TARGET_AVX") V4SI])
296 (define_mode_iterator VI4_AVX2
297 [(V8SI "TARGET_AVX2") V4SI])
299 (define_mode_iterator VI4_AVX512F
300 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
302 (define_mode_iterator VI4_AVX512VL
303 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
305 (define_mode_iterator VI48_AVX512F_AVX512VL
306 [V4SI V8SI (V16SI "TARGET_AVX512F")
307 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
309 (define_mode_iterator VI2_AVX512VL
310 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
312 (define_mode_iterator VI8_AVX2_AVX512BW
313 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
315 (define_mode_iterator VI8_AVX2
316 [(V4DI "TARGET_AVX2") V2DI])
318 (define_mode_iterator VI8_AVX2_AVX512F
319 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
322 (define_mode_iterator V8FI
326 (define_mode_iterator V16FI
329 ;; ??? We should probably use TImode instead.
330 (define_mode_iterator VIMAX_AVX2
331 [(V2TI "TARGET_AVX2") V1TI])
333 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
334 (define_mode_iterator SSESCALARMODE
335 [(V2TI "TARGET_AVX2") TI])
337 (define_mode_iterator VI12_AVX2
338 [(V32QI "TARGET_AVX2") V16QI
339 (V16HI "TARGET_AVX2") V8HI])
341 (define_mode_iterator VI24_AVX2
342 [(V16HI "TARGET_AVX2") V8HI
343 (V8SI "TARGET_AVX2") V4SI])
345 (define_mode_iterator VI124_AVX2_48_AVX512F
346 [(V32QI "TARGET_AVX2") V16QI
347 (V16HI "TARGET_AVX2") V8HI
348 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
349 (V8DI "TARGET_AVX512F")])
351 (define_mode_iterator VI124_AVX512F
352 [(V32QI "TARGET_AVX2") V16QI
353 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
354 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
356 (define_mode_iterator VI124_AVX2
357 [(V32QI "TARGET_AVX2") V16QI
358 (V16HI "TARGET_AVX2") V8HI
359 (V8SI "TARGET_AVX2") V4SI])
361 (define_mode_iterator VI2_AVX2_AVX512BW
362 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
364 (define_mode_iterator VI48_AVX2
365 [(V8SI "TARGET_AVX2") V4SI
366 (V4DI "TARGET_AVX2") V2DI])
368 (define_mode_iterator VI248_AVX2_8_AVX512F
369 [(V16HI "TARGET_AVX2") V8HI
370 (V8SI "TARGET_AVX2") V4SI
371 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
373 (define_mode_iterator VI48_AVX512F
374 [(V16SI "TARGET_AVX512F") V8SI V4SI
375 (V8DI "TARGET_AVX512F") V4DI V2DI])
377 (define_mode_iterator V48_AVX2
380 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
381 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
383 (define_mode_attr avx512
384 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
385 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
386 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
387 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
388 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
389 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
391 (define_mode_attr sse2_avx_avx512f
392 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
393 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
394 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
395 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
396 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
397 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
399 (define_mode_attr sse2_avx2
400 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
401 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
402 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
403 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
404 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
406 (define_mode_attr ssse3_avx2
407 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
408 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
409 (V4SI "ssse3") (V8SI "avx2")
410 (V2DI "ssse3") (V4DI "avx2")
411 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
413 (define_mode_attr sse4_1_avx2
414 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
415 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
416 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
417 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
419 (define_mode_attr avx_avx2
420 [(V4SF "avx") (V2DF "avx")
421 (V8SF "avx") (V4DF "avx")
422 (V4SI "avx2") (V2DI "avx2")
423 (V8SI "avx2") (V4DI "avx2")])
425 (define_mode_attr vec_avx2
426 [(V16QI "vec") (V32QI "avx2")
427 (V8HI "vec") (V16HI "avx2")
428 (V4SI "vec") (V8SI "avx2")
429 (V2DI "vec") (V4DI "avx2")])
431 (define_mode_attr avx2_avx512f
432 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
433 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
434 (V8SF "avx2") (V16SF "avx512f")
435 (V4DF "avx2") (V8DF "avx512f")])
437 (define_mode_attr avx2_avx512bw
438 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
439 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
440 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
441 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
442 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
444 (define_mode_attr shuffletype
445 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
446 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
447 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
448 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
449 (V64QI "i") (V1TI "i") (V2TI "i")])
451 (define_mode_attr ssequartermode
452 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
454 (define_mode_attr ssedoublemodelower
455 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
456 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
457 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
459 (define_mode_attr ssedoublemode
460 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
461 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
462 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
463 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
465 (define_mode_attr ssebytemode
466 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
468 ;; All 128bit vector integer modes
469 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
471 ;; All 256bit vector integer modes
472 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
474 ;; All 512bit vector integer modes
475 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
477 ;; Various 128bit vector integer mode combinations
478 (define_mode_iterator VI12_128 [V16QI V8HI])
479 (define_mode_iterator VI14_128 [V16QI V4SI])
480 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
481 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
482 (define_mode_iterator VI24_128 [V8HI V4SI])
483 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
484 (define_mode_iterator VI48_128 [V4SI V2DI])
486 ;; Various 256bit and 512 vector integer mode combinations
487 (define_mode_iterator VI124_256_48_512
488 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
489 (define_mode_iterator VI48_256 [V8SI V4DI])
490 (define_mode_iterator VI48_512 [V16SI V8DI])
491 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
493 ;; Int-float size matches
494 (define_mode_iterator VI4F_128 [V4SI V4SF])
495 (define_mode_iterator VI8F_128 [V2DI V2DF])
496 (define_mode_iterator VI4F_256 [V8SI V8SF])
497 (define_mode_iterator VI8F_256 [V4DI V4DF])
498 (define_mode_iterator VI8F_256_512
499 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
500 (define_mode_iterator VI48F_256_512
502 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
503 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
504 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
506 ;; Mapping from float mode to required SSE level
507 (define_mode_attr sse
508 [(SF "sse") (DF "sse2")
509 (V4SF "sse") (V2DF "sse2")
510 (V16SF "avx512f") (V8SF "avx")
511 (V8DF "avx512f") (V4DF "avx")])
513 (define_mode_attr sse2
514 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
515 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
517 (define_mode_attr sse3
518 [(V16QI "sse3") (V32QI "avx")])
520 (define_mode_attr sse4_1
521 [(V4SF "sse4_1") (V2DF "sse4_1")
522 (V8SF "avx") (V4DF "avx")
525 (define_mode_attr avxsizesuffix
526 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
527 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
528 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
529 (V16SF "512") (V8DF "512")
530 (V8SF "256") (V4DF "256")
531 (V4SF "") (V2DF "")])
533 ;; SSE instruction mode
534 (define_mode_attr sseinsnmode
535 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
536 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
537 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
538 (V16SF "V16SF") (V8DF "V8DF")
539 (V8SF "V8SF") (V4DF "V4DF")
540 (V4SF "V4SF") (V2DF "V2DF")
543 ;; Mapping of vector modes to corresponding mask size
544 (define_mode_attr avx512fmaskmode
545 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
546 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
547 (V16SI "HI") (V8SI "QI") (V4SI "QI")
548 (V8DI "QI") (V4DI "QI") (V2DI "QI")
549 (V16SF "HI") (V8SF "QI") (V4SF "QI")
550 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
552 ;; Mapping of vector float modes to an integer mode of the same size
553 (define_mode_attr sseintvecmode
554 [(V16SF "V16SI") (V8DF "V8DI")
555 (V8SF "V8SI") (V4DF "V4DI")
556 (V4SF "V4SI") (V2DF "V2DI")
557 (V16SI "V16SI") (V8DI "V8DI")
558 (V8SI "V8SI") (V4DI "V4DI")
559 (V4SI "V4SI") (V2DI "V2DI")
560 (V16HI "V16HI") (V8HI "V8HI")
561 (V32HI "V32HI") (V64QI "V64QI")
562 (V32QI "V32QI") (V16QI "V16QI")])
564 (define_mode_attr sseintvecmode2
565 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
566 (V8SF "OI") (V4SF "TI")])
568 (define_mode_attr sseintvecmodelower
569 [(V16SF "v16si") (V8DF "v8di")
570 (V8SF "v8si") (V4DF "v4di")
571 (V4SF "v4si") (V2DF "v2di")
572 (V8SI "v8si") (V4DI "v4di")
573 (V4SI "v4si") (V2DI "v2di")
574 (V16HI "v16hi") (V8HI "v8hi")
575 (V32QI "v32qi") (V16QI "v16qi")])
577 ;; Mapping of vector modes to a vector mode of double size
578 (define_mode_attr ssedoublevecmode
579 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
580 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
581 (V8SF "V16SF") (V4DF "V8DF")
582 (V4SF "V8SF") (V2DF "V4DF")])
584 ;; Mapping of vector modes to a vector mode of half size
585 (define_mode_attr ssehalfvecmode
586 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
587 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
588 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
589 (V16SF "V8SF") (V8DF "V4DF")
590 (V8SF "V4SF") (V4DF "V2DF")
593 ;; Mapping of vector modes ti packed single mode of the same size
594 (define_mode_attr ssePSmode
595 [(V16SI "V16SF") (V8DF "V16SF")
596 (V16SF "V16SF") (V8DI "V16SF")
597 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
598 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
599 (V8SI "V8SF") (V4SI "V4SF")
600 (V4DI "V8SF") (V2DI "V4SF")
601 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
602 (V8SF "V8SF") (V4SF "V4SF")
603 (V4DF "V8SF") (V2DF "V4SF")])
605 (define_mode_attr ssePSmode2
606 [(V8DI "V8SF") (V4DI "V4SF")])
608 ;; Mapping of vector modes back to the scalar modes
609 (define_mode_attr ssescalarmode
610 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
611 (V32HI "HI") (V16HI "HI") (V8HI "HI")
612 (V16SI "SI") (V8SI "SI") (V4SI "SI")
613 (V8DI "DI") (V4DI "DI") (V2DI "DI")
614 (V16SF "SF") (V8SF "SF") (V4SF "SF")
615 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
617 ;; Mapping of vector modes to the 128bit modes
618 (define_mode_attr ssexmmmode
619 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
620 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
621 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
622 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
623 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
624 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
626 ;; Pointer size override for scalar modes (Intel asm dialect)
627 (define_mode_attr iptr
628 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
629 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
630 (V8SF "k") (V4DF "q")
631 (V4SF "k") (V2DF "q")
634 ;; Number of scalar elements in each vector type
635 (define_mode_attr ssescalarnum
636 [(V64QI "64") (V16SI "16") (V8DI "8")
637 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
638 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
639 (V16SF "16") (V8DF "8")
640 (V8SF "8") (V4DF "4")
641 (V4SF "4") (V2DF "2")])
643 ;; Mask of scalar elements in each vector type
644 (define_mode_attr ssescalarnummask
645 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
646 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
647 (V8SF "7") (V4DF "3")
648 (V4SF "3") (V2DF "1")])
650 (define_mode_attr ssescalarsize
651 [(V8DI "64") (V4DI "64") (V2DI "64")
652 (V64QI "8") (V32QI "8") (V16QI "8")
653 (V32HI "16") (V16HI "16") (V8HI "16")
654 (V16SI "32") (V8SI "32") (V4SI "32")
655 (V16SF "32") (V8DF "64")])
657 ;; SSE prefix for integer vector modes
658 (define_mode_attr sseintprefix
659 [(V2DI "p") (V2DF "")
664 (V16SI "p") (V16SF "")
665 (V16QI "p") (V8HI "p")
666 (V32QI "p") (V16HI "p")
667 (V64QI "p") (V32HI "p")])
669 ;; SSE scalar suffix for vector modes
670 (define_mode_attr ssescalarmodesuffix
672 (V8SF "ss") (V4DF "sd")
673 (V4SF "ss") (V2DF "sd")
674 (V8SI "ss") (V4DI "sd")
677 ;; Pack/unpack vector modes
678 (define_mode_attr sseunpackmode
679 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
680 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
681 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
683 (define_mode_attr ssepackmode
684 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
685 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
686 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
688 ;; Mapping of the max integer size for xop rotate immediate constraint
689 (define_mode_attr sserotatemax
690 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
692 ;; Mapping of mode to cast intrinsic name
693 (define_mode_attr castmode
694 [(V8SI "si") (V8SF "ps") (V4DF "pd")
695 (V16SI "si") (V16SF "ps") (V8DF "pd")])
697 ;; Instruction suffix for sign and zero extensions.
698 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
700 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
701 ;; i64x4 or f64x4 for 512bit modes.
702 (define_mode_attr i128
703 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
704 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
705 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
708 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
709 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
711 ;; Mapping for dbpsabbw modes
712 (define_mode_attr dbpsadbwmode
713 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
715 ;; Mapping suffixes for broadcast
716 (define_mode_attr bcstscalarsuff
717 [(V64QI "b") (V32QI "b") (V16QI "b")
718 (V32HI "w") (V16HI "w") (V8HI "w")
719 (V16SI "d") (V8SI "d") (V4SI "d")
720 (V8DI "q") (V4DI "q") (V2DI "q")
721 (V16SF "ss") (V8SF "ss") (V4SF "ss")
722 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
724 ;; Tie mode of assembler operand to mode iterator
725 (define_mode_attr concat_tg_mode
726 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
727 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
730 ;; Include define_subst patterns for instructions with mask
733 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
735 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
739 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
741 ;; All of these patterns are enabled for SSE1 as well as SSE2.
742 ;; This is essential for maintaining stable calling conventions.
744 (define_expand "mov<mode>"
745 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
746 (match_operand:VMOVE 1 "nonimmediate_operand"))]
749 ix86_expand_vector_move (<MODE>mode, operands);
753 (define_insn "*mov<mode>_internal"
754 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
755 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
757 && (register_operand (operands[0], <MODE>mode)
758 || register_operand (operands[1], <MODE>mode))"
760 int mode = get_attr_mode (insn);
761 switch (which_alternative)
764 return standard_sse_constant_opcode (insn, operands[1]);
767 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
768 in avx512f, so we need to use workarounds, to access sse registers
769 16-31, which are evex-only. In avx512vl we don't need workarounds. */
770 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
771 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
772 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
774 if (memory_operand (operands[0], <MODE>mode))
776 if (<MODE_SIZE> == 32)
777 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
778 else if (<MODE_SIZE> == 16)
779 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
783 else if (memory_operand (operands[1], <MODE>mode))
785 if (<MODE_SIZE> == 32)
786 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
787 else if (<MODE_SIZE> == 16)
788 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
793 /* Reg -> reg move is always aligned. Just use wider move. */
798 return "vmovaps\t{%g1, %g0|%g0, %g1}";
801 return "vmovapd\t{%g1, %g0|%g0, %g1}";
804 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
815 && (misaligned_operand (operands[0], <MODE>mode)
816 || misaligned_operand (operands[1], <MODE>mode)))
817 return "vmovups\t{%1, %0|%0, %1}";
819 return "%vmovaps\t{%1, %0|%0, %1}";
825 && (misaligned_operand (operands[0], <MODE>mode)
826 || misaligned_operand (operands[1], <MODE>mode)))
827 return "vmovupd\t{%1, %0|%0, %1}";
829 return "%vmovapd\t{%1, %0|%0, %1}";
834 && (misaligned_operand (operands[0], <MODE>mode)
835 || misaligned_operand (operands[1], <MODE>mode)))
836 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
837 : "vmovdqu\t{%1, %0|%0, %1}";
839 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
840 : "%vmovdqa\t{%1, %0|%0, %1}";
842 if (misaligned_operand (operands[0], <MODE>mode)
843 || misaligned_operand (operands[1], <MODE>mode))
844 return "vmovdqu64\t{%1, %0|%0, %1}";
846 return "vmovdqa64\t{%1, %0|%0, %1}";
855 [(set_attr "type" "sselog1,ssemov,ssemov")
856 (set_attr "prefix" "maybe_vex")
858 (cond [(and (match_test "<MODE_SIZE> == 16")
859 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
860 (and (eq_attr "alternative" "2")
861 (match_test "TARGET_SSE_TYPELESS_STORES"))))
862 (const_string "<ssePSmode>")
863 (match_test "TARGET_AVX")
864 (const_string "<sseinsnmode>")
865 (ior (not (match_test "TARGET_SSE2"))
866 (match_test "optimize_function_for_size_p (cfun)"))
867 (const_string "V4SF")
868 (and (eq_attr "alternative" "0")
869 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
872 (const_string "<sseinsnmode>")))])
874 (define_insn "<avx512>_load<mode>_mask"
875 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
876 (vec_merge:V48_AVX512VL
877 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
878 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
879 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
882 static char buf [64];
885 const char *sse_suffix;
887 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
890 sse_suffix = "<ssemodesuffix>";
895 sse_suffix = "<ssescalarsize>";
898 if (misaligned_operand (operands[1], <MODE>mode))
903 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
904 insn_op, align, sse_suffix);
907 [(set_attr "type" "ssemov")
908 (set_attr "prefix" "evex")
909 (set_attr "memory" "none,load")
910 (set_attr "mode" "<sseinsnmode>")])
912 (define_insn "<avx512>_load<mode>_mask"
913 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
914 (vec_merge:VI12_AVX512VL
915 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
916 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
917 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
919 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
920 [(set_attr "type" "ssemov")
921 (set_attr "prefix" "evex")
922 (set_attr "memory" "none,load")
923 (set_attr "mode" "<sseinsnmode>")])
925 (define_insn "avx512f_blendm<mode>"
926 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
928 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
929 (match_operand:VI48F_512 1 "register_operand" "v")
930 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
932 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
933 [(set_attr "type" "ssemov")
934 (set_attr "prefix" "evex")
935 (set_attr "mode" "<sseinsnmode>")])
937 (define_insn "<avx512>_store<mode>_mask"
938 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
939 (vec_merge:V48_AVX512VL
940 (match_operand:V48_AVX512VL 1 "register_operand" "v")
942 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
945 static char buf [64];
948 const char *sse_suffix;
950 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
953 sse_suffix = "<ssemodesuffix>";
958 sse_suffix = "<ssescalarsize>";
961 if (misaligned_operand (operands[1], <MODE>mode))
966 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
967 insn_op, align, sse_suffix);
970 [(set_attr "type" "ssemov")
971 (set_attr "prefix" "evex")
972 (set_attr "memory" "store")
973 (set_attr "mode" "<sseinsnmode>")])
975 (define_insn "<avx512>_store<mode>_mask"
976 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
977 (vec_merge:VI12_AVX512VL
978 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
980 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
982 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
983 [(set_attr "type" "ssemov")
984 (set_attr "prefix" "evex")
985 (set_attr "memory" "store")
986 (set_attr "mode" "<sseinsnmode>")])
988 (define_insn "sse2_movq128"
989 [(set (match_operand:V2DI 0 "register_operand" "=x")
992 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
993 (parallel [(const_int 0)]))
996 "%vmovq\t{%1, %0|%0, %q1}"
997 [(set_attr "type" "ssemov")
998 (set_attr "prefix" "maybe_vex")
999 (set_attr "mode" "TI")])
1001 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1002 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1003 ;; from memory, we'd prefer to load the memory directly into the %xmm
1004 ;; register. To facilitate this happy circumstance, this pattern won't
1005 ;; split until after register allocation. If the 64-bit value didn't
1006 ;; come from memory, this is the best we can do. This is much better
1007 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1010 (define_insn_and_split "movdi_to_sse"
1012 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1013 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1014 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1015 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1017 "&& reload_completed"
1020 if (register_operand (operands[1], DImode))
1022 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1023 Assemble the 64-bit DImode value in an xmm register. */
1024 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1025 gen_rtx_SUBREG (SImode, operands[1], 0)));
1026 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1027 gen_rtx_SUBREG (SImode, operands[1], 4)));
1028 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1031 else if (memory_operand (operands[1], DImode))
1033 rtx tmp = gen_reg_rtx (V2DImode);
1034 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1035 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1042 [(set (match_operand:V4SF 0 "register_operand")
1043 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1044 "TARGET_SSE && reload_completed"
1047 (vec_duplicate:V4SF (match_dup 1))
1051 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1052 operands[2] = CONST0_RTX (V4SFmode);
1056 [(set (match_operand:V2DF 0 "register_operand")
1057 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1058 "TARGET_SSE2 && reload_completed"
1059 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1061 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1062 operands[2] = CONST0_RTX (DFmode);
1065 (define_expand "movmisalign<mode>"
1066 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1067 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1070 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1074 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1075 [(set (match_operand:VF 0 "register_operand")
1076 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1078 "TARGET_SSE && <mask_mode512bit_condition>"
1080 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1081 just fine if misaligned_operand is true, and without the UNSPEC it can
1082 be combined with arithmetic instructions. If misaligned_operand is
1083 false, still emit UNSPEC_LOADU insn to honor user's request for
1086 && misaligned_operand (operands[1], <MODE>mode))
1088 rtx src = operands[1];
1090 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1091 operands[2 * <mask_applied>],
1092 operands[3 * <mask_applied>]);
1093 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1098 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1099 [(set (match_operand:VF 0 "register_operand" "=v")
1101 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1103 "TARGET_SSE && <mask_mode512bit_condition>"
1105 switch (get_attr_mode (insn))
1110 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1112 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1115 [(set_attr "type" "ssemov")
1116 (set_attr "movu" "1")
1117 (set_attr "ssememalign" "8")
1118 (set_attr "prefix" "maybe_vex")
1120 (cond [(and (match_test "<MODE_SIZE> == 16")
1121 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1122 (const_string "<ssePSmode>")
1123 (match_test "TARGET_AVX")
1124 (const_string "<MODE>")
1125 (match_test "optimize_function_for_size_p (cfun)")
1126 (const_string "V4SF")
1128 (const_string "<MODE>")))])
1130 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1131 [(set (match_operand:VF 0 "memory_operand" "=m")
1133 [(match_operand:VF 1 "register_operand" "v")]
1137 switch (get_attr_mode (insn))
1142 return "%vmovups\t{%1, %0|%0, %1}";
1144 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1147 [(set_attr "type" "ssemov")
1148 (set_attr "movu" "1")
1149 (set_attr "ssememalign" "8")
1150 (set_attr "prefix" "maybe_vex")
1152 (cond [(and (match_test "<MODE_SIZE> == 16")
1153 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1154 (match_test "TARGET_SSE_TYPELESS_STORES")))
1155 (const_string "<ssePSmode>")
1156 (match_test "TARGET_AVX")
1157 (const_string "<MODE>")
1158 (match_test "optimize_function_for_size_p (cfun)")
1159 (const_string "V4SF")
1161 (const_string "<MODE>")))])
1163 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
1164 [(set (match_operand:VF_512 0 "memory_operand" "=m")
1167 [(match_operand:VF_512 1 "register_operand" "v")]
1170 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1173 switch (get_attr_mode (insn))
1176 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1178 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1181 [(set_attr "type" "ssemov")
1182 (set_attr "movu" "1")
1183 (set_attr "memory" "store")
1184 (set_attr "prefix" "evex")
1185 (set_attr "mode" "<sseinsnmode>")])
1187 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1188 just fine if misaligned_operand is true, and without the UNSPEC it can
1189 be combined with arithmetic instructions. If misaligned_operand is
1190 false, still emit UNSPEC_LOADU insn to honor user's request for
1192 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1193 [(set (match_operand:VI1 0 "register_operand")
1195 [(match_operand:VI1 1 "nonimmediate_operand")]
1197 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1200 && misaligned_operand (operands[1], <MODE>mode))
1202 rtx src = operands[1];
1204 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1205 operands[2 * <mask_applied>],
1206 operands[3 * <mask_applied>]);
1207 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1212 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1213 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1214 (unspec:VI_ULOADSTORE_BW_AVX512VL
1215 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1219 if (misaligned_operand (operands[1], <MODE>mode))
1221 rtx src = operands[1];
1223 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1224 operands[2 * <mask_applied>],
1225 operands[3 * <mask_applied>]);
1226 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1231 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1232 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1233 (unspec:VI_ULOADSTORE_F_AVX512VL
1234 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1238 if (misaligned_operand (operands[1], <MODE>mode))
1240 rtx src = operands[1];
1242 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1243 operands[2 * <mask_applied>],
1244 operands[3 * <mask_applied>]);
1245 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1250 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1251 [(set (match_operand:VI1 0 "register_operand" "=v")
1253 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1255 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1257 switch (get_attr_mode (insn))
1261 return "%vmovups\t{%1, %0|%0, %1}";
1263 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1264 return "%vmovdqu\t{%1, %0|%0, %1}";
1266 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1269 [(set_attr "type" "ssemov")
1270 (set_attr "movu" "1")
1271 (set_attr "ssememalign" "8")
1272 (set (attr "prefix_data16")
1274 (match_test "TARGET_AVX")
1276 (const_string "1")))
1277 (set_attr "prefix" "maybe_vex")
1279 (cond [(and (match_test "<MODE_SIZE> == 16")
1280 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1281 (const_string "<ssePSmode>")
1282 (match_test "TARGET_AVX")
1283 (const_string "<sseinsnmode>")
1284 (match_test "optimize_function_for_size_p (cfun)")
1285 (const_string "V4SF")
1287 (const_string "<sseinsnmode>")))])
1289 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1290 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1291 (unspec:VI_ULOADSTORE_BW_AVX512VL
1292 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1295 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1296 [(set_attr "type" "ssemov")
1297 (set_attr "movu" "1")
1298 (set_attr "ssememalign" "8")
1299 (set_attr "prefix" "maybe_evex")])
1301 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1302 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1303 (unspec:VI_ULOADSTORE_F_AVX512VL
1304 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1307 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1308 [(set_attr "type" "ssemov")
1309 (set_attr "movu" "1")
1310 (set_attr "ssememalign" "8")
1311 (set_attr "prefix" "maybe_evex")])
1313 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1314 [(set (match_operand:VI1 0 "memory_operand" "=m")
1316 [(match_operand:VI1 1 "register_operand" "v")]
1320 switch (get_attr_mode (insn))
1325 return "%vmovups\t{%1, %0|%0, %1}";
1331 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1332 return "%vmovdqu\t{%1, %0|%0, %1}";
1334 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1338 [(set_attr "type" "ssemov")
1339 (set_attr "movu" "1")
1340 (set_attr "ssememalign" "8")
1341 (set (attr "prefix_data16")
1343 (match_test "TARGET_AVX")
1345 (const_string "1")))
1346 (set_attr "prefix" "maybe_vex")
1348 (cond [(and (match_test "<MODE_SIZE> == 16")
1349 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1350 (match_test "TARGET_SSE_TYPELESS_STORES")))
1351 (const_string "<ssePSmode>")
1352 (match_test "TARGET_AVX")
1353 (const_string "<sseinsnmode>")
1354 (match_test "optimize_function_for_size_p (cfun)")
1355 (const_string "V4SF")
1357 (const_string "<sseinsnmode>")))])
1359 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1360 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1361 (unspec:VI_ULOADSTORE_BW_AVX512VL
1362 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1365 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1366 [(set_attr "type" "ssemov")
1367 (set_attr "movu" "1")
1368 (set_attr "ssememalign" "8")
1369 (set_attr "prefix" "maybe_evex")])
1371 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1372 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1373 (unspec:VI_ULOADSTORE_F_AVX512VL
1374 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1377 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1378 [(set_attr "type" "ssemov")
1379 (set_attr "movu" "1")
1380 (set_attr "ssememalign" "8")
1381 (set_attr "prefix" "maybe_vex")])
1383 (define_insn "<avx512>_storedqu<mode>_mask"
1384 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1385 (vec_merge:VI48_AVX512VL
1386 (unspec:VI48_AVX512VL
1387 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1390 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1392 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1393 [(set_attr "type" "ssemov")
1394 (set_attr "movu" "1")
1395 (set_attr "memory" "store")
1396 (set_attr "prefix" "evex")
1397 (set_attr "mode" "<sseinsnmode>")])
1399 (define_insn "<avx512>_storedqu<mode>_mask"
1400 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1401 (vec_merge:VI12_AVX512VL
1402 (unspec:VI12_AVX512VL
1403 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1406 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1408 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1409 [(set_attr "type" "ssemov")
1410 (set_attr "movu" "1")
1411 (set_attr "memory" "store")
1412 (set_attr "prefix" "evex")
1413 (set_attr "mode" "<sseinsnmode>")])
1415 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1416 [(set (match_operand:VI1 0 "register_operand" "=x")
1417 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1420 "%vlddqu\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "ssemov")
1422 (set_attr "movu" "1")
1423 (set_attr "ssememalign" "8")
1424 (set (attr "prefix_data16")
1426 (match_test "TARGET_AVX")
1428 (const_string "0")))
1429 (set (attr "prefix_rep")
1431 (match_test "TARGET_AVX")
1433 (const_string "1")))
1434 (set_attr "prefix" "maybe_vex")
1435 (set_attr "mode" "<sseinsnmode>")])
1437 (define_insn "sse2_movnti<mode>"
1438 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1439 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1442 "movnti\t{%1, %0|%0, %1}"
1443 [(set_attr "type" "ssemov")
1444 (set_attr "prefix_data16" "0")
1445 (set_attr "mode" "<MODE>")])
1447 (define_insn "<sse>_movnt<mode>"
1448 [(set (match_operand:VF 0 "memory_operand" "=m")
1450 [(match_operand:VF 1 "register_operand" "v")]
1453 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1454 [(set_attr "type" "ssemov")
1455 (set_attr "prefix" "maybe_vex")
1456 (set_attr "mode" "<MODE>")])
1458 (define_insn "<sse2>_movnt<mode>"
1459 [(set (match_operand:VI8 0 "memory_operand" "=m")
1460 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1463 "%vmovntdq\t{%1, %0|%0, %1}"
1464 [(set_attr "type" "ssecvt")
1465 (set (attr "prefix_data16")
1467 (match_test "TARGET_AVX")
1469 (const_string "1")))
1470 (set_attr "prefix" "maybe_vex")
1471 (set_attr "mode" "<sseinsnmode>")])
1473 ; Expand patterns for non-temporal stores. At the moment, only those
1474 ; that directly map to insns are defined; it would be possible to
1475 ; define patterns for other modes that would expand to several insns.
1477 ;; Modes handled by storent patterns.
1478 (define_mode_iterator STORENT_MODE
1479 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1480 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1481 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1482 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1483 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1485 (define_expand "storent<mode>"
1486 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1487 (unspec:STORENT_MODE
1488 [(match_operand:STORENT_MODE 1 "register_operand")]
1492 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1494 ;; Parallel floating point arithmetic
1496 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1498 (define_expand "<code><mode>2"
1499 [(set (match_operand:VF 0 "register_operand")
1501 (match_operand:VF 1 "register_operand")))]
1503 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1505 (define_insn_and_split "*absneg<mode>2"
1506 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1507 (match_operator:VF 3 "absneg_operator"
1508 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1509 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1512 "&& reload_completed"
1515 enum rtx_code absneg_op;
1521 if (MEM_P (operands[1]))
1522 op1 = operands[2], op2 = operands[1];
1524 op1 = operands[1], op2 = operands[2];
1529 if (rtx_equal_p (operands[0], operands[1]))
1535 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1536 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1537 t = gen_rtx_SET (VOIDmode, operands[0], t);
1541 [(set_attr "isa" "noavx,noavx,avx,avx")])
1543 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1544 [(set (match_operand:VF 0 "register_operand")
1546 (match_operand:VF 1 "<round_nimm_predicate>")
1547 (match_operand:VF 2 "<round_nimm_predicate>")))]
1548 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1549 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1551 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1552 [(set (match_operand:VF 0 "register_operand" "=x,v")
1554 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1555 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1556 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1558 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1559 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1560 [(set_attr "isa" "noavx,avx")
1561 (set_attr "type" "sseadd")
1562 (set_attr "prefix" "<mask_prefix3>")
1563 (set_attr "mode" "<MODE>")])
1565 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1566 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1569 (match_operand:VF_128 1 "register_operand" "0,v")
1570 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1575 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1576 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1577 [(set_attr "isa" "noavx,avx")
1578 (set_attr "type" "sseadd")
1579 (set_attr "prefix" "<round_prefix>")
1580 (set_attr "mode" "<ssescalarmode>")])
1582 (define_expand "mul<mode>3<mask_name><round_name>"
1583 [(set (match_operand:VF 0 "register_operand")
1585 (match_operand:VF 1 "<round_nimm_predicate>")
1586 (match_operand:VF 2 "<round_nimm_predicate>")))]
1587 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1588 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1590 (define_insn "*mul<mode>3<mask_name><round_name>"
1591 [(set (match_operand:VF 0 "register_operand" "=x,v")
1593 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1594 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1595 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1597 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1598 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1599 [(set_attr "isa" "noavx,avx")
1600 (set_attr "type" "ssemul")
1601 (set_attr "prefix" "<mask_prefix3>")
1602 (set_attr "btver2_decode" "direct,double")
1603 (set_attr "mode" "<MODE>")])
1605 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1606 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1609 (match_operand:VF_128 1 "register_operand" "0,v")
1610 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1615 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1616 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1617 [(set_attr "isa" "noavx,avx")
1618 (set_attr "type" "sse<multdiv_mnemonic>")
1619 (set_attr "prefix" "<round_prefix>")
1620 (set_attr "btver2_decode" "direct,double")
1621 (set_attr "mode" "<ssescalarmode>")])
1623 (define_expand "div<mode>3"
1624 [(set (match_operand:VF2 0 "register_operand")
1625 (div:VF2 (match_operand:VF2 1 "register_operand")
1626 (match_operand:VF2 2 "nonimmediate_operand")))]
1628 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1630 (define_expand "div<mode>3"
1631 [(set (match_operand:VF1 0 "register_operand")
1632 (div:VF1 (match_operand:VF1 1 "register_operand")
1633 (match_operand:VF1 2 "nonimmediate_operand")))]
1636 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1639 && TARGET_RECIP_VEC_DIV
1640 && !optimize_insn_for_size_p ()
1641 && flag_finite_math_only && !flag_trapping_math
1642 && flag_unsafe_math_optimizations)
1644 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1649 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1650 [(set (match_operand:VF 0 "register_operand" "=x,v")
1652 (match_operand:VF 1 "register_operand" "0,v")
1653 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1654 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1656 div<ssemodesuffix>\t{%2, %0|%0, %2}
1657 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1658 [(set_attr "isa" "noavx,avx")
1659 (set_attr "type" "ssediv")
1660 (set_attr "prefix" "<mask_prefix3>")
1661 (set_attr "mode" "<MODE>")])
1663 (define_insn "<sse>_rcp<mode>2"
1664 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1666 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1668 "%vrcpps\t{%1, %0|%0, %1}"
1669 [(set_attr "type" "sse")
1670 (set_attr "atom_sse_attr" "rcp")
1671 (set_attr "btver2_sse_attr" "rcp")
1672 (set_attr "prefix" "maybe_vex")
1673 (set_attr "mode" "<MODE>")])
1675 (define_insn "sse_vmrcpv4sf2"
1676 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1678 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1680 (match_operand:V4SF 2 "register_operand" "0,x")
1684 rcpss\t{%1, %0|%0, %k1}
1685 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1686 [(set_attr "isa" "noavx,avx")
1687 (set_attr "type" "sse")
1688 (set_attr "ssememalign" "32")
1689 (set_attr "atom_sse_attr" "rcp")
1690 (set_attr "btver2_sse_attr" "rcp")
1691 (set_attr "prefix" "orig,vex")
1692 (set_attr "mode" "SF")])
1694 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1695 [(set (match_operand:VF_512 0 "register_operand" "=v")
1697 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1700 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1701 [(set_attr "type" "sse")
1702 (set_attr "prefix" "evex")
1703 (set_attr "mode" "<MODE>")])
1705 (define_insn "srcp14<mode>"
1706 [(set (match_operand:VF_128 0 "register_operand" "=v")
1709 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1711 (match_operand:VF_128 2 "register_operand" "v")
1714 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1715 [(set_attr "type" "sse")
1716 (set_attr "prefix" "evex")
1717 (set_attr "mode" "<MODE>")])
1719 (define_expand "sqrt<mode>2"
1720 [(set (match_operand:VF2 0 "register_operand")
1721 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1724 (define_expand "sqrt<mode>2"
1725 [(set (match_operand:VF1 0 "register_operand")
1726 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1730 && TARGET_RECIP_VEC_SQRT
1731 && !optimize_insn_for_size_p ()
1732 && flag_finite_math_only && !flag_trapping_math
1733 && flag_unsafe_math_optimizations)
1735 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1740 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1741 [(set (match_operand:VF 0 "register_operand" "=v")
1742 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1743 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1744 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1745 [(set_attr "type" "sse")
1746 (set_attr "atom_sse_attr" "sqrt")
1747 (set_attr "btver2_sse_attr" "sqrt")
1748 (set_attr "prefix" "maybe_vex")
1749 (set_attr "mode" "<MODE>")])
1751 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1752 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1755 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1756 (match_operand:VF_128 2 "register_operand" "0,v")
1760 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1761 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1762 [(set_attr "isa" "noavx,avx")
1763 (set_attr "type" "sse")
1764 (set_attr "atom_sse_attr" "sqrt")
1765 (set_attr "prefix" "<round_prefix>")
1766 (set_attr "btver2_sse_attr" "sqrt")
1767 (set_attr "mode" "<ssescalarmode>")])
1769 (define_expand "rsqrt<mode>2"
1770 [(set (match_operand:VF1_128_256 0 "register_operand")
1772 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1775 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1779 (define_insn "<sse>_rsqrt<mode>2"
1780 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1782 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1784 "%vrsqrtps\t{%1, %0|%0, %1}"
1785 [(set_attr "type" "sse")
1786 (set_attr "prefix" "maybe_vex")
1787 (set_attr "mode" "<MODE>")])
1789 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1790 [(set (match_operand:VF_512 0 "register_operand" "=v")
1792 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1795 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1796 [(set_attr "type" "sse")
1797 (set_attr "prefix" "evex")
1798 (set_attr "mode" "<MODE>")])
1800 (define_insn "rsqrt14<mode>"
1801 [(set (match_operand:VF_128 0 "register_operand" "=v")
1804 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1806 (match_operand:VF_128 2 "register_operand" "v")
1809 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1810 [(set_attr "type" "sse")
1811 (set_attr "prefix" "evex")
1812 (set_attr "mode" "<MODE>")])
1814 (define_insn "sse_vmrsqrtv4sf2"
1815 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1817 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1819 (match_operand:V4SF 2 "register_operand" "0,x")
1823 rsqrtss\t{%1, %0|%0, %k1}
1824 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1825 [(set_attr "isa" "noavx,avx")
1826 (set_attr "type" "sse")
1827 (set_attr "ssememalign" "32")
1828 (set_attr "prefix" "orig,vex")
1829 (set_attr "mode" "SF")])
1831 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1832 ;; isn't really correct, as those rtl operators aren't defined when
1833 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1835 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1836 [(set (match_operand:VF 0 "register_operand")
1838 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1839 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1840 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1842 if (!flag_finite_math_only)
1843 operands[1] = force_reg (<MODE>mode, operands[1]);
1844 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1847 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1848 [(set (match_operand:VF 0 "register_operand" "=x,v")
1850 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1851 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1852 "TARGET_SSE && flag_finite_math_only
1853 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1854 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1856 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1857 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1858 [(set_attr "isa" "noavx,avx")
1859 (set_attr "type" "sseadd")
1860 (set_attr "btver2_sse_attr" "maxmin")
1861 (set_attr "prefix" "<mask_prefix3>")
1862 (set_attr "mode" "<MODE>")])
1864 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1865 [(set (match_operand:VF 0 "register_operand" "=x,v")
1867 (match_operand:VF 1 "register_operand" "0,v")
1868 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1869 "TARGET_SSE && !flag_finite_math_only
1870 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1872 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1873 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1874 [(set_attr "isa" "noavx,avx")
1875 (set_attr "type" "sseadd")
1876 (set_attr "btver2_sse_attr" "maxmin")
1877 (set_attr "prefix" "<mask_prefix3>")
1878 (set_attr "mode" "<MODE>")])
1880 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1881 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1884 (match_operand:VF_128 1 "register_operand" "0,v")
1885 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1890 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1891 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1892 [(set_attr "isa" "noavx,avx")
1893 (set_attr "type" "sse")
1894 (set_attr "btver2_sse_attr" "maxmin")
1895 (set_attr "prefix" "<round_saeonly_prefix>")
1896 (set_attr "mode" "<ssescalarmode>")])
1898 ;; These versions of the min/max patterns implement exactly the operations
1899 ;; min = (op1 < op2 ? op1 : op2)
1900 ;; max = (!(op1 < op2) ? op1 : op2)
1901 ;; Their operands are not commutative, and thus they may be used in the
1902 ;; presence of -0.0 and NaN.
1904 (define_insn "*ieee_smin<mode>3"
1905 [(set (match_operand:VF 0 "register_operand" "=v,v")
1907 [(match_operand:VF 1 "register_operand" "0,v")
1908 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1912 min<ssemodesuffix>\t{%2, %0|%0, %2}
1913 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1914 [(set_attr "isa" "noavx,avx")
1915 (set_attr "type" "sseadd")
1916 (set_attr "prefix" "orig,vex")
1917 (set_attr "mode" "<MODE>")])
1919 (define_insn "*ieee_smax<mode>3"
1920 [(set (match_operand:VF 0 "register_operand" "=v,v")
1922 [(match_operand:VF 1 "register_operand" "0,v")
1923 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1927 max<ssemodesuffix>\t{%2, %0|%0, %2}
1928 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1929 [(set_attr "isa" "noavx,avx")
1930 (set_attr "type" "sseadd")
1931 (set_attr "prefix" "orig,vex")
1932 (set_attr "mode" "<MODE>")])
1934 (define_insn "avx_addsubv4df3"
1935 [(set (match_operand:V4DF 0 "register_operand" "=x")
1938 (match_operand:V4DF 1 "register_operand" "x")
1939 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1940 (minus:V4DF (match_dup 1) (match_dup 2))
1943 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1944 [(set_attr "type" "sseadd")
1945 (set_attr "prefix" "vex")
1946 (set_attr "mode" "V4DF")])
1948 (define_insn "sse3_addsubv2df3"
1949 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1952 (match_operand:V2DF 1 "register_operand" "0,x")
1953 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1954 (minus:V2DF (match_dup 1) (match_dup 2))
1958 addsubpd\t{%2, %0|%0, %2}
1959 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1960 [(set_attr "isa" "noavx,avx")
1961 (set_attr "type" "sseadd")
1962 (set_attr "atom_unit" "complex")
1963 (set_attr "prefix" "orig,vex")
1964 (set_attr "mode" "V2DF")])
1966 (define_insn "avx_addsubv8sf3"
1967 [(set (match_operand:V8SF 0 "register_operand" "=x")
1970 (match_operand:V8SF 1 "register_operand" "x")
1971 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1972 (minus:V8SF (match_dup 1) (match_dup 2))
1975 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1976 [(set_attr "type" "sseadd")
1977 (set_attr "prefix" "vex")
1978 (set_attr "mode" "V8SF")])
1980 (define_insn "sse3_addsubv4sf3"
1981 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1984 (match_operand:V4SF 1 "register_operand" "0,x")
1985 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1986 (minus:V4SF (match_dup 1) (match_dup 2))
1990 addsubps\t{%2, %0|%0, %2}
1991 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1992 [(set_attr "isa" "noavx,avx")
1993 (set_attr "type" "sseadd")
1994 (set_attr "prefix" "orig,vex")
1995 (set_attr "prefix_rep" "1,*")
1996 (set_attr "mode" "V4SF")])
1998 (define_insn "avx_h<plusminus_insn>v4df3"
1999 [(set (match_operand:V4DF 0 "register_operand" "=x")
2004 (match_operand:V4DF 1 "register_operand" "x")
2005 (parallel [(const_int 0)]))
2006 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2009 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2010 (parallel [(const_int 0)]))
2011 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2014 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2015 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2017 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2018 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2020 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2021 [(set_attr "type" "sseadd")
2022 (set_attr "prefix" "vex")
2023 (set_attr "mode" "V4DF")])
2025 (define_expand "sse3_haddv2df3"
2026 [(set (match_operand:V2DF 0 "register_operand")
2030 (match_operand:V2DF 1 "register_operand")
2031 (parallel [(const_int 0)]))
2032 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2035 (match_operand:V2DF 2 "nonimmediate_operand")
2036 (parallel [(const_int 0)]))
2037 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2040 (define_insn "*sse3_haddv2df3"
2041 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2045 (match_operand:V2DF 1 "register_operand" "0,x")
2046 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2049 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2052 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2053 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2056 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2058 && INTVAL (operands[3]) != INTVAL (operands[4])
2059 && INTVAL (operands[5]) != INTVAL (operands[6])"
2061 haddpd\t{%2, %0|%0, %2}
2062 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2063 [(set_attr "isa" "noavx,avx")
2064 (set_attr "type" "sseadd")
2065 (set_attr "prefix" "orig,vex")
2066 (set_attr "mode" "V2DF")])
2068 (define_insn "sse3_hsubv2df3"
2069 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2073 (match_operand:V2DF 1 "register_operand" "0,x")
2074 (parallel [(const_int 0)]))
2075 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2078 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2079 (parallel [(const_int 0)]))
2080 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2083 hsubpd\t{%2, %0|%0, %2}
2084 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2085 [(set_attr "isa" "noavx,avx")
2086 (set_attr "type" "sseadd")
2087 (set_attr "prefix" "orig,vex")
2088 (set_attr "mode" "V2DF")])
2090 (define_insn "*sse3_haddv2df3_low"
2091 [(set (match_operand:DF 0 "register_operand" "=x,x")
2094 (match_operand:V2DF 1 "register_operand" "0,x")
2095 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2098 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2100 && INTVAL (operands[2]) != INTVAL (operands[3])"
2102 haddpd\t{%0, %0|%0, %0}
2103 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2104 [(set_attr "isa" "noavx,avx")
2105 (set_attr "type" "sseadd1")
2106 (set_attr "prefix" "orig,vex")
2107 (set_attr "mode" "V2DF")])
2109 (define_insn "*sse3_hsubv2df3_low"
2110 [(set (match_operand:DF 0 "register_operand" "=x,x")
2113 (match_operand:V2DF 1 "register_operand" "0,x")
2114 (parallel [(const_int 0)]))
2117 (parallel [(const_int 1)]))))]
2120 hsubpd\t{%0, %0|%0, %0}
2121 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2122 [(set_attr "isa" "noavx,avx")
2123 (set_attr "type" "sseadd1")
2124 (set_attr "prefix" "orig,vex")
2125 (set_attr "mode" "V2DF")])
2127 (define_insn "avx_h<plusminus_insn>v8sf3"
2128 [(set (match_operand:V8SF 0 "register_operand" "=x")
2134 (match_operand:V8SF 1 "register_operand" "x")
2135 (parallel [(const_int 0)]))
2136 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2138 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2139 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2143 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2144 (parallel [(const_int 0)]))
2145 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2147 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2148 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2152 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2153 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2155 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2156 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2159 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2160 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2162 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2163 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2165 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2166 [(set_attr "type" "sseadd")
2167 (set_attr "prefix" "vex")
2168 (set_attr "mode" "V8SF")])
2170 (define_insn "sse3_h<plusminus_insn>v4sf3"
2171 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2176 (match_operand:V4SF 1 "register_operand" "0,x")
2177 (parallel [(const_int 0)]))
2178 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2180 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2181 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2185 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2186 (parallel [(const_int 0)]))
2187 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2189 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2190 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2193 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2194 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2195 [(set_attr "isa" "noavx,avx")
2196 (set_attr "type" "sseadd")
2197 (set_attr "atom_unit" "complex")
2198 (set_attr "prefix" "orig,vex")
2199 (set_attr "prefix_rep" "1,*")
2200 (set_attr "mode" "V4SF")])
2202 (define_expand "reduc_splus_v8df"
2203 [(match_operand:V8DF 0 "register_operand")
2204 (match_operand:V8DF 1 "register_operand")]
2207 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2211 (define_expand "reduc_splus_v4df"
2212 [(match_operand:V4DF 0 "register_operand")
2213 (match_operand:V4DF 1 "register_operand")]
2216 rtx tmp = gen_reg_rtx (V4DFmode);
2217 rtx tmp2 = gen_reg_rtx (V4DFmode);
2218 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2219 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2220 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2224 (define_expand "reduc_splus_v2df"
2225 [(match_operand:V2DF 0 "register_operand")
2226 (match_operand:V2DF 1 "register_operand")]
2229 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2233 (define_expand "reduc_splus_v16sf"
2234 [(match_operand:V16SF 0 "register_operand")
2235 (match_operand:V16SF 1 "register_operand")]
2238 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2242 (define_expand "reduc_splus_v8sf"
2243 [(match_operand:V8SF 0 "register_operand")
2244 (match_operand:V8SF 1 "register_operand")]
2247 rtx tmp = gen_reg_rtx (V8SFmode);
2248 rtx tmp2 = gen_reg_rtx (V8SFmode);
2249 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2250 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2251 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2252 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2256 (define_expand "reduc_splus_v4sf"
2257 [(match_operand:V4SF 0 "register_operand")
2258 (match_operand:V4SF 1 "register_operand")]
2263 rtx tmp = gen_reg_rtx (V4SFmode);
2264 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2265 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2268 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2272 ;; Modes handled by reduc_sm{in,ax}* patterns.
2273 (define_mode_iterator REDUC_SMINMAX_MODE
2274 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2275 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2276 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2277 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
2278 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2279 (V8DF "TARGET_AVX512F")])
2281 (define_expand "reduc_<code>_<mode>"
2282 [(smaxmin:REDUC_SMINMAX_MODE
2283 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2284 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2287 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2291 (define_expand "reduc_<code>_<mode>"
2293 (match_operand:VI48_512 0 "register_operand")
2294 (match_operand:VI48_512 1 "register_operand"))]
2297 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2301 (define_expand "reduc_<code>_<mode>"
2303 (match_operand:VI_256 0 "register_operand")
2304 (match_operand:VI_256 1 "register_operand"))]
2307 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2311 (define_expand "reduc_umin_v8hi"
2313 (match_operand:V8HI 0 "register_operand")
2314 (match_operand:V8HI 1 "register_operand"))]
2317 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2323 ;; Parallel floating point comparisons
2325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2327 (define_insn "avx_cmp<mode>3"
2328 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2330 [(match_operand:VF_128_256 1 "register_operand" "x")
2331 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2332 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2335 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2336 [(set_attr "type" "ssecmp")
2337 (set_attr "length_immediate" "1")
2338 (set_attr "prefix" "vex")
2339 (set_attr "mode" "<MODE>")])
2341 (define_insn "avx_vmcmp<mode>3"
2342 [(set (match_operand:VF_128 0 "register_operand" "=x")
2345 [(match_operand:VF_128 1 "register_operand" "x")
2346 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2347 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2352 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2353 [(set_attr "type" "ssecmp")
2354 (set_attr "length_immediate" "1")
2355 (set_attr "prefix" "vex")
2356 (set_attr "mode" "<ssescalarmode>")])
2358 (define_insn "*<sse>_maskcmp<mode>3_comm"
2359 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2360 (match_operator:VF_128_256 3 "sse_comparison_operator"
2361 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2362 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2364 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2366 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2367 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2368 [(set_attr "isa" "noavx,avx")
2369 (set_attr "type" "ssecmp")
2370 (set_attr "length_immediate" "1")
2371 (set_attr "prefix" "orig,vex")
2372 (set_attr "mode" "<MODE>")])
2374 (define_insn "<sse>_maskcmp<mode>3"
2375 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2376 (match_operator:VF_128_256 3 "sse_comparison_operator"
2377 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2378 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2381 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2382 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2383 [(set_attr "isa" "noavx,avx")
2384 (set_attr "type" "ssecmp")
2385 (set_attr "length_immediate" "1")
2386 (set_attr "prefix" "orig,vex")
2387 (set_attr "mode" "<MODE>")])
2389 (define_insn "<sse>_vmmaskcmp<mode>3"
2390 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2392 (match_operator:VF_128 3 "sse_comparison_operator"
2393 [(match_operand:VF_128 1 "register_operand" "0,x")
2394 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2399 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2400 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2401 [(set_attr "isa" "noavx,avx")
2402 (set_attr "type" "ssecmp")
2403 (set_attr "length_immediate" "1,*")
2404 (set_attr "prefix" "orig,vex")
2405 (set_attr "mode" "<ssescalarmode>")])
2407 (define_mode_attr cmp_imm_predicate
2408 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2409 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2411 (define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2412 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2413 (unspec:<avx512fmaskmode>
2414 [(match_operand:VI48F_512 1 "register_operand" "v")
2415 (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2416 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2418 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2419 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2420 [(set_attr "type" "ssecmp")
2421 (set_attr "length_immediate" "1")
2422 (set_attr "prefix" "evex")
2423 (set_attr "mode" "<sseinsnmode>")])
2425 (define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
2426 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2427 (unspec:<avx512fmaskmode>
2428 [(match_operand:VI48_512 1 "register_operand" "v")
2429 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2430 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2431 UNSPEC_UNSIGNED_PCMP))]
2433 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2434 [(set_attr "type" "ssecmp")
2435 (set_attr "length_immediate" "1")
2436 (set_attr "prefix" "evex")
2437 (set_attr "mode" "<sseinsnmode>")])
2439 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2440 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2441 (and:<avx512fmaskmode>
2442 (unspec:<avx512fmaskmode>
2443 [(match_operand:VF_128 1 "register_operand" "v")
2444 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2445 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2449 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2450 [(set_attr "type" "ssecmp")
2451 (set_attr "length_immediate" "1")
2452 (set_attr "prefix" "evex")
2453 (set_attr "mode" "<ssescalarmode>")])
2455 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2456 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2457 (and:<avx512fmaskmode>
2458 (unspec:<avx512fmaskmode>
2459 [(match_operand:VF_128 1 "register_operand" "v")
2460 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2461 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2463 (and:<avx512fmaskmode>
2464 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2467 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2468 [(set_attr "type" "ssecmp")
2469 (set_attr "length_immediate" "1")
2470 (set_attr "prefix" "evex")
2471 (set_attr "mode" "<ssescalarmode>")])
2473 (define_insn "avx512f_maskcmp<mode>3"
2474 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2475 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2476 [(match_operand:VF 1 "register_operand" "v")
2477 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2479 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2480 [(set_attr "type" "ssecmp")
2481 (set_attr "length_immediate" "1")
2482 (set_attr "prefix" "evex")
2483 (set_attr "mode" "<sseinsnmode>")])
2485 (define_insn "<sse>_comi<round_saeonly_name>"
2486 [(set (reg:CCFP FLAGS_REG)
2489 (match_operand:<ssevecmode> 0 "register_operand" "v")
2490 (parallel [(const_int 0)]))
2492 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2493 (parallel [(const_int 0)]))))]
2494 "SSE_FLOAT_MODE_P (<MODE>mode)"
2495 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2496 [(set_attr "type" "ssecomi")
2497 (set_attr "prefix" "maybe_vex")
2498 (set_attr "prefix_rep" "0")
2499 (set (attr "prefix_data16")
2500 (if_then_else (eq_attr "mode" "DF")
2502 (const_string "0")))
2503 (set_attr "mode" "<MODE>")])
2505 (define_insn "<sse>_ucomi<round_saeonly_name>"
2506 [(set (reg:CCFPU FLAGS_REG)
2509 (match_operand:<ssevecmode> 0 "register_operand" "v")
2510 (parallel [(const_int 0)]))
2512 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2513 (parallel [(const_int 0)]))))]
2514 "SSE_FLOAT_MODE_P (<MODE>mode)"
2515 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2516 [(set_attr "type" "ssecomi")
2517 (set_attr "prefix" "maybe_vex")
2518 (set_attr "prefix_rep" "0")
2519 (set (attr "prefix_data16")
2520 (if_then_else (eq_attr "mode" "DF")
2522 (const_string "0")))
2523 (set_attr "mode" "<MODE>")])
2525 (define_expand "vcond<V_512:mode><VF_512:mode>"
2526 [(set (match_operand:V_512 0 "register_operand")
2528 (match_operator 3 ""
2529 [(match_operand:VF_512 4 "nonimmediate_operand")
2530 (match_operand:VF_512 5 "nonimmediate_operand")])
2531 (match_operand:V_512 1 "general_operand")
2532 (match_operand:V_512 2 "general_operand")))]
2534 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2535 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2537 bool ok = ix86_expand_fp_vcond (operands);
2542 (define_expand "vcond<V_256:mode><VF_256:mode>"
2543 [(set (match_operand:V_256 0 "register_operand")
2545 (match_operator 3 ""
2546 [(match_operand:VF_256 4 "nonimmediate_operand")
2547 (match_operand:VF_256 5 "nonimmediate_operand")])
2548 (match_operand:V_256 1 "general_operand")
2549 (match_operand:V_256 2 "general_operand")))]
2551 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2552 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2554 bool ok = ix86_expand_fp_vcond (operands);
2559 (define_expand "vcond<V_128:mode><VF_128:mode>"
2560 [(set (match_operand:V_128 0 "register_operand")
2562 (match_operator 3 ""
2563 [(match_operand:VF_128 4 "nonimmediate_operand")
2564 (match_operand:VF_128 5 "nonimmediate_operand")])
2565 (match_operand:V_128 1 "general_operand")
2566 (match_operand:V_128 2 "general_operand")))]
2568 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2569 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2571 bool ok = ix86_expand_fp_vcond (operands);
2576 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2578 ;; Parallel floating point logical operations
2580 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2582 (define_insn "<sse>_andnot<mode>3"
2583 [(set (match_operand:VF 0 "register_operand" "=x,v")
2586 (match_operand:VF 1 "register_operand" "0,v"))
2587 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2590 static char buf[32];
2594 switch (get_attr_mode (insn))
2601 suffix = "<ssemodesuffix>";
2604 switch (which_alternative)
2607 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2610 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2616 /* There is no vandnp[sd]. Use vpandnq. */
2617 if (<MODE_SIZE> == 64)
2620 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2623 snprintf (buf, sizeof (buf), ops, suffix);
2626 [(set_attr "isa" "noavx,avx")
2627 (set_attr "type" "sselog")
2628 (set_attr "prefix" "orig,maybe_evex")
2630 (cond [(and (match_test "<MODE_SIZE> == 16")
2631 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2632 (const_string "<ssePSmode>")
2633 (match_test "TARGET_AVX")
2634 (const_string "<MODE>")
2635 (match_test "optimize_function_for_size_p (cfun)")
2636 (const_string "V4SF")
2638 (const_string "<MODE>")))])
2640 (define_expand "<code><mode>3"
2641 [(set (match_operand:VF_128_256 0 "register_operand")
2642 (any_logic:VF_128_256
2643 (match_operand:VF_128_256 1 "nonimmediate_operand")
2644 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2646 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2648 (define_expand "<code><mode>3"
2649 [(set (match_operand:VF_512 0 "register_operand")
2651 (match_operand:VF_512 1 "nonimmediate_operand")
2652 (match_operand:VF_512 2 "nonimmediate_operand")))]
2654 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2656 (define_insn "*<code><mode>3"
2657 [(set (match_operand:VF 0 "register_operand" "=x,v")
2659 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2660 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2661 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2663 static char buf[32];
2667 switch (get_attr_mode (insn))
2674 suffix = "<ssemodesuffix>";
2677 switch (which_alternative)
2680 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2683 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2689 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2690 if (<MODE_SIZE> == 64)
2693 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2696 snprintf (buf, sizeof (buf), ops, suffix);
2699 [(set_attr "isa" "noavx,avx")
2700 (set_attr "type" "sselog")
2701 (set_attr "prefix" "orig,maybe_evex")
2703 (cond [(and (match_test "<MODE_SIZE> == 16")
2704 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2705 (const_string "<ssePSmode>")
2706 (match_test "TARGET_AVX")
2707 (const_string "<MODE>")
2708 (match_test "optimize_function_for_size_p (cfun)")
2709 (const_string "V4SF")
2711 (const_string "<MODE>")))])
2713 (define_expand "copysign<mode>3"
2716 (not:VF (match_dup 3))
2717 (match_operand:VF 1 "nonimmediate_operand")))
2719 (and:VF (match_dup 3)
2720 (match_operand:VF 2 "nonimmediate_operand")))
2721 (set (match_operand:VF 0 "register_operand")
2722 (ior:VF (match_dup 4) (match_dup 5)))]
2725 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2727 operands[4] = gen_reg_rtx (<MODE>mode);
2728 operands[5] = gen_reg_rtx (<MODE>mode);
2731 ;; Also define scalar versions. These are used for abs, neg, and
2732 ;; conditional move. Using subregs into vector modes causes register
2733 ;; allocation lossage. These patterns do not allow memory operands
2734 ;; because the native instructions read the full 128-bits.
2736 (define_insn "*andnot<mode>3"
2737 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2740 (match_operand:MODEF 1 "register_operand" "0,x"))
2741 (match_operand:MODEF 2 "register_operand" "x,x")))]
2742 "SSE_FLOAT_MODE_P (<MODE>mode)"
2744 static char buf[32];
2747 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2749 switch (which_alternative)
2752 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2755 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2761 snprintf (buf, sizeof (buf), ops, suffix);
2764 [(set_attr "isa" "noavx,avx")
2765 (set_attr "type" "sselog")
2766 (set_attr "prefix" "orig,vex")
2768 (cond [(and (match_test "<MODE_SIZE> == 16")
2769 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2770 (const_string "V4SF")
2771 (match_test "TARGET_AVX")
2772 (const_string "<ssevecmode>")
2773 (match_test "optimize_function_for_size_p (cfun)")
2774 (const_string "V4SF")
2776 (const_string "<ssevecmode>")))])
2778 (define_insn "*andnottf3"
2779 [(set (match_operand:TF 0 "register_operand" "=x,x")
2781 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2782 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2785 static char buf[32];
2788 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2790 switch (which_alternative)
2793 ops = "%s\t{%%2, %%0|%%0, %%2}";
2796 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2802 snprintf (buf, sizeof (buf), ops, tmp);
2805 [(set_attr "isa" "noavx,avx")
2806 (set_attr "type" "sselog")
2807 (set (attr "prefix_data16")
2809 (and (eq_attr "alternative" "0")
2810 (eq_attr "mode" "TI"))
2812 (const_string "*")))
2813 (set_attr "prefix" "orig,vex")
2815 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2816 (const_string "V4SF")
2817 (match_test "TARGET_AVX")
2819 (ior (not (match_test "TARGET_SSE2"))
2820 (match_test "optimize_function_for_size_p (cfun)"))
2821 (const_string "V4SF")
2823 (const_string "TI")))])
2825 (define_insn "*<code><mode>3"
2826 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2828 (match_operand:MODEF 1 "register_operand" "%0,x")
2829 (match_operand:MODEF 2 "register_operand" "x,x")))]
2830 "SSE_FLOAT_MODE_P (<MODE>mode)"
2832 static char buf[32];
2835 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2837 switch (which_alternative)
2840 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2843 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2849 snprintf (buf, sizeof (buf), ops, suffix);
2852 [(set_attr "isa" "noavx,avx")
2853 (set_attr "type" "sselog")
2854 (set_attr "prefix" "orig,vex")
2856 (cond [(and (match_test "<MODE_SIZE> == 16")
2857 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2858 (const_string "V4SF")
2859 (match_test "TARGET_AVX")
2860 (const_string "<ssevecmode>")
2861 (match_test "optimize_function_for_size_p (cfun)")
2862 (const_string "V4SF")
2864 (const_string "<ssevecmode>")))])
2866 (define_expand "<code>tf3"
2867 [(set (match_operand:TF 0 "register_operand")
2869 (match_operand:TF 1 "nonimmediate_operand")
2870 (match_operand:TF 2 "nonimmediate_operand")))]
2872 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2874 (define_insn "*<code>tf3"
2875 [(set (match_operand:TF 0 "register_operand" "=x,x")
2877 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2878 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2880 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2882 static char buf[32];
2885 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2887 switch (which_alternative)
2890 ops = "%s\t{%%2, %%0|%%0, %%2}";
2893 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2899 snprintf (buf, sizeof (buf), ops, tmp);
2902 [(set_attr "isa" "noavx,avx")
2903 (set_attr "type" "sselog")
2904 (set (attr "prefix_data16")
2906 (and (eq_attr "alternative" "0")
2907 (eq_attr "mode" "TI"))
2909 (const_string "*")))
2910 (set_attr "prefix" "orig,vex")
2912 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2913 (const_string "V4SF")
2914 (match_test "TARGET_AVX")
2916 (ior (not (match_test "TARGET_SSE2"))
2917 (match_test "optimize_function_for_size_p (cfun)"))
2918 (const_string "V4SF")
2920 (const_string "TI")))])
2922 ;; There are no floating point xor for V16SF and V8DF in avx512f
2923 ;; but we need them for negation. Instead we use int versions of
2924 ;; xor. Maybe there could be a better way to do that.
2926 (define_mode_attr avx512flogicsuff
2927 [(V16SF "d") (V8DF "q")])
2929 (define_insn "avx512f_<logic><mode>"
2930 [(set (match_operand:VF_512 0 "register_operand" "=v")
2932 (match_operand:VF_512 1 "register_operand" "v")
2933 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2935 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2936 [(set_attr "type" "sselog")
2937 (set_attr "prefix" "evex")])
2939 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2941 ;; FMA floating point multiply/accumulate instructions. These include
2942 ;; scalar versions of the instructions as well as vector versions.
2944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2946 ;; The standard names for scalar FMA are only available with SSE math enabled.
2947 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2948 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2949 ;; and TARGET_FMA4 are both false.
2950 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2951 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2952 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2953 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2954 (define_mode_iterator FMAMODEM
2955 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2956 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2957 (V4SF "TARGET_FMA || TARGET_FMA4")
2958 (V2DF "TARGET_FMA || TARGET_FMA4")
2959 (V8SF "TARGET_FMA || TARGET_FMA4")
2960 (V4DF "TARGET_FMA || TARGET_FMA4")
2961 (V16SF "TARGET_AVX512F")
2962 (V8DF "TARGET_AVX512F")])
2964 (define_expand "fma<mode>4"
2965 [(set (match_operand:FMAMODEM 0 "register_operand")
2967 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2968 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2969 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2971 (define_expand "fms<mode>4"
2972 [(set (match_operand:FMAMODEM 0 "register_operand")
2974 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2975 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2976 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2978 (define_expand "fnma<mode>4"
2979 [(set (match_operand:FMAMODEM 0 "register_operand")
2981 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2982 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2983 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2985 (define_expand "fnms<mode>4"
2986 [(set (match_operand:FMAMODEM 0 "register_operand")
2988 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2989 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2990 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2992 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2993 (define_mode_iterator FMAMODE
2994 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2995 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2996 (V4SF "TARGET_FMA || TARGET_FMA4")
2997 (V2DF "TARGET_FMA || TARGET_FMA4")
2998 (V8SF "TARGET_FMA || TARGET_FMA4")
2999 (V4DF "TARGET_FMA || TARGET_FMA4")
3000 (V16SF "TARGET_AVX512F")
3001 (V8DF "TARGET_AVX512F")])
3003 (define_expand "fma4i_fmadd_<mode>"
3004 [(set (match_operand:FMAMODE 0 "register_operand")
3006 (match_operand:FMAMODE 1 "nonimmediate_operand")
3007 (match_operand:FMAMODE 2 "nonimmediate_operand")
3008 (match_operand:FMAMODE 3 "nonimmediate_operand")))])
3010 (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
3011 [(match_operand:VF_512 0 "register_operand")
3012 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
3013 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
3014 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
3015 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3018 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3019 operands[0], operands[1], operands[2], operands[3],
3020 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3024 (define_mode_iterator FMAMODE_NOVF512
3025 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3026 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3027 (V4SF "TARGET_FMA || TARGET_FMA4")
3028 (V2DF "TARGET_FMA || TARGET_FMA4")
3029 (V8SF "TARGET_FMA || TARGET_FMA4")
3030 (V4DF "TARGET_FMA || TARGET_FMA4")])
3032 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3033 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3034 (fma:FMAMODE_NOVF512
3035 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3036 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3037 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
3038 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3040 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3041 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3042 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3043 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3044 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3045 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3046 (set_attr "type" "ssemuladd")
3047 (set_attr "mode" "<MODE>")])
3049 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3050 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3052 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3053 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3054 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3055 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3057 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3058 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3059 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3060 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3061 (set_attr "type" "ssemuladd")
3062 (set_attr "mode" "<MODE>")])
3064 (define_insn "avx512f_fmadd_<mode>_mask<round_name>"
3065 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3068 (match_operand:VF_512 1 "register_operand" "0,0")
3069 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3070 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3072 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3075 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3076 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3077 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3078 (set_attr "type" "ssemuladd")
3079 (set_attr "mode" "<MODE>")])
3081 (define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
3082 [(set (match_operand:VF_512 0 "register_operand" "=x")
3085 (match_operand:VF_512 1 "register_operand" "x")
3086 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3087 (match_operand:VF_512 3 "register_operand" "0"))
3089 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3091 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3092 [(set_attr "isa" "fma_avx512f")
3093 (set_attr "type" "ssemuladd")
3094 (set_attr "mode" "<MODE>")])
3096 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3097 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3098 (fma:FMAMODE_NOVF512
3099 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3100 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3101 (neg:FMAMODE_NOVF512
3102 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
3103 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3105 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3106 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3107 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3108 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3109 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3110 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3111 (set_attr "type" "ssemuladd")
3112 (set_attr "mode" "<MODE>")])
3114 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3115 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3117 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3118 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3120 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3121 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3123 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3124 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3125 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3126 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3127 (set_attr "type" "ssemuladd")
3128 (set_attr "mode" "<MODE>")])
3130 (define_insn "avx512f_fmsub_<mode>_mask<round_name>"
3131 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3134 (match_operand:VF_512 1 "register_operand" "0,0")
3135 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3137 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
3139 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3142 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3143 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3144 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3145 (set_attr "type" "ssemuladd")
3146 (set_attr "mode" "<MODE>")])
3148 (define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
3149 [(set (match_operand:VF_512 0 "register_operand" "=v")
3152 (match_operand:VF_512 1 "register_operand" "v")
3153 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3155 (match_operand:VF_512 3 "register_operand" "0")))
3157 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3159 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3160 [(set_attr "isa" "fma_avx512f")
3161 (set_attr "type" "ssemuladd")
3162 (set_attr "mode" "<MODE>")])
3164 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3165 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3166 (fma:FMAMODE_NOVF512
3167 (neg:FMAMODE_NOVF512
3168 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
3169 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3170 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
3171 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3173 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3174 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3175 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3176 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3177 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3178 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3179 (set_attr "type" "ssemuladd")
3180 (set_attr "mode" "<MODE>")])
3182 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3183 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3186 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
3187 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3188 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3189 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3191 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3192 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3193 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3194 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3195 (set_attr "type" "ssemuladd")
3196 (set_attr "mode" "<MODE>")])
3198 (define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
3199 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3203 (match_operand:VF_512 1 "register_operand" "0,0"))
3204 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3205 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3207 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3210 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3211 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3212 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3213 (set_attr "type" "ssemuladd")
3214 (set_attr "mode" "<MODE>")])
3216 (define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
3217 [(set (match_operand:VF_512 0 "register_operand" "=v")
3221 (match_operand:VF_512 1 "register_operand" "v"))
3222 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3223 (match_operand:VF_512 3 "register_operand" "0"))
3225 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3227 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3228 [(set_attr "isa" "fma_avx512f")
3229 (set_attr "type" "ssemuladd")
3230 (set_attr "mode" "<MODE>")])
3232 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3233 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
3234 (fma:FMAMODE_NOVF512
3235 (neg:FMAMODE_NOVF512
3236 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
3237 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3238 (neg:FMAMODE_NOVF512
3239 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
3240 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3242 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3243 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3244 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3245 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3246 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3247 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3248 (set_attr "type" "ssemuladd")
3249 (set_attr "mode" "<MODE>")])
3251 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3252 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3255 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
3256 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3258 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3259 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3261 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3262 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3263 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3264 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3265 (set_attr "type" "ssemuladd")
3266 (set_attr "mode" "<MODE>")])
3268 (define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
3269 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3273 (match_operand:VF_512 1 "register_operand" "0,0"))
3274 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3276 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
3278 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3281 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3282 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3283 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3284 (set_attr "type" "ssemuladd")
3285 (set_attr "mode" "<MODE>")])
3287 (define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
3288 [(set (match_operand:VF_512 0 "register_operand" "=v")
3292 (match_operand:VF_512 1 "register_operand" "v"))
3293 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3295 (match_operand:VF_512 3 "register_operand" "0")))
3297 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3299 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3300 [(set_attr "isa" "fma_avx512f")
3301 (set_attr "type" "ssemuladd")
3302 (set_attr "mode" "<MODE>")])
3304 ;; FMA parallel floating point multiply addsub and subadd operations.
3306 ;; It would be possible to represent these without the UNSPEC as
3309 ;; (fma op1 op2 op3)
3310 ;; (fma op1 op2 (neg op3))
3313 ;; But this doesn't seem useful in practice.
3315 (define_expand "fmaddsub_<mode>"
3316 [(set (match_operand:VF 0 "register_operand")
3318 [(match_operand:VF 1 "nonimmediate_operand")
3319 (match_operand:VF 2 "nonimmediate_operand")
3320 (match_operand:VF 3 "nonimmediate_operand")]
3322 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3324 (define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
3325 [(match_operand:VF_512 0 "register_operand")
3326 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
3327 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
3328 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
3329 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3332 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3333 operands[0], operands[1], operands[2], operands[3],
3334 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3338 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3339 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3341 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3342 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3343 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
3345 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3347 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3348 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3349 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3350 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3351 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3352 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3353 (set_attr "type" "ssemuladd")
3354 (set_attr "mode" "<MODE>")])
3356 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3357 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3359 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3360 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3361 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3363 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3365 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3366 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3367 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3368 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3369 (set_attr "type" "ssemuladd")
3370 (set_attr "mode" "<MODE>")])
3372 (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
3373 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3376 [(match_operand:VF_512 1 "register_operand" "0,0")
3377 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3378 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
3381 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3384 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3385 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3386 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3387 (set_attr "type" "ssemuladd")
3388 (set_attr "mode" "<MODE>")])
3390 (define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
3391 [(set (match_operand:VF_512 0 "register_operand" "=v")
3394 [(match_operand:VF_512 1 "register_operand" "v")
3395 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3396 (match_operand:VF_512 3 "register_operand" "0")]
3399 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3401 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3402 [(set_attr "isa" "fma_avx512f")
3403 (set_attr "type" "ssemuladd")
3404 (set_attr "mode" "<MODE>")])
3406 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3407 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3409 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3410 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3412 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
3414 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3416 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3417 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3418 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3419 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3420 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3421 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3422 (set_attr "type" "ssemuladd")
3423 (set_attr "mode" "<MODE>")])
3425 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3426 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3428 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3429 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3431 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3433 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3435 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3436 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3437 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3438 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3439 (set_attr "type" "ssemuladd")
3440 (set_attr "mode" "<MODE>")])
3442 (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
3443 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3446 [(match_operand:VF_512 1 "register_operand" "0,0")
3447 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3449 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
3452 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3455 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3456 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3457 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3458 (set_attr "type" "ssemuladd")
3459 (set_attr "mode" "<MODE>")])
3461 (define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
3462 [(set (match_operand:VF_512 0 "register_operand" "=v")
3465 [(match_operand:VF_512 1 "register_operand" "v")
3466 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3468 (match_operand:VF_512 3 "register_operand" "0"))]
3471 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3473 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3474 [(set_attr "isa" "fma_avx512f")
3475 (set_attr "type" "ssemuladd")
3476 (set_attr "mode" "<MODE>")])
3478 ;; FMA3 floating point scalar intrinsics. These merge result with
3479 ;; high-order elements from the destination register.
3481 (define_expand "fmai_vmfmadd_<mode><round_name>"
3482 [(set (match_operand:VF_128 0 "register_operand")
3485 (match_operand:VF_128 1 "<round_nimm_predicate>")
3486 (match_operand:VF_128 2 "<round_nimm_predicate>")
3487 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3492 (define_insn "*fmai_fmadd_<mode>"
3493 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3496 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3497 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3498 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3501 "TARGET_FMA || TARGET_AVX512F"
3503 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3504 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3505 [(set_attr "type" "ssemuladd")
3506 (set_attr "mode" "<MODE>")])
3508 (define_insn "*fmai_fmsub_<mode>"
3509 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3512 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3513 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3515 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3518 "TARGET_FMA || TARGET_AVX512F"
3520 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3521 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3522 [(set_attr "type" "ssemuladd")
3523 (set_attr "mode" "<MODE>")])
3525 (define_insn "*fmai_fnmadd_<mode><round_name>"
3526 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3530 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3531 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3532 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3535 "TARGET_FMA || TARGET_AVX512F"
3537 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3538 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3539 [(set_attr "type" "ssemuladd")
3540 (set_attr "mode" "<MODE>")])
3542 (define_insn "*fmai_fnmsub_<mode><round_name>"
3543 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3547 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3548 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3550 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3553 "TARGET_FMA || TARGET_AVX512F"
3555 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3556 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3557 [(set_attr "type" "ssemuladd")
3558 (set_attr "mode" "<MODE>")])
3560 ;; FMA4 floating point scalar intrinsics. These write the
3561 ;; entire destination register, with the high-order elements zeroed.
3563 (define_expand "fma4i_vmfmadd_<mode>"
3564 [(set (match_operand:VF_128 0 "register_operand")
3567 (match_operand:VF_128 1 "nonimmediate_operand")
3568 (match_operand:VF_128 2 "nonimmediate_operand")
3569 (match_operand:VF_128 3 "nonimmediate_operand"))
3573 "operands[4] = CONST0_RTX (<MODE>mode);")
3575 (define_insn "*fma4i_vmfmadd_<mode>"
3576 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3579 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3580 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3581 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3582 (match_operand:VF_128 4 "const0_operand")
3585 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3586 [(set_attr "type" "ssemuladd")
3587 (set_attr "mode" "<MODE>")])
3589 (define_insn "*fma4i_vmfmsub_<mode>"
3590 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3593 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3594 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3596 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3597 (match_operand:VF_128 4 "const0_operand")
3600 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3601 [(set_attr "type" "ssemuladd")
3602 (set_attr "mode" "<MODE>")])
3604 (define_insn "*fma4i_vmfnmadd_<mode>"
3605 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3609 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3610 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3611 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3612 (match_operand:VF_128 4 "const0_operand")
3615 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3616 [(set_attr "type" "ssemuladd")
3617 (set_attr "mode" "<MODE>")])
3619 (define_insn "*fma4i_vmfnmsub_<mode>"
3620 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3624 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3625 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3627 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3628 (match_operand:VF_128 4 "const0_operand")
3631 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3632 [(set_attr "type" "ssemuladd")
3633 (set_attr "mode" "<MODE>")])
3635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3637 ;; Parallel single-precision floating point conversion operations
3639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3641 (define_insn "sse_cvtpi2ps"
3642 [(set (match_operand:V4SF 0 "register_operand" "=x")
3645 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3646 (match_operand:V4SF 1 "register_operand" "0")
3649 "cvtpi2ps\t{%2, %0|%0, %2}"
3650 [(set_attr "type" "ssecvt")
3651 (set_attr "mode" "V4SF")])
3653 (define_insn "sse_cvtps2pi"
3654 [(set (match_operand:V2SI 0 "register_operand" "=y")
3656 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3658 (parallel [(const_int 0) (const_int 1)])))]
3660 "cvtps2pi\t{%1, %0|%0, %q1}"
3661 [(set_attr "type" "ssecvt")
3662 (set_attr "unit" "mmx")
3663 (set_attr "mode" "DI")])
3665 (define_insn "sse_cvttps2pi"
3666 [(set (match_operand:V2SI 0 "register_operand" "=y")
3668 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3669 (parallel [(const_int 0) (const_int 1)])))]
3671 "cvttps2pi\t{%1, %0|%0, %q1}"
3672 [(set_attr "type" "ssecvt")
3673 (set_attr "unit" "mmx")
3674 (set_attr "prefix_rep" "0")
3675 (set_attr "mode" "SF")])
3677 (define_insn "sse_cvtsi2ss<round_name>"
3678 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3681 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3682 (match_operand:V4SF 1 "register_operand" "0,0,v")
3686 cvtsi2ss\t{%2, %0|%0, %2}
3687 cvtsi2ss\t{%2, %0|%0, %2}
3688 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3689 [(set_attr "isa" "noavx,noavx,avx")
3690 (set_attr "type" "sseicvt")
3691 (set_attr "athlon_decode" "vector,double,*")
3692 (set_attr "amdfam10_decode" "vector,double,*")
3693 (set_attr "bdver1_decode" "double,direct,*")
3694 (set_attr "btver2_decode" "double,double,double")
3695 (set_attr "prefix" "orig,orig,maybe_evex")
3696 (set_attr "mode" "SF")])
3698 (define_insn "sse_cvtsi2ssq<round_name>"
3699 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3702 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3703 (match_operand:V4SF 1 "register_operand" "0,0,v")
3705 "TARGET_SSE && TARGET_64BIT"
3707 cvtsi2ssq\t{%2, %0|%0, %2}
3708 cvtsi2ssq\t{%2, %0|%0, %2}
3709 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3710 [(set_attr "isa" "noavx,noavx,avx")
3711 (set_attr "type" "sseicvt")
3712 (set_attr "athlon_decode" "vector,double,*")
3713 (set_attr "amdfam10_decode" "vector,double,*")
3714 (set_attr "bdver1_decode" "double,direct,*")
3715 (set_attr "btver2_decode" "double,double,double")
3716 (set_attr "length_vex" "*,*,4")
3717 (set_attr "prefix_rex" "1,1,*")
3718 (set_attr "prefix" "orig,orig,maybe_evex")
3719 (set_attr "mode" "SF")])
3721 (define_insn "sse_cvtss2si<round_name>"
3722 [(set (match_operand:SI 0 "register_operand" "=r,r")
3725 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3726 (parallel [(const_int 0)]))]
3727 UNSPEC_FIX_NOTRUNC))]
3729 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3730 [(set_attr "type" "sseicvt")
3731 (set_attr "athlon_decode" "double,vector")
3732 (set_attr "bdver1_decode" "double,double")
3733 (set_attr "prefix_rep" "1")
3734 (set_attr "prefix" "maybe_vex")
3735 (set_attr "mode" "SI")])
3737 (define_insn "sse_cvtss2si_2"
3738 [(set (match_operand:SI 0 "register_operand" "=r,r")
3739 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3740 UNSPEC_FIX_NOTRUNC))]
3742 "%vcvtss2si\t{%1, %0|%0, %k1}"
3743 [(set_attr "type" "sseicvt")
3744 (set_attr "athlon_decode" "double,vector")
3745 (set_attr "amdfam10_decode" "double,double")
3746 (set_attr "bdver1_decode" "double,double")
3747 (set_attr "prefix_rep" "1")
3748 (set_attr "prefix" "maybe_vex")
3749 (set_attr "mode" "SI")])
3751 (define_insn "sse_cvtss2siq<round_name>"
3752 [(set (match_operand:DI 0 "register_operand" "=r,r")
3755 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3756 (parallel [(const_int 0)]))]
3757 UNSPEC_FIX_NOTRUNC))]
3758 "TARGET_SSE && TARGET_64BIT"
3759 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3760 [(set_attr "type" "sseicvt")
3761 (set_attr "athlon_decode" "double,vector")
3762 (set_attr "bdver1_decode" "double,double")
3763 (set_attr "prefix_rep" "1")
3764 (set_attr "prefix" "maybe_vex")
3765 (set_attr "mode" "DI")])
3767 (define_insn "sse_cvtss2siq_2"
3768 [(set (match_operand:DI 0 "register_operand" "=r,r")
3769 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3770 UNSPEC_FIX_NOTRUNC))]
3771 "TARGET_SSE && TARGET_64BIT"
3772 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3773 [(set_attr "type" "sseicvt")
3774 (set_attr "athlon_decode" "double,vector")
3775 (set_attr "amdfam10_decode" "double,double")
3776 (set_attr "bdver1_decode" "double,double")
3777 (set_attr "prefix_rep" "1")
3778 (set_attr "prefix" "maybe_vex")
3779 (set_attr "mode" "DI")])
3781 (define_insn "sse_cvttss2si<round_saeonly_name>"
3782 [(set (match_operand:SI 0 "register_operand" "=r,r")
3785 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3786 (parallel [(const_int 0)]))))]
3788 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3789 [(set_attr "type" "sseicvt")
3790 (set_attr "athlon_decode" "double,vector")
3791 (set_attr "amdfam10_decode" "double,double")
3792 (set_attr "bdver1_decode" "double,double")
3793 (set_attr "prefix_rep" "1")
3794 (set_attr "prefix" "maybe_vex")
3795 (set_attr "mode" "SI")])
3797 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3798 [(set (match_operand:DI 0 "register_operand" "=r,r")
3801 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3802 (parallel [(const_int 0)]))))]
3803 "TARGET_SSE && TARGET_64BIT"
3804 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3805 [(set_attr "type" "sseicvt")
3806 (set_attr "athlon_decode" "double,vector")
3807 (set_attr "amdfam10_decode" "double,double")
3808 (set_attr "bdver1_decode" "double,double")
3809 (set_attr "prefix_rep" "1")
3810 (set_attr "prefix" "maybe_vex")
3811 (set_attr "mode" "DI")])
3813 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3814 [(set (match_operand:VF_128 0 "register_operand" "=v")
3816 (vec_duplicate:VF_128
3817 (unsigned_float:<ssescalarmode>
3818 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3819 (match_operand:VF_128 1 "register_operand" "v")
3821 "TARGET_AVX512F && <round_modev4sf_condition>"
3822 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3823 [(set_attr "type" "sseicvt")
3824 (set_attr "prefix" "evex")
3825 (set_attr "mode" "<ssescalarmode>")])
3827 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3828 [(set (match_operand:VF_128 0 "register_operand" "=v")
3830 (vec_duplicate:VF_128
3831 (unsigned_float:<ssescalarmode>
3832 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3833 (match_operand:VF_128 1 "register_operand" "v")
3835 "TARGET_AVX512F && TARGET_64BIT"
3836 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3837 [(set_attr "type" "sseicvt")
3838 (set_attr "prefix" "evex")
3839 (set_attr "mode" "<ssescalarmode>")])
3841 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3842 [(set (match_operand:VF1 0 "register_operand" "=v")
3844 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3845 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3846 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3847 [(set_attr "type" "ssecvt")
3848 (set_attr "prefix" "maybe_vex")
3849 (set_attr "mode" "<sseinsnmode>")])
3851 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
3852 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
3853 (unsigned_float:VF1_AVX512VL
3854 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
3856 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3857 [(set_attr "type" "ssecvt")
3858 (set_attr "prefix" "evex")
3859 (set_attr "mode" "<MODE>")])
3861 (define_expand "floatuns<sseintvecmodelower><mode>2"
3862 [(match_operand:VF1 0 "register_operand")
3863 (match_operand:<sseintvecmode> 1 "register_operand")]
3864 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3866 if (<MODE>mode == V16SFmode)
3867 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3869 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3875 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3876 (define_mode_attr sf2simodelower
3877 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3879 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3880 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3882 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3883 UNSPEC_FIX_NOTRUNC))]
3885 "%vcvtps2dq\t{%1, %0|%0, %1}"
3886 [(set_attr "type" "ssecvt")
3887 (set (attr "prefix_data16")
3889 (match_test "TARGET_AVX")
3891 (const_string "1")))
3892 (set_attr "prefix" "maybe_vex")
3893 (set_attr "mode" "<sseinsnmode>")])
3895 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3896 [(set (match_operand:V16SI 0 "register_operand" "=v")
3898 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3899 UNSPEC_FIX_NOTRUNC))]
3901 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3902 [(set_attr "type" "ssecvt")
3903 (set_attr "prefix" "evex")
3904 (set_attr "mode" "XI")])
3906 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
3907 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
3908 (unspec:VI4_AVX512VL
3909 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
3910 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3912 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3913 [(set_attr "type" "ssecvt")
3914 (set_attr "prefix" "evex")
3915 (set_attr "mode" "<sseinsnmode>")])
3917 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
3918 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
3919 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
3920 UNSPEC_FIX_NOTRUNC))]
3921 "TARGET_AVX512DQ && <round_mode512bit_condition>"
3922 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3923 [(set_attr "type" "ssecvt")
3924 (set_attr "prefix" "evex")
3925 (set_attr "mode" "<sseinsnmode>")])
3927 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
3928 [(set (match_operand:V2DI 0 "register_operand" "=v")
3931 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3932 (parallel [(const_int 0) (const_int 1)]))]
3933 UNSPEC_FIX_NOTRUNC))]
3934 "TARGET_AVX512DQ && TARGET_AVX512VL"
3935 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3936 [(set_attr "type" "ssecvt")
3937 (set_attr "prefix" "evex")
3938 (set_attr "mode" "TI")])
3940 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
3941 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
3942 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
3943 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3944 "TARGET_AVX512DQ && <round_mode512bit_condition>"
3945 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3946 [(set_attr "type" "ssecvt")
3947 (set_attr "prefix" "evex")
3948 (set_attr "mode" "<sseinsnmode>")])
3950 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
3951 [(set (match_operand:V2DI 0 "register_operand" "=v")
3954 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
3955 (parallel [(const_int 0) (const_int 1)]))]
3956 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3957 "TARGET_AVX512DQ && TARGET_AVX512VL"
3958 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
3959 [(set_attr "type" "ssecvt")
3960 (set_attr "prefix" "evex")
3961 (set_attr "mode" "TI")])
3963 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
3964 [(set (match_operand:V16SI 0 "register_operand" "=v")
3966 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
3968 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
3969 [(set_attr "type" "ssecvt")
3970 (set_attr "prefix" "evex")
3971 (set_attr "mode" "XI")])
3973 (define_insn "fix_truncv8sfv8si2"
3974 [(set (match_operand:V8SI 0 "register_operand" "=x")
3975 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3977 "vcvttps2dq\t{%1, %0|%0, %1}"
3978 [(set_attr "type" "ssecvt")
3979 (set_attr "prefix" "vex")
3980 (set_attr "mode" "OI")])
3982 (define_insn "fix_truncv4sfv4si2"
3983 [(set (match_operand:V4SI 0 "register_operand" "=x")
3984 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3986 "%vcvttps2dq\t{%1, %0|%0, %1}"
3987 [(set_attr "type" "ssecvt")
3988 (set (attr "prefix_rep")
3990 (match_test "TARGET_AVX")
3992 (const_string "1")))
3993 (set (attr "prefix_data16")
3995 (match_test "TARGET_AVX")
3997 (const_string "0")))
3998 (set_attr "prefix_data16" "0")
3999 (set_attr "prefix" "maybe_vex")
4000 (set_attr "mode" "TI")])
4002 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4003 [(match_operand:<sseintvecmode> 0 "register_operand")
4004 (match_operand:VF1 1 "register_operand")]
4007 if (<MODE>mode == V16SFmode)
4008 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4013 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4014 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4015 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4016 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4021 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4023 ;; Parallel double-precision floating point conversion operations
4025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4027 (define_insn "sse2_cvtpi2pd"
4028 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4029 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4031 "cvtpi2pd\t{%1, %0|%0, %1}"
4032 [(set_attr "type" "ssecvt")
4033 (set_attr "unit" "mmx,*")
4034 (set_attr "prefix_data16" "1,*")
4035 (set_attr "mode" "V2DF")])
4037 (define_insn "sse2_cvtpd2pi"
4038 [(set (match_operand:V2SI 0 "register_operand" "=y")
4039 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4040 UNSPEC_FIX_NOTRUNC))]
4042 "cvtpd2pi\t{%1, %0|%0, %1}"
4043 [(set_attr "type" "ssecvt")
4044 (set_attr "unit" "mmx")
4045 (set_attr "bdver1_decode" "double")
4046 (set_attr "btver2_decode" "direct")
4047 (set_attr "prefix_data16" "1")
4048 (set_attr "mode" "DI")])
4050 (define_insn "sse2_cvttpd2pi"
4051 [(set (match_operand:V2SI 0 "register_operand" "=y")
4052 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4054 "cvttpd2pi\t{%1, %0|%0, %1}"
4055 [(set_attr "type" "ssecvt")
4056 (set_attr "unit" "mmx")
4057 (set_attr "bdver1_decode" "double")
4058 (set_attr "prefix_data16" "1")
4059 (set_attr "mode" "TI")])
4061 (define_insn "sse2_cvtsi2sd"
4062 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4065 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4066 (match_operand:V2DF 1 "register_operand" "0,0,x")
4070 cvtsi2sd\t{%2, %0|%0, %2}
4071 cvtsi2sd\t{%2, %0|%0, %2}
4072 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4073 [(set_attr "isa" "noavx,noavx,avx")
4074 (set_attr "type" "sseicvt")
4075 (set_attr "athlon_decode" "double,direct,*")
4076 (set_attr "amdfam10_decode" "vector,double,*")
4077 (set_attr "bdver1_decode" "double,direct,*")
4078 (set_attr "btver2_decode" "double,double,double")
4079 (set_attr "prefix" "orig,orig,vex")
4080 (set_attr "mode" "DF")])
4082 (define_insn "sse2_cvtsi2sdq<round_name>"
4083 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4086 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4087 (match_operand:V2DF 1 "register_operand" "0,0,v")
4089 "TARGET_SSE2 && TARGET_64BIT"
4091 cvtsi2sdq\t{%2, %0|%0, %2}
4092 cvtsi2sdq\t{%2, %0|%0, %2}
4093 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4094 [(set_attr "isa" "noavx,noavx,avx")
4095 (set_attr "type" "sseicvt")
4096 (set_attr "athlon_decode" "double,direct,*")
4097 (set_attr "amdfam10_decode" "vector,double,*")
4098 (set_attr "bdver1_decode" "double,direct,*")
4099 (set_attr "length_vex" "*,*,4")
4100 (set_attr "prefix_rex" "1,1,*")
4101 (set_attr "prefix" "orig,orig,maybe_evex")
4102 (set_attr "mode" "DF")])
4104 (define_insn "avx512f_vcvtss2usi<round_name>"
4105 [(set (match_operand:SI 0 "register_operand" "=r")
4108 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4109 (parallel [(const_int 0)]))]
4110 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4112 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4113 [(set_attr "type" "sseicvt")
4114 (set_attr "prefix" "evex")
4115 (set_attr "mode" "SI")])
4117 (define_insn "avx512f_vcvtss2usiq<round_name>"
4118 [(set (match_operand:DI 0 "register_operand" "=r")
4121 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4122 (parallel [(const_int 0)]))]
4123 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4124 "TARGET_AVX512F && TARGET_64BIT"
4125 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4126 [(set_attr "type" "sseicvt")
4127 (set_attr "prefix" "evex")
4128 (set_attr "mode" "DI")])
4130 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4131 [(set (match_operand:SI 0 "register_operand" "=r")
4134 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4135 (parallel [(const_int 0)]))))]
4137 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4138 [(set_attr "type" "sseicvt")
4139 (set_attr "prefix" "evex")
4140 (set_attr "mode" "SI")])
4142 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4143 [(set (match_operand:DI 0 "register_operand" "=r")
4146 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4147 (parallel [(const_int 0)]))))]
4148 "TARGET_AVX512F && TARGET_64BIT"
4149 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4150 [(set_attr "type" "sseicvt")
4151 (set_attr "prefix" "evex")
4152 (set_attr "mode" "DI")])
4154 (define_insn "avx512f_vcvtsd2usi<round_name>"
4155 [(set (match_operand:SI 0 "register_operand" "=r")
4158 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4159 (parallel [(const_int 0)]))]
4160 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4162 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4163 [(set_attr "type" "sseicvt")
4164 (set_attr "prefix" "evex")
4165 (set_attr "mode" "SI")])
4167 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4168 [(set (match_operand:DI 0 "register_operand" "=r")
4171 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4172 (parallel [(const_int 0)]))]
4173 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4174 "TARGET_AVX512F && TARGET_64BIT"
4175 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4176 [(set_attr "type" "sseicvt")
4177 (set_attr "prefix" "evex")
4178 (set_attr "mode" "DI")])
4180 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4181 [(set (match_operand:SI 0 "register_operand" "=r")
4184 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4185 (parallel [(const_int 0)]))))]
4187 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4188 [(set_attr "type" "sseicvt")
4189 (set_attr "prefix" "evex")
4190 (set_attr "mode" "SI")])
4192 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4193 [(set (match_operand:DI 0 "register_operand" "=r")
4196 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4197 (parallel [(const_int 0)]))))]
4198 "TARGET_AVX512F && TARGET_64BIT"
4199 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4200 [(set_attr "type" "sseicvt")
4201 (set_attr "prefix" "evex")
4202 (set_attr "mode" "DI")])
4204 (define_insn "sse2_cvtsd2si<round_name>"
4205 [(set (match_operand:SI 0 "register_operand" "=r,r")
4208 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4209 (parallel [(const_int 0)]))]
4210 UNSPEC_FIX_NOTRUNC))]
4212 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4213 [(set_attr "type" "sseicvt")
4214 (set_attr "athlon_decode" "double,vector")
4215 (set_attr "bdver1_decode" "double,double")
4216 (set_attr "btver2_decode" "double,double")
4217 (set_attr "prefix_rep" "1")
4218 (set_attr "prefix" "maybe_vex")
4219 (set_attr "mode" "SI")])
4221 (define_insn "sse2_cvtsd2si_2"
4222 [(set (match_operand:SI 0 "register_operand" "=r,r")
4223 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4224 UNSPEC_FIX_NOTRUNC))]
4226 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4227 [(set_attr "type" "sseicvt")
4228 (set_attr "athlon_decode" "double,vector")
4229 (set_attr "amdfam10_decode" "double,double")
4230 (set_attr "bdver1_decode" "double,double")
4231 (set_attr "prefix_rep" "1")
4232 (set_attr "prefix" "maybe_vex")
4233 (set_attr "mode" "SI")])
4235 (define_insn "sse2_cvtsd2siq<round_name>"
4236 [(set (match_operand:DI 0 "register_operand" "=r,r")
4239 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4240 (parallel [(const_int 0)]))]
4241 UNSPEC_FIX_NOTRUNC))]
4242 "TARGET_SSE2 && TARGET_64BIT"
4243 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4244 [(set_attr "type" "sseicvt")
4245 (set_attr "athlon_decode" "double,vector")
4246 (set_attr "bdver1_decode" "double,double")
4247 (set_attr "prefix_rep" "1")
4248 (set_attr "prefix" "maybe_vex")
4249 (set_attr "mode" "DI")])
4251 (define_insn "sse2_cvtsd2siq_2"
4252 [(set (match_operand:DI 0 "register_operand" "=r,r")
4253 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4254 UNSPEC_FIX_NOTRUNC))]
4255 "TARGET_SSE2 && TARGET_64BIT"
4256 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4257 [(set_attr "type" "sseicvt")
4258 (set_attr "athlon_decode" "double,vector")
4259 (set_attr "amdfam10_decode" "double,double")
4260 (set_attr "bdver1_decode" "double,double")
4261 (set_attr "prefix_rep" "1")
4262 (set_attr "prefix" "maybe_vex")
4263 (set_attr "mode" "DI")])
4265 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4266 [(set (match_operand:SI 0 "register_operand" "=r,r")
4269 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4270 (parallel [(const_int 0)]))))]
4272 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4273 [(set_attr "type" "sseicvt")
4274 (set_attr "athlon_decode" "double,vector")
4275 (set_attr "amdfam10_decode" "double,double")
4276 (set_attr "bdver1_decode" "double,double")
4277 (set_attr "btver2_decode" "double,double")
4278 (set_attr "prefix_rep" "1")
4279 (set_attr "prefix" "maybe_vex")
4280 (set_attr "mode" "SI")])
4282 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4283 [(set (match_operand:DI 0 "register_operand" "=r,r")
4286 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4287 (parallel [(const_int 0)]))))]
4288 "TARGET_SSE2 && TARGET_64BIT"
4289 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4290 [(set_attr "type" "sseicvt")
4291 (set_attr "athlon_decode" "double,vector")
4292 (set_attr "amdfam10_decode" "double,double")
4293 (set_attr "bdver1_decode" "double,double")
4294 (set_attr "prefix_rep" "1")
4295 (set_attr "prefix" "maybe_vex")
4296 (set_attr "mode" "DI")])
4298 ;; For float<si2dfmode><mode>2 insn pattern
4299 (define_mode_attr si2dfmode
4300 [(V8DF "V8SI") (V4DF "V4SI")])
4301 (define_mode_attr si2dfmodelower
4302 [(V8DF "v8si") (V4DF "v4si")])
4304 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4305 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4306 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4307 "TARGET_AVX && <mask_mode512bit_condition>"
4308 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4309 [(set_attr "type" "ssecvt")
4310 (set_attr "prefix" "maybe_vex")
4311 (set_attr "mode" "<MODE>")])
4313 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4314 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4315 (any_float:VF2_AVX512VL
4316 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4318 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4319 [(set_attr "type" "ssecvt")
4320 (set_attr "prefix" "evex")
4321 (set_attr "mode" "<MODE>")])
4323 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4324 (define_mode_attr qq2pssuff
4325 [(V8SF "") (V4SF "{y}")])
4327 (define_mode_attr sselongvecmode
4328 [(V8SF "V8DI") (V4SF "V4DI")])
4330 (define_mode_attr sselongvecmodelower
4331 [(V8SF "v8di") (V4SF "v4di")])
4333 (define_mode_attr sseintvecmode3
4334 [(V8SF "XI") (V4SF "OI")
4335 (V8DF "OI") (V4DF "TI")])
4337 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4338 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4339 (any_float:VF1_128_256VL
4340 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4341 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4342 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4343 [(set_attr "type" "ssecvt")
4344 (set_attr "prefix" "evex")
4345 (set_attr "mode" "<MODE>")])
4347 (define_insn "*<floatsuffix>floatv2div2sf2"
4348 [(set (match_operand:V4SF 0 "register_operand" "=v")
4350 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4351 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4352 "TARGET_AVX512DQ && TARGET_AVX512VL"
4353 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4354 [(set_attr "type" "ssecvt")
4355 (set_attr "prefix" "evex")
4356 (set_attr "mode" "V4SF")])
4358 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4359 [(set (match_operand:V4SF 0 "register_operand" "=v")
4362 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4364 (match_operand:V4SF 2 "vector_move_operand" "0C")
4365 (parallel [(const_int 0) (const_int 1)]))
4366 (match_operand:QI 3 "register_operand" "Yk"))
4367 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4368 "TARGET_AVX512DQ && TARGET_AVX512VL"
4369 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4370 [(set_attr "type" "ssecvt")
4371 (set_attr "prefix" "evex")
4372 (set_attr "mode" "V4SF")])
4374 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4375 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4376 (unsigned_float:VF2_512_256VL
4377 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4379 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4380 [(set_attr "type" "ssecvt")
4381 (set_attr "prefix" "evex")
4382 (set_attr "mode" "<MODE>")])
4384 (define_insn "ufloatv2siv2df2<mask_name>"
4385 [(set (match_operand:V2DF 0 "register_operand" "=v")
4386 (unsigned_float:V2DF
4388 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4389 (parallel [(const_int 0) (const_int 1)]))))]
4391 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4392 [(set_attr "type" "ssecvt")
4393 (set_attr "prefix" "evex")
4394 (set_attr "mode" "V2DF")])
4396 (define_insn "avx512f_cvtdq2pd512_2"
4397 [(set (match_operand:V8DF 0 "register_operand" "=v")
4400 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4401 (parallel [(const_int 0) (const_int 1)
4402 (const_int 2) (const_int 3)
4403 (const_int 4) (const_int 5)
4404 (const_int 6) (const_int 7)]))))]
4406 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4407 [(set_attr "type" "ssecvt")
4408 (set_attr "prefix" "evex")
4409 (set_attr "mode" "V8DF")])
4411 (define_insn "avx_cvtdq2pd256_2"
4412 [(set (match_operand:V4DF 0 "register_operand" "=x")
4415 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
4416 (parallel [(const_int 0) (const_int 1)
4417 (const_int 2) (const_int 3)]))))]
4419 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4420 [(set_attr "type" "ssecvt")
4421 (set_attr "prefix" "vex")
4422 (set_attr "mode" "V4DF")])
4424 (define_insn "sse2_cvtdq2pd"
4425 [(set (match_operand:V2DF 0 "register_operand" "=x")
4428 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4429 (parallel [(const_int 0) (const_int 1)]))))]
4431 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
4432 [(set_attr "type" "ssecvt")
4433 (set_attr "prefix" "maybe_vex")
4434 (set_attr "ssememalign" "64")
4435 (set_attr "mode" "V2DF")])
4437 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4438 [(set (match_operand:V8SI 0 "register_operand" "=v")
4440 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4441 UNSPEC_FIX_NOTRUNC))]
4443 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4444 [(set_attr "type" "ssecvt")
4445 (set_attr "prefix" "evex")
4446 (set_attr "mode" "OI")])
4448 (define_insn "avx_cvtpd2dq256"
4449 [(set (match_operand:V4SI 0 "register_operand" "=x")
4450 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4451 UNSPEC_FIX_NOTRUNC))]
4453 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
4454 [(set_attr "type" "ssecvt")
4455 (set_attr "prefix" "vex")
4456 (set_attr "mode" "OI")])
4458 (define_expand "avx_cvtpd2dq256_2"
4459 [(set (match_operand:V8SI 0 "register_operand")
4461 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4465 "operands[2] = CONST0_RTX (V4SImode);")
4467 (define_insn "*avx_cvtpd2dq256_2"
4468 [(set (match_operand:V8SI 0 "register_operand" "=x")
4470 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4472 (match_operand:V4SI 2 "const0_operand")))]
4474 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4475 [(set_attr "type" "ssecvt")
4476 (set_attr "prefix" "vex")
4477 (set_attr "btver2_decode" "vector")
4478 (set_attr "mode" "OI")])
4480 (define_expand "sse2_cvtpd2dq"
4481 [(set (match_operand:V4SI 0 "register_operand")
4483 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
4487 "operands[2] = CONST0_RTX (V2SImode);")
4489 (define_insn "*sse2_cvtpd2dq"
4490 [(set (match_operand:V4SI 0 "register_operand" "=x")
4492 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4494 (match_operand:V2SI 2 "const0_operand")))]
4498 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
4500 return "cvtpd2dq\t{%1, %0|%0, %1}";
4502 [(set_attr "type" "ssecvt")
4503 (set_attr "prefix_rep" "1")
4504 (set_attr "prefix_data16" "0")
4505 (set_attr "prefix" "maybe_vex")
4506 (set_attr "mode" "TI")
4507 (set_attr "amdfam10_decode" "double")
4508 (set_attr "athlon_decode" "vector")
4509 (set_attr "bdver1_decode" "double")])
4511 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
4512 [(set (match_operand:V8SI 0 "register_operand" "=v")
4514 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4515 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4517 "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4518 [(set_attr "type" "ssecvt")
4519 (set_attr "prefix" "evex")
4520 (set_attr "mode" "OI")])
4522 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4523 [(set (match_operand:V8SI 0 "register_operand" "=v")
4525 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4527 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4528 [(set_attr "type" "ssecvt")
4529 (set_attr "prefix" "evex")
4530 (set_attr "mode" "OI")])
4532 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4533 [(set (match_operand:V4SI 0 "register_operand" "=v")
4535 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4536 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4538 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4539 [(set_attr "type" "ssecvt")
4540 (set_attr "prefix" "evex")
4541 (set_attr "mode" "TI")])
4543 (define_insn "fix_truncv4dfv4si2<mask_name>"
4544 [(set (match_operand:V4SI 0 "register_operand" "=v")
4545 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4546 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4547 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4548 [(set_attr "type" "ssecvt")
4549 (set_attr "prefix" "maybe_evex")
4550 (set_attr "mode" "OI")])
4552 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4553 [(set (match_operand:V4SI 0 "register_operand" "=v")
4554 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4555 "TARGET_AVX512VL && TARGET_AVX512F"
4556 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4557 [(set_attr "type" "ssecvt")
4558 (set_attr "prefix" "maybe_evex")
4559 (set_attr "mode" "OI")])
4561 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4562 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4563 (any_fix:<sseintvecmode>
4564 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4565 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4566 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4567 [(set_attr "type" "ssecvt")
4568 (set_attr "prefix" "evex")
4569 (set_attr "mode" "<sseintvecmode2>")])
4571 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4572 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4573 (unspec:<sseintvecmode>
4574 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4575 UNSPEC_FIX_NOTRUNC))]
4576 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4577 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4578 [(set_attr "type" "ssecvt")
4579 (set_attr "prefix" "evex")
4580 (set_attr "mode" "<sseintvecmode2>")])
4582 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4583 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4584 (unspec:<sseintvecmode>
4585 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4586 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4587 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4588 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4589 [(set_attr "type" "ssecvt")
4590 (set_attr "prefix" "evex")
4591 (set_attr "mode" "<sseintvecmode2>")])
4593 (define_expand "avx_cvttpd2dq256_2"
4594 [(set (match_operand:V8SI 0 "register_operand")
4596 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4599 "operands[2] = CONST0_RTX (V4SImode);")
4601 (define_insn "*avx_cvttpd2dq256_2"
4602 [(set (match_operand:V8SI 0 "register_operand" "=x")
4604 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
4605 (match_operand:V4SI 2 "const0_operand")))]
4607 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
4608 [(set_attr "type" "ssecvt")
4609 (set_attr "prefix" "vex")
4610 (set_attr "btver2_decode" "vector")
4611 (set_attr "mode" "OI")])
4613 (define_expand "sse2_cvttpd2dq"
4614 [(set (match_operand:V4SI 0 "register_operand")
4616 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
4619 "operands[2] = CONST0_RTX (V2SImode);")
4621 (define_insn "*sse2_cvttpd2dq"
4622 [(set (match_operand:V4SI 0 "register_operand" "=x")
4624 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4625 (match_operand:V2SI 2 "const0_operand")))]
4629 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4631 return "cvttpd2dq\t{%1, %0|%0, %1}";
4633 [(set_attr "type" "ssecvt")
4634 (set_attr "amdfam10_decode" "double")
4635 (set_attr "athlon_decode" "vector")
4636 (set_attr "bdver1_decode" "double")
4637 (set_attr "prefix" "maybe_vex")
4638 (set_attr "mode" "TI")])
4640 (define_insn "sse2_cvtsd2ss<round_name>"
4641 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4644 (float_truncate:V2SF
4645 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4646 (match_operand:V4SF 1 "register_operand" "0,0,v")
4650 cvtsd2ss\t{%2, %0|%0, %2}
4651 cvtsd2ss\t{%2, %0|%0, %q2}
4652 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4653 [(set_attr "isa" "noavx,noavx,avx")
4654 (set_attr "type" "ssecvt")
4655 (set_attr "athlon_decode" "vector,double,*")
4656 (set_attr "amdfam10_decode" "vector,double,*")
4657 (set_attr "bdver1_decode" "direct,direct,*")
4658 (set_attr "btver2_decode" "double,double,double")
4659 (set_attr "prefix" "orig,orig,<round_prefix>")
4660 (set_attr "mode" "SF")])
4662 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4663 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4667 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
4668 (parallel [(const_int 0) (const_int 1)])))
4669 (match_operand:V2DF 1 "register_operand" "0,0,v")
4673 cvtss2sd\t{%2, %0|%0, %2}
4674 cvtss2sd\t{%2, %0|%0, %k2}
4675 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4676 [(set_attr "isa" "noavx,noavx,avx")
4677 (set_attr "type" "ssecvt")
4678 (set_attr "amdfam10_decode" "vector,double,*")
4679 (set_attr "athlon_decode" "direct,direct,*")
4680 (set_attr "bdver1_decode" "direct,direct,*")
4681 (set_attr "btver2_decode" "double,double,double")
4682 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4683 (set_attr "mode" "DF")])
4685 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4686 [(set (match_operand:V8SF 0 "register_operand" "=v")
4687 (float_truncate:V8SF
4688 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4690 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4691 [(set_attr "type" "ssecvt")
4692 (set_attr "prefix" "evex")
4693 (set_attr "mode" "V8SF")])
4695 (define_insn "avx_cvtpd2ps256"
4696 [(set (match_operand:V4SF 0 "register_operand" "=x")
4697 (float_truncate:V4SF
4698 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4700 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4701 [(set_attr "type" "ssecvt")
4702 (set_attr "prefix" "vex")
4703 (set_attr "btver2_decode" "vector")
4704 (set_attr "mode" "V4SF")])
4706 (define_expand "sse2_cvtpd2ps"
4707 [(set (match_operand:V4SF 0 "register_operand")
4709 (float_truncate:V2SF
4710 (match_operand:V2DF 1 "nonimmediate_operand"))
4713 "operands[2] = CONST0_RTX (V2SFmode);")
4715 (define_insn "*sse2_cvtpd2ps"
4716 [(set (match_operand:V4SF 0 "register_operand" "=x")
4718 (float_truncate:V2SF
4719 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4720 (match_operand:V2SF 2 "const0_operand")))]
4724 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4726 return "cvtpd2ps\t{%1, %0|%0, %1}";
4728 [(set_attr "type" "ssecvt")
4729 (set_attr "amdfam10_decode" "double")
4730 (set_attr "athlon_decode" "vector")
4731 (set_attr "bdver1_decode" "double")
4732 (set_attr "prefix_data16" "1")
4733 (set_attr "prefix" "maybe_vex")
4734 (set_attr "mode" "V4SF")])
4736 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4737 (define_mode_attr sf2dfmode
4738 [(V8DF "V8SF") (V4DF "V4SF")])
4740 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4741 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4742 (float_extend:VF2_512_256
4743 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4744 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4745 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4746 [(set_attr "type" "ssecvt")
4747 (set_attr "prefix" "maybe_vex")
4748 (set_attr "mode" "<MODE>")])
4750 (define_insn "*avx_cvtps2pd256_2"
4751 [(set (match_operand:V4DF 0 "register_operand" "=x")
4754 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4755 (parallel [(const_int 0) (const_int 1)
4756 (const_int 2) (const_int 3)]))))]
4758 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4759 [(set_attr "type" "ssecvt")
4760 (set_attr "prefix" "vex")
4761 (set_attr "mode" "V4DF")])
4763 (define_insn "vec_unpacks_lo_v16sf"
4764 [(set (match_operand:V8DF 0 "register_operand" "=v")
4767 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4768 (parallel [(const_int 0) (const_int 1)
4769 (const_int 2) (const_int 3)
4770 (const_int 4) (const_int 5)
4771 (const_int 6) (const_int 7)]))))]
4773 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4774 [(set_attr "type" "ssecvt")
4775 (set_attr "prefix" "evex")
4776 (set_attr "mode" "V8DF")])
4778 (define_insn "sse2_cvtps2pd"
4779 [(set (match_operand:V2DF 0 "register_operand" "=x")
4782 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4783 (parallel [(const_int 0) (const_int 1)]))))]
4785 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4786 [(set_attr "type" "ssecvt")
4787 (set_attr "amdfam10_decode" "direct")
4788 (set_attr "athlon_decode" "double")
4789 (set_attr "bdver1_decode" "double")
4790 (set_attr "prefix_data16" "0")
4791 (set_attr "prefix" "maybe_vex")
4792 (set_attr "mode" "V2DF")])
4794 (define_expand "vec_unpacks_hi_v4sf"
4799 (match_operand:V4SF 1 "nonimmediate_operand"))
4800 (parallel [(const_int 6) (const_int 7)
4801 (const_int 2) (const_int 3)])))
4802 (set (match_operand:V2DF 0 "register_operand")
4806 (parallel [(const_int 0) (const_int 1)]))))]
4808 "operands[2] = gen_reg_rtx (V4SFmode);")
4810 (define_expand "vec_unpacks_hi_v8sf"
4813 (match_operand:V8SF 1 "nonimmediate_operand")
4814 (parallel [(const_int 4) (const_int 5)
4815 (const_int 6) (const_int 7)])))
4816 (set (match_operand:V4DF 0 "register_operand")
4820 "operands[2] = gen_reg_rtx (V4SFmode);")
4822 (define_expand "vec_unpacks_hi_v16sf"
4825 (match_operand:V16SF 1 "nonimmediate_operand")
4826 (parallel [(const_int 8) (const_int 9)
4827 (const_int 10) (const_int 11)
4828 (const_int 12) (const_int 13)
4829 (const_int 14) (const_int 15)])))
4830 (set (match_operand:V8DF 0 "register_operand")
4834 "operands[2] = gen_reg_rtx (V8SFmode);")
4836 (define_expand "vec_unpacks_lo_v4sf"
4837 [(set (match_operand:V2DF 0 "register_operand")
4840 (match_operand:V4SF 1 "nonimmediate_operand")
4841 (parallel [(const_int 0) (const_int 1)]))))]
4844 (define_expand "vec_unpacks_lo_v8sf"
4845 [(set (match_operand:V4DF 0 "register_operand")
4848 (match_operand:V8SF 1 "nonimmediate_operand")
4849 (parallel [(const_int 0) (const_int 1)
4850 (const_int 2) (const_int 3)]))))]
4853 (define_mode_attr sseunpackfltmode
4854 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4855 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4857 (define_expand "vec_unpacks_float_hi_<mode>"
4858 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4859 (match_operand:VI2_AVX512F 1 "register_operand")]
4862 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4864 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4865 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4866 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4870 (define_expand "vec_unpacks_float_lo_<mode>"
4871 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4872 (match_operand:VI2_AVX512F 1 "register_operand")]
4875 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4877 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4878 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4879 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4883 (define_expand "vec_unpacku_float_hi_<mode>"
4884 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4885 (match_operand:VI2_AVX512F 1 "register_operand")]
4888 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4890 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4891 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4892 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4896 (define_expand "vec_unpacku_float_lo_<mode>"
4897 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4898 (match_operand:VI2_AVX512F 1 "register_operand")]
4901 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4903 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4904 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4905 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4909 (define_expand "vec_unpacks_float_hi_v4si"
4912 (match_operand:V4SI 1 "nonimmediate_operand")
4913 (parallel [(const_int 2) (const_int 3)
4914 (const_int 2) (const_int 3)])))
4915 (set (match_operand:V2DF 0 "register_operand")
4919 (parallel [(const_int 0) (const_int 1)]))))]
4921 "operands[2] = gen_reg_rtx (V4SImode);")
4923 (define_expand "vec_unpacks_float_lo_v4si"
4924 [(set (match_operand:V2DF 0 "register_operand")
4927 (match_operand:V4SI 1 "nonimmediate_operand")
4928 (parallel [(const_int 0) (const_int 1)]))))]
4931 (define_expand "vec_unpacks_float_hi_v8si"
4934 (match_operand:V8SI 1 "nonimmediate_operand")
4935 (parallel [(const_int 4) (const_int 5)
4936 (const_int 6) (const_int 7)])))
4937 (set (match_operand:V4DF 0 "register_operand")
4941 "operands[2] = gen_reg_rtx (V4SImode);")
4943 (define_expand "vec_unpacks_float_lo_v8si"
4944 [(set (match_operand:V4DF 0 "register_operand")
4947 (match_operand:V8SI 1 "nonimmediate_operand")
4948 (parallel [(const_int 0) (const_int 1)
4949 (const_int 2) (const_int 3)]))))]
4952 (define_expand "vec_unpacks_float_hi_v16si"
4955 (match_operand:V16SI 1 "nonimmediate_operand")
4956 (parallel [(const_int 8) (const_int 9)
4957 (const_int 10) (const_int 11)
4958 (const_int 12) (const_int 13)
4959 (const_int 14) (const_int 15)])))
4960 (set (match_operand:V8DF 0 "register_operand")
4964 "operands[2] = gen_reg_rtx (V8SImode);")
4966 (define_expand "vec_unpacks_float_lo_v16si"
4967 [(set (match_operand:V8DF 0 "register_operand")
4970 (match_operand:V16SI 1 "nonimmediate_operand")
4971 (parallel [(const_int 0) (const_int 1)
4972 (const_int 2) (const_int 3)
4973 (const_int 4) (const_int 5)
4974 (const_int 6) (const_int 7)]))))]
4977 (define_expand "vec_unpacku_float_hi_v4si"
4980 (match_operand:V4SI 1 "nonimmediate_operand")
4981 (parallel [(const_int 2) (const_int 3)
4982 (const_int 2) (const_int 3)])))
4987 (parallel [(const_int 0) (const_int 1)]))))
4989 (lt:V2DF (match_dup 6) (match_dup 3)))
4991 (and:V2DF (match_dup 7) (match_dup 4)))
4992 (set (match_operand:V2DF 0 "register_operand")
4993 (plus:V2DF (match_dup 6) (match_dup 8)))]
4996 REAL_VALUE_TYPE TWO32r;
5000 real_ldexp (&TWO32r, &dconst1, 32);
5001 x = const_double_from_real_value (TWO32r, DFmode);
5003 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5004 operands[4] = force_reg (V2DFmode,
5005 ix86_build_const_vector (V2DFmode, 1, x));
5007 operands[5] = gen_reg_rtx (V4SImode);
5009 for (i = 6; i < 9; i++)
5010 operands[i] = gen_reg_rtx (V2DFmode);
5013 (define_expand "vec_unpacku_float_lo_v4si"
5017 (match_operand:V4SI 1 "nonimmediate_operand")
5018 (parallel [(const_int 0) (const_int 1)]))))
5020 (lt:V2DF (match_dup 5) (match_dup 3)))
5022 (and:V2DF (match_dup 6) (match_dup 4)))
5023 (set (match_operand:V2DF 0 "register_operand")
5024 (plus:V2DF (match_dup 5) (match_dup 7)))]
5027 REAL_VALUE_TYPE TWO32r;
5031 real_ldexp (&TWO32r, &dconst1, 32);
5032 x = const_double_from_real_value (TWO32r, DFmode);
5034 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5035 operands[4] = force_reg (V2DFmode,
5036 ix86_build_const_vector (V2DFmode, 1, x));
5038 for (i = 5; i < 8; i++)
5039 operands[i] = gen_reg_rtx (V2DFmode);
5042 (define_expand "vec_unpacku_float_hi_v8si"
5043 [(match_operand:V4DF 0 "register_operand")
5044 (match_operand:V8SI 1 "register_operand")]
5047 REAL_VALUE_TYPE TWO32r;
5051 real_ldexp (&TWO32r, &dconst1, 32);
5052 x = const_double_from_real_value (TWO32r, DFmode);
5054 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5055 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5056 tmp[5] = gen_reg_rtx (V4SImode);
5058 for (i = 2; i < 5; i++)
5059 tmp[i] = gen_reg_rtx (V4DFmode);
5060 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5061 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5062 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5063 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5064 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5065 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5069 (define_expand "vec_unpacku_float_hi_v16si"
5070 [(match_operand:V8DF 0 "register_operand")
5071 (match_operand:V16SI 1 "register_operand")]
5074 REAL_VALUE_TYPE TWO32r;
5077 real_ldexp (&TWO32r, &dconst1, 32);
5078 x = const_double_from_real_value (TWO32r, DFmode);
5080 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5081 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5082 tmp[2] = gen_reg_rtx (V8DFmode);
5083 tmp[3] = gen_reg_rtx (V8SImode);
5084 k = gen_reg_rtx (QImode);
5086 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5087 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5088 emit_insn (gen_rtx_SET (VOIDmode, k,
5089 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5090 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5091 emit_move_insn (operands[0], tmp[2]);
5095 (define_expand "vec_unpacku_float_lo_v8si"
5096 [(match_operand:V4DF 0 "register_operand")
5097 (match_operand:V8SI 1 "nonimmediate_operand")]
5100 REAL_VALUE_TYPE TWO32r;
5104 real_ldexp (&TWO32r, &dconst1, 32);
5105 x = const_double_from_real_value (TWO32r, DFmode);
5107 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5108 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5110 for (i = 2; i < 5; i++)
5111 tmp[i] = gen_reg_rtx (V4DFmode);
5112 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5113 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5114 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5115 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5116 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5120 (define_expand "vec_unpacku_float_lo_v16si"
5121 [(match_operand:V8DF 0 "register_operand")
5122 (match_operand:V16SI 1 "nonimmediate_operand")]
5125 REAL_VALUE_TYPE TWO32r;
5128 real_ldexp (&TWO32r, &dconst1, 32);
5129 x = const_double_from_real_value (TWO32r, DFmode);
5131 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5132 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5133 tmp[2] = gen_reg_rtx (V8DFmode);
5134 k = gen_reg_rtx (QImode);
5136 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5137 emit_insn (gen_rtx_SET (VOIDmode, k,
5138 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5139 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5140 emit_move_insn (operands[0], tmp[2]);
5144 (define_expand "vec_pack_trunc_<mode>"
5146 (float_truncate:<sf2dfmode>
5147 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5149 (float_truncate:<sf2dfmode>
5150 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5151 (set (match_operand:<ssePSmode> 0 "register_operand")
5152 (vec_concat:<ssePSmode>
5157 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5158 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5161 (define_expand "vec_pack_trunc_v2df"
5162 [(match_operand:V4SF 0 "register_operand")
5163 (match_operand:V2DF 1 "nonimmediate_operand")
5164 (match_operand:V2DF 2 "nonimmediate_operand")]
5169 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5171 tmp0 = gen_reg_rtx (V4DFmode);
5172 tmp1 = force_reg (V2DFmode, operands[1]);
5174 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5175 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5179 tmp0 = gen_reg_rtx (V4SFmode);
5180 tmp1 = gen_reg_rtx (V4SFmode);
5182 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5183 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5184 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5189 (define_expand "vec_pack_sfix_trunc_v8df"
5190 [(match_operand:V16SI 0 "register_operand")
5191 (match_operand:V8DF 1 "nonimmediate_operand")
5192 (match_operand:V8DF 2 "nonimmediate_operand")]
5197 r1 = gen_reg_rtx (V8SImode);
5198 r2 = gen_reg_rtx (V8SImode);
5200 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5201 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5202 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5206 (define_expand "vec_pack_sfix_trunc_v4df"
5207 [(match_operand:V8SI 0 "register_operand")
5208 (match_operand:V4DF 1 "nonimmediate_operand")
5209 (match_operand:V4DF 2 "nonimmediate_operand")]
5214 r1 = gen_reg_rtx (V4SImode);
5215 r2 = gen_reg_rtx (V4SImode);
5217 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5218 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5219 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5223 (define_expand "vec_pack_sfix_trunc_v2df"
5224 [(match_operand:V4SI 0 "register_operand")
5225 (match_operand:V2DF 1 "nonimmediate_operand")
5226 (match_operand:V2DF 2 "nonimmediate_operand")]
5229 rtx tmp0, tmp1, tmp2;
5231 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5233 tmp0 = gen_reg_rtx (V4DFmode);
5234 tmp1 = force_reg (V2DFmode, operands[1]);
5236 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5237 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5241 tmp0 = gen_reg_rtx (V4SImode);
5242 tmp1 = gen_reg_rtx (V4SImode);
5243 tmp2 = gen_reg_rtx (V2DImode);
5245 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5246 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5247 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5248 gen_lowpart (V2DImode, tmp0),
5249 gen_lowpart (V2DImode, tmp1)));
5250 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5255 (define_mode_attr ssepackfltmode
5256 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5258 (define_expand "vec_pack_ufix_trunc_<mode>"
5259 [(match_operand:<ssepackfltmode> 0 "register_operand")
5260 (match_operand:VF2 1 "register_operand")
5261 (match_operand:VF2 2 "register_operand")]
5264 if (<MODE>mode == V8DFmode)
5268 r1 = gen_reg_rtx (V8SImode);
5269 r2 = gen_reg_rtx (V8SImode);
5271 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5272 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5273 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5278 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5279 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5280 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5281 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5282 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5284 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5285 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5289 tmp[5] = gen_reg_rtx (V8SFmode);
5290 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5291 gen_lowpart (V8SFmode, tmp[3]), 0);
5292 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5294 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5295 operands[0], 0, OPTAB_DIRECT);
5296 if (tmp[6] != operands[0])
5297 emit_move_insn (operands[0], tmp[6]);
5303 (define_expand "vec_pack_sfix_v4df"
5304 [(match_operand:V8SI 0 "register_operand")
5305 (match_operand:V4DF 1 "nonimmediate_operand")
5306 (match_operand:V4DF 2 "nonimmediate_operand")]
5311 r1 = gen_reg_rtx (V4SImode);
5312 r2 = gen_reg_rtx (V4SImode);
5314 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5315 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5316 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5320 (define_expand "vec_pack_sfix_v2df"
5321 [(match_operand:V4SI 0 "register_operand")
5322 (match_operand:V2DF 1 "nonimmediate_operand")
5323 (match_operand:V2DF 2 "nonimmediate_operand")]
5326 rtx tmp0, tmp1, tmp2;
5328 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5330 tmp0 = gen_reg_rtx (V4DFmode);
5331 tmp1 = force_reg (V2DFmode, operands[1]);
5333 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5334 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5338 tmp0 = gen_reg_rtx (V4SImode);
5339 tmp1 = gen_reg_rtx (V4SImode);
5340 tmp2 = gen_reg_rtx (V2DImode);
5342 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5343 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5344 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5345 gen_lowpart (V2DImode, tmp0),
5346 gen_lowpart (V2DImode, tmp1)));
5347 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5354 ;; Parallel single-precision floating point element swizzling
5356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5358 (define_expand "sse_movhlps_exp"
5359 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5362 (match_operand:V4SF 1 "nonimmediate_operand")
5363 (match_operand:V4SF 2 "nonimmediate_operand"))
5364 (parallel [(const_int 6)
5370 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5372 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5374 /* Fix up the destination if needed. */
5375 if (dst != operands[0])
5376 emit_move_insn (operands[0], dst);
5381 (define_insn "sse_movhlps"
5382 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5385 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5386 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5387 (parallel [(const_int 6)
5391 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5393 movhlps\t{%2, %0|%0, %2}
5394 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5395 movlps\t{%H2, %0|%0, %H2}
5396 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5397 %vmovhps\t{%2, %0|%q0, %2}"
5398 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5399 (set_attr "type" "ssemov")
5400 (set_attr "ssememalign" "64")
5401 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5402 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5404 (define_expand "sse_movlhps_exp"
5405 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5408 (match_operand:V4SF 1 "nonimmediate_operand")
5409 (match_operand:V4SF 2 "nonimmediate_operand"))
5410 (parallel [(const_int 0)
5416 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5418 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5420 /* Fix up the destination if needed. */
5421 if (dst != operands[0])
5422 emit_move_insn (operands[0], dst);
5427 (define_insn "sse_movlhps"
5428 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5431 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5432 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5433 (parallel [(const_int 0)
5437 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5439 movlhps\t{%2, %0|%0, %2}
5440 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5441 movhps\t{%2, %0|%0, %q2}
5442 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5443 %vmovlps\t{%2, %H0|%H0, %2}"
5444 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5445 (set_attr "type" "ssemov")
5446 (set_attr "ssememalign" "64")
5447 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5448 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5450 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5451 [(set (match_operand:V16SF 0 "register_operand" "=v")
5454 (match_operand:V16SF 1 "register_operand" "v")
5455 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5456 (parallel [(const_int 2) (const_int 18)
5457 (const_int 3) (const_int 19)
5458 (const_int 6) (const_int 22)
5459 (const_int 7) (const_int 23)
5460 (const_int 10) (const_int 26)
5461 (const_int 11) (const_int 27)
5462 (const_int 14) (const_int 30)
5463 (const_int 15) (const_int 31)])))]
5465 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5466 [(set_attr "type" "sselog")
5467 (set_attr "prefix" "evex")
5468 (set_attr "mode" "V16SF")])
5470 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5471 (define_insn "avx_unpckhps256"
5472 [(set (match_operand:V8SF 0 "register_operand" "=x")
5475 (match_operand:V8SF 1 "register_operand" "x")
5476 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5477 (parallel [(const_int 2) (const_int 10)
5478 (const_int 3) (const_int 11)
5479 (const_int 6) (const_int 14)
5480 (const_int 7) (const_int 15)])))]
5482 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5483 [(set_attr "type" "sselog")
5484 (set_attr "prefix" "vex")
5485 (set_attr "mode" "V8SF")])
5487 (define_expand "vec_interleave_highv8sf"
5491 (match_operand:V8SF 1 "register_operand" "x")
5492 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5493 (parallel [(const_int 0) (const_int 8)
5494 (const_int 1) (const_int 9)
5495 (const_int 4) (const_int 12)
5496 (const_int 5) (const_int 13)])))
5502 (parallel [(const_int 2) (const_int 10)
5503 (const_int 3) (const_int 11)
5504 (const_int 6) (const_int 14)
5505 (const_int 7) (const_int 15)])))
5506 (set (match_operand:V8SF 0 "register_operand")
5511 (parallel [(const_int 4) (const_int 5)
5512 (const_int 6) (const_int 7)
5513 (const_int 12) (const_int 13)
5514 (const_int 14) (const_int 15)])))]
5517 operands[3] = gen_reg_rtx (V8SFmode);
5518 operands[4] = gen_reg_rtx (V8SFmode);
5521 (define_insn "vec_interleave_highv4sf"
5522 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5525 (match_operand:V4SF 1 "register_operand" "0,x")
5526 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5527 (parallel [(const_int 2) (const_int 6)
5528 (const_int 3) (const_int 7)])))]
5531 unpckhps\t{%2, %0|%0, %2}
5532 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5533 [(set_attr "isa" "noavx,avx")
5534 (set_attr "type" "sselog")
5535 (set_attr "prefix" "orig,vex")
5536 (set_attr "mode" "V4SF")])
5538 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5539 [(set (match_operand:V16SF 0 "register_operand" "=v")
5542 (match_operand:V16SF 1 "register_operand" "v")
5543 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5544 (parallel [(const_int 0) (const_int 16)
5545 (const_int 1) (const_int 17)
5546 (const_int 4) (const_int 20)
5547 (const_int 5) (const_int 21)
5548 (const_int 8) (const_int 24)
5549 (const_int 9) (const_int 25)
5550 (const_int 12) (const_int 28)
5551 (const_int 13) (const_int 29)])))]
5553 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5554 [(set_attr "type" "sselog")
5555 (set_attr "prefix" "evex")
5556 (set_attr "mode" "V16SF")])
5558 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5559 (define_insn "avx_unpcklps256"
5560 [(set (match_operand:V8SF 0 "register_operand" "=x")
5563 (match_operand:V8SF 1 "register_operand" "x")
5564 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5565 (parallel [(const_int 0) (const_int 8)
5566 (const_int 1) (const_int 9)
5567 (const_int 4) (const_int 12)
5568 (const_int 5) (const_int 13)])))]
5570 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5571 [(set_attr "type" "sselog")
5572 (set_attr "prefix" "vex")
5573 (set_attr "mode" "V8SF")])
5575 (define_expand "vec_interleave_lowv8sf"
5579 (match_operand:V8SF 1 "register_operand" "x")
5580 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5581 (parallel [(const_int 0) (const_int 8)
5582 (const_int 1) (const_int 9)
5583 (const_int 4) (const_int 12)
5584 (const_int 5) (const_int 13)])))
5590 (parallel [(const_int 2) (const_int 10)
5591 (const_int 3) (const_int 11)
5592 (const_int 6) (const_int 14)
5593 (const_int 7) (const_int 15)])))
5594 (set (match_operand:V8SF 0 "register_operand")
5599 (parallel [(const_int 0) (const_int 1)
5600 (const_int 2) (const_int 3)
5601 (const_int 8) (const_int 9)
5602 (const_int 10) (const_int 11)])))]
5605 operands[3] = gen_reg_rtx (V8SFmode);
5606 operands[4] = gen_reg_rtx (V8SFmode);
5609 (define_insn "vec_interleave_lowv4sf"
5610 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5613 (match_operand:V4SF 1 "register_operand" "0,x")
5614 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5615 (parallel [(const_int 0) (const_int 4)
5616 (const_int 1) (const_int 5)])))]
5619 unpcklps\t{%2, %0|%0, %2}
5620 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5621 [(set_attr "isa" "noavx,avx")
5622 (set_attr "type" "sselog")
5623 (set_attr "prefix" "orig,vex")
5624 (set_attr "mode" "V4SF")])
5626 ;; These are modeled with the same vec_concat as the others so that we
5627 ;; capture users of shufps that can use the new instructions
5628 (define_insn "avx_movshdup256"
5629 [(set (match_operand:V8SF 0 "register_operand" "=x")
5632 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5634 (parallel [(const_int 1) (const_int 1)
5635 (const_int 3) (const_int 3)
5636 (const_int 5) (const_int 5)
5637 (const_int 7) (const_int 7)])))]
5639 "vmovshdup\t{%1, %0|%0, %1}"
5640 [(set_attr "type" "sse")
5641 (set_attr "prefix" "vex")
5642 (set_attr "mode" "V8SF")])
5644 (define_insn "sse3_movshdup"
5645 [(set (match_operand:V4SF 0 "register_operand" "=x")
5648 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5650 (parallel [(const_int 1)
5655 "%vmovshdup\t{%1, %0|%0, %1}"
5656 [(set_attr "type" "sse")
5657 (set_attr "prefix_rep" "1")
5658 (set_attr "prefix" "maybe_vex")
5659 (set_attr "mode" "V4SF")])
5661 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5662 [(set (match_operand:V16SF 0 "register_operand" "=v")
5665 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5667 (parallel [(const_int 1) (const_int 1)
5668 (const_int 3) (const_int 3)
5669 (const_int 5) (const_int 5)
5670 (const_int 7) (const_int 7)
5671 (const_int 9) (const_int 9)
5672 (const_int 11) (const_int 11)
5673 (const_int 13) (const_int 13)
5674 (const_int 15) (const_int 15)])))]
5676 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5677 [(set_attr "type" "sse")
5678 (set_attr "prefix" "evex")
5679 (set_attr "mode" "V16SF")])
5681 (define_insn "avx_movsldup256"
5682 [(set (match_operand:V8SF 0 "register_operand" "=x")
5685 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5687 (parallel [(const_int 0) (const_int 0)
5688 (const_int 2) (const_int 2)
5689 (const_int 4) (const_int 4)
5690 (const_int 6) (const_int 6)])))]
5692 "vmovsldup\t{%1, %0|%0, %1}"
5693 [(set_attr "type" "sse")
5694 (set_attr "prefix" "vex")
5695 (set_attr "mode" "V8SF")])
5697 (define_insn "sse3_movsldup"
5698 [(set (match_operand:V4SF 0 "register_operand" "=x")
5701 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5703 (parallel [(const_int 0)
5708 "%vmovsldup\t{%1, %0|%0, %1}"
5709 [(set_attr "type" "sse")
5710 (set_attr "prefix_rep" "1")
5711 (set_attr "prefix" "maybe_vex")
5712 (set_attr "mode" "V4SF")])
5714 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5715 [(set (match_operand:V16SF 0 "register_operand" "=v")
5718 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5720 (parallel [(const_int 0) (const_int 0)
5721 (const_int 2) (const_int 2)
5722 (const_int 4) (const_int 4)
5723 (const_int 6) (const_int 6)
5724 (const_int 8) (const_int 8)
5725 (const_int 10) (const_int 10)
5726 (const_int 12) (const_int 12)
5727 (const_int 14) (const_int 14)])))]
5729 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5730 [(set_attr "type" "sse")
5731 (set_attr "prefix" "evex")
5732 (set_attr "mode" "V16SF")])
5734 (define_expand "avx_shufps256"
5735 [(match_operand:V8SF 0 "register_operand")
5736 (match_operand:V8SF 1 "register_operand")
5737 (match_operand:V8SF 2 "nonimmediate_operand")
5738 (match_operand:SI 3 "const_int_operand")]
5741 int mask = INTVAL (operands[3]);
5742 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5743 GEN_INT ((mask >> 0) & 3),
5744 GEN_INT ((mask >> 2) & 3),
5745 GEN_INT (((mask >> 4) & 3) + 8),
5746 GEN_INT (((mask >> 6) & 3) + 8),
5747 GEN_INT (((mask >> 0) & 3) + 4),
5748 GEN_INT (((mask >> 2) & 3) + 4),
5749 GEN_INT (((mask >> 4) & 3) + 12),
5750 GEN_INT (((mask >> 6) & 3) + 12)));
5754 ;; One bit in mask selects 2 elements.
5755 (define_insn "avx_shufps256_1"
5756 [(set (match_operand:V8SF 0 "register_operand" "=x")
5759 (match_operand:V8SF 1 "register_operand" "x")
5760 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5761 (parallel [(match_operand 3 "const_0_to_3_operand" )
5762 (match_operand 4 "const_0_to_3_operand" )
5763 (match_operand 5 "const_8_to_11_operand" )
5764 (match_operand 6 "const_8_to_11_operand" )
5765 (match_operand 7 "const_4_to_7_operand" )
5766 (match_operand 8 "const_4_to_7_operand" )
5767 (match_operand 9 "const_12_to_15_operand")
5768 (match_operand 10 "const_12_to_15_operand")])))]
5770 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5771 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5772 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5773 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5776 mask = INTVAL (operands[3]);
5777 mask |= INTVAL (operands[4]) << 2;
5778 mask |= (INTVAL (operands[5]) - 8) << 4;
5779 mask |= (INTVAL (operands[6]) - 8) << 6;
5780 operands[3] = GEN_INT (mask);
5782 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5784 [(set_attr "type" "sseshuf")
5785 (set_attr "length_immediate" "1")
5786 (set_attr "prefix" "vex")
5787 (set_attr "mode" "V8SF")])
5789 (define_expand "sse_shufps"
5790 [(match_operand:V4SF 0 "register_operand")
5791 (match_operand:V4SF 1 "register_operand")
5792 (match_operand:V4SF 2 "nonimmediate_operand")
5793 (match_operand:SI 3 "const_int_operand")]
5796 int mask = INTVAL (operands[3]);
5797 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5798 GEN_INT ((mask >> 0) & 3),
5799 GEN_INT ((mask >> 2) & 3),
5800 GEN_INT (((mask >> 4) & 3) + 4),
5801 GEN_INT (((mask >> 6) & 3) + 4)));
5805 (define_insn "sse_shufps_<mode>"
5806 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5807 (vec_select:VI4F_128
5808 (vec_concat:<ssedoublevecmode>
5809 (match_operand:VI4F_128 1 "register_operand" "0,x")
5810 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5811 (parallel [(match_operand 3 "const_0_to_3_operand")
5812 (match_operand 4 "const_0_to_3_operand")
5813 (match_operand 5 "const_4_to_7_operand")
5814 (match_operand 6 "const_4_to_7_operand")])))]
5818 mask |= INTVAL (operands[3]) << 0;
5819 mask |= INTVAL (operands[4]) << 2;
5820 mask |= (INTVAL (operands[5]) - 4) << 4;
5821 mask |= (INTVAL (operands[6]) - 4) << 6;
5822 operands[3] = GEN_INT (mask);
5824 switch (which_alternative)
5827 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5829 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5834 [(set_attr "isa" "noavx,avx")
5835 (set_attr "type" "sseshuf")
5836 (set_attr "length_immediate" "1")
5837 (set_attr "prefix" "orig,vex")
5838 (set_attr "mode" "V4SF")])
5840 (define_insn "sse_storehps"
5841 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5843 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5844 (parallel [(const_int 2) (const_int 3)])))]
5847 %vmovhps\t{%1, %0|%q0, %1}
5848 %vmovhlps\t{%1, %d0|%d0, %1}
5849 %vmovlps\t{%H1, %d0|%d0, %H1}"
5850 [(set_attr "type" "ssemov")
5851 (set_attr "ssememalign" "64")
5852 (set_attr "prefix" "maybe_vex")
5853 (set_attr "mode" "V2SF,V4SF,V2SF")])
5855 (define_expand "sse_loadhps_exp"
5856 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5859 (match_operand:V4SF 1 "nonimmediate_operand")
5860 (parallel [(const_int 0) (const_int 1)]))
5861 (match_operand:V2SF 2 "nonimmediate_operand")))]
5864 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5866 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5868 /* Fix up the destination if needed. */
5869 if (dst != operands[0])
5870 emit_move_insn (operands[0], dst);
5875 (define_insn "sse_loadhps"
5876 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5879 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5880 (parallel [(const_int 0) (const_int 1)]))
5881 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5884 movhps\t{%2, %0|%0, %q2}
5885 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5886 movlhps\t{%2, %0|%0, %2}
5887 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5888 %vmovlps\t{%2, %H0|%H0, %2}"
5889 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5890 (set_attr "type" "ssemov")
5891 (set_attr "ssememalign" "64")
5892 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5893 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5895 (define_insn "sse_storelps"
5896 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5898 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5899 (parallel [(const_int 0) (const_int 1)])))]
5902 %vmovlps\t{%1, %0|%q0, %1}
5903 %vmovaps\t{%1, %0|%0, %1}
5904 %vmovlps\t{%1, %d0|%d0, %q1}"
5905 [(set_attr "type" "ssemov")
5906 (set_attr "prefix" "maybe_vex")
5907 (set_attr "mode" "V2SF,V4SF,V2SF")])
5909 (define_expand "sse_loadlps_exp"
5910 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5912 (match_operand:V2SF 2 "nonimmediate_operand")
5914 (match_operand:V4SF 1 "nonimmediate_operand")
5915 (parallel [(const_int 2) (const_int 3)]))))]
5918 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5920 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5922 /* Fix up the destination if needed. */
5923 if (dst != operands[0])
5924 emit_move_insn (operands[0], dst);
5929 (define_insn "sse_loadlps"
5930 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5932 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5934 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5935 (parallel [(const_int 2) (const_int 3)]))))]
5938 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5939 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5940 movlps\t{%2, %0|%0, %q2}
5941 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5942 %vmovlps\t{%2, %0|%q0, %2}"
5943 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5944 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5945 (set_attr "ssememalign" "64")
5946 (set_attr "length_immediate" "1,1,*,*,*")
5947 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5948 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5950 (define_insn "sse_movss"
5951 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5953 (match_operand:V4SF 2 "register_operand" " x,x")
5954 (match_operand:V4SF 1 "register_operand" " 0,x")
5958 movss\t{%2, %0|%0, %2}
5959 vmovss\t{%2, %1, %0|%0, %1, %2}"
5960 [(set_attr "isa" "noavx,avx")
5961 (set_attr "type" "ssemov")
5962 (set_attr "prefix" "orig,vex")
5963 (set_attr "mode" "SF")])
5965 (define_insn "avx2_vec_dup<mode>"
5966 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5967 (vec_duplicate:VF1_128_256
5969 (match_operand:V4SF 1 "register_operand" "x")
5970 (parallel [(const_int 0)]))))]
5972 "vbroadcastss\t{%1, %0|%0, %1}"
5973 [(set_attr "type" "sselog1")
5974 (set_attr "prefix" "vex")
5975 (set_attr "mode" "<MODE>")])
5977 (define_insn "avx2_vec_dupv8sf_1"
5978 [(set (match_operand:V8SF 0 "register_operand" "=x")
5981 (match_operand:V8SF 1 "register_operand" "x")
5982 (parallel [(const_int 0)]))))]
5984 "vbroadcastss\t{%x1, %0|%0, %x1}"
5985 [(set_attr "type" "sselog1")
5986 (set_attr "prefix" "vex")
5987 (set_attr "mode" "V8SF")])
5989 (define_insn "vec_dupv4sf"
5990 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5992 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5995 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5996 vbroadcastss\t{%1, %0|%0, %1}
5997 shufps\t{$0, %0, %0|%0, %0, 0}"
5998 [(set_attr "isa" "avx,avx,noavx")
5999 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
6000 (set_attr "length_immediate" "1,0,1")
6001 (set_attr "prefix_extra" "0,1,*")
6002 (set_attr "prefix" "vex,vex,orig")
6003 (set_attr "mode" "V4SF")])
6005 ;; Although insertps takes register source, we prefer
6006 ;; unpcklps with register source since it is shorter.
6007 (define_insn "*vec_concatv2sf_sse4_1"
6008 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
6010 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
6011 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
6014 unpcklps\t{%2, %0|%0, %2}
6015 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6016 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6017 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6018 %vmovss\t{%1, %0|%0, %1}
6019 punpckldq\t{%2, %0|%0, %2}
6020 movd\t{%1, %0|%0, %1}"
6021 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
6022 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6023 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
6024 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
6025 (set_attr "length_immediate" "*,*,1,1,*,*,*")
6026 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6027 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6029 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6030 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6031 ;; alternatives pretty much forces the MMX alternative to be chosen.
6032 (define_insn "*vec_concatv2sf_sse"
6033 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6035 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6036 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6039 unpcklps\t{%2, %0|%0, %2}
6040 movss\t{%1, %0|%0, %1}
6041 punpckldq\t{%2, %0|%0, %2}
6042 movd\t{%1, %0|%0, %1}"
6043 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6044 (set_attr "mode" "V4SF,SF,DI,DI")])
6046 (define_insn "*vec_concatv4sf"
6047 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6049 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6050 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6053 movlhps\t{%2, %0|%0, %2}
6054 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6055 movhps\t{%2, %0|%0, %q2}
6056 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6057 [(set_attr "isa" "noavx,avx,noavx,avx")
6058 (set_attr "type" "ssemov")
6059 (set_attr "prefix" "orig,vex,orig,vex")
6060 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6062 (define_expand "vec_init<mode>"
6063 [(match_operand:V_128 0 "register_operand")
6067 ix86_expand_vector_init (false, operands[0], operands[1]);
6071 ;; Avoid combining registers from different units in a single alternative,
6072 ;; see comment above inline_secondary_memory_needed function in i386.c
6073 (define_insn "vec_set<mode>_0"
6074 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6075 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
6077 (vec_duplicate:VI4F_128
6078 (match_operand:<ssescalarmode> 2 "general_operand"
6079 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
6080 (match_operand:VI4F_128 1 "vector_move_operand"
6081 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
6085 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6086 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6087 %vmovd\t{%2, %0|%0, %2}
6088 movss\t{%2, %0|%0, %2}
6089 movss\t{%2, %0|%0, %2}
6090 vmovss\t{%2, %1, %0|%0, %1, %2}
6091 pinsrd\t{$0, %2, %0|%0, %2, 0}
6092 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6096 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
6098 (cond [(eq_attr "alternative" "0,6,7")
6099 (const_string "sselog")
6100 (eq_attr "alternative" "9")
6101 (const_string "imov")
6102 (eq_attr "alternative" "10")
6103 (const_string "fmov")
6105 (const_string "ssemov")))
6106 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
6107 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
6108 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
6109 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
6111 ;; A subset is vec_setv4sf.
6112 (define_insn "*vec_setv4sf_sse4_1"
6113 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6116 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
6117 (match_operand:V4SF 1 "register_operand" "0,x")
6118 (match_operand:SI 3 "const_int_operand")))]
6120 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6121 < GET_MODE_NUNITS (V4SFmode))"
6123 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6124 switch (which_alternative)
6127 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6129 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6134 [(set_attr "isa" "noavx,avx")
6135 (set_attr "type" "sselog")
6136 (set_attr "prefix_data16" "1,*")
6137 (set_attr "prefix_extra" "1")
6138 (set_attr "length_immediate" "1")
6139 (set_attr "prefix" "orig,vex")
6140 (set_attr "mode" "V4SF")])
6142 (define_insn "sse4_1_insertps"
6143 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6144 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
6145 (match_operand:V4SF 1 "register_operand" "0,x")
6146 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6150 if (MEM_P (operands[2]))
6152 unsigned count_s = INTVAL (operands[3]) >> 6;
6154 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6155 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6157 switch (which_alternative)
6160 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6162 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6167 [(set_attr "isa" "noavx,avx")
6168 (set_attr "type" "sselog")
6169 (set_attr "prefix_data16" "1,*")
6170 (set_attr "prefix_extra" "1")
6171 (set_attr "length_immediate" "1")
6172 (set_attr "prefix" "orig,vex")
6173 (set_attr "mode" "V4SF")])
6176 [(set (match_operand:VI4F_128 0 "memory_operand")
6178 (vec_duplicate:VI4F_128
6179 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6182 "TARGET_SSE && reload_completed"
6183 [(set (match_dup 0) (match_dup 1))]
6184 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6186 (define_expand "vec_set<mode>"
6187 [(match_operand:V 0 "register_operand")
6188 (match_operand:<ssescalarmode> 1 "register_operand")
6189 (match_operand 2 "const_int_operand")]
6192 ix86_expand_vector_set (false, operands[0], operands[1],
6193 INTVAL (operands[2]));
6197 (define_insn_and_split "*vec_extractv4sf_0"
6198 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6200 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6201 (parallel [(const_int 0)])))]
6202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6204 "&& reload_completed"
6205 [(set (match_dup 0) (match_dup 1))]
6207 if (REG_P (operands[1]))
6208 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6210 operands[1] = adjust_address (operands[1], SFmode, 0);
6213 (define_insn_and_split "*sse4_1_extractps"
6214 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
6216 (match_operand:V4SF 1 "register_operand" "x,0,x")
6217 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
6220 %vextractps\t{%2, %1, %0|%0, %1, %2}
6223 "&& reload_completed && SSE_REG_P (operands[0])"
6226 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6227 switch (INTVAL (operands[2]))
6231 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6232 operands[2], operands[2],
6233 GEN_INT (INTVAL (operands[2]) + 4),
6234 GEN_INT (INTVAL (operands[2]) + 4)));
6237 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6240 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6245 [(set_attr "isa" "*,noavx,avx")
6246 (set_attr "type" "sselog,*,*")
6247 (set_attr "prefix_data16" "1,*,*")
6248 (set_attr "prefix_extra" "1,*,*")
6249 (set_attr "length_immediate" "1,*,*")
6250 (set_attr "prefix" "maybe_vex,*,*")
6251 (set_attr "mode" "V4SF,*,*")])
6253 (define_insn_and_split "*vec_extractv4sf_mem"
6254 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6256 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6257 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6260 "&& reload_completed"
6261 [(set (match_dup 0) (match_dup 1))]
6263 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6266 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
6267 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6268 (match_operand:V16FI 1 "register_operand")
6269 (match_operand:SI 2 "const_0_to_3_operand")
6270 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6271 (match_operand:QI 4 "register_operand")]
6274 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6275 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6276 switch (INTVAL (operands[2]))
6279 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6280 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
6281 GEN_INT (3), operands[3], operands[4]));
6284 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6285 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
6286 GEN_INT (7), operands[3], operands[4]));
6289 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6290 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
6291 GEN_INT (11), operands[3], operands[4]));
6294 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6295 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
6296 GEN_INT (15), operands[3], operands[4]));
6304 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6305 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6306 (vec_merge:<ssequartermode>
6307 (vec_select:<ssequartermode>
6308 (match_operand:V16FI 1 "register_operand" "v")
6309 (parallel [(match_operand 2 "const_0_to_15_operand")
6310 (match_operand 3 "const_0_to_15_operand")
6311 (match_operand 4 "const_0_to_15_operand")
6312 (match_operand 5 "const_0_to_15_operand")]))
6313 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6314 (match_operand:QI 7 "register_operand" "Yk")))]
6316 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6317 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6318 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6320 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6321 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6323 [(set_attr "type" "sselog")
6324 (set_attr "prefix_extra" "1")
6325 (set_attr "length_immediate" "1")
6326 (set_attr "memory" "store")
6327 (set_attr "prefix" "evex")
6328 (set_attr "mode" "<sseinsnmode>")])
6330 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6331 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6332 (vec_select:<ssequartermode>
6333 (match_operand:V16FI 1 "register_operand" "v")
6334 (parallel [(match_operand 2 "const_0_to_15_operand")
6335 (match_operand 3 "const_0_to_15_operand")
6336 (match_operand 4 "const_0_to_15_operand")
6337 (match_operand 5 "const_0_to_15_operand")])))]
6339 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6340 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6341 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6343 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6344 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6346 [(set_attr "type" "sselog")
6347 (set_attr "prefix_extra" "1")
6348 (set_attr "length_immediate" "1")
6349 (set (attr "memory")
6350 (if_then_else (match_test "MEM_P (operands[0])")
6351 (const_string "store")
6352 (const_string "none")))
6353 (set_attr "prefix" "evex")
6354 (set_attr "mode" "<sseinsnmode>")])
6356 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
6357 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6358 (match_operand:V8FI 1 "register_operand")
6359 (match_operand:SI 2 "const_0_to_1_operand")
6360 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6361 (match_operand:QI 4 "register_operand")]
6364 rtx (*insn)(rtx, rtx, rtx, rtx);
6366 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6367 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6369 switch (INTVAL (operands[2]))
6372 insn = gen_vec_extract_lo_<mode>_mask;
6375 insn = gen_vec_extract_hi_<mode>_mask;
6381 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6386 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6387 (vec_select:<ssehalfvecmode>
6388 (match_operand:V8FI 1 "nonimmediate_operand")
6389 (parallel [(const_int 0) (const_int 1)
6390 (const_int 2) (const_int 3)])))]
6391 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6392 && reload_completed"
6395 rtx op1 = operands[1];
6397 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6399 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6400 emit_move_insn (operands[0], op1);
6404 (define_insn "vec_extract_lo_<mode>_maskm"
6405 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6406 (vec_merge:<ssehalfvecmode>
6407 (vec_select:<ssehalfvecmode>
6408 (match_operand:V8FI 1 "register_operand" "v")
6409 (parallel [(const_int 0) (const_int 1)
6410 (const_int 2) (const_int 3)]))
6411 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6412 (match_operand:QI 3 "register_operand" "Yk")))]
6414 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6415 [(set_attr "type" "sselog")
6416 (set_attr "prefix_extra" "1")
6417 (set_attr "length_immediate" "1")
6418 (set_attr "prefix" "evex")
6419 (set_attr "mode" "<sseinsnmode>")])
6421 (define_insn "vec_extract_lo_<mode><mask_name>"
6422 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6423 (vec_select:<ssehalfvecmode>
6424 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6425 (parallel [(const_int 0) (const_int 1)
6426 (const_int 2) (const_int 3)])))]
6427 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6430 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6434 [(set_attr "type" "sselog")
6435 (set_attr "prefix_extra" "1")
6436 (set_attr "length_immediate" "1")
6437 (set (attr "memory")
6438 (if_then_else (match_test "MEM_P (operands[0])")
6439 (const_string "store")
6440 (const_string "none")))
6441 (set_attr "prefix" "evex")
6442 (set_attr "mode" "<sseinsnmode>")])
6444 (define_insn "vec_extract_hi_<mode>_maskm"
6445 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6446 (vec_merge:<ssehalfvecmode>
6447 (vec_select:<ssehalfvecmode>
6448 (match_operand:V8FI 1 "register_operand" "v")
6449 (parallel [(const_int 4) (const_int 5)
6450 (const_int 6) (const_int 7)]))
6451 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6452 (match_operand:QI 3 "register_operand" "Yk")))]
6454 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6455 [(set_attr "type" "sselog")
6456 (set_attr "prefix_extra" "1")
6457 (set_attr "length_immediate" "1")
6458 (set_attr "memory" "store")
6459 (set_attr "prefix" "evex")
6460 (set_attr "mode" "<sseinsnmode>")])
6462 (define_insn "vec_extract_hi_<mode><mask_name>"
6463 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6464 (vec_select:<ssehalfvecmode>
6465 (match_operand:V8FI 1 "register_operand" "v")
6466 (parallel [(const_int 4) (const_int 5)
6467 (const_int 6) (const_int 7)])))]
6469 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6470 [(set_attr "type" "sselog")
6471 (set_attr "prefix_extra" "1")
6472 (set_attr "length_immediate" "1")
6473 (set (attr "memory")
6474 (if_then_else (match_test "MEM_P (operands[0])")
6475 (const_string "store")
6476 (const_string "none")))
6477 (set_attr "prefix" "evex")
6478 (set_attr "mode" "<sseinsnmode>")])
6480 (define_insn "vec_extract_hi_<mode><mask_name>"
6481 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6482 (vec_select:<ssehalfvecmode>
6483 (match_operand:V16FI 1 "register_operand" "v,v")
6484 (parallel [(const_int 8) (const_int 9)
6485 (const_int 10) (const_int 11)
6486 (const_int 12) (const_int 13)
6487 (const_int 14) (const_int 15)])))]
6488 "TARGET_AVX512F && (!<mask_applied> || TARGET_AVX512DQ)"
6490 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6491 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6492 [(set_attr "type" "sselog1")
6493 (set_attr "prefix_extra" "1")
6494 (set_attr "isa" "avx512dq,noavx512dq")
6495 (set_attr "length_immediate" "1")
6496 (set_attr "prefix" "evex")
6497 (set_attr "mode" "<sseinsnmode>")])
6499 (define_expand "avx_vextractf128<mode>"
6500 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6501 (match_operand:V_256 1 "register_operand")
6502 (match_operand:SI 2 "const_0_to_1_operand")]
6505 rtx (*insn)(rtx, rtx);
6507 switch (INTVAL (operands[2]))
6510 insn = gen_vec_extract_lo_<mode>;
6513 insn = gen_vec_extract_hi_<mode>;
6519 emit_insn (insn (operands[0], operands[1]));
6523 (define_insn_and_split "vec_extract_lo_<mode>"
6524 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6525 (vec_select:<ssehalfvecmode>
6526 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6527 (parallel [(const_int 0) (const_int 1)
6528 (const_int 2) (const_int 3)
6529 (const_int 4) (const_int 5)
6530 (const_int 6) (const_int 7)])))]
6531 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6533 "&& reload_completed"
6536 rtx op1 = operands[1];
6538 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6540 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6541 emit_move_insn (operands[0], op1);
6545 (define_insn_and_split "vec_extract_lo_<mode>"
6546 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6547 (vec_select:<ssehalfvecmode>
6548 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
6549 (parallel [(const_int 0) (const_int 1)])))]
6550 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6552 "&& reload_completed"
6553 [(set (match_dup 0) (match_dup 1))]
6555 if (REG_P (operands[1]))
6556 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6558 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6561 (define_insn "vec_extract_hi_<mode>"
6562 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6563 (vec_select:<ssehalfvecmode>
6564 (match_operand:VI8F_256 1 "register_operand" "x,x")
6565 (parallel [(const_int 2) (const_int 3)])))]
6567 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6568 [(set_attr "type" "sselog")
6569 (set_attr "prefix_extra" "1")
6570 (set_attr "length_immediate" "1")
6571 (set_attr "memory" "none,store")
6572 (set_attr "prefix" "vex")
6573 (set_attr "mode" "<sseinsnmode>")])
6575 (define_insn_and_split "vec_extract_lo_<mode>"
6576 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6577 (vec_select:<ssehalfvecmode>
6578 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
6579 (parallel [(const_int 0) (const_int 1)
6580 (const_int 2) (const_int 3)])))]
6581 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6583 "&& reload_completed"
6584 [(set (match_dup 0) (match_dup 1))]
6586 if (REG_P (operands[1]))
6587 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6589 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6592 (define_insn "vec_extract_hi_<mode>"
6593 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6594 (vec_select:<ssehalfvecmode>
6595 (match_operand:VI4F_256 1 "register_operand" "x,x")
6596 (parallel [(const_int 4) (const_int 5)
6597 (const_int 6) (const_int 7)])))]
6599 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6600 [(set_attr "type" "sselog")
6601 (set_attr "prefix_extra" "1")
6602 (set_attr "length_immediate" "1")
6603 (set_attr "memory" "none,store")
6604 (set_attr "prefix" "vex")
6605 (set_attr "mode" "<sseinsnmode>")])
6607 (define_insn_and_split "vec_extract_lo_v32hi"
6608 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6610 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6611 (parallel [(const_int 0) (const_int 1)
6612 (const_int 2) (const_int 3)
6613 (const_int 4) (const_int 5)
6614 (const_int 6) (const_int 7)
6615 (const_int 8) (const_int 9)
6616 (const_int 10) (const_int 11)
6617 (const_int 12) (const_int 13)
6618 (const_int 14) (const_int 15)])))]
6619 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6621 "&& reload_completed"
6622 [(set (match_dup 0) (match_dup 1))]
6624 if (REG_P (operands[1]))
6625 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6627 operands[1] = adjust_address (operands[1], V16HImode, 0);
6630 (define_insn "vec_extract_hi_v32hi"
6631 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6633 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6634 (parallel [(const_int 16) (const_int 17)
6635 (const_int 18) (const_int 19)
6636 (const_int 20) (const_int 21)
6637 (const_int 22) (const_int 23)
6638 (const_int 24) (const_int 25)
6639 (const_int 26) (const_int 27)
6640 (const_int 28) (const_int 29)
6641 (const_int 30) (const_int 31)])))]
6643 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6644 [(set_attr "type" "sselog")
6645 (set_attr "prefix_extra" "1")
6646 (set_attr "length_immediate" "1")
6647 (set_attr "memory" "none,store")
6648 (set_attr "prefix" "evex")
6649 (set_attr "mode" "XI")])
6651 (define_insn_and_split "vec_extract_lo_v16hi"
6652 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6654 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6655 (parallel [(const_int 0) (const_int 1)
6656 (const_int 2) (const_int 3)
6657 (const_int 4) (const_int 5)
6658 (const_int 6) (const_int 7)])))]
6659 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6661 "&& reload_completed"
6662 [(set (match_dup 0) (match_dup 1))]
6664 if (REG_P (operands[1]))
6665 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
6667 operands[1] = adjust_address (operands[1], V8HImode, 0);
6670 (define_insn "vec_extract_hi_v16hi"
6671 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6673 (match_operand:V16HI 1 "register_operand" "x,x")
6674 (parallel [(const_int 8) (const_int 9)
6675 (const_int 10) (const_int 11)
6676 (const_int 12) (const_int 13)
6677 (const_int 14) (const_int 15)])))]
6679 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6680 [(set_attr "type" "sselog")
6681 (set_attr "prefix_extra" "1")
6682 (set_attr "length_immediate" "1")
6683 (set_attr "memory" "none,store")
6684 (set_attr "prefix" "vex")
6685 (set_attr "mode" "OI")])
6687 (define_insn_and_split "vec_extract_lo_v64qi"
6688 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6690 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6691 (parallel [(const_int 0) (const_int 1)
6692 (const_int 2) (const_int 3)
6693 (const_int 4) (const_int 5)
6694 (const_int 6) (const_int 7)
6695 (const_int 8) (const_int 9)
6696 (const_int 10) (const_int 11)
6697 (const_int 12) (const_int 13)
6698 (const_int 14) (const_int 15)
6699 (const_int 16) (const_int 17)
6700 (const_int 18) (const_int 19)
6701 (const_int 20) (const_int 21)
6702 (const_int 22) (const_int 23)
6703 (const_int 24) (const_int 25)
6704 (const_int 26) (const_int 27)
6705 (const_int 28) (const_int 29)
6706 (const_int 30) (const_int 31)])))]
6707 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6709 "&& reload_completed"
6710 [(set (match_dup 0) (match_dup 1))]
6712 if (REG_P (operands[1]))
6713 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6715 operands[1] = adjust_address (operands[1], V32QImode, 0);
6718 (define_insn "vec_extract_hi_v64qi"
6719 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6721 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6722 (parallel [(const_int 32) (const_int 33)
6723 (const_int 34) (const_int 35)
6724 (const_int 36) (const_int 37)
6725 (const_int 38) (const_int 39)
6726 (const_int 40) (const_int 41)
6727 (const_int 42) (const_int 43)
6728 (const_int 44) (const_int 45)
6729 (const_int 46) (const_int 47)
6730 (const_int 48) (const_int 49)
6731 (const_int 50) (const_int 51)
6732 (const_int 52) (const_int 53)
6733 (const_int 54) (const_int 55)
6734 (const_int 56) (const_int 57)
6735 (const_int 58) (const_int 59)
6736 (const_int 60) (const_int 61)
6737 (const_int 62) (const_int 63)])))]
6739 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6740 [(set_attr "type" "sselog")
6741 (set_attr "prefix_extra" "1")
6742 (set_attr "length_immediate" "1")
6743 (set_attr "memory" "none,store")
6744 (set_attr "prefix" "evex")
6745 (set_attr "mode" "XI")])
6747 (define_insn_and_split "vec_extract_lo_v32qi"
6748 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6750 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6751 (parallel [(const_int 0) (const_int 1)
6752 (const_int 2) (const_int 3)
6753 (const_int 4) (const_int 5)
6754 (const_int 6) (const_int 7)
6755 (const_int 8) (const_int 9)
6756 (const_int 10) (const_int 11)
6757 (const_int 12) (const_int 13)
6758 (const_int 14) (const_int 15)])))]
6759 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6761 "&& reload_completed"
6762 [(set (match_dup 0) (match_dup 1))]
6764 if (REG_P (operands[1]))
6765 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6767 operands[1] = adjust_address (operands[1], V16QImode, 0);
6770 (define_insn "vec_extract_hi_v32qi"
6771 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6773 (match_operand:V32QI 1 "register_operand" "x,x")
6774 (parallel [(const_int 16) (const_int 17)
6775 (const_int 18) (const_int 19)
6776 (const_int 20) (const_int 21)
6777 (const_int 22) (const_int 23)
6778 (const_int 24) (const_int 25)
6779 (const_int 26) (const_int 27)
6780 (const_int 28) (const_int 29)
6781 (const_int 30) (const_int 31)])))]
6783 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6784 [(set_attr "type" "sselog")
6785 (set_attr "prefix_extra" "1")
6786 (set_attr "length_immediate" "1")
6787 (set_attr "memory" "none,store")
6788 (set_attr "prefix" "vex")
6789 (set_attr "mode" "OI")])
6791 ;; Modes handled by vec_extract patterns.
6792 (define_mode_iterator VEC_EXTRACT_MODE
6793 [(V32QI "TARGET_AVX") V16QI
6794 (V16HI "TARGET_AVX") V8HI
6795 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6796 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6797 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6798 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6800 (define_expand "vec_extract<mode>"
6801 [(match_operand:<ssescalarmode> 0 "register_operand")
6802 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6803 (match_operand 2 "const_int_operand")]
6806 ix86_expand_vector_extract (false, operands[0], operands[1],
6807 INTVAL (operands[2]));
6811 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6813 ;; Parallel double-precision floating point element swizzling
6815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6817 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6818 [(set (match_operand:V8DF 0 "register_operand" "=v")
6821 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6822 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6823 (parallel [(const_int 1) (const_int 9)
6824 (const_int 3) (const_int 11)
6825 (const_int 5) (const_int 13)
6826 (const_int 7) (const_int 15)])))]
6828 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6829 [(set_attr "type" "sselog")
6830 (set_attr "prefix" "evex")
6831 (set_attr "mode" "V8DF")])
6833 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6834 (define_insn "avx_unpckhpd256"
6835 [(set (match_operand:V4DF 0 "register_operand" "=x")
6838 (match_operand:V4DF 1 "register_operand" "x")
6839 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6840 (parallel [(const_int 1) (const_int 5)
6841 (const_int 3) (const_int 7)])))]
6843 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6844 [(set_attr "type" "sselog")
6845 (set_attr "prefix" "vex")
6846 (set_attr "mode" "V4DF")])
6848 (define_expand "vec_interleave_highv4df"
6852 (match_operand:V4DF 1 "register_operand" "x")
6853 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6854 (parallel [(const_int 0) (const_int 4)
6855 (const_int 2) (const_int 6)])))
6861 (parallel [(const_int 1) (const_int 5)
6862 (const_int 3) (const_int 7)])))
6863 (set (match_operand:V4DF 0 "register_operand")
6868 (parallel [(const_int 2) (const_int 3)
6869 (const_int 6) (const_int 7)])))]
6872 operands[3] = gen_reg_rtx (V4DFmode);
6873 operands[4] = gen_reg_rtx (V4DFmode);
6877 (define_expand "vec_interleave_highv2df"
6878 [(set (match_operand:V2DF 0 "register_operand")
6881 (match_operand:V2DF 1 "nonimmediate_operand")
6882 (match_operand:V2DF 2 "nonimmediate_operand"))
6883 (parallel [(const_int 1)
6887 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6888 operands[2] = force_reg (V2DFmode, operands[2]);
6891 (define_insn "*vec_interleave_highv2df"
6892 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6895 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6896 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6897 (parallel [(const_int 1)
6899 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6901 unpckhpd\t{%2, %0|%0, %2}
6902 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6903 %vmovddup\t{%H1, %0|%0, %H1}
6904 movlpd\t{%H1, %0|%0, %H1}
6905 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6906 %vmovhpd\t{%1, %0|%q0, %1}"
6907 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6908 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6909 (set_attr "ssememalign" "64")
6910 (set_attr "prefix_data16" "*,*,*,1,*,1")
6911 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6912 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6914 (define_expand "avx512f_movddup512<mask_name>"
6915 [(set (match_operand:V8DF 0 "register_operand")
6918 (match_operand:V8DF 1 "nonimmediate_operand")
6920 (parallel [(const_int 0) (const_int 8)
6921 (const_int 2) (const_int 10)
6922 (const_int 4) (const_int 12)
6923 (const_int 6) (const_int 14)])))]
6926 (define_expand "avx512f_unpcklpd512<mask_name>"
6927 [(set (match_operand:V8DF 0 "register_operand")
6930 (match_operand:V8DF 1 "register_operand")
6931 (match_operand:V8DF 2 "nonimmediate_operand"))
6932 (parallel [(const_int 0) (const_int 8)
6933 (const_int 2) (const_int 10)
6934 (const_int 4) (const_int 12)
6935 (const_int 6) (const_int 14)])))]
6938 (define_insn "*avx512f_unpcklpd512<mask_name>"
6939 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6942 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6943 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6944 (parallel [(const_int 0) (const_int 8)
6945 (const_int 2) (const_int 10)
6946 (const_int 4) (const_int 12)
6947 (const_int 6) (const_int 14)])))]
6950 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6951 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6952 [(set_attr "type" "sselog")
6953 (set_attr "prefix" "evex")
6954 (set_attr "mode" "V8DF")])
6956 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6957 (define_expand "avx_movddup256"
6958 [(set (match_operand:V4DF 0 "register_operand")
6961 (match_operand:V4DF 1 "nonimmediate_operand")
6963 (parallel [(const_int 0) (const_int 4)
6964 (const_int 2) (const_int 6)])))]
6967 (define_expand "avx_unpcklpd256"
6968 [(set (match_operand:V4DF 0 "register_operand")
6971 (match_operand:V4DF 1 "register_operand")
6972 (match_operand:V4DF 2 "nonimmediate_operand"))
6973 (parallel [(const_int 0) (const_int 4)
6974 (const_int 2) (const_int 6)])))]
6977 (define_insn "*avx_unpcklpd256"
6978 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6981 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6982 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6983 (parallel [(const_int 0) (const_int 4)
6984 (const_int 2) (const_int 6)])))]
6987 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6988 vmovddup\t{%1, %0|%0, %1}"
6989 [(set_attr "type" "sselog")
6990 (set_attr "prefix" "vex")
6991 (set_attr "mode" "V4DF")])
6993 (define_expand "vec_interleave_lowv4df"
6997 (match_operand:V4DF 1 "register_operand" "x")
6998 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6999 (parallel [(const_int 0) (const_int 4)
7000 (const_int 2) (const_int 6)])))
7006 (parallel [(const_int 1) (const_int 5)
7007 (const_int 3) (const_int 7)])))
7008 (set (match_operand:V4DF 0 "register_operand")
7013 (parallel [(const_int 0) (const_int 1)
7014 (const_int 4) (const_int 5)])))]
7017 operands[3] = gen_reg_rtx (V4DFmode);
7018 operands[4] = gen_reg_rtx (V4DFmode);
7021 (define_expand "vec_interleave_lowv2df"
7022 [(set (match_operand:V2DF 0 "register_operand")
7025 (match_operand:V2DF 1 "nonimmediate_operand")
7026 (match_operand:V2DF 2 "nonimmediate_operand"))
7027 (parallel [(const_int 0)
7031 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7032 operands[1] = force_reg (V2DFmode, operands[1]);
7035 (define_insn "*vec_interleave_lowv2df"
7036 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7039 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7040 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7041 (parallel [(const_int 0)
7043 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7045 unpcklpd\t{%2, %0|%0, %2}
7046 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7047 %vmovddup\t{%1, %0|%0, %q1}
7048 movhpd\t{%2, %0|%0, %q2}
7049 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7050 %vmovlpd\t{%2, %H0|%H0, %2}"
7051 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7052 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7053 (set_attr "ssememalign" "64")
7054 (set_attr "prefix_data16" "*,*,*,1,*,1")
7055 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7056 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7059 [(set (match_operand:V2DF 0 "memory_operand")
7062 (match_operand:V2DF 1 "register_operand")
7064 (parallel [(const_int 0)
7066 "TARGET_SSE3 && reload_completed"
7069 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7070 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7071 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7076 [(set (match_operand:V2DF 0 "register_operand")
7079 (match_operand:V2DF 1 "memory_operand")
7081 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7082 (match_operand:SI 3 "const_int_operand")])))]
7083 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7084 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7086 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7089 (define_insn "avx512f_vmscalef<mode><round_name>"
7090 [(set (match_operand:VF_128 0 "register_operand" "=v")
7093 [(match_operand:VF_128 1 "register_operand" "v")
7094 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7099 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7100 [(set_attr "prefix" "evex")
7101 (set_attr "mode" "<ssescalarmode>")])
7103 (define_insn "avx512f_scalef<mode><mask_name><round_name>"
7104 [(set (match_operand:VF_512 0 "register_operand" "=v")
7106 [(match_operand:VF_512 1 "register_operand" "v")
7107 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
7110 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7111 [(set_attr "prefix" "evex")
7112 (set_attr "mode" "<MODE>")])
7114 (define_expand "avx512f_vternlog<mode>_maskz"
7115 [(match_operand:VI48_512 0 "register_operand")
7116 (match_operand:VI48_512 1 "register_operand")
7117 (match_operand:VI48_512 2 "register_operand")
7118 (match_operand:VI48_512 3 "nonimmediate_operand")
7119 (match_operand:SI 4 "const_0_to_255_operand")
7120 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7123 emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
7124 operands[0], operands[1], operands[2], operands[3],
7125 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7129 (define_insn "avx512f_vternlog<mode><sd_maskz_name>"
7130 [(set (match_operand:VI48_512 0 "register_operand" "=v")
7132 [(match_operand:VI48_512 1 "register_operand" "0")
7133 (match_operand:VI48_512 2 "register_operand" "v")
7134 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
7135 (match_operand:SI 4 "const_0_to_255_operand")]
7138 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7139 [(set_attr "type" "sselog")
7140 (set_attr "prefix" "evex")
7141 (set_attr "mode" "<sseinsnmode>")])
7143 (define_insn "avx512f_vternlog<mode>_mask"
7144 [(set (match_operand:VI48_512 0 "register_operand" "=v")
7147 [(match_operand:VI48_512 1 "register_operand" "0")
7148 (match_operand:VI48_512 2 "register_operand" "v")
7149 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
7150 (match_operand:SI 4 "const_0_to_255_operand")]
7153 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7155 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7156 [(set_attr "type" "sselog")
7157 (set_attr "prefix" "evex")
7158 (set_attr "mode" "<sseinsnmode>")])
7160 (define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
7161 [(set (match_operand:VF_512 0 "register_operand" "=v")
7162 (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7165 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7166 [(set_attr "prefix" "evex")
7167 (set_attr "mode" "<MODE>")])
7169 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7170 [(set (match_operand:VF_128 0 "register_operand" "=v")
7173 [(match_operand:VF_128 1 "register_operand" "v")
7174 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7179 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7180 [(set_attr "prefix" "evex")
7181 (set_attr "mode" "<ssescalarmode>")])
7183 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
7184 [(set (match_operand:VI48_512 0 "register_operand" "=v")
7185 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
7186 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
7187 (match_operand:SI 3 "const_0_to_255_operand")]
7190 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7191 [(set_attr "prefix" "evex")
7192 (set_attr "mode" "<sseinsnmode>")])
7194 (define_expand "avx512f_shufps512_mask"
7195 [(match_operand:V16SF 0 "register_operand")
7196 (match_operand:V16SF 1 "register_operand")
7197 (match_operand:V16SF 2 "nonimmediate_operand")
7198 (match_operand:SI 3 "const_0_to_255_operand")
7199 (match_operand:V16SF 4 "register_operand")
7200 (match_operand:HI 5 "register_operand")]
7203 int mask = INTVAL (operands[3]);
7204 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7205 GEN_INT ((mask >> 0) & 3),
7206 GEN_INT ((mask >> 2) & 3),
7207 GEN_INT (((mask >> 4) & 3) + 16),
7208 GEN_INT (((mask >> 6) & 3) + 16),
7209 GEN_INT (((mask >> 0) & 3) + 4),
7210 GEN_INT (((mask >> 2) & 3) + 4),
7211 GEN_INT (((mask >> 4) & 3) + 20),
7212 GEN_INT (((mask >> 6) & 3) + 20),
7213 GEN_INT (((mask >> 0) & 3) + 8),
7214 GEN_INT (((mask >> 2) & 3) + 8),
7215 GEN_INT (((mask >> 4) & 3) + 24),
7216 GEN_INT (((mask >> 6) & 3) + 24),
7217 GEN_INT (((mask >> 0) & 3) + 12),
7218 GEN_INT (((mask >> 2) & 3) + 12),
7219 GEN_INT (((mask >> 4) & 3) + 28),
7220 GEN_INT (((mask >> 6) & 3) + 28),
7221 operands[4], operands[5]));
7226 (define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7227 [(match_operand:VF_512 0 "register_operand")
7228 (match_operand:VF_512 1 "register_operand")
7229 (match_operand:VF_512 2 "register_operand")
7230 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7231 (match_operand:SI 4 "const_0_to_255_operand")
7232 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7235 emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7236 operands[0], operands[1], operands[2], operands[3],
7237 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7238 <round_saeonly_expand_operand6>));
7242 (define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7243 [(set (match_operand:VF_512 0 "register_operand" "=v")
7245 [(match_operand:VF_512 1 "register_operand" "0")
7246 (match_operand:VF_512 2 "register_operand" "v")
7247 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7248 (match_operand:SI 4 "const_0_to_255_operand")]
7251 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7252 [(set_attr "prefix" "evex")
7253 (set_attr "mode" "<MODE>")])
7255 (define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
7256 [(set (match_operand:VF_512 0 "register_operand" "=v")
7259 [(match_operand:VF_512 1 "register_operand" "0")
7260 (match_operand:VF_512 2 "register_operand" "v")
7261 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7262 (match_operand:SI 4 "const_0_to_255_operand")]
7265 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7267 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7268 [(set_attr "prefix" "evex")
7269 (set_attr "mode" "<MODE>")])
7271 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7272 [(match_operand:VF_128 0 "register_operand")
7273 (match_operand:VF_128 1 "register_operand")
7274 (match_operand:VF_128 2 "register_operand")
7275 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7276 (match_operand:SI 4 "const_0_to_255_operand")
7277 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7280 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7281 operands[0], operands[1], operands[2], operands[3],
7282 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7283 <round_saeonly_expand_operand6>));
7287 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7288 [(set (match_operand:VF_128 0 "register_operand" "=v")
7291 [(match_operand:VF_128 1 "register_operand" "0")
7292 (match_operand:VF_128 2 "register_operand" "v")
7293 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7294 (match_operand:SI 4 "const_0_to_255_operand")]
7299 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7300 [(set_attr "prefix" "evex")
7301 (set_attr "mode" "<ssescalarmode>")])
7303 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7304 [(set (match_operand:VF_128 0 "register_operand" "=v")
7308 [(match_operand:VF_128 1 "register_operand" "0")
7309 (match_operand:VF_128 2 "register_operand" "v")
7310 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7311 (match_operand:SI 4 "const_0_to_255_operand")]
7316 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7318 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7319 [(set_attr "prefix" "evex")
7320 (set_attr "mode" "<ssescalarmode>")])
7322 (define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
7323 [(set (match_operand:VF_512 0 "register_operand" "=v")
7325 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7326 (match_operand:SI 2 "const_0_to_255_operand")]
7329 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7330 [(set_attr "length_immediate" "1")
7331 (set_attr "prefix" "evex")
7332 (set_attr "mode" "<MODE>")])
7334 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7335 [(set (match_operand:VF_128 0 "register_operand" "=v")
7338 [(match_operand:VF_128 1 "register_operand" "v")
7339 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7340 (match_operand:SI 3 "const_0_to_255_operand")]
7345 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7346 [(set_attr "length_immediate" "1")
7347 (set_attr "prefix" "evex")
7348 (set_attr "mode" "<MODE>")])
7350 ;; One bit in mask selects 2 elements.
7351 (define_insn "avx512f_shufps512_1<mask_name>"
7352 [(set (match_operand:V16SF 0 "register_operand" "=v")
7355 (match_operand:V16SF 1 "register_operand" "v")
7356 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7357 (parallel [(match_operand 3 "const_0_to_3_operand")
7358 (match_operand 4 "const_0_to_3_operand")
7359 (match_operand 5 "const_16_to_19_operand")
7360 (match_operand 6 "const_16_to_19_operand")
7361 (match_operand 7 "const_4_to_7_operand")
7362 (match_operand 8 "const_4_to_7_operand")
7363 (match_operand 9 "const_20_to_23_operand")
7364 (match_operand 10 "const_20_to_23_operand")
7365 (match_operand 11 "const_8_to_11_operand")
7366 (match_operand 12 "const_8_to_11_operand")
7367 (match_operand 13 "const_24_to_27_operand")
7368 (match_operand 14 "const_24_to_27_operand")
7369 (match_operand 15 "const_12_to_15_operand")
7370 (match_operand 16 "const_12_to_15_operand")
7371 (match_operand 17 "const_28_to_31_operand")
7372 (match_operand 18 "const_28_to_31_operand")])))]
7374 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7375 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7376 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7377 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7378 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7379 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7380 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7381 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7382 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7383 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7384 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7385 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7388 mask = INTVAL (operands[3]);
7389 mask |= INTVAL (operands[4]) << 2;
7390 mask |= (INTVAL (operands[5]) - 16) << 4;
7391 mask |= (INTVAL (operands[6]) - 16) << 6;
7392 operands[3] = GEN_INT (mask);
7394 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7396 [(set_attr "type" "sselog")
7397 (set_attr "length_immediate" "1")
7398 (set_attr "prefix" "evex")
7399 (set_attr "mode" "V16SF")])
7401 (define_expand "avx512f_shufpd512_mask"
7402 [(match_operand:V8DF 0 "register_operand")
7403 (match_operand:V8DF 1 "register_operand")
7404 (match_operand:V8DF 2 "nonimmediate_operand")
7405 (match_operand:SI 3 "const_0_to_255_operand")
7406 (match_operand:V8DF 4 "register_operand")
7407 (match_operand:QI 5 "register_operand")]
7410 int mask = INTVAL (operands[3]);
7411 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7413 GEN_INT (mask & 2 ? 9 : 8),
7414 GEN_INT (mask & 4 ? 3 : 2),
7415 GEN_INT (mask & 8 ? 11 : 10),
7416 GEN_INT (mask & 16 ? 5 : 4),
7417 GEN_INT (mask & 32 ? 13 : 12),
7418 GEN_INT (mask & 64 ? 7 : 6),
7419 GEN_INT (mask & 128 ? 15 : 14),
7420 operands[4], operands[5]));
7424 (define_insn "avx512f_shufpd512_1<mask_name>"
7425 [(set (match_operand:V8DF 0 "register_operand" "=v")
7428 (match_operand:V8DF 1 "register_operand" "v")
7429 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7430 (parallel [(match_operand 3 "const_0_to_1_operand")
7431 (match_operand 4 "const_8_to_9_operand")
7432 (match_operand 5 "const_2_to_3_operand")
7433 (match_operand 6 "const_10_to_11_operand")
7434 (match_operand 7 "const_4_to_5_operand")
7435 (match_operand 8 "const_12_to_13_operand")
7436 (match_operand 9 "const_6_to_7_operand")
7437 (match_operand 10 "const_14_to_15_operand")])))]
7441 mask = INTVAL (operands[3]);
7442 mask |= (INTVAL (operands[4]) - 8) << 1;
7443 mask |= (INTVAL (operands[5]) - 2) << 2;
7444 mask |= (INTVAL (operands[6]) - 10) << 3;
7445 mask |= (INTVAL (operands[7]) - 4) << 4;
7446 mask |= (INTVAL (operands[8]) - 12) << 5;
7447 mask |= (INTVAL (operands[9]) - 6) << 6;
7448 mask |= (INTVAL (operands[10]) - 14) << 7;
7449 operands[3] = GEN_INT (mask);
7451 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7453 [(set_attr "type" "sselog")
7454 (set_attr "length_immediate" "1")
7455 (set_attr "prefix" "evex")
7456 (set_attr "mode" "V8DF")])
7458 (define_expand "avx_shufpd256"
7459 [(match_operand:V4DF 0 "register_operand")
7460 (match_operand:V4DF 1 "register_operand")
7461 (match_operand:V4DF 2 "nonimmediate_operand")
7462 (match_operand:SI 3 "const_int_operand")]
7465 int mask = INTVAL (operands[3]);
7466 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
7468 GEN_INT (mask & 2 ? 5 : 4),
7469 GEN_INT (mask & 4 ? 3 : 2),
7470 GEN_INT (mask & 8 ? 7 : 6)));
7474 (define_insn "avx_shufpd256_1"
7475 [(set (match_operand:V4DF 0 "register_operand" "=x")
7478 (match_operand:V4DF 1 "register_operand" "x")
7479 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7480 (parallel [(match_operand 3 "const_0_to_1_operand")
7481 (match_operand 4 "const_4_to_5_operand")
7482 (match_operand 5 "const_2_to_3_operand")
7483 (match_operand 6 "const_6_to_7_operand")])))]
7487 mask = INTVAL (operands[3]);
7488 mask |= (INTVAL (operands[4]) - 4) << 1;
7489 mask |= (INTVAL (operands[5]) - 2) << 2;
7490 mask |= (INTVAL (operands[6]) - 6) << 3;
7491 operands[3] = GEN_INT (mask);
7493 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7495 [(set_attr "type" "sseshuf")
7496 (set_attr "length_immediate" "1")
7497 (set_attr "prefix" "vex")
7498 (set_attr "mode" "V4DF")])
7500 (define_expand "sse2_shufpd"
7501 [(match_operand:V2DF 0 "register_operand")
7502 (match_operand:V2DF 1 "register_operand")
7503 (match_operand:V2DF 2 "nonimmediate_operand")
7504 (match_operand:SI 3 "const_int_operand")]
7507 int mask = INTVAL (operands[3]);
7508 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
7510 GEN_INT (mask & 2 ? 3 : 2)));
7514 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
7515 (define_insn "avx2_interleave_highv4di"
7516 [(set (match_operand:V4DI 0 "register_operand" "=x")
7519 (match_operand:V4DI 1 "register_operand" "x")
7520 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7521 (parallel [(const_int 1)
7526 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7527 [(set_attr "type" "sselog")
7528 (set_attr "prefix" "vex")
7529 (set_attr "mode" "OI")])
7531 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7532 [(set (match_operand:V8DI 0 "register_operand" "=v")
7535 (match_operand:V8DI 1 "register_operand" "v")
7536 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7537 (parallel [(const_int 1) (const_int 9)
7538 (const_int 3) (const_int 11)
7539 (const_int 5) (const_int 13)
7540 (const_int 7) (const_int 15)])))]
7542 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7543 [(set_attr "type" "sselog")
7544 (set_attr "prefix" "evex")
7545 (set_attr "mode" "XI")])
7547 (define_insn "vec_interleave_highv2di"
7548 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7551 (match_operand:V2DI 1 "register_operand" "0,x")
7552 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7553 (parallel [(const_int 1)
7557 punpckhqdq\t{%2, %0|%0, %2}
7558 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7559 [(set_attr "isa" "noavx,avx")
7560 (set_attr "type" "sselog")
7561 (set_attr "prefix_data16" "1,*")
7562 (set_attr "prefix" "orig,vex")
7563 (set_attr "mode" "TI")])
7565 (define_insn "avx2_interleave_lowv4di"
7566 [(set (match_operand:V4DI 0 "register_operand" "=x")
7569 (match_operand:V4DI 1 "register_operand" "x")
7570 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7571 (parallel [(const_int 0)
7576 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7577 [(set_attr "type" "sselog")
7578 (set_attr "prefix" "vex")
7579 (set_attr "mode" "OI")])
7581 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7582 [(set (match_operand:V8DI 0 "register_operand" "=v")
7585 (match_operand:V8DI 1 "register_operand" "v")
7586 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7587 (parallel [(const_int 0) (const_int 8)
7588 (const_int 2) (const_int 10)
7589 (const_int 4) (const_int 12)
7590 (const_int 6) (const_int 14)])))]
7592 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7593 [(set_attr "type" "sselog")
7594 (set_attr "prefix" "evex")
7595 (set_attr "mode" "XI")])
7597 (define_insn "vec_interleave_lowv2di"
7598 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7601 (match_operand:V2DI 1 "register_operand" "0,x")
7602 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7603 (parallel [(const_int 0)
7607 punpcklqdq\t{%2, %0|%0, %2}
7608 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7609 [(set_attr "isa" "noavx,avx")
7610 (set_attr "type" "sselog")
7611 (set_attr "prefix_data16" "1,*")
7612 (set_attr "prefix" "orig,vex")
7613 (set_attr "mode" "TI")])
7615 (define_insn "sse2_shufpd_<mode>"
7616 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
7617 (vec_select:VI8F_128
7618 (vec_concat:<ssedoublevecmode>
7619 (match_operand:VI8F_128 1 "register_operand" "0,x")
7620 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
7621 (parallel [(match_operand 3 "const_0_to_1_operand")
7622 (match_operand 4 "const_2_to_3_operand")])))]
7626 mask = INTVAL (operands[3]);
7627 mask |= (INTVAL (operands[4]) - 2) << 1;
7628 operands[3] = GEN_INT (mask);
7630 switch (which_alternative)
7633 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
7635 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7640 [(set_attr "isa" "noavx,avx")
7641 (set_attr "type" "sseshuf")
7642 (set_attr "length_immediate" "1")
7643 (set_attr "prefix" "orig,vex")
7644 (set_attr "mode" "V2DF")])
7646 ;; Avoid combining registers from different units in a single alternative,
7647 ;; see comment above inline_secondary_memory_needed function in i386.c
7648 (define_insn "sse2_storehpd"
7649 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
7651 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
7652 (parallel [(const_int 1)])))]
7653 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7655 %vmovhpd\t{%1, %0|%0, %1}
7657 vunpckhpd\t{%d1, %0|%0, %d1}
7661 [(set_attr "isa" "*,noavx,avx,*,*,*")
7662 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
7663 (set (attr "prefix_data16")
7665 (and (eq_attr "alternative" "0")
7666 (not (match_test "TARGET_AVX")))
7668 (const_string "*")))
7669 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
7670 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
7673 [(set (match_operand:DF 0 "register_operand")
7675 (match_operand:V2DF 1 "memory_operand")
7676 (parallel [(const_int 1)])))]
7677 "TARGET_SSE2 && reload_completed"
7678 [(set (match_dup 0) (match_dup 1))]
7679 "operands[1] = adjust_address (operands[1], DFmode, 8);")
7681 (define_insn "*vec_extractv2df_1_sse"
7682 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7684 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
7685 (parallel [(const_int 1)])))]
7686 "!TARGET_SSE2 && TARGET_SSE
7687 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7689 movhps\t{%1, %0|%q0, %1}
7690 movhlps\t{%1, %0|%0, %1}
7691 movlps\t{%H1, %0|%0, %H1}"
7692 [(set_attr "type" "ssemov")
7693 (set_attr "ssememalign" "64")
7694 (set_attr "mode" "V2SF,V4SF,V2SF")])
7696 ;; Avoid combining registers from different units in a single alternative,
7697 ;; see comment above inline_secondary_memory_needed function in i386.c
7698 (define_insn "sse2_storelpd"
7699 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
7701 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
7702 (parallel [(const_int 0)])))]
7703 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7705 %vmovlpd\t{%1, %0|%0, %1}
7710 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
7711 (set_attr "prefix_data16" "1,*,*,*,*")
7712 (set_attr "prefix" "maybe_vex")
7713 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
7716 [(set (match_operand:DF 0 "register_operand")
7718 (match_operand:V2DF 1 "nonimmediate_operand")
7719 (parallel [(const_int 0)])))]
7720 "TARGET_SSE2 && reload_completed"
7721 [(set (match_dup 0) (match_dup 1))]
7723 if (REG_P (operands[1]))
7724 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7726 operands[1] = adjust_address (operands[1], DFmode, 0);
7729 (define_insn "*vec_extractv2df_0_sse"
7730 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7732 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7733 (parallel [(const_int 0)])))]
7734 "!TARGET_SSE2 && TARGET_SSE
7735 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7737 movlps\t{%1, %0|%0, %1}
7738 movaps\t{%1, %0|%0, %1}
7739 movlps\t{%1, %0|%0, %q1}"
7740 [(set_attr "type" "ssemov")
7741 (set_attr "mode" "V2SF,V4SF,V2SF")])
7743 (define_expand "sse2_loadhpd_exp"
7744 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7747 (match_operand:V2DF 1 "nonimmediate_operand")
7748 (parallel [(const_int 0)]))
7749 (match_operand:DF 2 "nonimmediate_operand")))]
7752 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7754 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7756 /* Fix up the destination if needed. */
7757 if (dst != operands[0])
7758 emit_move_insn (operands[0], dst);
7763 ;; Avoid combining registers from different units in a single alternative,
7764 ;; see comment above inline_secondary_memory_needed function in i386.c
7765 (define_insn "sse2_loadhpd"
7766 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7770 (match_operand:V2DF 1 "nonimmediate_operand"
7772 (parallel [(const_int 0)]))
7773 (match_operand:DF 2 "nonimmediate_operand"
7774 " m,m,x,x,x,*f,r")))]
7775 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7777 movhpd\t{%2, %0|%0, %2}
7778 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7779 unpcklpd\t{%2, %0|%0, %2}
7780 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7784 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7785 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7786 (set_attr "ssememalign" "64")
7787 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7788 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7789 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7792 [(set (match_operand:V2DF 0 "memory_operand")
7794 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7795 (match_operand:DF 1 "register_operand")))]
7796 "TARGET_SSE2 && reload_completed"
7797 [(set (match_dup 0) (match_dup 1))]
7798 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7800 (define_expand "sse2_loadlpd_exp"
7801 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7803 (match_operand:DF 2 "nonimmediate_operand")
7805 (match_operand:V2DF 1 "nonimmediate_operand")
7806 (parallel [(const_int 1)]))))]
7809 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7811 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7813 /* Fix up the destination if needed. */
7814 if (dst != operands[0])
7815 emit_move_insn (operands[0], dst);
7820 ;; Avoid combining registers from different units in a single alternative,
7821 ;; see comment above inline_secondary_memory_needed function in i386.c
7822 (define_insn "sse2_loadlpd"
7823 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7824 "=x,x,x,x,x,x,x,x,m,m ,m")
7826 (match_operand:DF 2 "nonimmediate_operand"
7827 " m,m,m,x,x,0,0,x,x,*f,r")
7829 (match_operand:V2DF 1 "vector_move_operand"
7830 " C,0,x,0,x,x,o,o,0,0 ,0")
7831 (parallel [(const_int 1)]))))]
7832 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7834 %vmovsd\t{%2, %0|%0, %2}
7835 movlpd\t{%2, %0|%0, %2}
7836 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7837 movsd\t{%2, %0|%0, %2}
7838 vmovsd\t{%2, %1, %0|%0, %1, %2}
7839 shufpd\t{$2, %1, %0|%0, %1, 2}
7840 movhpd\t{%H1, %0|%0, %H1}
7841 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7845 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7847 (cond [(eq_attr "alternative" "5")
7848 (const_string "sselog")
7849 (eq_attr "alternative" "9")
7850 (const_string "fmov")
7851 (eq_attr "alternative" "10")
7852 (const_string "imov")
7854 (const_string "ssemov")))
7855 (set_attr "ssememalign" "64")
7856 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7857 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7858 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7859 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7862 [(set (match_operand:V2DF 0 "memory_operand")
7864 (match_operand:DF 1 "register_operand")
7865 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7866 "TARGET_SSE2 && reload_completed"
7867 [(set (match_dup 0) (match_dup 1))]
7868 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7870 (define_insn "sse2_movsd"
7871 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7873 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7874 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7878 movsd\t{%2, %0|%0, %2}
7879 vmovsd\t{%2, %1, %0|%0, %1, %2}
7880 movlpd\t{%2, %0|%0, %q2}
7881 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7882 %vmovlpd\t{%2, %0|%q0, %2}
7883 shufpd\t{$2, %1, %0|%0, %1, 2}
7884 movhps\t{%H1, %0|%0, %H1}
7885 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7886 %vmovhps\t{%1, %H0|%H0, %1}"
7887 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7890 (eq_attr "alternative" "5")
7891 (const_string "sselog")
7892 (const_string "ssemov")))
7893 (set (attr "prefix_data16")
7895 (and (eq_attr "alternative" "2,4")
7896 (not (match_test "TARGET_AVX")))
7898 (const_string "*")))
7899 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7900 (set_attr "ssememalign" "64")
7901 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7902 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7904 (define_insn "vec_dupv2df"
7905 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7907 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7911 %vmovddup\t{%1, %0|%0, %1}"
7912 [(set_attr "isa" "noavx,sse3")
7913 (set_attr "type" "sselog1")
7914 (set_attr "prefix" "orig,maybe_vex")
7915 (set_attr "mode" "V2DF,DF")])
7917 (define_insn "*vec_concatv2df"
7918 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7920 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7921 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7924 unpcklpd\t{%2, %0|%0, %2}
7925 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7926 %vmovddup\t{%1, %0|%0, %1}
7927 movhpd\t{%2, %0|%0, %2}
7928 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7929 %vmovsd\t{%1, %0|%0, %1}
7930 movlhps\t{%2, %0|%0, %2}
7931 movhps\t{%2, %0|%0, %2}"
7932 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7935 (eq_attr "alternative" "0,1,2")
7936 (const_string "sselog")
7937 (const_string "ssemov")))
7938 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7939 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7940 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7944 ;; Parallel integer down-conversion operations
7946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7948 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7949 (define_mode_attr pmov_src_mode
7950 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7951 (define_mode_attr pmov_src_lower
7952 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7953 (define_mode_attr pmov_suff
7954 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7956 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7957 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7958 (any_truncate:PMOV_DST_MODE
7959 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7961 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7962 [(set_attr "type" "ssemov")
7963 (set_attr "memory" "none,store")
7964 (set_attr "prefix" "evex")
7965 (set_attr "mode" "<sseinsnmode>")])
7967 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7968 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7969 (vec_merge:PMOV_DST_MODE
7970 (any_truncate:PMOV_DST_MODE
7971 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7972 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7973 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
7975 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7976 [(set_attr "type" "ssemov")
7977 (set_attr "memory" "none,store")
7978 (set_attr "prefix" "evex")
7979 (set_attr "mode" "<sseinsnmode>")])
7981 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
7982 [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
7983 (vec_merge:PMOV_DST_MODE
7984 (any_truncate:PMOV_DST_MODE
7985 (match_operand:<pmov_src_mode> 1 "register_operand"))
7987 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
7990 (define_insn "*avx512f_<code>v8div16qi2"
7991 [(set (match_operand:V16QI 0 "register_operand" "=v")
7994 (match_operand:V8DI 1 "register_operand" "v"))
7995 (const_vector:V8QI [(const_int 0) (const_int 0)
7996 (const_int 0) (const_int 0)
7997 (const_int 0) (const_int 0)
7998 (const_int 0) (const_int 0)])))]
8000 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8001 [(set_attr "type" "ssemov")
8002 (set_attr "prefix" "evex")
8003 (set_attr "mode" "TI")])
8005 (define_insn "*avx512f_<code>v8div16qi2_store"
8006 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8009 (match_operand:V8DI 1 "register_operand" "v"))
8012 (parallel [(const_int 8) (const_int 9)
8013 (const_int 10) (const_int 11)
8014 (const_int 12) (const_int 13)
8015 (const_int 14) (const_int 15)]))))]
8017 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8018 [(set_attr "type" "ssemov")
8019 (set_attr "memory" "store")
8020 (set_attr "prefix" "evex")
8021 (set_attr "mode" "TI")])
8023 (define_insn "avx512f_<code>v8div16qi2_mask"
8024 [(set (match_operand:V16QI 0 "register_operand" "=v")
8028 (match_operand:V8DI 1 "register_operand" "v"))
8030 (match_operand:V16QI 2 "vector_move_operand" "0C")
8031 (parallel [(const_int 0) (const_int 1)
8032 (const_int 2) (const_int 3)
8033 (const_int 4) (const_int 5)
8034 (const_int 6) (const_int 7)]))
8035 (match_operand:QI 3 "register_operand" "Yk"))
8036 (const_vector:V8QI [(const_int 0) (const_int 0)
8037 (const_int 0) (const_int 0)
8038 (const_int 0) (const_int 0)
8039 (const_int 0) (const_int 0)])))]
8041 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8042 [(set_attr "type" "ssemov")
8043 (set_attr "prefix" "evex")
8044 (set_attr "mode" "TI")])
8046 (define_insn "avx512f_<code>v8div16qi2_mask_store"
8047 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8051 (match_operand:V8DI 1 "register_operand" "v"))
8054 (parallel [(const_int 0) (const_int 1)
8055 (const_int 2) (const_int 3)
8056 (const_int 4) (const_int 5)
8057 (const_int 6) (const_int 7)]))
8058 (match_operand:QI 2 "register_operand" "Yk"))
8061 (parallel [(const_int 8) (const_int 9)
8062 (const_int 10) (const_int 11)
8063 (const_int 12) (const_int 13)
8064 (const_int 14) (const_int 15)]))))]
8066 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8067 [(set_attr "type" "ssemov")
8068 (set_attr "memory" "store")
8069 (set_attr "prefix" "evex")
8070 (set_attr "mode" "TI")])
8072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8074 ;; Parallel integral arithmetic
8076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8078 (define_expand "neg<mode>2"
8079 [(set (match_operand:VI_AVX2 0 "register_operand")
8082 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
8084 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
8086 (define_expand "<plusminus_insn><mode>3<mask_name>"
8087 [(set (match_operand:VI_AVX2 0 "register_operand")
8089 (match_operand:VI_AVX2 1 "nonimmediate_operand")
8090 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8091 "TARGET_SSE2 && <mask_mode512bit_condition>"
8092 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8094 (define_insn "*<plusminus_insn><mode>3<mask_name>"
8095 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
8097 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
8098 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
8099 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
8101 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
8102 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8103 [(set_attr "isa" "noavx,avx")
8104 (set_attr "type" "sseiadd")
8105 (set_attr "prefix_data16" "1,*")
8106 (set_attr "prefix" "<mask_prefix3>")
8107 (set_attr "mode" "<sseinsnmode>")])
8109 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
8110 [(set (match_operand:VI12_AVX2 0 "register_operand")
8111 (sat_plusminus:VI12_AVX2
8112 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
8113 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
8115 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8117 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
8118 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
8119 (sat_plusminus:VI12_AVX2
8120 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
8121 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
8122 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8124 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
8125 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8126 [(set_attr "isa" "noavx,avx")
8127 (set_attr "type" "sseiadd")
8128 (set_attr "prefix_data16" "1,*")
8129 (set_attr "prefix" "orig,vex")
8130 (set_attr "mode" "TI")])
8132 (define_expand "mul<mode>3"
8133 [(set (match_operand:VI1_AVX2 0 "register_operand")
8134 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
8135 (match_operand:VI1_AVX2 2 "register_operand")))]
8138 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
8142 (define_expand "mul<mode>3"
8143 [(set (match_operand:VI2_AVX2 0 "register_operand")
8144 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
8145 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
8147 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
8149 (define_insn "*mul<mode>3"
8150 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8151 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
8152 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
8153 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8155 pmullw\t{%2, %0|%0, %2}
8156 vpmullw\t{%2, %1, %0|%0, %1, %2}"
8157 [(set_attr "isa" "noavx,avx")
8158 (set_attr "type" "sseimul")
8159 (set_attr "prefix_data16" "1,*")
8160 (set_attr "prefix" "orig,vex")
8161 (set_attr "mode" "<sseinsnmode>")])
8163 (define_expand "<s>mul<mode>3_highpart"
8164 [(set (match_operand:VI2_AVX2 0 "register_operand")
8166 (lshiftrt:<ssedoublemode>
8167 (mult:<ssedoublemode>
8168 (any_extend:<ssedoublemode>
8169 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
8170 (any_extend:<ssedoublemode>
8171 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
8174 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
8176 (define_insn "*<s>mul<mode>3_highpart"
8177 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8179 (lshiftrt:<ssedoublemode>
8180 (mult:<ssedoublemode>
8181 (any_extend:<ssedoublemode>
8182 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
8183 (any_extend:<ssedoublemode>
8184 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
8186 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8188 pmulh<u>w\t{%2, %0|%0, %2}
8189 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
8190 [(set_attr "isa" "noavx,avx")
8191 (set_attr "type" "sseimul")
8192 (set_attr "prefix_data16" "1,*")
8193 (set_attr "prefix" "orig,vex")
8194 (set_attr "mode" "<sseinsnmode>")])
8196 (define_expand "vec_widen_umult_even_v16si<mask_name>"
8197 [(set (match_operand:V8DI 0 "register_operand")
8201 (match_operand:V16SI 1 "nonimmediate_operand")
8202 (parallel [(const_int 0) (const_int 2)
8203 (const_int 4) (const_int 6)
8204 (const_int 8) (const_int 10)
8205 (const_int 12) (const_int 14)])))
8208 (match_operand:V16SI 2 "nonimmediate_operand")
8209 (parallel [(const_int 0) (const_int 2)
8210 (const_int 4) (const_int 6)
8211 (const_int 8) (const_int 10)
8212 (const_int 12) (const_int 14)])))))]
8214 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
8216 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
8217 [(set (match_operand:V8DI 0 "register_operand" "=v")
8221 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
8222 (parallel [(const_int 0) (const_int 2)
8223 (const_int 4) (const_int 6)
8224 (const_int 8) (const_int 10)
8225 (const_int 12) (const_int 14)])))
8228 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
8229 (parallel [(const_int 0) (const_int 2)
8230 (const_int 4) (const_int 6)
8231 (const_int 8) (const_int 10)
8232 (const_int 12) (const_int 14)])))))]
8233 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
8234 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8235 [(set_attr "isa" "avx512f")
8236 (set_attr "type" "sseimul")
8237 (set_attr "prefix_extra" "1")
8238 (set_attr "prefix" "evex")
8239 (set_attr "mode" "XI")])
8241 (define_expand "vec_widen_umult_even_v8si"
8242 [(set (match_operand:V4DI 0 "register_operand")
8246 (match_operand:V8SI 1 "nonimmediate_operand")
8247 (parallel [(const_int 0) (const_int 2)
8248 (const_int 4) (const_int 6)])))
8251 (match_operand:V8SI 2 "nonimmediate_operand")
8252 (parallel [(const_int 0) (const_int 2)
8253 (const_int 4) (const_int 6)])))))]
8255 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
8257 (define_insn "*vec_widen_umult_even_v8si"
8258 [(set (match_operand:V4DI 0 "register_operand" "=x")
8262 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
8263 (parallel [(const_int 0) (const_int 2)
8264 (const_int 4) (const_int 6)])))
8267 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8268 (parallel [(const_int 0) (const_int 2)
8269 (const_int 4) (const_int 6)])))))]
8270 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
8271 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
8272 [(set_attr "type" "sseimul")
8273 (set_attr "prefix" "vex")
8274 (set_attr "mode" "OI")])
8276 (define_expand "vec_widen_umult_even_v4si"
8277 [(set (match_operand:V2DI 0 "register_operand")
8281 (match_operand:V4SI 1 "nonimmediate_operand")
8282 (parallel [(const_int 0) (const_int 2)])))
8285 (match_operand:V4SI 2 "nonimmediate_operand")
8286 (parallel [(const_int 0) (const_int 2)])))))]
8288 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
8290 (define_insn "*vec_widen_umult_even_v4si"
8291 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8295 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
8296 (parallel [(const_int 0) (const_int 2)])))
8299 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8300 (parallel [(const_int 0) (const_int 2)])))))]
8301 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
8303 pmuludq\t{%2, %0|%0, %2}
8304 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
8305 [(set_attr "isa" "noavx,avx")
8306 (set_attr "type" "sseimul")
8307 (set_attr "prefix_data16" "1,*")
8308 (set_attr "prefix" "orig,vex")
8309 (set_attr "mode" "TI")])
8311 (define_expand "vec_widen_smult_even_v16si<mask_name>"
8312 [(set (match_operand:V8DI 0 "register_operand")
8316 (match_operand:V16SI 1 "nonimmediate_operand")
8317 (parallel [(const_int 0) (const_int 2)
8318 (const_int 4) (const_int 6)
8319 (const_int 8) (const_int 10)
8320 (const_int 12) (const_int 14)])))
8323 (match_operand:V16SI 2 "nonimmediate_operand")
8324 (parallel [(const_int 0) (const_int 2)
8325 (const_int 4) (const_int 6)
8326 (const_int 8) (const_int 10)
8327 (const_int 12) (const_int 14)])))))]
8329 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
8331 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
8332 [(set (match_operand:V8DI 0 "register_operand" "=v")
8336 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
8337 (parallel [(const_int 0) (const_int 2)
8338 (const_int 4) (const_int 6)
8339 (const_int 8) (const_int 10)
8340 (const_int 12) (const_int 14)])))
8343 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
8344 (parallel [(const_int 0) (const_int 2)
8345 (const_int 4) (const_int 6)
8346 (const_int 8) (const_int 10)
8347 (const_int 12) (const_int 14)])))))]
8348 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
8349 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8350 [(set_attr "isa" "avx512f")
8351 (set_attr "type" "sseimul")
8352 (set_attr "prefix_extra" "1")
8353 (set_attr "prefix" "evex")
8354 (set_attr "mode" "XI")])
8356 (define_expand "vec_widen_smult_even_v8si"
8357 [(set (match_operand:V4DI 0 "register_operand")
8361 (match_operand:V8SI 1 "nonimmediate_operand")
8362 (parallel [(const_int 0) (const_int 2)
8363 (const_int 4) (const_int 6)])))
8366 (match_operand:V8SI 2 "nonimmediate_operand")
8367 (parallel [(const_int 0) (const_int 2)
8368 (const_int 4) (const_int 6)])))))]
8370 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
8372 (define_insn "*vec_widen_smult_even_v8si"
8373 [(set (match_operand:V4DI 0 "register_operand" "=x")
8377 (match_operand:V8SI 1 "nonimmediate_operand" "x")
8378 (parallel [(const_int 0) (const_int 2)
8379 (const_int 4) (const_int 6)])))
8382 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8383 (parallel [(const_int 0) (const_int 2)
8384 (const_int 4) (const_int 6)])))))]
8385 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
8386 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
8387 [(set_attr "type" "sseimul")
8388 (set_attr "prefix_extra" "1")
8389 (set_attr "prefix" "vex")
8390 (set_attr "mode" "OI")])
8392 (define_expand "sse4_1_mulv2siv2di3"
8393 [(set (match_operand:V2DI 0 "register_operand")
8397 (match_operand:V4SI 1 "nonimmediate_operand")
8398 (parallel [(const_int 0) (const_int 2)])))
8401 (match_operand:V4SI 2 "nonimmediate_operand")
8402 (parallel [(const_int 0) (const_int 2)])))))]
8404 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
8406 (define_insn "*sse4_1_mulv2siv2di3"
8407 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8411 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
8412 (parallel [(const_int 0) (const_int 2)])))
8415 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8416 (parallel [(const_int 0) (const_int 2)])))))]
8417 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
8419 pmuldq\t{%2, %0|%0, %2}
8420 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
8421 [(set_attr "isa" "noavx,avx")
8422 (set_attr "type" "sseimul")
8423 (set_attr "prefix_data16" "1,*")
8424 (set_attr "prefix_extra" "1")
8425 (set_attr "prefix" "orig,vex")
8426 (set_attr "mode" "TI")])
8428 (define_expand "avx2_pmaddwd"
8429 [(set (match_operand:V8SI 0 "register_operand")
8434 (match_operand:V16HI 1 "nonimmediate_operand")
8435 (parallel [(const_int 0) (const_int 2)
8436 (const_int 4) (const_int 6)
8437 (const_int 8) (const_int 10)
8438 (const_int 12) (const_int 14)])))
8441 (match_operand:V16HI 2 "nonimmediate_operand")
8442 (parallel [(const_int 0) (const_int 2)
8443 (const_int 4) (const_int 6)
8444 (const_int 8) (const_int 10)
8445 (const_int 12) (const_int 14)]))))
8448 (vec_select:V8HI (match_dup 1)
8449 (parallel [(const_int 1) (const_int 3)
8450 (const_int 5) (const_int 7)
8451 (const_int 9) (const_int 11)
8452 (const_int 13) (const_int 15)])))
8454 (vec_select:V8HI (match_dup 2)
8455 (parallel [(const_int 1) (const_int 3)
8456 (const_int 5) (const_int 7)
8457 (const_int 9) (const_int 11)
8458 (const_int 13) (const_int 15)]))))))]
8460 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8462 (define_insn "*avx2_pmaddwd"
8463 [(set (match_operand:V8SI 0 "register_operand" "=x")
8468 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
8469 (parallel [(const_int 0) (const_int 2)
8470 (const_int 4) (const_int 6)
8471 (const_int 8) (const_int 10)
8472 (const_int 12) (const_int 14)])))
8475 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8476 (parallel [(const_int 0) (const_int 2)
8477 (const_int 4) (const_int 6)
8478 (const_int 8) (const_int 10)
8479 (const_int 12) (const_int 14)]))))
8482 (vec_select:V8HI (match_dup 1)
8483 (parallel [(const_int 1) (const_int 3)
8484 (const_int 5) (const_int 7)
8485 (const_int 9) (const_int 11)
8486 (const_int 13) (const_int 15)])))
8488 (vec_select:V8HI (match_dup 2)
8489 (parallel [(const_int 1) (const_int 3)
8490 (const_int 5) (const_int 7)
8491 (const_int 9) (const_int 11)
8492 (const_int 13) (const_int 15)]))))))]
8493 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
8494 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8495 [(set_attr "type" "sseiadd")
8496 (set_attr "prefix" "vex")
8497 (set_attr "mode" "OI")])
8499 (define_expand "sse2_pmaddwd"
8500 [(set (match_operand:V4SI 0 "register_operand")
8505 (match_operand:V8HI 1 "nonimmediate_operand")
8506 (parallel [(const_int 0) (const_int 2)
8507 (const_int 4) (const_int 6)])))
8510 (match_operand:V8HI 2 "nonimmediate_operand")
8511 (parallel [(const_int 0) (const_int 2)
8512 (const_int 4) (const_int 6)]))))
8515 (vec_select:V4HI (match_dup 1)
8516 (parallel [(const_int 1) (const_int 3)
8517 (const_int 5) (const_int 7)])))
8519 (vec_select:V4HI (match_dup 2)
8520 (parallel [(const_int 1) (const_int 3)
8521 (const_int 5) (const_int 7)]))))))]
8523 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8525 (define_insn "*sse2_pmaddwd"
8526 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8531 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8532 (parallel [(const_int 0) (const_int 2)
8533 (const_int 4) (const_int 6)])))
8536 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8537 (parallel [(const_int 0) (const_int 2)
8538 (const_int 4) (const_int 6)]))))
8541 (vec_select:V4HI (match_dup 1)
8542 (parallel [(const_int 1) (const_int 3)
8543 (const_int 5) (const_int 7)])))
8545 (vec_select:V4HI (match_dup 2)
8546 (parallel [(const_int 1) (const_int 3)
8547 (const_int 5) (const_int 7)]))))))]
8548 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8550 pmaddwd\t{%2, %0|%0, %2}
8551 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8552 [(set_attr "isa" "noavx,avx")
8553 (set_attr "type" "sseiadd")
8554 (set_attr "atom_unit" "simul")
8555 (set_attr "prefix_data16" "1,*")
8556 (set_attr "prefix" "orig,vex")
8557 (set_attr "mode" "TI")])
8559 (define_expand "mul<mode>3<mask_name>"
8560 [(set (match_operand:VI4_AVX512F 0 "register_operand")
8562 (match_operand:VI4_AVX512F 1 "general_vector_operand")
8563 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
8564 "TARGET_SSE2 && <mask_mode512bit_condition>"
8568 if (!nonimmediate_operand (operands[1], <MODE>mode))
8569 operands[1] = force_reg (<MODE>mode, operands[1]);
8570 if (!nonimmediate_operand (operands[2], <MODE>mode))
8571 operands[2] = force_reg (<MODE>mode, operands[2]);
8572 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8576 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
8581 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
8582 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
8584 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
8585 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
8586 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
8588 pmulld\t{%2, %0|%0, %2}
8589 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8590 [(set_attr "isa" "noavx,avx")
8591 (set_attr "type" "sseimul")
8592 (set_attr "prefix_extra" "1")
8593 (set_attr "prefix" "<mask_prefix3>")
8594 (set_attr "btver2_decode" "vector,vector")
8595 (set_attr "mode" "<sseinsnmode>")])
8597 (define_expand "mul<mode>3"
8598 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
8599 (mult:VI8_AVX2_AVX512F
8600 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
8601 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
8604 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
8608 (define_expand "vec_widen_<s>mult_hi_<mode>"
8609 [(match_operand:<sseunpackmode> 0 "register_operand")
8610 (any_extend:<sseunpackmode>
8611 (match_operand:VI124_AVX2 1 "register_operand"))
8612 (match_operand:VI124_AVX2 2 "register_operand")]
8615 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8620 (define_expand "vec_widen_<s>mult_lo_<mode>"
8621 [(match_operand:<sseunpackmode> 0 "register_operand")
8622 (any_extend:<sseunpackmode>
8623 (match_operand:VI124_AVX2 1 "register_operand"))
8624 (match_operand:VI124_AVX2 2 "register_operand")]
8627 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8632 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
8633 ;; named patterns, but signed V4SI needs special help for plain SSE2.
8634 (define_expand "vec_widen_smult_even_v4si"
8635 [(match_operand:V2DI 0 "register_operand")
8636 (match_operand:V4SI 1 "nonimmediate_operand")
8637 (match_operand:V4SI 2 "nonimmediate_operand")]
8640 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8645 (define_expand "vec_widen_<s>mult_odd_<mode>"
8646 [(match_operand:<sseunpackmode> 0 "register_operand")
8647 (any_extend:<sseunpackmode>
8648 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
8649 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
8652 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8657 (define_expand "sdot_prod<mode>"
8658 [(match_operand:<sseunpackmode> 0 "register_operand")
8659 (match_operand:VI2_AVX2 1 "register_operand")
8660 (match_operand:VI2_AVX2 2 "register_operand")
8661 (match_operand:<sseunpackmode> 3 "register_operand")]
8664 rtx t = gen_reg_rtx (<sseunpackmode>mode);
8665 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
8666 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8667 gen_rtx_PLUS (<sseunpackmode>mode,
8672 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
8673 ;; back together when madd is available.
8674 (define_expand "sdot_prodv4si"
8675 [(match_operand:V2DI 0 "register_operand")
8676 (match_operand:V4SI 1 "register_operand")
8677 (match_operand:V4SI 2 "register_operand")
8678 (match_operand:V2DI 3 "register_operand")]
8681 rtx t = gen_reg_rtx (V2DImode);
8682 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
8683 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
8687 (define_expand "usadv16qi"
8688 [(match_operand:V4SI 0 "register_operand")
8689 (match_operand:V16QI 1 "register_operand")
8690 (match_operand:V16QI 2 "nonimmediate_operand")
8691 (match_operand:V4SI 3 "nonimmediate_operand")]
8694 rtx t1 = gen_reg_rtx (V2DImode);
8695 rtx t2 = gen_reg_rtx (V4SImode);
8696 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
8697 convert_move (t2, t1, 0);
8698 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
8702 (define_expand "usadv32qi"
8703 [(match_operand:V8SI 0 "register_operand")
8704 (match_operand:V32QI 1 "register_operand")
8705 (match_operand:V32QI 2 "nonimmediate_operand")
8706 (match_operand:V8SI 3 "nonimmediate_operand")]
8709 rtx t1 = gen_reg_rtx (V4DImode);
8710 rtx t2 = gen_reg_rtx (V8SImode);
8711 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
8712 convert_move (t2, t1, 0);
8713 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
8717 (define_insn "ashr<mode>3"
8718 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
8720 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
8721 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8724 psra<ssemodesuffix>\t{%2, %0|%0, %2}
8725 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8726 [(set_attr "isa" "noavx,avx")
8727 (set_attr "type" "sseishft")
8728 (set (attr "length_immediate")
8729 (if_then_else (match_operand 2 "const_int_operand")
8731 (const_string "0")))
8732 (set_attr "prefix_data16" "1,*")
8733 (set_attr "prefix" "orig,vex")
8734 (set_attr "mode" "<sseinsnmode>")])
8736 (define_insn "ashr<mode>3<mask_name>"
8737 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8739 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
8740 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
8741 "TARGET_AVX512F && <mask_mode512bit_condition>"
8742 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8743 [(set_attr "type" "sseishft")
8744 (set (attr "length_immediate")
8745 (if_then_else (match_operand 2 "const_int_operand")
8747 (const_string "0")))
8748 (set_attr "mode" "<sseinsnmode>")])
8750 (define_insn "<shift_insn><mode>3<mask_name>"
8751 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
8752 (any_lshift:VI2_AVX2_AVX512BW
8753 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
8754 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
8755 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
8757 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8758 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8759 [(set_attr "isa" "noavx,avx")
8760 (set_attr "type" "sseishft")
8761 (set (attr "length_immediate")
8762 (if_then_else (match_operand 2 "const_int_operand")
8764 (const_string "0")))
8765 (set_attr "prefix_data16" "1,*")
8766 (set_attr "prefix" "orig,vex")
8767 (set_attr "mode" "<sseinsnmode>")])
8769 (define_insn "<shift_insn><mode>3<mask_name>"
8770 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
8771 (any_lshift:VI48_AVX2
8772 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
8773 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
8774 "TARGET_SSE2 && <mask_mode512bit_condition>"
8776 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8777 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8778 [(set_attr "isa" "noavx,avx")
8779 (set_attr "type" "sseishft")
8780 (set (attr "length_immediate")
8781 (if_then_else (match_operand 2 "const_int_operand")
8783 (const_string "0")))
8784 (set_attr "prefix_data16" "1,*")
8785 (set_attr "prefix" "orig,vex")
8786 (set_attr "mode" "<sseinsnmode>")])
8788 (define_insn "<shift_insn><mode>3<mask_name>"
8789 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8790 (any_lshift:VI48_512
8791 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
8792 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8793 "TARGET_AVX512F && <mask_mode512bit_condition>"
8794 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8795 [(set_attr "isa" "avx512f")
8796 (set_attr "type" "sseishft")
8797 (set (attr "length_immediate")
8798 (if_then_else (match_operand 2 "const_int_operand")
8800 (const_string "0")))
8801 (set_attr "prefix" "evex")
8802 (set_attr "mode" "<sseinsnmode>")])
8805 (define_expand "vec_shl_<mode>"
8808 (match_operand:VI_128 1 "register_operand")
8809 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8810 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8813 operands[1] = gen_lowpart (V1TImode, operands[1]);
8814 operands[3] = gen_reg_rtx (V1TImode);
8815 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8818 (define_insn "<sse2_avx2>_ashl<mode>3"
8819 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8821 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8822 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8825 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8827 switch (which_alternative)
8830 return "pslldq\t{%2, %0|%0, %2}";
8832 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8837 [(set_attr "isa" "noavx,avx")
8838 (set_attr "type" "sseishft")
8839 (set_attr "length_immediate" "1")
8840 (set_attr "prefix_data16" "1,*")
8841 (set_attr "prefix" "orig,vex")
8842 (set_attr "mode" "<sseinsnmode>")])
8844 (define_expand "vec_shr_<mode>"
8847 (match_operand:VI_128 1 "register_operand")
8848 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8849 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8852 operands[1] = gen_lowpart (V1TImode, operands[1]);
8853 operands[3] = gen_reg_rtx (V1TImode);
8854 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8857 (define_insn "<sse2_avx2>_lshr<mode>3"
8858 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8859 (lshiftrt:VIMAX_AVX2
8860 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8861 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8864 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8866 switch (which_alternative)
8869 return "psrldq\t{%2, %0|%0, %2}";
8871 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8876 [(set_attr "isa" "noavx,avx")
8877 (set_attr "type" "sseishft")
8878 (set_attr "length_immediate" "1")
8879 (set_attr "atom_unit" "sishuf")
8880 (set_attr "prefix_data16" "1,*")
8881 (set_attr "prefix" "orig,vex")
8882 (set_attr "mode" "<sseinsnmode>")])
8884 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8885 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8886 (any_rotate:VI48_512
8887 (match_operand:VI48_512 1 "register_operand" "v")
8888 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8890 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8891 [(set_attr "prefix" "evex")
8892 (set_attr "mode" "<sseinsnmode>")])
8894 (define_insn "avx512f_<rotate><mode><mask_name>"
8895 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8896 (any_rotate:VI48_512
8897 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8898 (match_operand:SI 2 "const_0_to_255_operand")))]
8900 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8901 [(set_attr "prefix" "evex")
8902 (set_attr "mode" "<sseinsnmode>")])
8904 (define_expand "<code><mode>3<mask_name><round_name>"
8905 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8906 (maxmin:VI124_256_48_512
8907 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
8908 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
8909 "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8910 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8912 (define_insn "*avx2_<code><mode>3<mask_name><round_name>"
8913 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8914 (maxmin:VI124_256_48_512
8915 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
8916 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
8917 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8918 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8919 "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8920 [(set_attr "type" "sseiadd")
8921 (set_attr "prefix_extra" "1")
8922 (set_attr "prefix" "maybe_evex")
8923 (set_attr "mode" "OI")])
8925 (define_expand "<code><mode>3"
8926 [(set (match_operand:VI8_AVX2 0 "register_operand")
8928 (match_operand:VI8_AVX2 1 "register_operand")
8929 (match_operand:VI8_AVX2 2 "register_operand")))]
8936 xops[0] = operands[0];
8938 if (<CODE> == SMAX || <CODE> == UMAX)
8940 xops[1] = operands[1];
8941 xops[2] = operands[2];
8945 xops[1] = operands[2];
8946 xops[2] = operands[1];
8949 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8951 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8952 xops[4] = operands[1];
8953 xops[5] = operands[2];
8955 ok = ix86_expand_int_vcond (xops);
8960 (define_expand "<code><mode>3"
8961 [(set (match_operand:VI124_128 0 "register_operand")
8963 (match_operand:VI124_128 1 "nonimmediate_operand")
8964 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8967 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8968 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8974 xops[0] = operands[0];
8975 operands[1] = force_reg (<MODE>mode, operands[1]);
8976 operands[2] = force_reg (<MODE>mode, operands[2]);
8980 xops[1] = operands[1];
8981 xops[2] = operands[2];
8985 xops[1] = operands[2];
8986 xops[2] = operands[1];
8989 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8990 xops[4] = operands[1];
8991 xops[5] = operands[2];
8993 ok = ix86_expand_int_vcond (xops);
8999 (define_insn "*sse4_1_<code><mode>3"
9000 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
9002 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
9003 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
9004 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9006 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
9007 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9008 [(set_attr "isa" "noavx,avx")
9009 (set_attr "type" "sseiadd")
9010 (set_attr "prefix_extra" "1,*")
9011 (set_attr "prefix" "orig,vex")
9012 (set_attr "mode" "TI")])
9014 (define_insn "*<code>v8hi3"
9015 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9017 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9018 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
9019 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
9021 p<maxmin_int>w\t{%2, %0|%0, %2}
9022 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
9023 [(set_attr "isa" "noavx,avx")
9024 (set_attr "type" "sseiadd")
9025 (set_attr "prefix_data16" "1,*")
9026 (set_attr "prefix_extra" "*,1")
9027 (set_attr "prefix" "orig,vex")
9028 (set_attr "mode" "TI")])
9030 (define_expand "<code><mode>3"
9031 [(set (match_operand:VI124_128 0 "register_operand")
9033 (match_operand:VI124_128 1 "nonimmediate_operand")
9034 (match_operand:VI124_128 2 "nonimmediate_operand")))]
9037 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
9038 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
9039 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
9041 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
9042 operands[1] = force_reg (<MODE>mode, operands[1]);
9043 if (rtx_equal_p (op3, op2))
9044 op3 = gen_reg_rtx (V8HImode);
9045 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
9046 emit_insn (gen_addv8hi3 (op0, op3, op2));
9054 operands[1] = force_reg (<MODE>mode, operands[1]);
9055 operands[2] = force_reg (<MODE>mode, operands[2]);
9057 xops[0] = operands[0];
9061 xops[1] = operands[1];
9062 xops[2] = operands[2];
9066 xops[1] = operands[2];
9067 xops[2] = operands[1];
9070 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
9071 xops[4] = operands[1];
9072 xops[5] = operands[2];
9074 ok = ix86_expand_int_vcond (xops);
9080 (define_insn "*sse4_1_<code><mode>3"
9081 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
9083 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
9084 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
9085 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9087 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
9088 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9089 [(set_attr "isa" "noavx,avx")
9090 (set_attr "type" "sseiadd")
9091 (set_attr "prefix_extra" "1,*")
9092 (set_attr "prefix" "orig,vex")
9093 (set_attr "mode" "TI")])
9095 (define_insn "*<code>v16qi3"
9096 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9098 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
9099 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
9100 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
9102 p<maxmin_int>b\t{%2, %0|%0, %2}
9103 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
9104 [(set_attr "isa" "noavx,avx")
9105 (set_attr "type" "sseiadd")
9106 (set_attr "prefix_data16" "1,*")
9107 (set_attr "prefix_extra" "*,1")
9108 (set_attr "prefix" "orig,vex")
9109 (set_attr "mode" "TI")])
9111 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9113 ;; Parallel integral comparisons
9115 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9117 (define_expand "avx2_eq<mode>3"
9118 [(set (match_operand:VI_256 0 "register_operand")
9120 (match_operand:VI_256 1 "nonimmediate_operand")
9121 (match_operand:VI_256 2 "nonimmediate_operand")))]
9123 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
9125 (define_insn "*avx2_eq<mode>3"
9126 [(set (match_operand:VI_256 0 "register_operand" "=x")
9128 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
9129 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
9130 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
9131 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9132 [(set_attr "type" "ssecmp")
9133 (set_attr "prefix_extra" "1")
9134 (set_attr "prefix" "vex")
9135 (set_attr "mode" "OI")])
9137 (define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
9138 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
9139 (unspec:<avx512fmaskmode>
9140 [(match_operand:VI48_512 1 "register_operand")
9141 (match_operand:VI48_512 2 "nonimmediate_operand")]
9144 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
9146 (define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
9147 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9148 (unspec:<avx512fmaskmode>
9149 [(match_operand:VI48_512 1 "register_operand" "%v")
9150 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9152 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
9153 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9154 [(set_attr "type" "ssecmp")
9155 (set_attr "prefix_extra" "1")
9156 (set_attr "prefix" "evex")
9157 (set_attr "mode" "<sseinsnmode>")])
9159 (define_insn "*sse4_1_eqv2di3"
9160 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9162 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
9163 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
9164 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
9166 pcmpeqq\t{%2, %0|%0, %2}
9167 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
9168 [(set_attr "isa" "noavx,avx")
9169 (set_attr "type" "ssecmp")
9170 (set_attr "prefix_extra" "1")
9171 (set_attr "prefix" "orig,vex")
9172 (set_attr "mode" "TI")])
9174 (define_insn "*sse2_eq<mode>3"
9175 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
9177 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
9178 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
9179 "TARGET_SSE2 && !TARGET_XOP
9180 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
9182 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
9183 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9184 [(set_attr "isa" "noavx,avx")
9185 (set_attr "type" "ssecmp")
9186 (set_attr "prefix_data16" "1,*")
9187 (set_attr "prefix" "orig,vex")
9188 (set_attr "mode" "TI")])
9190 (define_expand "sse2_eq<mode>3"
9191 [(set (match_operand:VI124_128 0 "register_operand")
9193 (match_operand:VI124_128 1 "nonimmediate_operand")
9194 (match_operand:VI124_128 2 "nonimmediate_operand")))]
9195 "TARGET_SSE2 && !TARGET_XOP "
9196 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
9198 (define_expand "sse4_1_eqv2di3"
9199 [(set (match_operand:V2DI 0 "register_operand")
9201 (match_operand:V2DI 1 "nonimmediate_operand")
9202 (match_operand:V2DI 2 "nonimmediate_operand")))]
9204 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
9206 (define_insn "sse4_2_gtv2di3"
9207 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9209 (match_operand:V2DI 1 "register_operand" "0,x")
9210 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
9213 pcmpgtq\t{%2, %0|%0, %2}
9214 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
9215 [(set_attr "isa" "noavx,avx")
9216 (set_attr "type" "ssecmp")
9217 (set_attr "prefix_extra" "1")
9218 (set_attr "prefix" "orig,vex")
9219 (set_attr "mode" "TI")])
9221 (define_insn "avx2_gt<mode>3"
9222 [(set (match_operand:VI_256 0 "register_operand" "=x")
9224 (match_operand:VI_256 1 "register_operand" "x")
9225 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
9227 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9228 [(set_attr "type" "ssecmp")
9229 (set_attr "prefix_extra" "1")
9230 (set_attr "prefix" "vex")
9231 (set_attr "mode" "OI")])
9233 (define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
9234 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9235 (unspec:<avx512fmaskmode>
9236 [(match_operand:VI48_512 1 "register_operand" "v")
9237 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
9239 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9240 [(set_attr "type" "ssecmp")
9241 (set_attr "prefix_extra" "1")
9242 (set_attr "prefix" "evex")
9243 (set_attr "mode" "<sseinsnmode>")])
9245 (define_insn "sse2_gt<mode>3"
9246 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
9248 (match_operand:VI124_128 1 "register_operand" "0,x")
9249 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
9250 "TARGET_SSE2 && !TARGET_XOP"
9252 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
9253 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9254 [(set_attr "isa" "noavx,avx")
9255 (set_attr "type" "ssecmp")
9256 (set_attr "prefix_data16" "1,*")
9257 (set_attr "prefix" "orig,vex")
9258 (set_attr "mode" "TI")])
9260 (define_expand "vcond<V_512:mode><VI_512:mode>"
9261 [(set (match_operand:V_512 0 "register_operand")
9263 (match_operator 3 ""
9264 [(match_operand:VI_512 4 "nonimmediate_operand")
9265 (match_operand:VI_512 5 "general_operand")])
9266 (match_operand:V_512 1)
9267 (match_operand:V_512 2)))]
9269 && (GET_MODE_NUNITS (<V_512:MODE>mode)
9270 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
9272 bool ok = ix86_expand_int_vcond (operands);
9277 (define_expand "vcond<V_256:mode><VI_256:mode>"
9278 [(set (match_operand:V_256 0 "register_operand")
9280 (match_operator 3 ""
9281 [(match_operand:VI_256 4 "nonimmediate_operand")
9282 (match_operand:VI_256 5 "general_operand")])
9283 (match_operand:V_256 1)
9284 (match_operand:V_256 2)))]
9286 && (GET_MODE_NUNITS (<V_256:MODE>mode)
9287 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
9289 bool ok = ix86_expand_int_vcond (operands);
9294 (define_expand "vcond<V_128:mode><VI124_128:mode>"
9295 [(set (match_operand:V_128 0 "register_operand")
9297 (match_operator 3 ""
9298 [(match_operand:VI124_128 4 "nonimmediate_operand")
9299 (match_operand:VI124_128 5 "general_operand")])
9300 (match_operand:V_128 1)
9301 (match_operand:V_128 2)))]
9303 && (GET_MODE_NUNITS (<V_128:MODE>mode)
9304 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
9306 bool ok = ix86_expand_int_vcond (operands);
9311 (define_expand "vcond<VI8F_128:mode>v2di"
9312 [(set (match_operand:VI8F_128 0 "register_operand")
9313 (if_then_else:VI8F_128
9314 (match_operator 3 ""
9315 [(match_operand:V2DI 4 "nonimmediate_operand")
9316 (match_operand:V2DI 5 "general_operand")])
9317 (match_operand:VI8F_128 1)
9318 (match_operand:VI8F_128 2)))]
9321 bool ok = ix86_expand_int_vcond (operands);
9326 (define_expand "vcondu<V_512:mode><VI_512:mode>"
9327 [(set (match_operand:V_512 0 "register_operand")
9329 (match_operator 3 ""
9330 [(match_operand:VI_512 4 "nonimmediate_operand")
9331 (match_operand:VI_512 5 "nonimmediate_operand")])
9332 (match_operand:V_512 1 "general_operand")
9333 (match_operand:V_512 2 "general_operand")))]
9335 && (GET_MODE_NUNITS (<V_512:MODE>mode)
9336 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
9338 bool ok = ix86_expand_int_vcond (operands);
9343 (define_expand "vcondu<V_256:mode><VI_256:mode>"
9344 [(set (match_operand:V_256 0 "register_operand")
9346 (match_operator 3 ""
9347 [(match_operand:VI_256 4 "nonimmediate_operand")
9348 (match_operand:VI_256 5 "nonimmediate_operand")])
9349 (match_operand:V_256 1 "general_operand")
9350 (match_operand:V_256 2 "general_operand")))]
9352 && (GET_MODE_NUNITS (<V_256:MODE>mode)
9353 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
9355 bool ok = ix86_expand_int_vcond (operands);
9360 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
9361 [(set (match_operand:V_128 0 "register_operand")
9363 (match_operator 3 ""
9364 [(match_operand:VI124_128 4 "nonimmediate_operand")
9365 (match_operand:VI124_128 5 "nonimmediate_operand")])
9366 (match_operand:V_128 1 "general_operand")
9367 (match_operand:V_128 2 "general_operand")))]
9369 && (GET_MODE_NUNITS (<V_128:MODE>mode)
9370 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
9372 bool ok = ix86_expand_int_vcond (operands);
9377 (define_expand "vcondu<VI8F_128:mode>v2di"
9378 [(set (match_operand:VI8F_128 0 "register_operand")
9379 (if_then_else:VI8F_128
9380 (match_operator 3 ""
9381 [(match_operand:V2DI 4 "nonimmediate_operand")
9382 (match_operand:V2DI 5 "nonimmediate_operand")])
9383 (match_operand:VI8F_128 1 "general_operand")
9384 (match_operand:VI8F_128 2 "general_operand")))]
9387 bool ok = ix86_expand_int_vcond (operands);
9392 (define_mode_iterator VEC_PERM_AVX2
9393 [V16QI V8HI V4SI V2DI V4SF V2DF
9394 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
9395 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
9396 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
9397 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
9398 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
9400 (define_expand "vec_perm<mode>"
9401 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
9402 (match_operand:VEC_PERM_AVX2 1 "register_operand")
9403 (match_operand:VEC_PERM_AVX2 2 "register_operand")
9404 (match_operand:<sseintvecmode> 3 "register_operand")]
9405 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
9407 ix86_expand_vec_perm (operands);
9411 (define_mode_iterator VEC_PERM_CONST
9412 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
9413 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
9414 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
9415 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
9416 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
9417 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
9418 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
9419 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
9421 (define_expand "vec_perm_const<mode>"
9422 [(match_operand:VEC_PERM_CONST 0 "register_operand")
9423 (match_operand:VEC_PERM_CONST 1 "register_operand")
9424 (match_operand:VEC_PERM_CONST 2 "register_operand")
9425 (match_operand:<sseintvecmode> 3)]
9428 if (ix86_expand_vec_perm_const (operands))
9434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9436 ;; Parallel bitwise logical operations
9438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9440 (define_expand "one_cmpl<mode>2"
9441 [(set (match_operand:VI 0 "register_operand")
9442 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
9446 int i, n = GET_MODE_NUNITS (<MODE>mode);
9447 rtvec v = rtvec_alloc (n);
9449 for (i = 0; i < n; ++i)
9450 RTVEC_ELT (v, i) = constm1_rtx;
9452 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
9455 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
9456 [(set (match_operand:VI_AVX2 0 "register_operand")
9458 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
9459 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9460 "TARGET_SSE2 && <mask_mode512bit_condition>")
9462 (define_insn "*andnot<mode>3<mask_name>"
9463 [(set (match_operand:VI 0 "register_operand" "=x,v")
9465 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
9466 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
9467 "TARGET_SSE && <mask_mode512bit_condition>"
9469 static char buf[64];
9473 switch (get_attr_mode (insn))
9476 gcc_assert (TARGET_AVX512F);
9478 tmp = "pandn<ssemodesuffix>";
9482 gcc_assert (TARGET_AVX2);
9484 gcc_assert (TARGET_SSE2);
9490 gcc_assert (TARGET_AVX512F);
9492 gcc_assert (TARGET_AVX);
9494 gcc_assert (TARGET_SSE);
9503 switch (which_alternative)
9506 ops = "%s\t{%%2, %%0|%%0, %%2}";
9509 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9515 snprintf (buf, sizeof (buf), ops, tmp);
9518 [(set_attr "isa" "noavx,avx")
9519 (set_attr "type" "sselog")
9520 (set (attr "prefix_data16")
9522 (and (eq_attr "alternative" "0")
9523 (eq_attr "mode" "TI"))
9525 (const_string "*")))
9526 (set_attr "prefix" "<mask_prefix3>")
9528 (cond [(and (match_test "<MODE_SIZE> == 16")
9529 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
9530 (const_string "<ssePSmode>")
9531 (match_test "TARGET_AVX2")
9532 (const_string "<sseinsnmode>")
9533 (match_test "TARGET_AVX")
9535 (match_test "<MODE_SIZE> > 16")
9536 (const_string "V8SF")
9537 (const_string "<sseinsnmode>"))
9538 (ior (not (match_test "TARGET_SSE2"))
9539 (match_test "optimize_function_for_size_p (cfun)"))
9540 (const_string "V4SF")
9542 (const_string "<sseinsnmode>")))])
9544 (define_expand "<code><mode>3"
9545 [(set (match_operand:VI 0 "register_operand")
9547 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
9548 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
9551 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
9555 (define_insn "<mask_codefor><code><mode>3<mask_name>"
9556 [(set (match_operand:VI 0 "register_operand" "=x,v")
9558 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
9559 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
9560 "TARGET_SSE && <mask_mode512bit_condition>
9561 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9563 static char buf[64];
9567 switch (get_attr_mode (insn))
9570 gcc_assert (TARGET_AVX512F);
9572 tmp = "p<logic><ssemodesuffix>";
9576 gcc_assert (TARGET_AVX2);
9578 gcc_assert (TARGET_SSE2);
9584 gcc_assert (TARGET_AVX512F);
9586 gcc_assert (TARGET_AVX);
9588 gcc_assert (TARGET_SSE);
9597 switch (which_alternative)
9600 ops = "%s\t{%%2, %%0|%%0, %%2}";
9603 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9609 snprintf (buf, sizeof (buf), ops, tmp);
9612 [(set_attr "isa" "noavx,avx")
9613 (set_attr "type" "sselog")
9614 (set (attr "prefix_data16")
9616 (and (eq_attr "alternative" "0")
9617 (eq_attr "mode" "TI"))
9619 (const_string "*")))
9620 (set_attr "prefix" "<mask_prefix3>")
9622 (cond [(and (match_test "<MODE_SIZE> == 16")
9623 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
9624 (const_string "<ssePSmode>")
9625 (match_test "TARGET_AVX2")
9626 (const_string "<sseinsnmode>")
9627 (match_test "TARGET_AVX")
9629 (match_test "<MODE_SIZE> > 16")
9630 (const_string "V8SF")
9631 (const_string "<sseinsnmode>"))
9632 (ior (not (match_test "TARGET_SSE2"))
9633 (match_test "optimize_function_for_size_p (cfun)"))
9634 (const_string "V4SF")
9636 (const_string "<sseinsnmode>")))])
9638 (define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
9639 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9640 (unspec:<avx512fmaskmode>
9641 [(match_operand:VI48_512 1 "register_operand" "v")
9642 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9645 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9646 [(set_attr "prefix" "evex")
9647 (set_attr "mode" "<sseinsnmode>")])
9649 (define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
9650 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9651 (unspec:<avx512fmaskmode>
9652 [(match_operand:VI48_512 1 "register_operand" "v")
9653 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9656 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9657 [(set_attr "prefix" "evex")
9658 (set_attr "mode" "<sseinsnmode>")])
9660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9662 ;; Parallel integral element swizzling
9664 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9666 (define_expand "vec_pack_trunc_<mode>"
9667 [(match_operand:<ssepackmode> 0 "register_operand")
9668 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
9669 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
9672 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
9673 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
9674 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
9678 (define_insn "<sse2_avx2>_packsswb"
9679 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9680 (vec_concat:VI1_AVX2
9681 (ss_truncate:<ssehalfvecmode>
9682 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9683 (ss_truncate:<ssehalfvecmode>
9684 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9687 packsswb\t{%2, %0|%0, %2}
9688 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
9689 [(set_attr "isa" "noavx,avx")
9690 (set_attr "type" "sselog")
9691 (set_attr "prefix_data16" "1,*")
9692 (set_attr "prefix" "orig,vex")
9693 (set_attr "mode" "<sseinsnmode>")])
9695 (define_insn "<sse2_avx2>_packssdw"
9696 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9697 (vec_concat:VI2_AVX2
9698 (ss_truncate:<ssehalfvecmode>
9699 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9700 (ss_truncate:<ssehalfvecmode>
9701 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9704 packssdw\t{%2, %0|%0, %2}
9705 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
9706 [(set_attr "isa" "noavx,avx")
9707 (set_attr "type" "sselog")
9708 (set_attr "prefix_data16" "1,*")
9709 (set_attr "prefix" "orig,vex")
9710 (set_attr "mode" "<sseinsnmode>")])
9712 (define_insn "<sse2_avx2>_packuswb"
9713 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9714 (vec_concat:VI1_AVX2
9715 (us_truncate:<ssehalfvecmode>
9716 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9717 (us_truncate:<ssehalfvecmode>
9718 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9721 packuswb\t{%2, %0|%0, %2}
9722 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
9723 [(set_attr "isa" "noavx,avx")
9724 (set_attr "type" "sselog")
9725 (set_attr "prefix_data16" "1,*")
9726 (set_attr "prefix" "orig,vex")
9727 (set_attr "mode" "<sseinsnmode>")])
9729 (define_insn "avx2_interleave_highv32qi"
9730 [(set (match_operand:V32QI 0 "register_operand" "=x")
9733 (match_operand:V32QI 1 "register_operand" "x")
9734 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9735 (parallel [(const_int 8) (const_int 40)
9736 (const_int 9) (const_int 41)
9737 (const_int 10) (const_int 42)
9738 (const_int 11) (const_int 43)
9739 (const_int 12) (const_int 44)
9740 (const_int 13) (const_int 45)
9741 (const_int 14) (const_int 46)
9742 (const_int 15) (const_int 47)
9743 (const_int 24) (const_int 56)
9744 (const_int 25) (const_int 57)
9745 (const_int 26) (const_int 58)
9746 (const_int 27) (const_int 59)
9747 (const_int 28) (const_int 60)
9748 (const_int 29) (const_int 61)
9749 (const_int 30) (const_int 62)
9750 (const_int 31) (const_int 63)])))]
9752 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9753 [(set_attr "type" "sselog")
9754 (set_attr "prefix" "vex")
9755 (set_attr "mode" "OI")])
9757 (define_insn "vec_interleave_highv16qi"
9758 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9761 (match_operand:V16QI 1 "register_operand" "0,x")
9762 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9763 (parallel [(const_int 8) (const_int 24)
9764 (const_int 9) (const_int 25)
9765 (const_int 10) (const_int 26)
9766 (const_int 11) (const_int 27)
9767 (const_int 12) (const_int 28)
9768 (const_int 13) (const_int 29)
9769 (const_int 14) (const_int 30)
9770 (const_int 15) (const_int 31)])))]
9773 punpckhbw\t{%2, %0|%0, %2}
9774 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9775 [(set_attr "isa" "noavx,avx")
9776 (set_attr "type" "sselog")
9777 (set_attr "prefix_data16" "1,*")
9778 (set_attr "prefix" "orig,vex")
9779 (set_attr "mode" "TI")])
9781 (define_insn "avx2_interleave_lowv32qi"
9782 [(set (match_operand:V32QI 0 "register_operand" "=x")
9785 (match_operand:V32QI 1 "register_operand" "x")
9786 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9787 (parallel [(const_int 0) (const_int 32)
9788 (const_int 1) (const_int 33)
9789 (const_int 2) (const_int 34)
9790 (const_int 3) (const_int 35)
9791 (const_int 4) (const_int 36)
9792 (const_int 5) (const_int 37)
9793 (const_int 6) (const_int 38)
9794 (const_int 7) (const_int 39)
9795 (const_int 16) (const_int 48)
9796 (const_int 17) (const_int 49)
9797 (const_int 18) (const_int 50)
9798 (const_int 19) (const_int 51)
9799 (const_int 20) (const_int 52)
9800 (const_int 21) (const_int 53)
9801 (const_int 22) (const_int 54)
9802 (const_int 23) (const_int 55)])))]
9804 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9805 [(set_attr "type" "sselog")
9806 (set_attr "prefix" "vex")
9807 (set_attr "mode" "OI")])
9809 (define_insn "vec_interleave_lowv16qi"
9810 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9813 (match_operand:V16QI 1 "register_operand" "0,x")
9814 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9815 (parallel [(const_int 0) (const_int 16)
9816 (const_int 1) (const_int 17)
9817 (const_int 2) (const_int 18)
9818 (const_int 3) (const_int 19)
9819 (const_int 4) (const_int 20)
9820 (const_int 5) (const_int 21)
9821 (const_int 6) (const_int 22)
9822 (const_int 7) (const_int 23)])))]
9825 punpcklbw\t{%2, %0|%0, %2}
9826 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9827 [(set_attr "isa" "noavx,avx")
9828 (set_attr "type" "sselog")
9829 (set_attr "prefix_data16" "1,*")
9830 (set_attr "prefix" "orig,vex")
9831 (set_attr "mode" "TI")])
9833 (define_insn "avx2_interleave_highv16hi"
9834 [(set (match_operand:V16HI 0 "register_operand" "=x")
9837 (match_operand:V16HI 1 "register_operand" "x")
9838 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9839 (parallel [(const_int 4) (const_int 20)
9840 (const_int 5) (const_int 21)
9841 (const_int 6) (const_int 22)
9842 (const_int 7) (const_int 23)
9843 (const_int 12) (const_int 28)
9844 (const_int 13) (const_int 29)
9845 (const_int 14) (const_int 30)
9846 (const_int 15) (const_int 31)])))]
9848 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9849 [(set_attr "type" "sselog")
9850 (set_attr "prefix" "vex")
9851 (set_attr "mode" "OI")])
9853 (define_insn "vec_interleave_highv8hi"
9854 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9857 (match_operand:V8HI 1 "register_operand" "0,x")
9858 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9859 (parallel [(const_int 4) (const_int 12)
9860 (const_int 5) (const_int 13)
9861 (const_int 6) (const_int 14)
9862 (const_int 7) (const_int 15)])))]
9865 punpckhwd\t{%2, %0|%0, %2}
9866 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9867 [(set_attr "isa" "noavx,avx")
9868 (set_attr "type" "sselog")
9869 (set_attr "prefix_data16" "1,*")
9870 (set_attr "prefix" "orig,vex")
9871 (set_attr "mode" "TI")])
9873 (define_insn "avx2_interleave_lowv16hi"
9874 [(set (match_operand:V16HI 0 "register_operand" "=x")
9877 (match_operand:V16HI 1 "register_operand" "x")
9878 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9879 (parallel [(const_int 0) (const_int 16)
9880 (const_int 1) (const_int 17)
9881 (const_int 2) (const_int 18)
9882 (const_int 3) (const_int 19)
9883 (const_int 8) (const_int 24)
9884 (const_int 9) (const_int 25)
9885 (const_int 10) (const_int 26)
9886 (const_int 11) (const_int 27)])))]
9888 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9889 [(set_attr "type" "sselog")
9890 (set_attr "prefix" "vex")
9891 (set_attr "mode" "OI")])
9893 (define_insn "vec_interleave_lowv8hi"
9894 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9897 (match_operand:V8HI 1 "register_operand" "0,x")
9898 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9899 (parallel [(const_int 0) (const_int 8)
9900 (const_int 1) (const_int 9)
9901 (const_int 2) (const_int 10)
9902 (const_int 3) (const_int 11)])))]
9905 punpcklwd\t{%2, %0|%0, %2}
9906 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9907 [(set_attr "isa" "noavx,avx")
9908 (set_attr "type" "sselog")
9909 (set_attr "prefix_data16" "1,*")
9910 (set_attr "prefix" "orig,vex")
9911 (set_attr "mode" "TI")])
9913 (define_insn "avx2_interleave_highv8si"
9914 [(set (match_operand:V8SI 0 "register_operand" "=x")
9917 (match_operand:V8SI 1 "register_operand" "x")
9918 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9919 (parallel [(const_int 2) (const_int 10)
9920 (const_int 3) (const_int 11)
9921 (const_int 6) (const_int 14)
9922 (const_int 7) (const_int 15)])))]
9924 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9925 [(set_attr "type" "sselog")
9926 (set_attr "prefix" "vex")
9927 (set_attr "mode" "OI")])
9929 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9930 [(set (match_operand:V16SI 0 "register_operand" "=v")
9933 (match_operand:V16SI 1 "register_operand" "v")
9934 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9935 (parallel [(const_int 2) (const_int 18)
9936 (const_int 3) (const_int 19)
9937 (const_int 6) (const_int 22)
9938 (const_int 7) (const_int 23)
9939 (const_int 10) (const_int 26)
9940 (const_int 11) (const_int 27)
9941 (const_int 14) (const_int 30)
9942 (const_int 15) (const_int 31)])))]
9944 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9945 [(set_attr "type" "sselog")
9946 (set_attr "prefix" "evex")
9947 (set_attr "mode" "XI")])
9950 (define_insn "vec_interleave_highv4si"
9951 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9954 (match_operand:V4SI 1 "register_operand" "0,x")
9955 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9956 (parallel [(const_int 2) (const_int 6)
9957 (const_int 3) (const_int 7)])))]
9960 punpckhdq\t{%2, %0|%0, %2}
9961 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9962 [(set_attr "isa" "noavx,avx")
9963 (set_attr "type" "sselog")
9964 (set_attr "prefix_data16" "1,*")
9965 (set_attr "prefix" "orig,vex")
9966 (set_attr "mode" "TI")])
9968 (define_insn "avx2_interleave_lowv8si"
9969 [(set (match_operand:V8SI 0 "register_operand" "=x")
9972 (match_operand:V8SI 1 "register_operand" "x")
9973 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9974 (parallel [(const_int 0) (const_int 8)
9975 (const_int 1) (const_int 9)
9976 (const_int 4) (const_int 12)
9977 (const_int 5) (const_int 13)])))]
9979 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9980 [(set_attr "type" "sselog")
9981 (set_attr "prefix" "vex")
9982 (set_attr "mode" "OI")])
9984 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9985 [(set (match_operand:V16SI 0 "register_operand" "=v")
9988 (match_operand:V16SI 1 "register_operand" "v")
9989 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9990 (parallel [(const_int 0) (const_int 16)
9991 (const_int 1) (const_int 17)
9992 (const_int 4) (const_int 20)
9993 (const_int 5) (const_int 21)
9994 (const_int 8) (const_int 24)
9995 (const_int 9) (const_int 25)
9996 (const_int 12) (const_int 28)
9997 (const_int 13) (const_int 29)])))]
9999 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10000 [(set_attr "type" "sselog")
10001 (set_attr "prefix" "evex")
10002 (set_attr "mode" "XI")])
10004 (define_insn "vec_interleave_lowv4si"
10005 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10008 (match_operand:V4SI 1 "register_operand" "0,x")
10009 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
10010 (parallel [(const_int 0) (const_int 4)
10011 (const_int 1) (const_int 5)])))]
10014 punpckldq\t{%2, %0|%0, %2}
10015 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
10016 [(set_attr "isa" "noavx,avx")
10017 (set_attr "type" "sselog")
10018 (set_attr "prefix_data16" "1,*")
10019 (set_attr "prefix" "orig,vex")
10020 (set_attr "mode" "TI")])
10022 (define_expand "vec_interleave_high<mode>"
10023 [(match_operand:VI_256 0 "register_operand" "=x")
10024 (match_operand:VI_256 1 "register_operand" "x")
10025 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
10028 rtx t1 = gen_reg_rtx (<MODE>mode);
10029 rtx t2 = gen_reg_rtx (<MODE>mode);
10030 rtx t3 = gen_reg_rtx (V4DImode);
10031 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
10032 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
10033 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
10034 gen_lowpart (V4DImode, t2),
10035 GEN_INT (1 + (3 << 4))));
10036 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
10040 (define_expand "vec_interleave_low<mode>"
10041 [(match_operand:VI_256 0 "register_operand" "=x")
10042 (match_operand:VI_256 1 "register_operand" "x")
10043 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
10046 rtx t1 = gen_reg_rtx (<MODE>mode);
10047 rtx t2 = gen_reg_rtx (<MODE>mode);
10048 rtx t3 = gen_reg_rtx (V4DImode);
10049 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
10050 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
10051 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
10052 gen_lowpart (V4DImode, t2),
10053 GEN_INT (0 + (2 << 4))));
10054 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
10058 ;; Modes handled by pinsr patterns.
10059 (define_mode_iterator PINSR_MODE
10060 [(V16QI "TARGET_SSE4_1") V8HI
10061 (V4SI "TARGET_SSE4_1")
10062 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
10064 (define_mode_attr sse2p4_1
10065 [(V16QI "sse4_1") (V8HI "sse2")
10066 (V4SI "sse4_1") (V2DI "sse4_1")])
10068 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
10069 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
10070 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
10071 (vec_merge:PINSR_MODE
10072 (vec_duplicate:PINSR_MODE
10073 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
10074 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
10075 (match_operand:SI 3 "const_int_operand")))]
10077 && ((unsigned) exact_log2 (INTVAL (operands[3]))
10078 < GET_MODE_NUNITS (<MODE>mode))"
10080 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
10082 switch (which_alternative)
10085 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
10086 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
10089 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
10091 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
10092 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
10095 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10097 gcc_unreachable ();
10100 [(set_attr "isa" "noavx,noavx,avx,avx")
10101 (set_attr "type" "sselog")
10102 (set (attr "prefix_rex")
10104 (and (not (match_test "TARGET_AVX"))
10105 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
10107 (const_string "*")))
10108 (set (attr "prefix_data16")
10110 (and (not (match_test "TARGET_AVX"))
10111 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10113 (const_string "*")))
10114 (set (attr "prefix_extra")
10116 (and (not (match_test "TARGET_AVX"))
10117 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10119 (const_string "1")))
10120 (set_attr "length_immediate" "1")
10121 (set_attr "prefix" "orig,orig,vex,vex")
10122 (set_attr "mode" "TI")])
10124 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
10125 [(match_operand:V16FI 0 "register_operand")
10126 (match_operand:V16FI 1 "register_operand")
10127 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
10128 (match_operand:SI 3 "const_0_to_3_operand")
10129 (match_operand:V16FI 4 "register_operand")
10130 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10133 switch (INTVAL (operands[3]))
10136 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
10137 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
10141 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
10142 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
10146 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
10147 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
10151 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
10152 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
10156 gcc_unreachable ();
10162 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
10163 [(set (match_operand:V16FI 0 "register_operand" "=v")
10165 (match_operand:V16FI 1 "register_operand" "v")
10166 (vec_duplicate:V16FI
10167 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
10168 (match_operand:SI 3 "const_int_operand" "n")))]
10172 if (INTVAL (operands[3]) == 0xFFF)
10174 else if ( INTVAL (operands[3]) == 0xF0FF)
10176 else if ( INTVAL (operands[3]) == 0xFF0F)
10178 else if ( INTVAL (operands[3]) == 0xFFF0)
10181 gcc_unreachable ();
10183 operands[3] = GEN_INT (mask);
10185 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
10187 [(set_attr "type" "sselog")
10188 (set_attr "length_immediate" "1")
10189 (set_attr "prefix" "evex")
10190 (set_attr "mode" "<sseinsnmode>")])
10192 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
10193 [(match_operand:V8FI 0 "register_operand")
10194 (match_operand:V8FI 1 "register_operand")
10195 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
10196 (match_operand:SI 3 "const_0_to_1_operand")
10197 (match_operand:V8FI 4 "register_operand")
10198 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10201 int mask = INTVAL (operands[3]);
10203 emit_insn (gen_vec_set_lo_<mode>_mask
10204 (operands[0], operands[1], operands[2],
10205 operands[4], operands[5]));
10207 emit_insn (gen_vec_set_hi_<mode>_mask
10208 (operands[0], operands[1], operands[2],
10209 operands[4], operands[5]));
10213 (define_insn "vec_set_lo_<mode><mask_name>"
10214 [(set (match_operand:V8FI 0 "register_operand" "=v")
10216 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
10217 (vec_select:<ssehalfvecmode>
10218 (match_operand:V8FI 1 "register_operand" "v")
10219 (parallel [(const_int 4) (const_int 5)
10220 (const_int 6) (const_int 7)]))))]
10222 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
10223 [(set_attr "type" "sselog")
10224 (set_attr "length_immediate" "1")
10225 (set_attr "prefix" "evex")
10226 (set_attr "mode" "XI")])
10228 (define_insn "vec_set_hi_<mode><mask_name>"
10229 [(set (match_operand:V8FI 0 "register_operand" "=v")
10231 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
10232 (vec_select:<ssehalfvecmode>
10233 (match_operand:V8FI 1 "register_operand" "v")
10234 (parallel [(const_int 0) (const_int 1)
10235 (const_int 2) (const_int 3)]))))]
10237 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
10238 [(set_attr "type" "sselog")
10239 (set_attr "length_immediate" "1")
10240 (set_attr "prefix" "evex")
10241 (set_attr "mode" "XI")])
10243 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
10244 [(match_operand:V8FI 0 "register_operand")
10245 (match_operand:V8FI 1 "register_operand")
10246 (match_operand:V8FI 2 "nonimmediate_operand")
10247 (match_operand:SI 3 "const_0_to_255_operand")
10248 (match_operand:V8FI 4 "register_operand")
10249 (match_operand:QI 5 "register_operand")]
10252 int mask = INTVAL (operands[3]);
10253 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
10254 (operands[0], operands[1], operands[2],
10255 GEN_INT (((mask >> 0) & 3) * 2),
10256 GEN_INT (((mask >> 0) & 3) * 2 + 1),
10257 GEN_INT (((mask >> 2) & 3) * 2),
10258 GEN_INT (((mask >> 2) & 3) * 2 + 1),
10259 GEN_INT (((mask >> 4) & 3) * 2 + 8),
10260 GEN_INT (((mask >> 4) & 3) * 2 + 9),
10261 GEN_INT (((mask >> 6) & 3) * 2 + 8),
10262 GEN_INT (((mask >> 6) & 3) * 2 + 9),
10263 operands[4], operands[5]));
10267 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
10268 [(set (match_operand:V8FI 0 "register_operand" "=v")
10270 (vec_concat:<ssedoublemode>
10271 (match_operand:V8FI 1 "register_operand" "v")
10272 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
10273 (parallel [(match_operand 3 "const_0_to_7_operand")
10274 (match_operand 4 "const_0_to_7_operand")
10275 (match_operand 5 "const_0_to_7_operand")
10276 (match_operand 6 "const_0_to_7_operand")
10277 (match_operand 7 "const_8_to_15_operand")
10278 (match_operand 8 "const_8_to_15_operand")
10279 (match_operand 9 "const_8_to_15_operand")
10280 (match_operand 10 "const_8_to_15_operand")])))]
10282 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
10283 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
10284 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
10285 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
10288 mask = INTVAL (operands[3]) / 2;
10289 mask |= INTVAL (operands[5]) / 2 << 2;
10290 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
10291 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
10292 operands[3] = GEN_INT (mask);
10294 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
10296 [(set_attr "type" "sselog")
10297 (set_attr "length_immediate" "1")
10298 (set_attr "prefix" "evex")
10299 (set_attr "mode" "<sseinsnmode>")])
10301 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
10302 [(match_operand:V16FI 0 "register_operand")
10303 (match_operand:V16FI 1 "register_operand")
10304 (match_operand:V16FI 2 "nonimmediate_operand")
10305 (match_operand:SI 3 "const_0_to_255_operand")
10306 (match_operand:V16FI 4 "register_operand")
10307 (match_operand:HI 5 "register_operand")]
10310 int mask = INTVAL (operands[3]);
10311 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
10312 (operands[0], operands[1], operands[2],
10313 GEN_INT (((mask >> 0) & 3) * 4),
10314 GEN_INT (((mask >> 0) & 3) * 4 + 1),
10315 GEN_INT (((mask >> 0) & 3) * 4 + 2),
10316 GEN_INT (((mask >> 0) & 3) * 4 + 3),
10317 GEN_INT (((mask >> 2) & 3) * 4),
10318 GEN_INT (((mask >> 2) & 3) * 4 + 1),
10319 GEN_INT (((mask >> 2) & 3) * 4 + 2),
10320 GEN_INT (((mask >> 2) & 3) * 4 + 3),
10321 GEN_INT (((mask >> 4) & 3) * 4 + 16),
10322 GEN_INT (((mask >> 4) & 3) * 4 + 17),
10323 GEN_INT (((mask >> 4) & 3) * 4 + 18),
10324 GEN_INT (((mask >> 4) & 3) * 4 + 19),
10325 GEN_INT (((mask >> 6) & 3) * 4 + 16),
10326 GEN_INT (((mask >> 6) & 3) * 4 + 17),
10327 GEN_INT (((mask >> 6) & 3) * 4 + 18),
10328 GEN_INT (((mask >> 6) & 3) * 4 + 19),
10329 operands[4], operands[5]));
10333 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
10334 [(set (match_operand:V16FI 0 "register_operand" "=v")
10336 (vec_concat:<ssedoublemode>
10337 (match_operand:V16FI 1 "register_operand" "v")
10338 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
10339 (parallel [(match_operand 3 "const_0_to_15_operand")
10340 (match_operand 4 "const_0_to_15_operand")
10341 (match_operand 5 "const_0_to_15_operand")
10342 (match_operand 6 "const_0_to_15_operand")
10343 (match_operand 7 "const_0_to_15_operand")
10344 (match_operand 8 "const_0_to_15_operand")
10345 (match_operand 9 "const_0_to_15_operand")
10346 (match_operand 10 "const_0_to_15_operand")
10347 (match_operand 11 "const_16_to_31_operand")
10348 (match_operand 12 "const_16_to_31_operand")
10349 (match_operand 13 "const_16_to_31_operand")
10350 (match_operand 14 "const_16_to_31_operand")
10351 (match_operand 15 "const_16_to_31_operand")
10352 (match_operand 16 "const_16_to_31_operand")
10353 (match_operand 17 "const_16_to_31_operand")
10354 (match_operand 18 "const_16_to_31_operand")])))]
10356 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
10357 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
10358 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
10359 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
10360 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
10361 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
10362 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
10363 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
10364 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
10365 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
10366 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
10367 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
10370 mask = INTVAL (operands[3]) / 4;
10371 mask |= INTVAL (operands[7]) / 4 << 2;
10372 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
10373 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
10374 operands[3] = GEN_INT (mask);
10376 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
10378 [(set_attr "type" "sselog")
10379 (set_attr "length_immediate" "1")
10380 (set_attr "prefix" "evex")
10381 (set_attr "mode" "<sseinsnmode>")])
10383 (define_expand "avx512f_pshufdv3_mask"
10384 [(match_operand:V16SI 0 "register_operand")
10385 (match_operand:V16SI 1 "nonimmediate_operand")
10386 (match_operand:SI 2 "const_0_to_255_operand")
10387 (match_operand:V16SI 3 "register_operand")
10388 (match_operand:HI 4 "register_operand")]
10391 int mask = INTVAL (operands[2]);
10392 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
10393 GEN_INT ((mask >> 0) & 3),
10394 GEN_INT ((mask >> 2) & 3),
10395 GEN_INT ((mask >> 4) & 3),
10396 GEN_INT ((mask >> 6) & 3),
10397 GEN_INT (((mask >> 0) & 3) + 4),
10398 GEN_INT (((mask >> 2) & 3) + 4),
10399 GEN_INT (((mask >> 4) & 3) + 4),
10400 GEN_INT (((mask >> 6) & 3) + 4),
10401 GEN_INT (((mask >> 0) & 3) + 8),
10402 GEN_INT (((mask >> 2) & 3) + 8),
10403 GEN_INT (((mask >> 4) & 3) + 8),
10404 GEN_INT (((mask >> 6) & 3) + 8),
10405 GEN_INT (((mask >> 0) & 3) + 12),
10406 GEN_INT (((mask >> 2) & 3) + 12),
10407 GEN_INT (((mask >> 4) & 3) + 12),
10408 GEN_INT (((mask >> 6) & 3) + 12),
10409 operands[3], operands[4]));
10413 (define_insn "avx512f_pshufd_1<mask_name>"
10414 [(set (match_operand:V16SI 0 "register_operand" "=v")
10416 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
10417 (parallel [(match_operand 2 "const_0_to_3_operand")
10418 (match_operand 3 "const_0_to_3_operand")
10419 (match_operand 4 "const_0_to_3_operand")
10420 (match_operand 5 "const_0_to_3_operand")
10421 (match_operand 6 "const_4_to_7_operand")
10422 (match_operand 7 "const_4_to_7_operand")
10423 (match_operand 8 "const_4_to_7_operand")
10424 (match_operand 9 "const_4_to_7_operand")
10425 (match_operand 10 "const_8_to_11_operand")
10426 (match_operand 11 "const_8_to_11_operand")
10427 (match_operand 12 "const_8_to_11_operand")
10428 (match_operand 13 "const_8_to_11_operand")
10429 (match_operand 14 "const_12_to_15_operand")
10430 (match_operand 15 "const_12_to_15_operand")
10431 (match_operand 16 "const_12_to_15_operand")
10432 (match_operand 17 "const_12_to_15_operand")])))]
10434 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
10435 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
10436 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
10437 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
10438 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
10439 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
10440 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
10441 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
10442 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
10443 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
10444 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
10445 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
10448 mask |= INTVAL (operands[2]) << 0;
10449 mask |= INTVAL (operands[3]) << 2;
10450 mask |= INTVAL (operands[4]) << 4;
10451 mask |= INTVAL (operands[5]) << 6;
10452 operands[2] = GEN_INT (mask);
10454 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
10456 [(set_attr "type" "sselog1")
10457 (set_attr "prefix" "evex")
10458 (set_attr "length_immediate" "1")
10459 (set_attr "mode" "XI")])
10461 (define_expand "avx2_pshufdv3"
10462 [(match_operand:V8SI 0 "register_operand")
10463 (match_operand:V8SI 1 "nonimmediate_operand")
10464 (match_operand:SI 2 "const_0_to_255_operand")]
10467 int mask = INTVAL (operands[2]);
10468 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
10469 GEN_INT ((mask >> 0) & 3),
10470 GEN_INT ((mask >> 2) & 3),
10471 GEN_INT ((mask >> 4) & 3),
10472 GEN_INT ((mask >> 6) & 3),
10473 GEN_INT (((mask >> 0) & 3) + 4),
10474 GEN_INT (((mask >> 2) & 3) + 4),
10475 GEN_INT (((mask >> 4) & 3) + 4),
10476 GEN_INT (((mask >> 6) & 3) + 4)));
10480 (define_insn "avx2_pshufd_1"
10481 [(set (match_operand:V8SI 0 "register_operand" "=x")
10483 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
10484 (parallel [(match_operand 2 "const_0_to_3_operand")
10485 (match_operand 3 "const_0_to_3_operand")
10486 (match_operand 4 "const_0_to_3_operand")
10487 (match_operand 5 "const_0_to_3_operand")
10488 (match_operand 6 "const_4_to_7_operand")
10489 (match_operand 7 "const_4_to_7_operand")
10490 (match_operand 8 "const_4_to_7_operand")
10491 (match_operand 9 "const_4_to_7_operand")])))]
10493 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
10494 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
10495 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
10496 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
10499 mask |= INTVAL (operands[2]) << 0;
10500 mask |= INTVAL (operands[3]) << 2;
10501 mask |= INTVAL (operands[4]) << 4;
10502 mask |= INTVAL (operands[5]) << 6;
10503 operands[2] = GEN_INT (mask);
10505 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
10507 [(set_attr "type" "sselog1")
10508 (set_attr "prefix" "vex")
10509 (set_attr "length_immediate" "1")
10510 (set_attr "mode" "OI")])
10512 (define_expand "sse2_pshufd"
10513 [(match_operand:V4SI 0 "register_operand")
10514 (match_operand:V4SI 1 "nonimmediate_operand")
10515 (match_operand:SI 2 "const_int_operand")]
10518 int mask = INTVAL (operands[2]);
10519 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
10520 GEN_INT ((mask >> 0) & 3),
10521 GEN_INT ((mask >> 2) & 3),
10522 GEN_INT ((mask >> 4) & 3),
10523 GEN_INT ((mask >> 6) & 3)));
10527 (define_insn "sse2_pshufd_1"
10528 [(set (match_operand:V4SI 0 "register_operand" "=x")
10530 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10531 (parallel [(match_operand 2 "const_0_to_3_operand")
10532 (match_operand 3 "const_0_to_3_operand")
10533 (match_operand 4 "const_0_to_3_operand")
10534 (match_operand 5 "const_0_to_3_operand")])))]
10538 mask |= INTVAL (operands[2]) << 0;
10539 mask |= INTVAL (operands[3]) << 2;
10540 mask |= INTVAL (operands[4]) << 4;
10541 mask |= INTVAL (operands[5]) << 6;
10542 operands[2] = GEN_INT (mask);
10544 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
10546 [(set_attr "type" "sselog1")
10547 (set_attr "prefix_data16" "1")
10548 (set_attr "prefix" "maybe_vex")
10549 (set_attr "length_immediate" "1")
10550 (set_attr "mode" "TI")])
10552 (define_expand "avx2_pshuflwv3"
10553 [(match_operand:V16HI 0 "register_operand")
10554 (match_operand:V16HI 1 "nonimmediate_operand")
10555 (match_operand:SI 2 "const_0_to_255_operand")]
10558 int mask = INTVAL (operands[2]);
10559 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
10560 GEN_INT ((mask >> 0) & 3),
10561 GEN_INT ((mask >> 2) & 3),
10562 GEN_INT ((mask >> 4) & 3),
10563 GEN_INT ((mask >> 6) & 3),
10564 GEN_INT (((mask >> 0) & 3) + 8),
10565 GEN_INT (((mask >> 2) & 3) + 8),
10566 GEN_INT (((mask >> 4) & 3) + 8),
10567 GEN_INT (((mask >> 6) & 3) + 8)));
10571 (define_insn "avx2_pshuflw_1"
10572 [(set (match_operand:V16HI 0 "register_operand" "=x")
10574 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10575 (parallel [(match_operand 2 "const_0_to_3_operand")
10576 (match_operand 3 "const_0_to_3_operand")
10577 (match_operand 4 "const_0_to_3_operand")
10578 (match_operand 5 "const_0_to_3_operand")
10583 (match_operand 6 "const_8_to_11_operand")
10584 (match_operand 7 "const_8_to_11_operand")
10585 (match_operand 8 "const_8_to_11_operand")
10586 (match_operand 9 "const_8_to_11_operand")
10590 (const_int 15)])))]
10592 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10593 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10594 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10595 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10598 mask |= INTVAL (operands[2]) << 0;
10599 mask |= INTVAL (operands[3]) << 2;
10600 mask |= INTVAL (operands[4]) << 4;
10601 mask |= INTVAL (operands[5]) << 6;
10602 operands[2] = GEN_INT (mask);
10604 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10606 [(set_attr "type" "sselog")
10607 (set_attr "prefix" "vex")
10608 (set_attr "length_immediate" "1")
10609 (set_attr "mode" "OI")])
10611 (define_expand "sse2_pshuflw"
10612 [(match_operand:V8HI 0 "register_operand")
10613 (match_operand:V8HI 1 "nonimmediate_operand")
10614 (match_operand:SI 2 "const_int_operand")]
10617 int mask = INTVAL (operands[2]);
10618 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
10619 GEN_INT ((mask >> 0) & 3),
10620 GEN_INT ((mask >> 2) & 3),
10621 GEN_INT ((mask >> 4) & 3),
10622 GEN_INT ((mask >> 6) & 3)));
10626 (define_insn "sse2_pshuflw_1"
10627 [(set (match_operand:V8HI 0 "register_operand" "=x")
10629 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10630 (parallel [(match_operand 2 "const_0_to_3_operand")
10631 (match_operand 3 "const_0_to_3_operand")
10632 (match_operand 4 "const_0_to_3_operand")
10633 (match_operand 5 "const_0_to_3_operand")
10641 mask |= INTVAL (operands[2]) << 0;
10642 mask |= INTVAL (operands[3]) << 2;
10643 mask |= INTVAL (operands[4]) << 4;
10644 mask |= INTVAL (operands[5]) << 6;
10645 operands[2] = GEN_INT (mask);
10647 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10649 [(set_attr "type" "sselog")
10650 (set_attr "prefix_data16" "0")
10651 (set_attr "prefix_rep" "1")
10652 (set_attr "prefix" "maybe_vex")
10653 (set_attr "length_immediate" "1")
10654 (set_attr "mode" "TI")])
10656 (define_expand "avx2_pshufhwv3"
10657 [(match_operand:V16HI 0 "register_operand")
10658 (match_operand:V16HI 1 "nonimmediate_operand")
10659 (match_operand:SI 2 "const_0_to_255_operand")]
10662 int mask = INTVAL (operands[2]);
10663 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
10664 GEN_INT (((mask >> 0) & 3) + 4),
10665 GEN_INT (((mask >> 2) & 3) + 4),
10666 GEN_INT (((mask >> 4) & 3) + 4),
10667 GEN_INT (((mask >> 6) & 3) + 4),
10668 GEN_INT (((mask >> 0) & 3) + 12),
10669 GEN_INT (((mask >> 2) & 3) + 12),
10670 GEN_INT (((mask >> 4) & 3) + 12),
10671 GEN_INT (((mask >> 6) & 3) + 12)));
10675 (define_insn "avx2_pshufhw_1"
10676 [(set (match_operand:V16HI 0 "register_operand" "=x")
10678 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10679 (parallel [(const_int 0)
10683 (match_operand 2 "const_4_to_7_operand")
10684 (match_operand 3 "const_4_to_7_operand")
10685 (match_operand 4 "const_4_to_7_operand")
10686 (match_operand 5 "const_4_to_7_operand")
10691 (match_operand 6 "const_12_to_15_operand")
10692 (match_operand 7 "const_12_to_15_operand")
10693 (match_operand 8 "const_12_to_15_operand")
10694 (match_operand 9 "const_12_to_15_operand")])))]
10696 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10697 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10698 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10699 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10702 mask |= (INTVAL (operands[2]) - 4) << 0;
10703 mask |= (INTVAL (operands[3]) - 4) << 2;
10704 mask |= (INTVAL (operands[4]) - 4) << 4;
10705 mask |= (INTVAL (operands[5]) - 4) << 6;
10706 operands[2] = GEN_INT (mask);
10708 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10710 [(set_attr "type" "sselog")
10711 (set_attr "prefix" "vex")
10712 (set_attr "length_immediate" "1")
10713 (set_attr "mode" "OI")])
10715 (define_expand "sse2_pshufhw"
10716 [(match_operand:V8HI 0 "register_operand")
10717 (match_operand:V8HI 1 "nonimmediate_operand")
10718 (match_operand:SI 2 "const_int_operand")]
10721 int mask = INTVAL (operands[2]);
10722 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
10723 GEN_INT (((mask >> 0) & 3) + 4),
10724 GEN_INT (((mask >> 2) & 3) + 4),
10725 GEN_INT (((mask >> 4) & 3) + 4),
10726 GEN_INT (((mask >> 6) & 3) + 4)));
10730 (define_insn "sse2_pshufhw_1"
10731 [(set (match_operand:V8HI 0 "register_operand" "=x")
10733 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10734 (parallel [(const_int 0)
10738 (match_operand 2 "const_4_to_7_operand")
10739 (match_operand 3 "const_4_to_7_operand")
10740 (match_operand 4 "const_4_to_7_operand")
10741 (match_operand 5 "const_4_to_7_operand")])))]
10745 mask |= (INTVAL (operands[2]) - 4) << 0;
10746 mask |= (INTVAL (operands[3]) - 4) << 2;
10747 mask |= (INTVAL (operands[4]) - 4) << 4;
10748 mask |= (INTVAL (operands[5]) - 4) << 6;
10749 operands[2] = GEN_INT (mask);
10751 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10753 [(set_attr "type" "sselog")
10754 (set_attr "prefix_rep" "1")
10755 (set_attr "prefix_data16" "0")
10756 (set_attr "prefix" "maybe_vex")
10757 (set_attr "length_immediate" "1")
10758 (set_attr "mode" "TI")])
10760 (define_expand "sse2_loadd"
10761 [(set (match_operand:V4SI 0 "register_operand")
10763 (vec_duplicate:V4SI
10764 (match_operand:SI 1 "nonimmediate_operand"))
10768 "operands[2] = CONST0_RTX (V4SImode);")
10770 (define_insn "sse2_loadld"
10771 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
10773 (vec_duplicate:V4SI
10774 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
10775 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
10779 %vmovd\t{%2, %0|%0, %2}
10780 %vmovd\t{%2, %0|%0, %2}
10781 movss\t{%2, %0|%0, %2}
10782 movss\t{%2, %0|%0, %2}
10783 vmovss\t{%2, %1, %0|%0, %1, %2}"
10784 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
10785 (set_attr "type" "ssemov")
10786 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10787 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10789 (define_insn "*vec_extract<mode>"
10790 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10791 (vec_select:<ssescalarmode>
10792 (match_operand:VI12_128 1 "register_operand" "x,x")
10794 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10797 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10798 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10799 [(set_attr "type" "sselog1")
10800 (set (attr "prefix_data16")
10802 (and (eq_attr "alternative" "0")
10803 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10805 (const_string "*")))
10806 (set (attr "prefix_extra")
10808 (and (eq_attr "alternative" "0")
10809 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10811 (const_string "1")))
10812 (set_attr "length_immediate" "1")
10813 (set_attr "prefix" "maybe_vex")
10814 (set_attr "mode" "TI")])
10816 (define_insn "*vec_extractv8hi_sse2"
10817 [(set (match_operand:HI 0 "register_operand" "=r")
10819 (match_operand:V8HI 1 "register_operand" "x")
10821 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10822 "TARGET_SSE2 && !TARGET_SSE4_1"
10823 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10824 [(set_attr "type" "sselog1")
10825 (set_attr "prefix_data16" "1")
10826 (set_attr "length_immediate" "1")
10827 (set_attr "mode" "TI")])
10829 (define_insn "*vec_extractv16qi_zext"
10830 [(set (match_operand:SWI48 0 "register_operand" "=r")
10833 (match_operand:V16QI 1 "register_operand" "x")
10835 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10837 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10838 [(set_attr "type" "sselog1")
10839 (set_attr "prefix_extra" "1")
10840 (set_attr "length_immediate" "1")
10841 (set_attr "prefix" "maybe_vex")
10842 (set_attr "mode" "TI")])
10844 (define_insn "*vec_extractv8hi_zext"
10845 [(set (match_operand:SWI48 0 "register_operand" "=r")
10848 (match_operand:V8HI 1 "register_operand" "x")
10850 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10852 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10853 [(set_attr "type" "sselog1")
10854 (set_attr "prefix_data16" "1")
10855 (set_attr "length_immediate" "1")
10856 (set_attr "prefix" "maybe_vex")
10857 (set_attr "mode" "TI")])
10859 (define_insn "*vec_extract<mode>_mem"
10860 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10861 (vec_select:<ssescalarmode>
10862 (match_operand:VI12_128 1 "memory_operand" "o")
10864 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10868 (define_insn "*vec_extract<ssevecmodelower>_0"
10869 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10871 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10872 (parallel [(const_int 0)])))]
10873 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10875 [(set_attr "isa" "*,sse4,*,*")])
10877 (define_insn_and_split "*vec_extractv4si_0_zext"
10878 [(set (match_operand:DI 0 "register_operand" "=r")
10881 (match_operand:V4SI 1 "register_operand" "x")
10882 (parallel [(const_int 0)]))))]
10883 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10885 "&& reload_completed"
10886 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10887 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10889 (define_insn "*vec_extractv2di_0_sse"
10890 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10892 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10893 (parallel [(const_int 0)])))]
10894 "TARGET_SSE && !TARGET_64BIT
10895 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10899 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10901 (match_operand:<ssevecmode> 1 "register_operand")
10902 (parallel [(const_int 0)])))]
10903 "TARGET_SSE && reload_completed"
10904 [(set (match_dup 0) (match_dup 1))]
10905 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10907 (define_insn "*vec_extractv4si"
10908 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10910 (match_operand:V4SI 1 "register_operand" "x,0,x")
10911 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10914 switch (which_alternative)
10917 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10920 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10921 return "psrldq\t{%2, %0|%0, %2}";
10924 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10925 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10928 gcc_unreachable ();
10931 [(set_attr "isa" "*,noavx,avx")
10932 (set_attr "type" "sselog1,sseishft1,sseishft1")
10933 (set_attr "prefix_extra" "1,*,*")
10934 (set_attr "length_immediate" "1")
10935 (set_attr "prefix" "maybe_vex,orig,vex")
10936 (set_attr "mode" "TI")])
10938 (define_insn "*vec_extractv4si_zext"
10939 [(set (match_operand:DI 0 "register_operand" "=r")
10942 (match_operand:V4SI 1 "register_operand" "x")
10943 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10944 "TARGET_64BIT && TARGET_SSE4_1"
10945 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10946 [(set_attr "type" "sselog1")
10947 (set_attr "prefix_extra" "1")
10948 (set_attr "length_immediate" "1")
10949 (set_attr "prefix" "maybe_vex")
10950 (set_attr "mode" "TI")])
10952 (define_insn "*vec_extractv4si_mem"
10953 [(set (match_operand:SI 0 "register_operand" "=x,r")
10955 (match_operand:V4SI 1 "memory_operand" "o,o")
10956 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10960 (define_insn_and_split "*vec_extractv4si_zext_mem"
10961 [(set (match_operand:DI 0 "register_operand" "=x,r")
10964 (match_operand:V4SI 1 "memory_operand" "o,o")
10965 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10966 "TARGET_64BIT && TARGET_SSE"
10968 "&& reload_completed"
10969 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10971 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10974 (define_insn "*vec_extractv2di_1"
10975 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10977 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10978 (parallel [(const_int 1)])))]
10979 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10981 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10982 %vmovhps\t{%1, %0|%0, %1}
10983 psrldq\t{$8, %0|%0, 8}
10984 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10985 movhlps\t{%1, %0|%0, %1}
10988 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10989 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10990 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10991 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10992 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10993 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10994 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10997 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10998 (vec_select:<ssescalarmode>
10999 (match_operand:VI_128 1 "memory_operand")
11001 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
11002 "TARGET_SSE && reload_completed"
11003 [(set (match_dup 0) (match_dup 1))]
11005 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
11007 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
11010 (define_insn "*vec_dupv4si"
11011 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
11012 (vec_duplicate:V4SI
11013 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
11016 %vpshufd\t{$0, %1, %0|%0, %1, 0}
11017 vbroadcastss\t{%1, %0|%0, %1}
11018 shufps\t{$0, %0, %0|%0, %0, 0}"
11019 [(set_attr "isa" "sse2,avx,noavx")
11020 (set_attr "type" "sselog1,ssemov,sselog1")
11021 (set_attr "length_immediate" "1,0,1")
11022 (set_attr "prefix_extra" "0,1,*")
11023 (set_attr "prefix" "maybe_vex,vex,orig")
11024 (set_attr "mode" "TI,V4SF,V4SF")])
11026 (define_insn "*vec_dupv2di"
11027 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
11028 (vec_duplicate:V2DI
11029 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
11033 vpunpcklqdq\t{%d1, %0|%0, %d1}
11034 %vmovddup\t{%1, %0|%0, %1}
11036 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
11037 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
11038 (set_attr "prefix" "orig,vex,maybe_vex,orig")
11039 (set_attr "mode" "TI,TI,DF,V4SF")])
11041 (define_insn "*vec_concatv2si_sse4_1"
11042 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
11044 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
11045 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
11048 pinsrd\t{$1, %2, %0|%0, %2, 1}
11049 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
11050 punpckldq\t{%2, %0|%0, %2}
11051 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
11052 %vmovd\t{%1, %0|%0, %1}
11053 punpckldq\t{%2, %0|%0, %2}
11054 movd\t{%1, %0|%0, %1}"
11055 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
11056 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
11057 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
11058 (set_attr "length_immediate" "1,1,*,*,*,*,*")
11059 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
11060 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
11062 ;; ??? In theory we can match memory for the MMX alternative, but allowing
11063 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
11064 ;; alternatives pretty much forces the MMX alternative to be chosen.
11065 (define_insn "*vec_concatv2si"
11066 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
11068 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
11069 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
11070 "TARGET_SSE && !TARGET_SSE4_1"
11072 punpckldq\t{%2, %0|%0, %2}
11073 movd\t{%1, %0|%0, %1}
11074 movd\t{%1, %0|%0, %1}
11075 unpcklps\t{%2, %0|%0, %2}
11076 movss\t{%1, %0|%0, %1}
11077 punpckldq\t{%2, %0|%0, %2}
11078 movd\t{%1, %0|%0, %1}"
11079 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
11080 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
11081 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
11083 (define_insn "*vec_concatv4si"
11084 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
11086 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
11087 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
11090 punpcklqdq\t{%2, %0|%0, %2}
11091 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
11092 movlhps\t{%2, %0|%0, %2}
11093 movhps\t{%2, %0|%0, %q2}
11094 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
11095 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
11096 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
11097 (set_attr "prefix" "orig,vex,orig,orig,vex")
11098 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
11100 ;; movd instead of movq is required to handle broken assemblers.
11101 (define_insn "vec_concatv2di"
11102 [(set (match_operand:V2DI 0 "register_operand"
11103 "=x,x ,Yi,x ,!x,x,x,x,x,x")
11105 (match_operand:DI 1 "nonimmediate_operand"
11106 " 0,x ,r ,xm,*y,0,x,0,0,x")
11107 (match_operand:DI 2 "vector_move_operand"
11108 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
11111 pinsrq\t{$1, %2, %0|%0, %2, 1}
11112 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
11113 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
11114 %vmovq\t{%1, %0|%0, %1}
11115 movq2dq\t{%1, %0|%0, %1}
11116 punpcklqdq\t{%2, %0|%0, %2}
11117 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
11118 movlhps\t{%2, %0|%0, %2}
11119 movhps\t{%2, %0|%0, %2}
11120 vmovhps\t{%2, %1, %0|%0, %1, %2}"
11121 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
11124 (eq_attr "alternative" "0,1,5,6")
11125 (const_string "sselog")
11126 (const_string "ssemov")))
11127 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
11128 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
11129 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
11130 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
11131 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
11133 (define_expand "vec_unpacks_lo_<mode>"
11134 [(match_operand:<sseunpackmode> 0 "register_operand")
11135 (match_operand:VI124_AVX512F 1 "register_operand")]
11137 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
11139 (define_expand "vec_unpacks_hi_<mode>"
11140 [(match_operand:<sseunpackmode> 0 "register_operand")
11141 (match_operand:VI124_AVX512F 1 "register_operand")]
11143 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
11145 (define_expand "vec_unpacku_lo_<mode>"
11146 [(match_operand:<sseunpackmode> 0 "register_operand")
11147 (match_operand:VI124_AVX512F 1 "register_operand")]
11149 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
11151 (define_expand "vec_unpacku_hi_<mode>"
11152 [(match_operand:<sseunpackmode> 0 "register_operand")
11153 (match_operand:VI124_AVX512F 1 "register_operand")]
11155 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
11157 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11163 (define_expand "<sse2_avx2>_uavg<mode>3"
11164 [(set (match_operand:VI12_AVX2 0 "register_operand")
11165 (truncate:VI12_AVX2
11166 (lshiftrt:<ssedoublemode>
11167 (plus:<ssedoublemode>
11168 (plus:<ssedoublemode>
11169 (zero_extend:<ssedoublemode>
11170 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
11171 (zero_extend:<ssedoublemode>
11172 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
11177 operands[3] = CONST1_RTX(<MODE>mode);
11178 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11181 (define_insn "*<sse2_avx2>_uavg<mode>3"
11182 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
11183 (truncate:VI12_AVX2
11184 (lshiftrt:<ssedoublemode>
11185 (plus:<ssedoublemode>
11186 (plus:<ssedoublemode>
11187 (zero_extend:<ssedoublemode>
11188 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
11189 (zero_extend:<ssedoublemode>
11190 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
11191 (match_operand:VI12_AVX2 3 "const1_operand"))
11193 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
11195 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
11196 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11197 [(set_attr "isa" "noavx,avx")
11198 (set_attr "type" "sseiadd")
11199 (set_attr "prefix_data16" "1,*")
11200 (set_attr "prefix" "orig,vex")
11201 (set_attr "mode" "<sseinsnmode>")])
11203 ;; The correct representation for this is absolutely enormous, and
11204 ;; surely not generally useful.
11205 (define_insn "<sse2_avx2>_psadbw"
11206 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
11207 (unspec:VI8_AVX2_AVX512BW
11208 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
11209 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
11213 psadbw\t{%2, %0|%0, %2}
11214 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
11215 [(set_attr "isa" "noavx,avx")
11216 (set_attr "type" "sseiadd")
11217 (set_attr "atom_unit" "simul")
11218 (set_attr "prefix_data16" "1,*")
11219 (set_attr "prefix" "orig,maybe_evex")
11220 (set_attr "mode" "<sseinsnmode>")])
11222 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
11223 [(set (match_operand:SI 0 "register_operand" "=r")
11225 [(match_operand:VF_128_256 1 "register_operand" "x")]
11228 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
11229 [(set_attr "type" "ssemov")
11230 (set_attr "prefix" "maybe_vex")
11231 (set_attr "mode" "<MODE>")])
11233 (define_insn "avx2_pmovmskb"
11234 [(set (match_operand:SI 0 "register_operand" "=r")
11235 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
11238 "vpmovmskb\t{%1, %0|%0, %1}"
11239 [(set_attr "type" "ssemov")
11240 (set_attr "prefix" "vex")
11241 (set_attr "mode" "DI")])
11243 (define_insn "sse2_pmovmskb"
11244 [(set (match_operand:SI 0 "register_operand" "=r")
11245 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
11248 "%vpmovmskb\t{%1, %0|%0, %1}"
11249 [(set_attr "type" "ssemov")
11250 (set_attr "prefix_data16" "1")
11251 (set_attr "prefix" "maybe_vex")
11252 (set_attr "mode" "SI")])
11254 (define_expand "sse2_maskmovdqu"
11255 [(set (match_operand:V16QI 0 "memory_operand")
11256 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
11257 (match_operand:V16QI 2 "register_operand")
11262 (define_insn "*sse2_maskmovdqu"
11263 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
11264 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
11265 (match_operand:V16QI 2 "register_operand" "x")
11266 (mem:V16QI (match_dup 0))]
11270 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
11271 that requires %v to be at the beginning of the opcode name. */
11272 if (Pmode != word_mode)
11273 fputs ("\taddr32", asm_out_file);
11274 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
11276 [(set_attr "type" "ssemov")
11277 (set_attr "prefix_data16" "1")
11278 (set (attr "length_address")
11279 (symbol_ref ("Pmode != word_mode")))
11280 ;; The implicit %rdi operand confuses default length_vex computation.
11281 (set (attr "length_vex")
11282 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
11283 (set_attr "prefix" "maybe_vex")
11284 (set_attr "mode" "TI")])
11286 (define_insn "sse_ldmxcsr"
11287 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
11291 [(set_attr "type" "sse")
11292 (set_attr "atom_sse_attr" "mxcsr")
11293 (set_attr "prefix" "maybe_vex")
11294 (set_attr "memory" "load")])
11296 (define_insn "sse_stmxcsr"
11297 [(set (match_operand:SI 0 "memory_operand" "=m")
11298 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
11301 [(set_attr "type" "sse")
11302 (set_attr "atom_sse_attr" "mxcsr")
11303 (set_attr "prefix" "maybe_vex")
11304 (set_attr "memory" "store")])
11306 (define_insn "sse2_clflush"
11307 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
11311 [(set_attr "type" "sse")
11312 (set_attr "atom_sse_attr" "fence")
11313 (set_attr "memory" "unknown")])
11316 (define_insn "sse3_mwait"
11317 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
11318 (match_operand:SI 1 "register_operand" "c")]
11321 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
11322 ;; Since 32bit register operands are implicitly zero extended to 64bit,
11323 ;; we only need to set up 32bit registers.
11325 [(set_attr "length" "3")])
11327 (define_insn "sse3_monitor_<mode>"
11328 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
11329 (match_operand:SI 1 "register_operand" "c")
11330 (match_operand:SI 2 "register_operand" "d")]
11333 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
11334 ;; RCX and RDX are used. Since 32bit register operands are implicitly
11335 ;; zero extended to 64bit, we only need to set up 32bit registers.
11337 [(set (attr "length")
11338 (symbol_ref ("(Pmode != word_mode) + 3")))])
11340 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11342 ;; SSSE3 instructions
11344 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11346 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
11348 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
11349 [(set (match_operand:V16HI 0 "register_operand" "=x")
11354 (ssse3_plusminus:HI
11356 (match_operand:V16HI 1 "register_operand" "x")
11357 (parallel [(const_int 0)]))
11358 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11359 (ssse3_plusminus:HI
11360 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11361 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11363 (ssse3_plusminus:HI
11364 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
11365 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
11366 (ssse3_plusminus:HI
11367 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
11368 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
11371 (ssse3_plusminus:HI
11372 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
11373 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
11374 (ssse3_plusminus:HI
11375 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
11376 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
11378 (ssse3_plusminus:HI
11379 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
11380 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
11381 (ssse3_plusminus:HI
11382 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
11383 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
11387 (ssse3_plusminus:HI
11389 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11390 (parallel [(const_int 0)]))
11391 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11392 (ssse3_plusminus:HI
11393 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11394 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
11396 (ssse3_plusminus:HI
11397 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
11398 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
11399 (ssse3_plusminus:HI
11400 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
11401 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
11404 (ssse3_plusminus:HI
11405 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
11406 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
11407 (ssse3_plusminus:HI
11408 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
11409 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
11411 (ssse3_plusminus:HI
11412 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
11413 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
11414 (ssse3_plusminus:HI
11415 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
11416 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
11418 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
11419 [(set_attr "type" "sseiadd")
11420 (set_attr "prefix_extra" "1")
11421 (set_attr "prefix" "vex")
11422 (set_attr "mode" "OI")])
11424 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
11425 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11429 (ssse3_plusminus:HI
11431 (match_operand:V8HI 1 "register_operand" "0,x")
11432 (parallel [(const_int 0)]))
11433 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11434 (ssse3_plusminus:HI
11435 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11436 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11438 (ssse3_plusminus:HI
11439 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
11440 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
11441 (ssse3_plusminus:HI
11442 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
11443 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
11446 (ssse3_plusminus:HI
11448 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11449 (parallel [(const_int 0)]))
11450 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11451 (ssse3_plusminus:HI
11452 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11453 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
11455 (ssse3_plusminus:HI
11456 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
11457 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
11458 (ssse3_plusminus:HI
11459 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
11460 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
11463 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
11464 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
11465 [(set_attr "isa" "noavx,avx")
11466 (set_attr "type" "sseiadd")
11467 (set_attr "atom_unit" "complex")
11468 (set_attr "prefix_data16" "1,*")
11469 (set_attr "prefix_extra" "1")
11470 (set_attr "prefix" "orig,vex")
11471 (set_attr "mode" "TI")])
11473 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
11474 [(set (match_operand:V4HI 0 "register_operand" "=y")
11477 (ssse3_plusminus:HI
11479 (match_operand:V4HI 1 "register_operand" "0")
11480 (parallel [(const_int 0)]))
11481 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11482 (ssse3_plusminus:HI
11483 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11484 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11486 (ssse3_plusminus:HI
11488 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
11489 (parallel [(const_int 0)]))
11490 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11491 (ssse3_plusminus:HI
11492 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11493 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
11495 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
11496 [(set_attr "type" "sseiadd")
11497 (set_attr "atom_unit" "complex")
11498 (set_attr "prefix_extra" "1")
11499 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11500 (set_attr "mode" "DI")])
11502 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
11503 [(set (match_operand:V8SI 0 "register_operand" "=x")
11509 (match_operand:V8SI 1 "register_operand" "x")
11510 (parallel [(const_int 0)]))
11511 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11513 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11514 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11517 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
11518 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
11520 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
11521 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
11526 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
11527 (parallel [(const_int 0)]))
11528 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11530 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11531 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
11534 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
11535 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
11537 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
11538 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
11540 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11541 [(set_attr "type" "sseiadd")
11542 (set_attr "prefix_extra" "1")
11543 (set_attr "prefix" "vex")
11544 (set_attr "mode" "OI")])
11546 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
11547 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11552 (match_operand:V4SI 1 "register_operand" "0,x")
11553 (parallel [(const_int 0)]))
11554 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11556 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11557 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11561 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
11562 (parallel [(const_int 0)]))
11563 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11565 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11566 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
11569 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
11570 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11571 [(set_attr "isa" "noavx,avx")
11572 (set_attr "type" "sseiadd")
11573 (set_attr "atom_unit" "complex")
11574 (set_attr "prefix_data16" "1,*")
11575 (set_attr "prefix_extra" "1")
11576 (set_attr "prefix" "orig,vex")
11577 (set_attr "mode" "TI")])
11579 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
11580 [(set (match_operand:V2SI 0 "register_operand" "=y")
11584 (match_operand:V2SI 1 "register_operand" "0")
11585 (parallel [(const_int 0)]))
11586 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11589 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
11590 (parallel [(const_int 0)]))
11591 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
11593 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
11594 [(set_attr "type" "sseiadd")
11595 (set_attr "atom_unit" "complex")
11596 (set_attr "prefix_extra" "1")
11597 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11598 (set_attr "mode" "DI")])
11600 (define_insn "avx2_pmaddubsw256"
11601 [(set (match_operand:V16HI 0 "register_operand" "=x")
11606 (match_operand:V32QI 1 "register_operand" "x")
11607 (parallel [(const_int 0) (const_int 2)
11608 (const_int 4) (const_int 6)
11609 (const_int 8) (const_int 10)
11610 (const_int 12) (const_int 14)
11611 (const_int 16) (const_int 18)
11612 (const_int 20) (const_int 22)
11613 (const_int 24) (const_int 26)
11614 (const_int 28) (const_int 30)])))
11617 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
11618 (parallel [(const_int 0) (const_int 2)
11619 (const_int 4) (const_int 6)
11620 (const_int 8) (const_int 10)
11621 (const_int 12) (const_int 14)
11622 (const_int 16) (const_int 18)
11623 (const_int 20) (const_int 22)
11624 (const_int 24) (const_int 26)
11625 (const_int 28) (const_int 30)]))))
11628 (vec_select:V16QI (match_dup 1)
11629 (parallel [(const_int 1) (const_int 3)
11630 (const_int 5) (const_int 7)
11631 (const_int 9) (const_int 11)
11632 (const_int 13) (const_int 15)
11633 (const_int 17) (const_int 19)
11634 (const_int 21) (const_int 23)
11635 (const_int 25) (const_int 27)
11636 (const_int 29) (const_int 31)])))
11638 (vec_select:V16QI (match_dup 2)
11639 (parallel [(const_int 1) (const_int 3)
11640 (const_int 5) (const_int 7)
11641 (const_int 9) (const_int 11)
11642 (const_int 13) (const_int 15)
11643 (const_int 17) (const_int 19)
11644 (const_int 21) (const_int 23)
11645 (const_int 25) (const_int 27)
11646 (const_int 29) (const_int 31)]))))))]
11648 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11649 [(set_attr "type" "sseiadd")
11650 (set_attr "prefix_extra" "1")
11651 (set_attr "prefix" "vex")
11652 (set_attr "mode" "OI")])
11654 (define_insn "ssse3_pmaddubsw128"
11655 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11660 (match_operand:V16QI 1 "register_operand" "0,x")
11661 (parallel [(const_int 0) (const_int 2)
11662 (const_int 4) (const_int 6)
11663 (const_int 8) (const_int 10)
11664 (const_int 12) (const_int 14)])))
11667 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
11668 (parallel [(const_int 0) (const_int 2)
11669 (const_int 4) (const_int 6)
11670 (const_int 8) (const_int 10)
11671 (const_int 12) (const_int 14)]))))
11674 (vec_select:V8QI (match_dup 1)
11675 (parallel [(const_int 1) (const_int 3)
11676 (const_int 5) (const_int 7)
11677 (const_int 9) (const_int 11)
11678 (const_int 13) (const_int 15)])))
11680 (vec_select:V8QI (match_dup 2)
11681 (parallel [(const_int 1) (const_int 3)
11682 (const_int 5) (const_int 7)
11683 (const_int 9) (const_int 11)
11684 (const_int 13) (const_int 15)]))))))]
11687 pmaddubsw\t{%2, %0|%0, %2}
11688 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11689 [(set_attr "isa" "noavx,avx")
11690 (set_attr "type" "sseiadd")
11691 (set_attr "atom_unit" "simul")
11692 (set_attr "prefix_data16" "1,*")
11693 (set_attr "prefix_extra" "1")
11694 (set_attr "prefix" "orig,vex")
11695 (set_attr "mode" "TI")])
11697 (define_insn "ssse3_pmaddubsw"
11698 [(set (match_operand:V4HI 0 "register_operand" "=y")
11703 (match_operand:V8QI 1 "register_operand" "0")
11704 (parallel [(const_int 0) (const_int 2)
11705 (const_int 4) (const_int 6)])))
11708 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
11709 (parallel [(const_int 0) (const_int 2)
11710 (const_int 4) (const_int 6)]))))
11713 (vec_select:V4QI (match_dup 1)
11714 (parallel [(const_int 1) (const_int 3)
11715 (const_int 5) (const_int 7)])))
11717 (vec_select:V4QI (match_dup 2)
11718 (parallel [(const_int 1) (const_int 3)
11719 (const_int 5) (const_int 7)]))))))]
11721 "pmaddubsw\t{%2, %0|%0, %2}"
11722 [(set_attr "type" "sseiadd")
11723 (set_attr "atom_unit" "simul")
11724 (set_attr "prefix_extra" "1")
11725 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11726 (set_attr "mode" "DI")])
11728 (define_mode_iterator PMULHRSW
11729 [V4HI V8HI (V16HI "TARGET_AVX2")])
11731 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
11732 [(set (match_operand:PMULHRSW 0 "register_operand")
11734 (lshiftrt:<ssedoublemode>
11735 (plus:<ssedoublemode>
11736 (lshiftrt:<ssedoublemode>
11737 (mult:<ssedoublemode>
11738 (sign_extend:<ssedoublemode>
11739 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
11740 (sign_extend:<ssedoublemode>
11741 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
11747 operands[3] = CONST1_RTX(<MODE>mode);
11748 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11751 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
11752 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
11754 (lshiftrt:<ssedoublemode>
11755 (plus:<ssedoublemode>
11756 (lshiftrt:<ssedoublemode>
11757 (mult:<ssedoublemode>
11758 (sign_extend:<ssedoublemode>
11759 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
11760 (sign_extend:<ssedoublemode>
11761 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
11763 (match_operand:VI2_AVX2 3 "const1_operand"))
11765 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
11767 pmulhrsw\t{%2, %0|%0, %2}
11768 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
11769 [(set_attr "isa" "noavx,avx")
11770 (set_attr "type" "sseimul")
11771 (set_attr "prefix_data16" "1,*")
11772 (set_attr "prefix_extra" "1")
11773 (set_attr "prefix" "orig,vex")
11774 (set_attr "mode" "<sseinsnmode>")])
11776 (define_insn "*ssse3_pmulhrswv4hi3"
11777 [(set (match_operand:V4HI 0 "register_operand" "=y")
11784 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11786 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11788 (match_operand:V4HI 3 "const1_operand"))
11790 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11791 "pmulhrsw\t{%2, %0|%0, %2}"
11792 [(set_attr "type" "sseimul")
11793 (set_attr "prefix_extra" "1")
11794 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11795 (set_attr "mode" "DI")])
11797 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11798 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11800 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11801 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11805 pshufb\t{%2, %0|%0, %2}
11806 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11807 [(set_attr "isa" "noavx,avx")
11808 (set_attr "type" "sselog1")
11809 (set_attr "prefix_data16" "1,*")
11810 (set_attr "prefix_extra" "1")
11811 (set_attr "prefix" "orig,vex")
11812 (set_attr "btver2_decode" "vector,vector")
11813 (set_attr "mode" "<sseinsnmode>")])
11815 (define_insn "ssse3_pshufbv8qi3"
11816 [(set (match_operand:V8QI 0 "register_operand" "=y")
11817 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11818 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11821 "pshufb\t{%2, %0|%0, %2}";
11822 [(set_attr "type" "sselog1")
11823 (set_attr "prefix_extra" "1")
11824 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11825 (set_attr "mode" "DI")])
11827 (define_insn "<ssse3_avx2>_psign<mode>3"
11828 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11830 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11831 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11835 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11836 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11837 [(set_attr "isa" "noavx,avx")
11838 (set_attr "type" "sselog1")
11839 (set_attr "prefix_data16" "1,*")
11840 (set_attr "prefix_extra" "1")
11841 (set_attr "prefix" "orig,vex")
11842 (set_attr "mode" "<sseinsnmode>")])
11844 (define_insn "ssse3_psign<mode>3"
11845 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11847 [(match_operand:MMXMODEI 1 "register_operand" "0")
11848 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11851 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11852 [(set_attr "type" "sselog1")
11853 (set_attr "prefix_extra" "1")
11854 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11855 (set_attr "mode" "DI")])
11857 (define_insn "<ssse3_avx2>_palignr<mode>"
11858 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11859 (unspec:SSESCALARMODE
11860 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11861 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11862 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11866 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11868 switch (which_alternative)
11871 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11873 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11875 gcc_unreachable ();
11878 [(set_attr "isa" "noavx,avx")
11879 (set_attr "type" "sseishft")
11880 (set_attr "atom_unit" "sishuf")
11881 (set_attr "prefix_data16" "1,*")
11882 (set_attr "prefix_extra" "1")
11883 (set_attr "length_immediate" "1")
11884 (set_attr "prefix" "orig,vex")
11885 (set_attr "mode" "<sseinsnmode>")])
11887 (define_insn "ssse3_palignrdi"
11888 [(set (match_operand:DI 0 "register_operand" "=y")
11889 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11890 (match_operand:DI 2 "nonimmediate_operand" "ym")
11891 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11895 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11896 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11898 [(set_attr "type" "sseishft")
11899 (set_attr "atom_unit" "sishuf")
11900 (set_attr "prefix_extra" "1")
11901 (set_attr "length_immediate" "1")
11902 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11903 (set_attr "mode" "DI")])
11905 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11906 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11907 (abs:VI124_AVX2_48_AVX512F
11908 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11909 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11910 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11911 [(set_attr "type" "sselog1")
11912 (set_attr "prefix_data16" "1")
11913 (set_attr "prefix_extra" "1")
11914 (set_attr "prefix" "maybe_vex")
11915 (set_attr "mode" "<sseinsnmode>")])
11917 (define_expand "abs<mode>2"
11918 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11919 (abs:VI124_AVX2_48_AVX512F
11920 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11925 ix86_expand_sse2_abs (operands[0], operands[1]);
11930 (define_insn "abs<mode>2"
11931 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11933 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11935 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11936 [(set_attr "type" "sselog1")
11937 (set_attr "prefix_rep" "0")
11938 (set_attr "prefix_extra" "1")
11939 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11940 (set_attr "mode" "DI")])
11942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11944 ;; AMD SSE4A instructions
11946 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11948 (define_insn "sse4a_movnt<mode>"
11949 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11951 [(match_operand:MODEF 1 "register_operand" "x")]
11954 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11955 [(set_attr "type" "ssemov")
11956 (set_attr "mode" "<MODE>")])
11958 (define_insn "sse4a_vmmovnt<mode>"
11959 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11960 (unspec:<ssescalarmode>
11961 [(vec_select:<ssescalarmode>
11962 (match_operand:VF_128 1 "register_operand" "x")
11963 (parallel [(const_int 0)]))]
11966 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11967 [(set_attr "type" "ssemov")
11968 (set_attr "mode" "<ssescalarmode>")])
11970 (define_insn "sse4a_extrqi"
11971 [(set (match_operand:V2DI 0 "register_operand" "=x")
11972 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11973 (match_operand 2 "const_0_to_255_operand")
11974 (match_operand 3 "const_0_to_255_operand")]
11977 "extrq\t{%3, %2, %0|%0, %2, %3}"
11978 [(set_attr "type" "sse")
11979 (set_attr "prefix_data16" "1")
11980 (set_attr "length_immediate" "2")
11981 (set_attr "mode" "TI")])
11983 (define_insn "sse4a_extrq"
11984 [(set (match_operand:V2DI 0 "register_operand" "=x")
11985 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11986 (match_operand:V16QI 2 "register_operand" "x")]
11989 "extrq\t{%2, %0|%0, %2}"
11990 [(set_attr "type" "sse")
11991 (set_attr "prefix_data16" "1")
11992 (set_attr "mode" "TI")])
11994 (define_insn "sse4a_insertqi"
11995 [(set (match_operand:V2DI 0 "register_operand" "=x")
11996 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11997 (match_operand:V2DI 2 "register_operand" "x")
11998 (match_operand 3 "const_0_to_255_operand")
11999 (match_operand 4 "const_0_to_255_operand")]
12002 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
12003 [(set_attr "type" "sseins")
12004 (set_attr "prefix_data16" "0")
12005 (set_attr "prefix_rep" "1")
12006 (set_attr "length_immediate" "2")
12007 (set_attr "mode" "TI")])
12009 (define_insn "sse4a_insertq"
12010 [(set (match_operand:V2DI 0 "register_operand" "=x")
12011 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
12012 (match_operand:V2DI 2 "register_operand" "x")]
12015 "insertq\t{%2, %0|%0, %2}"
12016 [(set_attr "type" "sseins")
12017 (set_attr "prefix_data16" "0")
12018 (set_attr "prefix_rep" "1")
12019 (set_attr "mode" "TI")])
12021 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12023 ;; Intel SSE4.1 instructions
12025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12027 ;; Mapping of immediate bits for blend instructions
12028 (define_mode_attr blendbits
12029 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
12031 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
12032 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
12033 (vec_merge:VF_128_256
12034 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
12035 (match_operand:VF_128_256 1 "register_operand" "0,x")
12036 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
12039 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
12040 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12041 [(set_attr "isa" "noavx,avx")
12042 (set_attr "type" "ssemov")
12043 (set_attr "length_immediate" "1")
12044 (set_attr "prefix_data16" "1,*")
12045 (set_attr "prefix_extra" "1")
12046 (set_attr "prefix" "orig,vex")
12047 (set_attr "mode" "<MODE>")])
12049 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
12050 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
12052 [(match_operand:VF_128_256 1 "register_operand" "0,x")
12053 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
12054 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
12058 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
12059 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12060 [(set_attr "isa" "noavx,avx")
12061 (set_attr "type" "ssemov")
12062 (set_attr "length_immediate" "1")
12063 (set_attr "prefix_data16" "1,*")
12064 (set_attr "prefix_extra" "1")
12065 (set_attr "prefix" "orig,vex")
12066 (set_attr "btver2_decode" "vector,vector")
12067 (set_attr "mode" "<MODE>")])
12069 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
12070 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
12072 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
12073 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
12074 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12078 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
12079 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12080 [(set_attr "isa" "noavx,avx")
12081 (set_attr "type" "ssemul")
12082 (set_attr "length_immediate" "1")
12083 (set_attr "prefix_data16" "1,*")
12084 (set_attr "prefix_extra" "1")
12085 (set_attr "prefix" "orig,vex")
12086 (set_attr "btver2_decode" "vector,vector")
12087 (set_attr "mode" "<MODE>")])
12089 ;; Mode attribute used by `vmovntdqa' pattern
12090 (define_mode_attr vi8_sse4_1_avx2_avx512
12091 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
12093 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
12094 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
12095 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
12098 "%vmovntdqa\t{%1, %0|%0, %1}"
12099 [(set_attr "type" "ssemov")
12100 (set_attr "prefix_extra" "1, *")
12101 (set_attr "prefix" "maybe_vex, evex")
12102 (set_attr "mode" "<sseinsnmode>")])
12104 (define_insn "<sse4_1_avx2>_mpsadbw"
12105 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
12107 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
12108 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
12109 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12113 mpsadbw\t{%3, %2, %0|%0, %2, %3}
12114 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12115 [(set_attr "isa" "noavx,avx")
12116 (set_attr "type" "sselog1")
12117 (set_attr "length_immediate" "1")
12118 (set_attr "prefix_extra" "1")
12119 (set_attr "prefix" "orig,vex")
12120 (set_attr "btver2_decode" "vector,vector")
12121 (set_attr "mode" "<sseinsnmode>")])
12123 (define_insn "avx2_packusdw"
12124 [(set (match_operand:V16HI 0 "register_operand" "=x")
12127 (match_operand:V8SI 1 "register_operand" "x"))
12129 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
12131 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
12132 [(set_attr "type" "sselog")
12133 (set_attr "prefix_extra" "1")
12134 (set_attr "prefix" "vex")
12135 (set_attr "mode" "OI")])
12137 (define_insn "sse4_1_packusdw"
12138 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12141 (match_operand:V4SI 1 "register_operand" "0,x"))
12143 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
12146 packusdw\t{%2, %0|%0, %2}
12147 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
12148 [(set_attr "isa" "noavx,avx")
12149 (set_attr "type" "sselog")
12150 (set_attr "prefix_extra" "1")
12151 (set_attr "prefix" "orig,vex")
12152 (set_attr "mode" "TI")])
12154 (define_insn "<sse4_1_avx2>_pblendvb"
12155 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
12157 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
12158 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
12159 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
12163 pblendvb\t{%3, %2, %0|%0, %2, %3}
12164 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12165 [(set_attr "isa" "noavx,avx")
12166 (set_attr "type" "ssemov")
12167 (set_attr "prefix_extra" "1")
12168 (set_attr "length_immediate" "*,1")
12169 (set_attr "prefix" "orig,vex")
12170 (set_attr "btver2_decode" "vector,vector")
12171 (set_attr "mode" "<sseinsnmode>")])
12173 (define_insn "sse4_1_pblendw"
12174 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
12176 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
12177 (match_operand:V8HI 1 "register_operand" "0,x")
12178 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
12181 pblendw\t{%3, %2, %0|%0, %2, %3}
12182 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12183 [(set_attr "isa" "noavx,avx")
12184 (set_attr "type" "ssemov")
12185 (set_attr "prefix_extra" "1")
12186 (set_attr "length_immediate" "1")
12187 (set_attr "prefix" "orig,vex")
12188 (set_attr "mode" "TI")])
12190 ;; The builtin uses an 8-bit immediate. Expand that.
12191 (define_expand "avx2_pblendw"
12192 [(set (match_operand:V16HI 0 "register_operand")
12194 (match_operand:V16HI 2 "nonimmediate_operand")
12195 (match_operand:V16HI 1 "register_operand")
12196 (match_operand:SI 3 "const_0_to_255_operand")))]
12199 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
12200 operands[3] = GEN_INT (val << 8 | val);
12203 (define_insn "*avx2_pblendw"
12204 [(set (match_operand:V16HI 0 "register_operand" "=x")
12206 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
12207 (match_operand:V16HI 1 "register_operand" "x")
12208 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
12211 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
12212 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12214 [(set_attr "type" "ssemov")
12215 (set_attr "prefix_extra" "1")
12216 (set_attr "length_immediate" "1")
12217 (set_attr "prefix" "vex")
12218 (set_attr "mode" "OI")])
12220 (define_insn "avx2_pblendd<mode>"
12221 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12222 (vec_merge:VI4_AVX2
12223 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
12224 (match_operand:VI4_AVX2 1 "register_operand" "x")
12225 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
12227 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12228 [(set_attr "type" "ssemov")
12229 (set_attr "prefix_extra" "1")
12230 (set_attr "length_immediate" "1")
12231 (set_attr "prefix" "vex")
12232 (set_attr "mode" "<sseinsnmode>")])
12234 (define_insn "sse4_1_phminposuw"
12235 [(set (match_operand:V8HI 0 "register_operand" "=x")
12236 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12237 UNSPEC_PHMINPOSUW))]
12239 "%vphminposuw\t{%1, %0|%0, %1}"
12240 [(set_attr "type" "sselog1")
12241 (set_attr "prefix_extra" "1")
12242 (set_attr "prefix" "maybe_vex")
12243 (set_attr "mode" "TI")])
12245 (define_insn "avx2_<code>v16qiv16hi2"
12246 [(set (match_operand:V16HI 0 "register_operand" "=x")
12248 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
12250 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
12251 [(set_attr "type" "ssemov")
12252 (set_attr "prefix_extra" "1")
12253 (set_attr "prefix" "vex")
12254 (set_attr "mode" "OI")])
12256 (define_insn "sse4_1_<code>v8qiv8hi2"
12257 [(set (match_operand:V8HI 0 "register_operand" "=x")
12260 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12261 (parallel [(const_int 0) (const_int 1)
12262 (const_int 2) (const_int 3)
12263 (const_int 4) (const_int 5)
12264 (const_int 6) (const_int 7)]))))]
12266 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
12267 [(set_attr "type" "ssemov")
12268 (set_attr "ssememalign" "64")
12269 (set_attr "prefix_extra" "1")
12270 (set_attr "prefix" "maybe_vex")
12271 (set_attr "mode" "TI")])
12273 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
12274 [(set (match_operand:V16SI 0 "register_operand" "=v")
12276 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
12278 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
12279 [(set_attr "type" "ssemov")
12280 (set_attr "prefix" "evex")
12281 (set_attr "mode" "XI")])
12283 (define_insn "avx2_<code>v8qiv8si2"
12284 [(set (match_operand:V8SI 0 "register_operand" "=x")
12287 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12288 (parallel [(const_int 0) (const_int 1)
12289 (const_int 2) (const_int 3)
12290 (const_int 4) (const_int 5)
12291 (const_int 6) (const_int 7)]))))]
12293 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
12294 [(set_attr "type" "ssemov")
12295 (set_attr "prefix_extra" "1")
12296 (set_attr "prefix" "vex")
12297 (set_attr "mode" "OI")])
12299 (define_insn "sse4_1_<code>v4qiv4si2"
12300 [(set (match_operand:V4SI 0 "register_operand" "=x")
12303 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12304 (parallel [(const_int 0) (const_int 1)
12305 (const_int 2) (const_int 3)]))))]
12307 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
12308 [(set_attr "type" "ssemov")
12309 (set_attr "ssememalign" "32")
12310 (set_attr "prefix_extra" "1")
12311 (set_attr "prefix" "maybe_vex")
12312 (set_attr "mode" "TI")])
12314 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
12315 [(set (match_operand:V16SI 0 "register_operand" "=v")
12317 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
12319 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12320 [(set_attr "type" "ssemov")
12321 (set_attr "prefix" "evex")
12322 (set_attr "mode" "XI")])
12324 (define_insn "avx2_<code>v8hiv8si2"
12325 [(set (match_operand:V8SI 0 "register_operand" "=x")
12327 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
12329 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
12330 [(set_attr "type" "ssemov")
12331 (set_attr "prefix_extra" "1")
12332 (set_attr "prefix" "vex")
12333 (set_attr "mode" "OI")])
12335 (define_insn "sse4_1_<code>v4hiv4si2"
12336 [(set (match_operand:V4SI 0 "register_operand" "=x")
12339 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12340 (parallel [(const_int 0) (const_int 1)
12341 (const_int 2) (const_int 3)]))))]
12343 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
12344 [(set_attr "type" "ssemov")
12345 (set_attr "ssememalign" "64")
12346 (set_attr "prefix_extra" "1")
12347 (set_attr "prefix" "maybe_vex")
12348 (set_attr "mode" "TI")])
12350 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
12351 [(set (match_operand:V8DI 0 "register_operand" "=v")
12354 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
12355 (parallel [(const_int 0) (const_int 1)
12356 (const_int 2) (const_int 3)
12357 (const_int 4) (const_int 5)
12358 (const_int 6) (const_int 7)]))))]
12360 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
12361 [(set_attr "type" "ssemov")
12362 (set_attr "prefix" "evex")
12363 (set_attr "mode" "XI")])
12365 (define_insn "avx2_<code>v4qiv4di2"
12366 [(set (match_operand:V4DI 0 "register_operand" "=x")
12369 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12370 (parallel [(const_int 0) (const_int 1)
12371 (const_int 2) (const_int 3)]))))]
12373 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
12374 [(set_attr "type" "ssemov")
12375 (set_attr "prefix_extra" "1")
12376 (set_attr "prefix" "vex")
12377 (set_attr "mode" "OI")])
12379 (define_insn "sse4_1_<code>v2qiv2di2"
12380 [(set (match_operand:V2DI 0 "register_operand" "=x")
12383 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
12384 (parallel [(const_int 0) (const_int 1)]))))]
12386 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
12387 [(set_attr "type" "ssemov")
12388 (set_attr "ssememalign" "16")
12389 (set_attr "prefix_extra" "1")
12390 (set_attr "prefix" "maybe_vex")
12391 (set_attr "mode" "TI")])
12393 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
12394 [(set (match_operand:V8DI 0 "register_operand" "=v")
12396 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
12398 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
12399 [(set_attr "type" "ssemov")
12400 (set_attr "prefix" "evex")
12401 (set_attr "mode" "XI")])
12403 (define_insn "avx2_<code>v4hiv4di2"
12404 [(set (match_operand:V4DI 0 "register_operand" "=x")
12407 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12408 (parallel [(const_int 0) (const_int 1)
12409 (const_int 2) (const_int 3)]))))]
12411 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
12412 [(set_attr "type" "ssemov")
12413 (set_attr "prefix_extra" "1")
12414 (set_attr "prefix" "vex")
12415 (set_attr "mode" "OI")])
12417 (define_insn "sse4_1_<code>v2hiv2di2"
12418 [(set (match_operand:V2DI 0 "register_operand" "=x")
12421 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
12422 (parallel [(const_int 0) (const_int 1)]))))]
12424 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
12425 [(set_attr "type" "ssemov")
12426 (set_attr "ssememalign" "32")
12427 (set_attr "prefix_extra" "1")
12428 (set_attr "prefix" "maybe_vex")
12429 (set_attr "mode" "TI")])
12431 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
12432 [(set (match_operand:V8DI 0 "register_operand" "=v")
12434 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
12436 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12437 [(set_attr "type" "ssemov")
12438 (set_attr "prefix" "evex")
12439 (set_attr "mode" "XI")])
12441 (define_insn "avx2_<code>v4siv4di2"
12442 [(set (match_operand:V4DI 0 "register_operand" "=x")
12444 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
12446 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
12447 [(set_attr "type" "ssemov")
12448 (set_attr "prefix_extra" "1")
12449 (set_attr "mode" "OI")])
12451 (define_insn "sse4_1_<code>v2siv2di2"
12452 [(set (match_operand:V2DI 0 "register_operand" "=x")
12455 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
12456 (parallel [(const_int 0) (const_int 1)]))))]
12458 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
12459 [(set_attr "type" "ssemov")
12460 (set_attr "ssememalign" "64")
12461 (set_attr "prefix_extra" "1")
12462 (set_attr "prefix" "maybe_vex")
12463 (set_attr "mode" "TI")])
12465 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
12466 ;; setting FLAGS_REG. But it is not a really compare instruction.
12467 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
12468 [(set (reg:CC FLAGS_REG)
12469 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
12470 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
12473 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
12474 [(set_attr "type" "ssecomi")
12475 (set_attr "prefix_extra" "1")
12476 (set_attr "prefix" "vex")
12477 (set_attr "mode" "<MODE>")])
12479 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
12480 ;; But it is not a really compare instruction.
12481 (define_insn "avx_ptest256"
12482 [(set (reg:CC FLAGS_REG)
12483 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
12484 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
12487 "vptest\t{%1, %0|%0, %1}"
12488 [(set_attr "type" "ssecomi")
12489 (set_attr "prefix_extra" "1")
12490 (set_attr "prefix" "vex")
12491 (set_attr "btver2_decode" "vector")
12492 (set_attr "mode" "OI")])
12494 (define_insn "sse4_1_ptest"
12495 [(set (reg:CC FLAGS_REG)
12496 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
12497 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
12500 "%vptest\t{%1, %0|%0, %1}"
12501 [(set_attr "type" "ssecomi")
12502 (set_attr "prefix_extra" "1")
12503 (set_attr "prefix" "maybe_vex")
12504 (set_attr "mode" "TI")])
12506 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
12507 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
12509 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
12510 (match_operand:SI 2 "const_0_to_15_operand" "n")]
12513 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12514 [(set_attr "type" "ssecvt")
12515 (set (attr "prefix_data16")
12517 (match_test "TARGET_AVX")
12519 (const_string "1")))
12520 (set_attr "prefix_extra" "1")
12521 (set_attr "length_immediate" "1")
12522 (set_attr "prefix" "maybe_vex")
12523 (set_attr "mode" "<MODE>")])
12525 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
12526 [(match_operand:<sseintvecmode> 0 "register_operand")
12527 (match_operand:VF1_128_256 1 "nonimmediate_operand")
12528 (match_operand:SI 2 "const_0_to_15_operand")]
12531 rtx tmp = gen_reg_rtx (<MODE>mode);
12534 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
12537 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12541 (define_expand "avx512f_roundpd512"
12542 [(match_operand:V8DF 0 "register_operand")
12543 (match_operand:V8DF 1 "nonimmediate_operand")
12544 (match_operand:SI 2 "const_0_to_15_operand")]
12547 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
12551 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
12552 [(match_operand:<ssepackfltmode> 0 "register_operand")
12553 (match_operand:VF2 1 "nonimmediate_operand")
12554 (match_operand:VF2 2 "nonimmediate_operand")
12555 (match_operand:SI 3 "const_0_to_15_operand")]
12560 if (<MODE>mode == V2DFmode
12561 && TARGET_AVX && !TARGET_PREFER_AVX128)
12563 rtx tmp2 = gen_reg_rtx (V4DFmode);
12565 tmp0 = gen_reg_rtx (V4DFmode);
12566 tmp1 = force_reg (V2DFmode, operands[1]);
12568 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12569 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
12570 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12574 tmp0 = gen_reg_rtx (<MODE>mode);
12575 tmp1 = gen_reg_rtx (<MODE>mode);
12578 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
12581 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
12584 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12589 (define_insn "sse4_1_round<ssescalarmodesuffix>"
12590 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
12593 [(match_operand:VF_128 2 "register_operand" "x,x")
12594 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
12596 (match_operand:VF_128 1 "register_operand" "0,x")
12600 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
12601 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12602 [(set_attr "isa" "noavx,avx")
12603 (set_attr "type" "ssecvt")
12604 (set_attr "length_immediate" "1")
12605 (set_attr "prefix_data16" "1,*")
12606 (set_attr "prefix_extra" "1")
12607 (set_attr "prefix" "orig,vex")
12608 (set_attr "mode" "<MODE>")])
12610 (define_expand "round<mode>2"
12611 [(set (match_dup 4)
12613 (match_operand:VF 1 "register_operand")
12615 (set (match_operand:VF 0 "register_operand")
12617 [(match_dup 4) (match_dup 5)]
12619 "TARGET_ROUND && !flag_trapping_math"
12621 enum machine_mode scalar_mode;
12622 const struct real_format *fmt;
12623 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
12624 rtx half, vec_half;
12626 scalar_mode = GET_MODE_INNER (<MODE>mode);
12628 /* load nextafter (0.5, 0.0) */
12629 fmt = REAL_MODE_FORMAT (scalar_mode);
12630 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
12631 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
12632 half = const_double_from_real_value (pred_half, scalar_mode);
12634 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
12635 vec_half = force_reg (<MODE>mode, vec_half);
12637 operands[3] = gen_reg_rtx (<MODE>mode);
12638 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
12640 operands[4] = gen_reg_rtx (<MODE>mode);
12641 operands[5] = GEN_INT (ROUND_TRUNC);
12644 (define_expand "round<mode>2_sfix"
12645 [(match_operand:<sseintvecmode> 0 "register_operand")
12646 (match_operand:VF1_128_256 1 "register_operand")]
12647 "TARGET_ROUND && !flag_trapping_math"
12649 rtx tmp = gen_reg_rtx (<MODE>mode);
12651 emit_insn (gen_round<mode>2 (tmp, operands[1]));
12654 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12658 (define_expand "round<mode>2_vec_pack_sfix"
12659 [(match_operand:<ssepackfltmode> 0 "register_operand")
12660 (match_operand:VF2 1 "register_operand")
12661 (match_operand:VF2 2 "register_operand")]
12662 "TARGET_ROUND && !flag_trapping_math"
12666 if (<MODE>mode == V2DFmode
12667 && TARGET_AVX && !TARGET_PREFER_AVX128)
12669 rtx tmp2 = gen_reg_rtx (V4DFmode);
12671 tmp0 = gen_reg_rtx (V4DFmode);
12672 tmp1 = force_reg (V2DFmode, operands[1]);
12674 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12675 emit_insn (gen_roundv4df2 (tmp2, tmp0));
12676 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12680 tmp0 = gen_reg_rtx (<MODE>mode);
12681 tmp1 = gen_reg_rtx (<MODE>mode);
12683 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
12684 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
12687 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12692 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12694 ;; Intel SSE4.2 string/text processing instructions
12696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12698 (define_insn_and_split "sse4_2_pcmpestr"
12699 [(set (match_operand:SI 0 "register_operand" "=c,c")
12701 [(match_operand:V16QI 2 "register_operand" "x,x")
12702 (match_operand:SI 3 "register_operand" "a,a")
12703 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
12704 (match_operand:SI 5 "register_operand" "d,d")
12705 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
12707 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12715 (set (reg:CC FLAGS_REG)
12724 && can_create_pseudo_p ()"
12729 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12730 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12731 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12734 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12735 operands[3], operands[4],
12736 operands[5], operands[6]));
12738 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12739 operands[3], operands[4],
12740 operands[5], operands[6]));
12741 if (flags && !(ecx || xmm0))
12742 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12743 operands[2], operands[3],
12744 operands[4], operands[5],
12746 if (!(flags || ecx || xmm0))
12747 emit_note (NOTE_INSN_DELETED);
12751 [(set_attr "type" "sselog")
12752 (set_attr "prefix_data16" "1")
12753 (set_attr "prefix_extra" "1")
12754 (set_attr "ssememalign" "8")
12755 (set_attr "length_immediate" "1")
12756 (set_attr "memory" "none,load")
12757 (set_attr "mode" "TI")])
12759 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
12760 [(set (match_operand:SI 0 "register_operand" "=c")
12762 [(match_operand:V16QI 2 "register_operand" "x")
12763 (match_operand:SI 3 "register_operand" "a")
12765 [(match_operand:V16QI 4 "memory_operand" "m")]
12767 (match_operand:SI 5 "register_operand" "d")
12768 (match_operand:SI 6 "const_0_to_255_operand" "n")]
12770 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12774 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12778 (set (reg:CC FLAGS_REG)
12782 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12787 && can_create_pseudo_p ()"
12792 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12793 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12794 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12797 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12798 operands[3], operands[4],
12799 operands[5], operands[6]));
12801 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12802 operands[3], operands[4],
12803 operands[5], operands[6]));
12804 if (flags && !(ecx || xmm0))
12805 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12806 operands[2], operands[3],
12807 operands[4], operands[5],
12809 if (!(flags || ecx || xmm0))
12810 emit_note (NOTE_INSN_DELETED);
12814 [(set_attr "type" "sselog")
12815 (set_attr "prefix_data16" "1")
12816 (set_attr "prefix_extra" "1")
12817 (set_attr "ssememalign" "8")
12818 (set_attr "length_immediate" "1")
12819 (set_attr "memory" "load")
12820 (set_attr "mode" "TI")])
12822 (define_insn "sse4_2_pcmpestri"
12823 [(set (match_operand:SI 0 "register_operand" "=c,c")
12825 [(match_operand:V16QI 1 "register_operand" "x,x")
12826 (match_operand:SI 2 "register_operand" "a,a")
12827 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12828 (match_operand:SI 4 "register_operand" "d,d")
12829 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12831 (set (reg:CC FLAGS_REG)
12840 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12841 [(set_attr "type" "sselog")
12842 (set_attr "prefix_data16" "1")
12843 (set_attr "prefix_extra" "1")
12844 (set_attr "prefix" "maybe_vex")
12845 (set_attr "ssememalign" "8")
12846 (set_attr "length_immediate" "1")
12847 (set_attr "btver2_decode" "vector")
12848 (set_attr "memory" "none,load")
12849 (set_attr "mode" "TI")])
12851 (define_insn "sse4_2_pcmpestrm"
12852 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12854 [(match_operand:V16QI 1 "register_operand" "x,x")
12855 (match_operand:SI 2 "register_operand" "a,a")
12856 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12857 (match_operand:SI 4 "register_operand" "d,d")
12858 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12860 (set (reg:CC FLAGS_REG)
12869 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12870 [(set_attr "type" "sselog")
12871 (set_attr "prefix_data16" "1")
12872 (set_attr "prefix_extra" "1")
12873 (set_attr "ssememalign" "8")
12874 (set_attr "length_immediate" "1")
12875 (set_attr "prefix" "maybe_vex")
12876 (set_attr "btver2_decode" "vector")
12877 (set_attr "memory" "none,load")
12878 (set_attr "mode" "TI")])
12880 (define_insn "sse4_2_pcmpestr_cconly"
12881 [(set (reg:CC FLAGS_REG)
12883 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12884 (match_operand:SI 3 "register_operand" "a,a,a,a")
12885 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12886 (match_operand:SI 5 "register_operand" "d,d,d,d")
12887 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12889 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12890 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12893 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12894 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12895 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12896 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12897 [(set_attr "type" "sselog")
12898 (set_attr "prefix_data16" "1")
12899 (set_attr "prefix_extra" "1")
12900 (set_attr "ssememalign" "8")
12901 (set_attr "length_immediate" "1")
12902 (set_attr "memory" "none,load,none,load")
12903 (set_attr "btver2_decode" "vector,vector,vector,vector")
12904 (set_attr "prefix" "maybe_vex")
12905 (set_attr "mode" "TI")])
12907 (define_insn_and_split "sse4_2_pcmpistr"
12908 [(set (match_operand:SI 0 "register_operand" "=c,c")
12910 [(match_operand:V16QI 2 "register_operand" "x,x")
12911 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12912 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12914 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12920 (set (reg:CC FLAGS_REG)
12927 && can_create_pseudo_p ()"
12932 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12933 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12934 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12937 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12938 operands[3], operands[4]));
12940 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12941 operands[3], operands[4]));
12942 if (flags && !(ecx || xmm0))
12943 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12944 operands[2], operands[3],
12946 if (!(flags || ecx || xmm0))
12947 emit_note (NOTE_INSN_DELETED);
12951 [(set_attr "type" "sselog")
12952 (set_attr "prefix_data16" "1")
12953 (set_attr "prefix_extra" "1")
12954 (set_attr "ssememalign" "8")
12955 (set_attr "length_immediate" "1")
12956 (set_attr "memory" "none,load")
12957 (set_attr "mode" "TI")])
12959 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12960 [(set (match_operand:SI 0 "register_operand" "=c")
12962 [(match_operand:V16QI 2 "register_operand" "x")
12964 [(match_operand:V16QI 3 "memory_operand" "m")]
12966 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12968 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12971 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12974 (set (reg:CC FLAGS_REG)
12977 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12981 && can_create_pseudo_p ()"
12986 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12987 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12988 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12991 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12992 operands[3], operands[4]));
12994 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12995 operands[3], operands[4]));
12996 if (flags && !(ecx || xmm0))
12997 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12998 operands[2], operands[3],
13000 if (!(flags || ecx || xmm0))
13001 emit_note (NOTE_INSN_DELETED);
13005 [(set_attr "type" "sselog")
13006 (set_attr "prefix_data16" "1")
13007 (set_attr "prefix_extra" "1")
13008 (set_attr "ssememalign" "8")
13009 (set_attr "length_immediate" "1")
13010 (set_attr "memory" "load")
13011 (set_attr "mode" "TI")])
13013 (define_insn "sse4_2_pcmpistri"
13014 [(set (match_operand:SI 0 "register_operand" "=c,c")
13016 [(match_operand:V16QI 1 "register_operand" "x,x")
13017 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13018 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13020 (set (reg:CC FLAGS_REG)
13027 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
13028 [(set_attr "type" "sselog")
13029 (set_attr "prefix_data16" "1")
13030 (set_attr "prefix_extra" "1")
13031 (set_attr "ssememalign" "8")
13032 (set_attr "length_immediate" "1")
13033 (set_attr "prefix" "maybe_vex")
13034 (set_attr "memory" "none,load")
13035 (set_attr "btver2_decode" "vector")
13036 (set_attr "mode" "TI")])
13038 (define_insn "sse4_2_pcmpistrm"
13039 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
13041 [(match_operand:V16QI 1 "register_operand" "x,x")
13042 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13043 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13045 (set (reg:CC FLAGS_REG)
13052 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
13053 [(set_attr "type" "sselog")
13054 (set_attr "prefix_data16" "1")
13055 (set_attr "prefix_extra" "1")
13056 (set_attr "ssememalign" "8")
13057 (set_attr "length_immediate" "1")
13058 (set_attr "prefix" "maybe_vex")
13059 (set_attr "memory" "none,load")
13060 (set_attr "btver2_decode" "vector")
13061 (set_attr "mode" "TI")])
13063 (define_insn "sse4_2_pcmpistr_cconly"
13064 [(set (reg:CC FLAGS_REG)
13066 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
13067 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
13068 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
13070 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
13071 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
13074 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
13075 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
13076 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
13077 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
13078 [(set_attr "type" "sselog")
13079 (set_attr "prefix_data16" "1")
13080 (set_attr "prefix_extra" "1")
13081 (set_attr "ssememalign" "8")
13082 (set_attr "length_immediate" "1")
13083 (set_attr "memory" "none,load,none,load")
13084 (set_attr "prefix" "maybe_vex")
13085 (set_attr "btver2_decode" "vector,vector,vector,vector")
13086 (set_attr "mode" "TI")])
13088 ;; Packed float variants
13089 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
13090 [(V8DI "V8SF") (V16SI "V16SF")])
13092 (define_expand "avx512pf_gatherpf<mode>sf"
13094 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
13095 (mem:<GATHER_SCATTER_SF_MEM_MODE>
13097 [(match_operand 2 "vsib_address_operand")
13098 (match_operand:VI48_512 1 "register_operand")
13099 (match_operand:SI 3 "const1248_operand")]))
13100 (match_operand:SI 4 "const_2_to_3_operand")]
13101 UNSPEC_GATHER_PREFETCH)]
13105 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
13106 operands[3]), UNSPEC_VSIBADDR);
13109 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
13111 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
13112 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
13114 [(match_operand:P 2 "vsib_address_operand" "Tv")
13115 (match_operand:VI48_512 1 "register_operand" "v")
13116 (match_operand:SI 3 "const1248_operand" "n")]
13118 (match_operand:SI 4 "const_2_to_3_operand" "n")]
13119 UNSPEC_GATHER_PREFETCH)]
13122 switch (INTVAL (operands[4]))
13125 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
13127 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
13129 gcc_unreachable ();
13132 [(set_attr "type" "sse")
13133 (set_attr "prefix" "evex")
13134 (set_attr "mode" "XI")])
13136 (define_insn "*avx512pf_gatherpf<mode>sf"
13139 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
13141 [(match_operand:P 1 "vsib_address_operand" "Tv")
13142 (match_operand:VI48_512 0 "register_operand" "v")
13143 (match_operand:SI 2 "const1248_operand" "n")]
13145 (match_operand:SI 3 "const_2_to_3_operand" "n")]
13146 UNSPEC_GATHER_PREFETCH)]
13149 switch (INTVAL (operands[3]))
13152 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
13154 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
13156 gcc_unreachable ();
13159 [(set_attr "type" "sse")
13160 (set_attr "prefix" "evex")
13161 (set_attr "mode" "XI")])
13163 ;; Packed double variants
13164 (define_expand "avx512pf_gatherpf<mode>df"
13166 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
13169 [(match_operand 2 "vsib_address_operand")
13170 (match_operand:VI4_256_8_512 1 "register_operand")
13171 (match_operand:SI 3 "const1248_operand")]))
13172 (match_operand:SI 4 "const_2_to_3_operand")]
13173 UNSPEC_GATHER_PREFETCH)]
13177 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
13178 operands[3]), UNSPEC_VSIBADDR);
13181 (define_insn "*avx512pf_gatherpf<mode>df_mask"
13183 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
13184 (match_operator:V8DF 5 "vsib_mem_operator"
13186 [(match_operand:P 2 "vsib_address_operand" "Tv")
13187 (match_operand:VI4_256_8_512 1 "register_operand" "v")
13188 (match_operand:SI 3 "const1248_operand" "n")]
13190 (match_operand:SI 4 "const_2_to_3_operand" "n")]
13191 UNSPEC_GATHER_PREFETCH)]
13194 switch (INTVAL (operands[4]))
13197 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
13199 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
13201 gcc_unreachable ();
13204 [(set_attr "type" "sse")
13205 (set_attr "prefix" "evex")
13206 (set_attr "mode" "XI")])
13208 (define_insn "*avx512pf_gatherpf<mode>df"
13211 (match_operator:V8DF 4 "vsib_mem_operator"
13213 [(match_operand:P 1 "vsib_address_operand" "Tv")
13214 (match_operand:VI4_256_8_512 0 "register_operand" "v")
13215 (match_operand:SI 2 "const1248_operand" "n")]
13217 (match_operand:SI 3 "const_2_to_3_operand" "n")]
13218 UNSPEC_GATHER_PREFETCH)]
13221 switch (INTVAL (operands[3]))
13224 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
13226 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
13228 gcc_unreachable ();
13231 [(set_attr "type" "sse")
13232 (set_attr "prefix" "evex")
13233 (set_attr "mode" "XI")])
13235 ;; Packed float variants
13236 (define_expand "avx512pf_scatterpf<mode>sf"
13238 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
13239 (mem:<GATHER_SCATTER_SF_MEM_MODE>
13241 [(match_operand 2 "vsib_address_operand")
13242 (match_operand:VI48_512 1 "register_operand")
13243 (match_operand:SI 3 "const1248_operand")]))
13244 (match_operand:SI 4 "const2367_operand")]
13245 UNSPEC_SCATTER_PREFETCH)]
13249 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
13250 operands[3]), UNSPEC_VSIBADDR);
13253 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
13255 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
13256 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
13258 [(match_operand:P 2 "vsib_address_operand" "Tv")
13259 (match_operand:VI48_512 1 "register_operand" "v")
13260 (match_operand:SI 3 "const1248_operand" "n")]
13262 (match_operand:SI 4 "const2367_operand" "n")]
13263 UNSPEC_SCATTER_PREFETCH)]
13266 switch (INTVAL (operands[4]))
13270 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
13273 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
13275 gcc_unreachable ();
13278 [(set_attr "type" "sse")
13279 (set_attr "prefix" "evex")
13280 (set_attr "mode" "XI")])
13282 (define_insn "*avx512pf_scatterpf<mode>sf"
13285 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
13287 [(match_operand:P 1 "vsib_address_operand" "Tv")
13288 (match_operand:VI48_512 0 "register_operand" "v")
13289 (match_operand:SI 2 "const1248_operand" "n")]
13291 (match_operand:SI 3 "const2367_operand" "n")]
13292 UNSPEC_SCATTER_PREFETCH)]
13295 switch (INTVAL (operands[3]))
13299 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
13302 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
13304 gcc_unreachable ();
13307 [(set_attr "type" "sse")
13308 (set_attr "prefix" "evex")
13309 (set_attr "mode" "XI")])
13311 ;; Packed double variants
13312 (define_expand "avx512pf_scatterpf<mode>df"
13314 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
13317 [(match_operand 2 "vsib_address_operand")
13318 (match_operand:VI4_256_8_512 1 "register_operand")
13319 (match_operand:SI 3 "const1248_operand")]))
13320 (match_operand:SI 4 "const2367_operand")]
13321 UNSPEC_SCATTER_PREFETCH)]
13325 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
13326 operands[3]), UNSPEC_VSIBADDR);
13329 (define_insn "*avx512pf_scatterpf<mode>df_mask"
13331 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
13332 (match_operator:V8DF 5 "vsib_mem_operator"
13334 [(match_operand:P 2 "vsib_address_operand" "Tv")
13335 (match_operand:VI4_256_8_512 1 "register_operand" "v")
13336 (match_operand:SI 3 "const1248_operand" "n")]
13338 (match_operand:SI 4 "const2367_operand" "n")]
13339 UNSPEC_SCATTER_PREFETCH)]
13342 switch (INTVAL (operands[4]))
13346 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
13349 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
13351 gcc_unreachable ();
13354 [(set_attr "type" "sse")
13355 (set_attr "prefix" "evex")
13356 (set_attr "mode" "XI")])
13358 (define_insn "*avx512pf_scatterpf<mode>df"
13361 (match_operator:V8DF 4 "vsib_mem_operator"
13363 [(match_operand:P 1 "vsib_address_operand" "Tv")
13364 (match_operand:VI4_256_8_512 0 "register_operand" "v")
13365 (match_operand:SI 2 "const1248_operand" "n")]
13367 (match_operand:SI 3 "const2367_operand" "n")]
13368 UNSPEC_SCATTER_PREFETCH)]
13371 switch (INTVAL (operands[3]))
13375 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
13378 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
13380 gcc_unreachable ();
13383 [(set_attr "type" "sse")
13384 (set_attr "prefix" "evex")
13385 (set_attr "mode" "XI")])
13387 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
13388 [(set (match_operand:VF_512 0 "register_operand" "=v")
13390 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13393 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
13394 [(set_attr "prefix" "evex")
13395 (set_attr "type" "sse")
13396 (set_attr "mode" "<MODE>")])
13398 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
13399 [(set (match_operand:VF_512 0 "register_operand" "=v")
13401 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13404 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
13405 [(set_attr "prefix" "evex")
13406 (set_attr "type" "sse")
13407 (set_attr "mode" "<MODE>")])
13409 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
13410 [(set (match_operand:VF_128 0 "register_operand" "=v")
13413 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13415 (match_operand:VF_128 2 "register_operand" "v")
13418 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
13419 [(set_attr "length_immediate" "1")
13420 (set_attr "prefix" "evex")
13421 (set_attr "type" "sse")
13422 (set_attr "mode" "<MODE>")])
13424 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
13425 [(set (match_operand:VF_512 0 "register_operand" "=v")
13427 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13430 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
13431 [(set_attr "prefix" "evex")
13432 (set_attr "type" "sse")
13433 (set_attr "mode" "<MODE>")])
13435 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
13436 [(set (match_operand:VF_128 0 "register_operand" "=v")
13439 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
13441 (match_operand:VF_128 2 "register_operand" "v")
13444 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
13445 [(set_attr "length_immediate" "1")
13446 (set_attr "type" "sse")
13447 (set_attr "prefix" "evex")
13448 (set_attr "mode" "<MODE>")])
13450 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13452 ;; XOP instructions
13454 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13456 (define_code_iterator xop_plus [plus ss_plus])
13458 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
13459 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
13461 ;; XOP parallel integer multiply/add instructions.
13463 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
13464 [(set (match_operand:VI24_128 0 "register_operand" "=x")
13467 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
13468 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
13469 (match_operand:VI24_128 3 "register_operand" "x")))]
13471 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13472 [(set_attr "type" "ssemuladd")
13473 (set_attr "mode" "TI")])
13475 (define_insn "xop_p<macs>dql"
13476 [(set (match_operand:V2DI 0 "register_operand" "=x")
13481 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13482 (parallel [(const_int 0) (const_int 2)])))
13485 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13486 (parallel [(const_int 0) (const_int 2)]))))
13487 (match_operand:V2DI 3 "register_operand" "x")))]
13489 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13490 [(set_attr "type" "ssemuladd")
13491 (set_attr "mode" "TI")])
13493 (define_insn "xop_p<macs>dqh"
13494 [(set (match_operand:V2DI 0 "register_operand" "=x")
13499 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13500 (parallel [(const_int 1) (const_int 3)])))
13503 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13504 (parallel [(const_int 1) (const_int 3)]))))
13505 (match_operand:V2DI 3 "register_operand" "x")))]
13507 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13508 [(set_attr "type" "ssemuladd")
13509 (set_attr "mode" "TI")])
13511 ;; XOP parallel integer multiply/add instructions for the intrinisics
13512 (define_insn "xop_p<macs>wd"
13513 [(set (match_operand:V4SI 0 "register_operand" "=x")
13518 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13519 (parallel [(const_int 1) (const_int 3)
13520 (const_int 5) (const_int 7)])))
13523 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13524 (parallel [(const_int 1) (const_int 3)
13525 (const_int 5) (const_int 7)]))))
13526 (match_operand:V4SI 3 "register_operand" "x")))]
13528 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13529 [(set_attr "type" "ssemuladd")
13530 (set_attr "mode" "TI")])
13532 (define_insn "xop_p<madcs>wd"
13533 [(set (match_operand:V4SI 0 "register_operand" "=x")
13539 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13540 (parallel [(const_int 0) (const_int 2)
13541 (const_int 4) (const_int 6)])))
13544 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13545 (parallel [(const_int 0) (const_int 2)
13546 (const_int 4) (const_int 6)]))))
13551 (parallel [(const_int 1) (const_int 3)
13552 (const_int 5) (const_int 7)])))
13556 (parallel [(const_int 1) (const_int 3)
13557 (const_int 5) (const_int 7)])))))
13558 (match_operand:V4SI 3 "register_operand" "x")))]
13560 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13561 [(set_attr "type" "ssemuladd")
13562 (set_attr "mode" "TI")])
13564 ;; XOP parallel XMM conditional moves
13565 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
13566 [(set (match_operand:V 0 "register_operand" "=x,x")
13568 (match_operand:V 3 "nonimmediate_operand" "x,m")
13569 (match_operand:V 1 "register_operand" "x,x")
13570 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
13572 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13573 [(set_attr "type" "sse4arg")])
13575 ;; XOP horizontal add/subtract instructions
13576 (define_insn "xop_phadd<u>bw"
13577 [(set (match_operand:V8HI 0 "register_operand" "=x")
13581 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13582 (parallel [(const_int 0) (const_int 2)
13583 (const_int 4) (const_int 6)
13584 (const_int 8) (const_int 10)
13585 (const_int 12) (const_int 14)])))
13589 (parallel [(const_int 1) (const_int 3)
13590 (const_int 5) (const_int 7)
13591 (const_int 9) (const_int 11)
13592 (const_int 13) (const_int 15)])))))]
13594 "vphadd<u>bw\t{%1, %0|%0, %1}"
13595 [(set_attr "type" "sseiadd1")])
13597 (define_insn "xop_phadd<u>bd"
13598 [(set (match_operand:V4SI 0 "register_operand" "=x")
13603 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13604 (parallel [(const_int 0) (const_int 4)
13605 (const_int 8) (const_int 12)])))
13609 (parallel [(const_int 1) (const_int 5)
13610 (const_int 9) (const_int 13)]))))
13615 (parallel [(const_int 2) (const_int 6)
13616 (const_int 10) (const_int 14)])))
13620 (parallel [(const_int 3) (const_int 7)
13621 (const_int 11) (const_int 15)]))))))]
13623 "vphadd<u>bd\t{%1, %0|%0, %1}"
13624 [(set_attr "type" "sseiadd1")])
13626 (define_insn "xop_phadd<u>bq"
13627 [(set (match_operand:V2DI 0 "register_operand" "=x")
13633 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13634 (parallel [(const_int 0) (const_int 8)])))
13638 (parallel [(const_int 1) (const_int 9)]))))
13643 (parallel [(const_int 2) (const_int 10)])))
13647 (parallel [(const_int 3) (const_int 11)])))))
13653 (parallel [(const_int 4) (const_int 12)])))
13657 (parallel [(const_int 5) (const_int 13)]))))
13662 (parallel [(const_int 6) (const_int 14)])))
13666 (parallel [(const_int 7) (const_int 15)])))))))]
13668 "vphadd<u>bq\t{%1, %0|%0, %1}"
13669 [(set_attr "type" "sseiadd1")])
13671 (define_insn "xop_phadd<u>wd"
13672 [(set (match_operand:V4SI 0 "register_operand" "=x")
13676 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13677 (parallel [(const_int 0) (const_int 2)
13678 (const_int 4) (const_int 6)])))
13682 (parallel [(const_int 1) (const_int 3)
13683 (const_int 5) (const_int 7)])))))]
13685 "vphadd<u>wd\t{%1, %0|%0, %1}"
13686 [(set_attr "type" "sseiadd1")])
13688 (define_insn "xop_phadd<u>wq"
13689 [(set (match_operand:V2DI 0 "register_operand" "=x")
13694 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13695 (parallel [(const_int 0) (const_int 4)])))
13699 (parallel [(const_int 1) (const_int 5)]))))
13704 (parallel [(const_int 2) (const_int 6)])))
13708 (parallel [(const_int 3) (const_int 7)]))))))]
13710 "vphadd<u>wq\t{%1, %0|%0, %1}"
13711 [(set_attr "type" "sseiadd1")])
13713 (define_insn "xop_phadd<u>dq"
13714 [(set (match_operand:V2DI 0 "register_operand" "=x")
13718 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13719 (parallel [(const_int 0) (const_int 2)])))
13723 (parallel [(const_int 1) (const_int 3)])))))]
13725 "vphadd<u>dq\t{%1, %0|%0, %1}"
13726 [(set_attr "type" "sseiadd1")])
13728 (define_insn "xop_phsubbw"
13729 [(set (match_operand:V8HI 0 "register_operand" "=x")
13733 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13734 (parallel [(const_int 0) (const_int 2)
13735 (const_int 4) (const_int 6)
13736 (const_int 8) (const_int 10)
13737 (const_int 12) (const_int 14)])))
13741 (parallel [(const_int 1) (const_int 3)
13742 (const_int 5) (const_int 7)
13743 (const_int 9) (const_int 11)
13744 (const_int 13) (const_int 15)])))))]
13746 "vphsubbw\t{%1, %0|%0, %1}"
13747 [(set_attr "type" "sseiadd1")])
13749 (define_insn "xop_phsubwd"
13750 [(set (match_operand:V4SI 0 "register_operand" "=x")
13754 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13755 (parallel [(const_int 0) (const_int 2)
13756 (const_int 4) (const_int 6)])))
13760 (parallel [(const_int 1) (const_int 3)
13761 (const_int 5) (const_int 7)])))))]
13763 "vphsubwd\t{%1, %0|%0, %1}"
13764 [(set_attr "type" "sseiadd1")])
13766 (define_insn "xop_phsubdq"
13767 [(set (match_operand:V2DI 0 "register_operand" "=x")
13771 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13772 (parallel [(const_int 0) (const_int 2)])))
13776 (parallel [(const_int 1) (const_int 3)])))))]
13778 "vphsubdq\t{%1, %0|%0, %1}"
13779 [(set_attr "type" "sseiadd1")])
13781 ;; XOP permute instructions
13782 (define_insn "xop_pperm"
13783 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13785 [(match_operand:V16QI 1 "register_operand" "x,x")
13786 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13787 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
13788 UNSPEC_XOP_PERMUTE))]
13789 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13790 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13791 [(set_attr "type" "sse4arg")
13792 (set_attr "mode" "TI")])
13794 ;; XOP pack instructions that combine two vectors into a smaller vector
13795 (define_insn "xop_pperm_pack_v2di_v4si"
13796 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13799 (match_operand:V2DI 1 "register_operand" "x,x"))
13801 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
13802 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13803 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13804 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13805 [(set_attr "type" "sse4arg")
13806 (set_attr "mode" "TI")])
13808 (define_insn "xop_pperm_pack_v4si_v8hi"
13809 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13812 (match_operand:V4SI 1 "register_operand" "x,x"))
13814 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
13815 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13816 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13817 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13818 [(set_attr "type" "sse4arg")
13819 (set_attr "mode" "TI")])
13821 (define_insn "xop_pperm_pack_v8hi_v16qi"
13822 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13825 (match_operand:V8HI 1 "register_operand" "x,x"))
13827 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
13828 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13829 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13830 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13831 [(set_attr "type" "sse4arg")
13832 (set_attr "mode" "TI")])
13834 ;; XOP packed rotate instructions
13835 (define_expand "rotl<mode>3"
13836 [(set (match_operand:VI_128 0 "register_operand")
13838 (match_operand:VI_128 1 "nonimmediate_operand")
13839 (match_operand:SI 2 "general_operand")))]
13842 /* If we were given a scalar, convert it to parallel */
13843 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13845 rtvec vs = rtvec_alloc (<ssescalarnum>);
13846 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13847 rtx reg = gen_reg_rtx (<MODE>mode);
13848 rtx op2 = operands[2];
13851 if (GET_MODE (op2) != <ssescalarmode>mode)
13853 op2 = gen_reg_rtx (<ssescalarmode>mode);
13854 convert_move (op2, operands[2], false);
13857 for (i = 0; i < <ssescalarnum>; i++)
13858 RTVEC_ELT (vs, i) = op2;
13860 emit_insn (gen_vec_init<mode> (reg, par));
13861 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13866 (define_expand "rotr<mode>3"
13867 [(set (match_operand:VI_128 0 "register_operand")
13869 (match_operand:VI_128 1 "nonimmediate_operand")
13870 (match_operand:SI 2 "general_operand")))]
13873 /* If we were given a scalar, convert it to parallel */
13874 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13876 rtvec vs = rtvec_alloc (<ssescalarnum>);
13877 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13878 rtx neg = gen_reg_rtx (<MODE>mode);
13879 rtx reg = gen_reg_rtx (<MODE>mode);
13880 rtx op2 = operands[2];
13883 if (GET_MODE (op2) != <ssescalarmode>mode)
13885 op2 = gen_reg_rtx (<ssescalarmode>mode);
13886 convert_move (op2, operands[2], false);
13889 for (i = 0; i < <ssescalarnum>; i++)
13890 RTVEC_ELT (vs, i) = op2;
13892 emit_insn (gen_vec_init<mode> (reg, par));
13893 emit_insn (gen_neg<mode>2 (neg, reg));
13894 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
13899 (define_insn "xop_rotl<mode>3"
13900 [(set (match_operand:VI_128 0 "register_operand" "=x")
13902 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13903 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13905 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13906 [(set_attr "type" "sseishft")
13907 (set_attr "length_immediate" "1")
13908 (set_attr "mode" "TI")])
13910 (define_insn "xop_rotr<mode>3"
13911 [(set (match_operand:VI_128 0 "register_operand" "=x")
13913 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13914 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13918 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
13919 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
13921 [(set_attr "type" "sseishft")
13922 (set_attr "length_immediate" "1")
13923 (set_attr "mode" "TI")])
13925 (define_expand "vrotr<mode>3"
13926 [(match_operand:VI_128 0 "register_operand")
13927 (match_operand:VI_128 1 "register_operand")
13928 (match_operand:VI_128 2 "register_operand")]
13931 rtx reg = gen_reg_rtx (<MODE>mode);
13932 emit_insn (gen_neg<mode>2 (reg, operands[2]));
13933 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13937 (define_expand "vrotl<mode>3"
13938 [(match_operand:VI_128 0 "register_operand")
13939 (match_operand:VI_128 1 "register_operand")
13940 (match_operand:VI_128 2 "register_operand")]
13943 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
13947 (define_insn "xop_vrotl<mode>3"
13948 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13949 (if_then_else:VI_128
13951 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13954 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13958 (neg:VI_128 (match_dup 2)))))]
13959 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13960 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13961 [(set_attr "type" "sseishft")
13962 (set_attr "prefix_data16" "0")
13963 (set_attr "prefix_extra" "2")
13964 (set_attr "mode" "TI")])
13966 ;; XOP packed shift instructions.
13967 (define_expand "vlshr<mode>3"
13968 [(set (match_operand:VI12_128 0 "register_operand")
13970 (match_operand:VI12_128 1 "register_operand")
13971 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13974 rtx neg = gen_reg_rtx (<MODE>mode);
13975 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13976 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13980 (define_expand "vlshr<mode>3"
13981 [(set (match_operand:VI48_128 0 "register_operand")
13983 (match_operand:VI48_128 1 "register_operand")
13984 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13985 "TARGET_AVX2 || TARGET_XOP"
13989 rtx neg = gen_reg_rtx (<MODE>mode);
13990 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13991 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13996 (define_expand "vlshr<mode>3"
13997 [(set (match_operand:VI48_512 0 "register_operand")
13999 (match_operand:VI48_512 1 "register_operand")
14000 (match_operand:VI48_512 2 "nonimmediate_operand")))]
14003 (define_expand "vlshr<mode>3"
14004 [(set (match_operand:VI48_256 0 "register_operand")
14006 (match_operand:VI48_256 1 "register_operand")
14007 (match_operand:VI48_256 2 "nonimmediate_operand")))]
14010 (define_expand "vashr<mode>3"
14011 [(set (match_operand:VI128_128 0 "register_operand")
14012 (ashiftrt:VI128_128
14013 (match_operand:VI128_128 1 "register_operand")
14014 (match_operand:VI128_128 2 "nonimmediate_operand")))]
14017 rtx neg = gen_reg_rtx (<MODE>mode);
14018 emit_insn (gen_neg<mode>2 (neg, operands[2]));
14019 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
14023 (define_expand "vashrv4si3"
14024 [(set (match_operand:V4SI 0 "register_operand")
14025 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
14026 (match_operand:V4SI 2 "nonimmediate_operand")))]
14027 "TARGET_AVX2 || TARGET_XOP"
14031 rtx neg = gen_reg_rtx (V4SImode);
14032 emit_insn (gen_negv4si2 (neg, operands[2]));
14033 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
14038 (define_expand "vashrv16si3"
14039 [(set (match_operand:V16SI 0 "register_operand")
14040 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
14041 (match_operand:V16SI 2 "nonimmediate_operand")))]
14044 (define_expand "vashrv8si3"
14045 [(set (match_operand:V8SI 0 "register_operand")
14046 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
14047 (match_operand:V8SI 2 "nonimmediate_operand")))]
14050 (define_expand "vashl<mode>3"
14051 [(set (match_operand:VI12_128 0 "register_operand")
14053 (match_operand:VI12_128 1 "register_operand")
14054 (match_operand:VI12_128 2 "nonimmediate_operand")))]
14057 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
14061 (define_expand "vashl<mode>3"
14062 [(set (match_operand:VI48_128 0 "register_operand")
14064 (match_operand:VI48_128 1 "register_operand")
14065 (match_operand:VI48_128 2 "nonimmediate_operand")))]
14066 "TARGET_AVX2 || TARGET_XOP"
14070 operands[2] = force_reg (<MODE>mode, operands[2]);
14071 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
14076 (define_expand "vashl<mode>3"
14077 [(set (match_operand:VI48_512 0 "register_operand")
14079 (match_operand:VI48_512 1 "register_operand")
14080 (match_operand:VI48_512 2 "nonimmediate_operand")))]
14083 (define_expand "vashl<mode>3"
14084 [(set (match_operand:VI48_256 0 "register_operand")
14086 (match_operand:VI48_256 1 "register_operand")
14087 (match_operand:VI48_256 2 "nonimmediate_operand")))]
14090 (define_insn "xop_sha<mode>3"
14091 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
14092 (if_then_else:VI_128
14094 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
14097 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
14101 (neg:VI_128 (match_dup 2)))))]
14102 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14103 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14104 [(set_attr "type" "sseishft")
14105 (set_attr "prefix_data16" "0")
14106 (set_attr "prefix_extra" "2")
14107 (set_attr "mode" "TI")])
14109 (define_insn "xop_shl<mode>3"
14110 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
14111 (if_then_else:VI_128
14113 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
14116 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
14120 (neg:VI_128 (match_dup 2)))))]
14121 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14122 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14123 [(set_attr "type" "sseishft")
14124 (set_attr "prefix_data16" "0")
14125 (set_attr "prefix_extra" "2")
14126 (set_attr "mode" "TI")])
14128 (define_expand "<shift_insn><mode>3"
14129 [(set (match_operand:VI1_AVX2 0 "register_operand")
14130 (any_shift:VI1_AVX2
14131 (match_operand:VI1_AVX2 1 "register_operand")
14132 (match_operand:SI 2 "nonmemory_operand")))]
14135 if (TARGET_XOP && <MODE>mode == V16QImode)
14137 bool negate = false;
14138 rtx (*gen) (rtx, rtx, rtx);
14142 if (<CODE> != ASHIFT)
14144 if (CONST_INT_P (operands[2]))
14145 operands[2] = GEN_INT (-INTVAL (operands[2]));
14149 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
14150 for (i = 0; i < 16; i++)
14151 XVECEXP (par, 0, i) = operands[2];
14153 tmp = gen_reg_rtx (V16QImode);
14154 emit_insn (gen_vec_initv16qi (tmp, par));
14157 emit_insn (gen_negv16qi2 (tmp, tmp));
14159 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
14160 emit_insn (gen (operands[0], operands[1], tmp));
14163 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
14167 (define_expand "ashrv2di3"
14168 [(set (match_operand:V2DI 0 "register_operand")
14170 (match_operand:V2DI 1 "register_operand")
14171 (match_operand:DI 2 "nonmemory_operand")))]
14174 rtx reg = gen_reg_rtx (V2DImode);
14176 bool negate = false;
14179 if (CONST_INT_P (operands[2]))
14180 operands[2] = GEN_INT (-INTVAL (operands[2]));
14184 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
14185 for (i = 0; i < 2; i++)
14186 XVECEXP (par, 0, i) = operands[2];
14188 emit_insn (gen_vec_initv2di (reg, par));
14191 emit_insn (gen_negv2di2 (reg, reg));
14193 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
14197 ;; XOP FRCZ support
14198 (define_insn "xop_frcz<mode>2"
14199 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
14201 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
14204 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
14205 [(set_attr "type" "ssecvt1")
14206 (set_attr "mode" "<MODE>")])
14208 (define_expand "xop_vmfrcz<mode>2"
14209 [(set (match_operand:VF_128 0 "register_operand")
14212 [(match_operand:VF_128 1 "nonimmediate_operand")]
14217 "operands[2] = CONST0_RTX (<MODE>mode);")
14219 (define_insn "*xop_vmfrcz<mode>2"
14220 [(set (match_operand:VF_128 0 "register_operand" "=x")
14223 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
14225 (match_operand:VF_128 2 "const0_operand")
14228 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
14229 [(set_attr "type" "ssecvt1")
14230 (set_attr "mode" "<MODE>")])
14232 (define_insn "xop_maskcmp<mode>3"
14233 [(set (match_operand:VI_128 0 "register_operand" "=x")
14234 (match_operator:VI_128 1 "ix86_comparison_int_operator"
14235 [(match_operand:VI_128 2 "register_operand" "x")
14236 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
14238 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
14239 [(set_attr "type" "sse4arg")
14240 (set_attr "prefix_data16" "0")
14241 (set_attr "prefix_rep" "0")
14242 (set_attr "prefix_extra" "2")
14243 (set_attr "length_immediate" "1")
14244 (set_attr "mode" "TI")])
14246 (define_insn "xop_maskcmp_uns<mode>3"
14247 [(set (match_operand:VI_128 0 "register_operand" "=x")
14248 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
14249 [(match_operand:VI_128 2 "register_operand" "x")
14250 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
14252 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
14253 [(set_attr "type" "ssecmp")
14254 (set_attr "prefix_data16" "0")
14255 (set_attr "prefix_rep" "0")
14256 (set_attr "prefix_extra" "2")
14257 (set_attr "length_immediate" "1")
14258 (set_attr "mode" "TI")])
14260 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
14261 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
14262 ;; the exact instruction generated for the intrinsic.
14263 (define_insn "xop_maskcmp_uns2<mode>3"
14264 [(set (match_operand:VI_128 0 "register_operand" "=x")
14266 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
14267 [(match_operand:VI_128 2 "register_operand" "x")
14268 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
14269 UNSPEC_XOP_UNSIGNED_CMP))]
14271 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
14272 [(set_attr "type" "ssecmp")
14273 (set_attr "prefix_data16" "0")
14274 (set_attr "prefix_extra" "2")
14275 (set_attr "length_immediate" "1")
14276 (set_attr "mode" "TI")])
14278 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
14279 ;; being added here to be complete.
14280 (define_insn "xop_pcom_tf<mode>3"
14281 [(set (match_operand:VI_128 0 "register_operand" "=x")
14283 [(match_operand:VI_128 1 "register_operand" "x")
14284 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
14285 (match_operand:SI 3 "const_int_operand" "n")]
14286 UNSPEC_XOP_TRUEFALSE))]
14289 return ((INTVAL (operands[3]) != 0)
14290 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14291 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
14293 [(set_attr "type" "ssecmp")
14294 (set_attr "prefix_data16" "0")
14295 (set_attr "prefix_extra" "2")
14296 (set_attr "length_immediate" "1")
14297 (set_attr "mode" "TI")])
14299 (define_insn "xop_vpermil2<mode>3"
14300 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
14302 [(match_operand:VF_128_256 1 "register_operand" "x")
14303 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
14304 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
14305 (match_operand:SI 4 "const_0_to_3_operand" "n")]
14308 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
14309 [(set_attr "type" "sse4arg")
14310 (set_attr "length_immediate" "1")
14311 (set_attr "mode" "<MODE>")])
14313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14315 (define_insn "aesenc"
14316 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
14317 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
14318 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
14322 aesenc\t{%2, %0|%0, %2}
14323 vaesenc\t{%2, %1, %0|%0, %1, %2}"
14324 [(set_attr "isa" "noavx,avx")
14325 (set_attr "type" "sselog1")
14326 (set_attr "prefix_extra" "1")
14327 (set_attr "prefix" "orig,vex")
14328 (set_attr "btver2_decode" "double,double")
14329 (set_attr "mode" "TI")])
14331 (define_insn "aesenclast"
14332 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
14333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
14334 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
14335 UNSPEC_AESENCLAST))]
14338 aesenclast\t{%2, %0|%0, %2}
14339 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
14340 [(set_attr "isa" "noavx,avx")
14341 (set_attr "type" "sselog1")
14342 (set_attr "prefix_extra" "1")
14343 (set_attr "prefix" "orig,vex")
14344 (set_attr "btver2_decode" "double,double")
14345 (set_attr "mode" "TI")])
14347 (define_insn "aesdec"
14348 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
14349 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
14350 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
14354 aesdec\t{%2, %0|%0, %2}
14355 vaesdec\t{%2, %1, %0|%0, %1, %2}"
14356 [(set_attr "isa" "noavx,avx")
14357 (set_attr "type" "sselog1")
14358 (set_attr "prefix_extra" "1")
14359 (set_attr "prefix" "orig,vex")
14360 (set_attr "btver2_decode" "double,double")
14361 (set_attr "mode" "TI")])
14363 (define_insn "aesdeclast"
14364 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
14365 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
14366 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
14367 UNSPEC_AESDECLAST))]
14370 aesdeclast\t{%2, %0|%0, %2}
14371 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
14372 [(set_attr "isa" "noavx,avx")
14373 (set_attr "type" "sselog1")
14374 (set_attr "prefix_extra" "1")
14375 (set_attr "prefix" "orig,vex")
14376 (set_attr "btver2_decode" "double,double")
14377 (set_attr "mode" "TI")])
14379 (define_insn "aesimc"
14380 [(set (match_operand:V2DI 0 "register_operand" "=x")
14381 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
14384 "%vaesimc\t{%1, %0|%0, %1}"
14385 [(set_attr "type" "sselog1")
14386 (set_attr "prefix_extra" "1")
14387 (set_attr "prefix" "maybe_vex")
14388 (set_attr "mode" "TI")])
14390 (define_insn "aeskeygenassist"
14391 [(set (match_operand:V2DI 0 "register_operand" "=x")
14392 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
14393 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14394 UNSPEC_AESKEYGENASSIST))]
14396 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
14397 [(set_attr "type" "sselog1")
14398 (set_attr "prefix_extra" "1")
14399 (set_attr "length_immediate" "1")
14400 (set_attr "prefix" "maybe_vex")
14401 (set_attr "mode" "TI")])
14403 (define_insn "pclmulqdq"
14404 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
14405 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
14406 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
14407 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14411 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
14412 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14413 [(set_attr "isa" "noavx,avx")
14414 (set_attr "type" "sselog1")
14415 (set_attr "prefix_extra" "1")
14416 (set_attr "length_immediate" "1")
14417 (set_attr "prefix" "orig,vex")
14418 (set_attr "mode" "TI")])
14420 (define_expand "avx_vzeroall"
14421 [(match_par_dup 0 [(const_int 0)])]
14424 int nregs = TARGET_64BIT ? 16 : 8;
14427 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
14429 XVECEXP (operands[0], 0, 0)
14430 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
14433 for (regno = 0; regno < nregs; regno++)
14434 XVECEXP (operands[0], 0, regno + 1)
14435 = gen_rtx_SET (VOIDmode,
14436 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
14437 CONST0_RTX (V8SImode));
14440 (define_insn "*avx_vzeroall"
14441 [(match_parallel 0 "vzeroall_operation"
14442 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
14445 [(set_attr "type" "sse")
14446 (set_attr "modrm" "0")
14447 (set_attr "memory" "none")
14448 (set_attr "prefix" "vex")
14449 (set_attr "btver2_decode" "vector")
14450 (set_attr "mode" "OI")])
14452 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
14453 ;; if the upper 128bits are unused.
14454 (define_insn "avx_vzeroupper"
14455 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
14458 [(set_attr "type" "sse")
14459 (set_attr "modrm" "0")
14460 (set_attr "memory" "none")
14461 (set_attr "prefix" "vex")
14462 (set_attr "btver2_decode" "vector")
14463 (set_attr "mode" "OI")])
14465 (define_insn "avx2_pbroadcast<mode>"
14466 [(set (match_operand:VI 0 "register_operand" "=x")
14468 (vec_select:<ssescalarmode>
14469 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
14470 (parallel [(const_int 0)]))))]
14472 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
14473 [(set_attr "type" "ssemov")
14474 (set_attr "prefix_extra" "1")
14475 (set_attr "prefix" "vex")
14476 (set_attr "mode" "<sseinsnmode>")])
14478 (define_insn "avx2_pbroadcast<mode>_1"
14479 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
14480 (vec_duplicate:VI_256
14481 (vec_select:<ssescalarmode>
14482 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
14483 (parallel [(const_int 0)]))))]
14486 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
14487 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
14488 [(set_attr "type" "ssemov")
14489 (set_attr "prefix_extra" "1")
14490 (set_attr "prefix" "vex")
14491 (set_attr "mode" "<sseinsnmode>")])
14493 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
14494 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
14495 (unspec:VI48F_256_512
14496 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
14497 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
14499 "TARGET_AVX2 && <mask_mode512bit_condition>"
14500 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
14501 [(set_attr "type" "sselog")
14502 (set_attr "prefix" "<mask_prefix2>")
14503 (set_attr "mode" "<sseinsnmode>")])
14505 (define_expand "<avx2_avx512f>_perm<mode>"
14506 [(match_operand:VI8F_256_512 0 "register_operand")
14507 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
14508 (match_operand:SI 2 "const_0_to_255_operand")]
14511 int mask = INTVAL (operands[2]);
14512 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
14513 GEN_INT ((mask >> 0) & 3),
14514 GEN_INT ((mask >> 2) & 3),
14515 GEN_INT ((mask >> 4) & 3),
14516 GEN_INT ((mask >> 6) & 3)));
14520 (define_expand "avx512f_perm<mode>_mask"
14521 [(match_operand:V8FI 0 "register_operand")
14522 (match_operand:V8FI 1 "nonimmediate_operand")
14523 (match_operand:SI 2 "const_0_to_255_operand")
14524 (match_operand:V8FI 3 "vector_move_operand")
14525 (match_operand:<avx512fmaskmode> 4 "register_operand")]
14528 int mask = INTVAL (operands[2]);
14529 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
14530 GEN_INT ((mask >> 0) & 3),
14531 GEN_INT ((mask >> 2) & 3),
14532 GEN_INT ((mask >> 4) & 3),
14533 GEN_INT ((mask >> 6) & 3),
14534 operands[3], operands[4]));
14538 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
14539 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
14540 (vec_select:VI8F_256_512
14541 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
14542 (parallel [(match_operand 2 "const_0_to_3_operand")
14543 (match_operand 3 "const_0_to_3_operand")
14544 (match_operand 4 "const_0_to_3_operand")
14545 (match_operand 5 "const_0_to_3_operand")])))]
14546 "TARGET_AVX2 && <mask_mode512bit_condition>"
14549 mask |= INTVAL (operands[2]) << 0;
14550 mask |= INTVAL (operands[3]) << 2;
14551 mask |= INTVAL (operands[4]) << 4;
14552 mask |= INTVAL (operands[5]) << 6;
14553 operands[2] = GEN_INT (mask);
14554 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14556 [(set_attr "type" "sselog")
14557 (set_attr "prefix" "<mask_prefix2>")
14558 (set_attr "mode" "<sseinsnmode>")])
14560 (define_insn "avx2_permv2ti"
14561 [(set (match_operand:V4DI 0 "register_operand" "=x")
14563 [(match_operand:V4DI 1 "register_operand" "x")
14564 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
14565 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14568 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14569 [(set_attr "type" "sselog")
14570 (set_attr "prefix" "vex")
14571 (set_attr "mode" "OI")])
14573 (define_insn "avx2_vec_dupv4df"
14574 [(set (match_operand:V4DF 0 "register_operand" "=x")
14575 (vec_duplicate:V4DF
14577 (match_operand:V2DF 1 "register_operand" "x")
14578 (parallel [(const_int 0)]))))]
14580 "vbroadcastsd\t{%1, %0|%0, %1}"
14581 [(set_attr "type" "sselog1")
14582 (set_attr "prefix" "vex")
14583 (set_attr "mode" "V4DF")])
14585 ;; Modes handled by AVX vec_dup patterns.
14586 (define_mode_iterator AVX_VEC_DUP_MODE
14587 [V8SI V8SF V4DI V4DF])
14589 (define_insn "vec_dup<mode>"
14590 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
14591 (vec_duplicate:AVX_VEC_DUP_MODE
14592 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
14595 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
14596 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
14598 [(set_attr "type" "ssemov")
14599 (set_attr "prefix_extra" "1")
14600 (set_attr "prefix" "vex")
14601 (set_attr "isa" "*,avx2,noavx2")
14602 (set_attr "mode" "V8SF")])
14604 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
14605 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14606 (vec_duplicate:VI48F_512
14607 (vec_select:<ssescalarmode>
14608 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
14609 (parallel [(const_int 0)]))))]
14611 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14612 [(set_attr "type" "ssemov")
14613 (set_attr "prefix" "evex")
14614 (set_attr "mode" "<sseinsnmode>")])
14616 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14617 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
14618 (vec_duplicate:V16FI
14619 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
14622 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
14623 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14624 [(set_attr "type" "ssemov")
14625 (set_attr "prefix" "evex")
14626 (set_attr "mode" "<sseinsnmode>")])
14628 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14629 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
14630 (vec_duplicate:V8FI
14631 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
14634 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
14635 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14636 [(set_attr "type" "ssemov")
14637 (set_attr "prefix" "evex")
14638 (set_attr "mode" "<sseinsnmode>")])
14640 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
14641 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14642 (vec_duplicate:VI48_512
14643 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
14644 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
14645 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14646 [(set_attr "type" "ssemov")
14647 (set_attr "prefix" "evex")
14648 (set_attr "mode" "<sseinsnmode>")])
14650 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
14651 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14652 (vec_duplicate:VI48F_512
14653 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
14655 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14656 [(set_attr "type" "ssemov")
14657 (set_attr "prefix" "evex")
14658 (set_attr "mode" "<sseinsnmode>")])
14660 (define_insn "avx2_vbroadcasti128_<mode>"
14661 [(set (match_operand:VI_256 0 "register_operand" "=x")
14663 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
14666 "vbroadcasti128\t{%1, %0|%0, %1}"
14667 [(set_attr "type" "ssemov")
14668 (set_attr "prefix_extra" "1")
14669 (set_attr "prefix" "vex")
14670 (set_attr "mode" "OI")])
14673 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
14674 (vec_duplicate:AVX_VEC_DUP_MODE
14675 (match_operand:<ssescalarmode> 1 "register_operand")))]
14676 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
14677 [(set (match_dup 2)
14678 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
14680 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
14681 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
14683 (define_insn "avx_vbroadcastf128_<mode>"
14684 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
14686 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
14690 vbroadcast<i128>\t{%1, %0|%0, %1}
14691 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
14692 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
14693 [(set_attr "type" "ssemov,sselog1,sselog1")
14694 (set_attr "prefix_extra" "1")
14695 (set_attr "length_immediate" "0,1,1")
14696 (set_attr "prefix" "vex")
14697 (set_attr "mode" "<sseinsnmode>")])
14699 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
14700 (define_mode_iterator VI4F_BRCST32x2
14701 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
14702 V16SF (V8SF "TARGET_AVX512VL")])
14704 (define_mode_attr 64x2mode
14705 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
14707 (define_mode_attr 32x2mode
14708 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
14709 (V8SF "V2SF") (V4SI "V2SI")])
14711 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
14712 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
14713 (vec_duplicate:VI4F_BRCST32x2
14714 (vec_select:<32x2mode>
14715 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
14716 (parallel [(const_int 0) (const_int 1)]))))]
14718 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14719 [(set_attr "type" "ssemov")
14720 (set_attr "prefix_extra" "1")
14721 (set_attr "prefix" "evex")
14722 (set_attr "mode" "<sseinsnmode>")])
14724 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
14725 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
14726 (vec_duplicate:VI4F_256
14727 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
14730 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
14731 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14732 [(set_attr "type" "ssemov")
14733 (set_attr "prefix_extra" "1")
14734 (set_attr "prefix" "evex")
14735 (set_attr "mode" "<sseinsnmode>")])
14737 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
14738 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
14739 (vec_duplicate:V16FI
14740 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
14743 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
14744 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14745 [(set_attr "type" "ssemov")
14746 (set_attr "prefix_extra" "1")
14747 (set_attr "prefix" "evex")
14748 (set_attr "mode" "<sseinsnmode>")])
14750 ;; For broadcast[i|f]64x2
14751 (define_mode_iterator VI8F_BRCST64x2
14752 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
14754 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
14755 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
14756 (vec_duplicate:VI8F_BRCST64x2
14757 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
14760 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
14761 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14762 [(set_attr "type" "ssemov")
14763 (set_attr "prefix_extra" "1")
14764 (set_attr "prefix" "evex")
14765 (set_attr "mode" "<sseinsnmode>")])
14767 (define_insn "avx512cd_maskb_vec_dup<mode>"
14768 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
14769 (vec_duplicate:VI8_AVX512VL
14771 (match_operand:QI 1 "register_operand" "Yk"))))]
14773 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
14774 [(set_attr "type" "mskmov")
14775 (set_attr "prefix" "evex")
14776 (set_attr "mode" "XI")])
14778 (define_insn "avx512cd_maskw_vec_dup<mode>"
14779 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
14780 (vec_duplicate:VI4_AVX512VL
14782 (match_operand:HI 1 "register_operand" "Yk"))))]
14784 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
14785 [(set_attr "type" "mskmov")
14786 (set_attr "prefix" "evex")
14787 (set_attr "mode" "XI")])
14789 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
14790 ;; If it so happens that the input is in memory, use vbroadcast.
14791 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
14792 (define_insn "*avx_vperm_broadcast_v4sf"
14793 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
14795 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
14796 (match_parallel 2 "avx_vbroadcast_operand"
14797 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14800 int elt = INTVAL (operands[3]);
14801 switch (which_alternative)
14805 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
14806 return "vbroadcastss\t{%1, %0|%0, %k1}";
14808 operands[2] = GEN_INT (elt * 0x55);
14809 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
14811 gcc_unreachable ();
14814 [(set_attr "type" "ssemov,ssemov,sselog1")
14815 (set_attr "prefix_extra" "1")
14816 (set_attr "length_immediate" "0,0,1")
14817 (set_attr "prefix" "vex")
14818 (set_attr "mode" "SF,SF,V4SF")])
14820 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
14821 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
14823 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
14824 (match_parallel 2 "avx_vbroadcast_operand"
14825 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14828 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
14829 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
14831 rtx op0 = operands[0], op1 = operands[1];
14832 int elt = INTVAL (operands[3]);
14838 if (TARGET_AVX2 && elt == 0)
14840 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
14845 /* Shuffle element we care about into all elements of the 128-bit lane.
14846 The other lane gets shuffled too, but we don't care. */
14847 if (<MODE>mode == V4DFmode)
14848 mask = (elt & 1 ? 15 : 0);
14850 mask = (elt & 3) * 0x55;
14851 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
14853 /* Shuffle the lane we care about into both lanes of the dest. */
14854 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
14855 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
14859 operands[1] = adjust_address (op1, <ssescalarmode>mode,
14860 elt * GET_MODE_SIZE (<ssescalarmode>mode));
14863 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14864 [(set (match_operand:VF2 0 "register_operand")
14866 (match_operand:VF2 1 "nonimmediate_operand")
14867 (match_operand:SI 2 "const_0_to_255_operand")))]
14868 "TARGET_AVX && <mask_mode512bit_condition>"
14870 int mask = INTVAL (operands[2]);
14871 rtx perm[<ssescalarnum>];
14874 for (i = 0; i < <ssescalarnum>; i = i + 2)
14876 perm[i] = GEN_INT (((mask >> i) & 1) + i);
14877 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
14881 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14884 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14885 [(set (match_operand:VF1 0 "register_operand")
14887 (match_operand:VF1 1 "nonimmediate_operand")
14888 (match_operand:SI 2 "const_0_to_255_operand")))]
14889 "TARGET_AVX && <mask_mode512bit_condition>"
14891 int mask = INTVAL (operands[2]);
14892 rtx perm[<ssescalarnum>];
14895 for (i = 0; i < <ssescalarnum>; i = i + 4)
14897 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
14898 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
14899 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
14900 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
14904 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14907 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
14908 [(set (match_operand:VF 0 "register_operand" "=v")
14910 (match_operand:VF 1 "nonimmediate_operand" "vm")
14911 (match_parallel 2 ""
14912 [(match_operand 3 "const_int_operand")])))]
14913 "TARGET_AVX && <mask_mode512bit_condition>
14914 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
14916 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
14917 operands[2] = GEN_INT (mask);
14918 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
14920 [(set_attr "type" "sselog")
14921 (set_attr "prefix_extra" "1")
14922 (set_attr "length_immediate" "1")
14923 (set_attr "prefix" "<mask_prefix>")
14924 (set_attr "mode" "<sseinsnmode>")])
14926 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
14927 [(set (match_operand:VF 0 "register_operand" "=v")
14929 [(match_operand:VF 1 "register_operand" "v")
14930 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
14932 "TARGET_AVX && <mask_mode512bit_condition>"
14933 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14934 [(set_attr "type" "sselog")
14935 (set_attr "prefix_extra" "1")
14936 (set_attr "btver2_decode" "vector")
14937 (set_attr "prefix" "<mask_prefix>")
14938 (set_attr "mode" "<sseinsnmode>")])
14940 (define_expand "avx512f_vpermi2var<mode>3_maskz"
14941 [(match_operand:VI48F_512 0 "register_operand" "=v")
14942 (match_operand:VI48F_512 1 "register_operand" "v")
14943 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14944 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14945 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14948 emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
14949 operands[0], operands[1], operands[2], operands[3],
14950 CONST0_RTX (<MODE>mode), operands[4]));
14954 (define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
14955 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14957 [(match_operand:VI48F_512 1 "register_operand" "v")
14958 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14959 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14962 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14963 [(set_attr "type" "sselog")
14964 (set_attr "prefix" "evex")
14965 (set_attr "mode" "<sseinsnmode>")])
14967 (define_insn "avx512f_vpermi2var<mode>3_mask"
14968 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14969 (vec_merge:VI48F_512
14971 [(match_operand:VI48F_512 1 "register_operand" "v")
14972 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14973 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14974 UNSPEC_VPERMI2_MASK)
14976 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14978 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14979 [(set_attr "type" "sselog")
14980 (set_attr "prefix" "evex")
14981 (set_attr "mode" "<sseinsnmode>")])
14983 (define_expand "avx512f_vpermt2var<mode>3_maskz"
14984 [(match_operand:VI48F_512 0 "register_operand" "=v")
14985 (match_operand:<sseintvecmode> 1 "register_operand" "v")
14986 (match_operand:VI48F_512 2 "register_operand" "0")
14987 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14988 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14991 emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
14992 operands[0], operands[1], operands[2], operands[3],
14993 CONST0_RTX (<MODE>mode), operands[4]));
14997 (define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
14998 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
15000 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
15001 (match_operand:VI48F_512 2 "register_operand" "0")
15002 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
15005 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
15006 [(set_attr "type" "sselog")
15007 (set_attr "prefix" "evex")
15008 (set_attr "mode" "<sseinsnmode>")])
15010 (define_insn "avx512f_vpermt2var<mode>3_mask"
15011 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
15012 (vec_merge:VI48F_512
15014 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
15015 (match_operand:VI48F_512 2 "register_operand" "0")
15016 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
15019 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
15021 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
15022 [(set_attr "type" "sselog")
15023 (set_attr "prefix" "evex")
15024 (set_attr "mode" "<sseinsnmode>")])
15026 (define_expand "avx_vperm2f128<mode>3"
15027 [(set (match_operand:AVX256MODE2P 0 "register_operand")
15028 (unspec:AVX256MODE2P
15029 [(match_operand:AVX256MODE2P 1 "register_operand")
15030 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
15031 (match_operand:SI 3 "const_0_to_255_operand")]
15032 UNSPEC_VPERMIL2F128))]
15035 int mask = INTVAL (operands[3]);
15036 if ((mask & 0x88) == 0)
15038 rtx perm[<ssescalarnum>], t1, t2;
15039 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
15041 base = (mask & 3) * nelt2;
15042 for (i = 0; i < nelt2; ++i)
15043 perm[i] = GEN_INT (base + i);
15045 base = ((mask >> 4) & 3) * nelt2;
15046 for (i = 0; i < nelt2; ++i)
15047 perm[i + nelt2] = GEN_INT (base + i);
15049 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
15050 operands[1], operands[2]);
15051 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
15052 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
15053 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
15059 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
15060 ;; means that in order to represent this properly in rtl we'd have to
15061 ;; nest *another* vec_concat with a zero operand and do the select from
15062 ;; a 4x wide vector. That doesn't seem very nice.
15063 (define_insn "*avx_vperm2f128<mode>_full"
15064 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
15065 (unspec:AVX256MODE2P
15066 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
15067 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
15068 (match_operand:SI 3 "const_0_to_255_operand" "n")]
15069 UNSPEC_VPERMIL2F128))]
15071 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15072 [(set_attr "type" "sselog")
15073 (set_attr "prefix_extra" "1")
15074 (set_attr "length_immediate" "1")
15075 (set_attr "prefix" "vex")
15076 (set_attr "mode" "<sseinsnmode>")])
15078 (define_insn "*avx_vperm2f128<mode>_nozero"
15079 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
15080 (vec_select:AVX256MODE2P
15081 (vec_concat:<ssedoublevecmode>
15082 (match_operand:AVX256MODE2P 1 "register_operand" "x")
15083 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
15084 (match_parallel 3 ""
15085 [(match_operand 4 "const_int_operand")])))]
15087 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
15089 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
15091 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
15093 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
15094 operands[3] = GEN_INT (mask);
15095 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15097 [(set_attr "type" "sselog")
15098 (set_attr "prefix_extra" "1")
15099 (set_attr "length_immediate" "1")
15100 (set_attr "prefix" "vex")
15101 (set_attr "mode" "<sseinsnmode>")])
15103 (define_insn "*ssse3_palignr<mode>_perm"
15104 [(set (match_operand:V_128 0 "register_operand" "=x,x")
15106 (match_operand:V_128 1 "register_operand" "0,x")
15107 (match_parallel 2 "palignr_operand"
15108 [(match_operand 3 "const_int_operand" "n, n")])))]
15111 enum machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
15112 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
15114 switch (which_alternative)
15117 return "palignr\t{%2, %1, %0|%0, %1, %2}";
15119 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
15121 gcc_unreachable ();
15124 [(set_attr "isa" "noavx,avx")
15125 (set_attr "type" "sseishft")
15126 (set_attr "atom_unit" "sishuf")
15127 (set_attr "prefix_data16" "1,*")
15128 (set_attr "prefix_extra" "1")
15129 (set_attr "length_immediate" "1")
15130 (set_attr "prefix" "orig,vex")])
15132 (define_expand "avx_vinsertf128<mode>"
15133 [(match_operand:V_256 0 "register_operand")
15134 (match_operand:V_256 1 "register_operand")
15135 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
15136 (match_operand:SI 3 "const_0_to_1_operand")]
15139 rtx (*insn)(rtx, rtx, rtx);
15141 switch (INTVAL (operands[3]))
15144 insn = gen_vec_set_lo_<mode>;
15147 insn = gen_vec_set_hi_<mode>;
15150 gcc_unreachable ();
15153 emit_insn (insn (operands[0], operands[1], operands[2]));
15157 (define_insn "avx2_vec_set_lo_v4di"
15158 [(set (match_operand:V4DI 0 "register_operand" "=x")
15160 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
15162 (match_operand:V4DI 1 "register_operand" "x")
15163 (parallel [(const_int 2) (const_int 3)]))))]
15165 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
15166 [(set_attr "type" "sselog")
15167 (set_attr "prefix_extra" "1")
15168 (set_attr "length_immediate" "1")
15169 (set_attr "prefix" "vex")
15170 (set_attr "mode" "OI")])
15172 (define_insn "avx2_vec_set_hi_v4di"
15173 [(set (match_operand:V4DI 0 "register_operand" "=x")
15176 (match_operand:V4DI 1 "register_operand" "x")
15177 (parallel [(const_int 0) (const_int 1)]))
15178 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
15180 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
15181 [(set_attr "type" "sselog")
15182 (set_attr "prefix_extra" "1")
15183 (set_attr "length_immediate" "1")
15184 (set_attr "prefix" "vex")
15185 (set_attr "mode" "OI")])
15187 (define_insn "vec_set_lo_<mode>"
15188 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
15189 (vec_concat:VI8F_256
15190 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
15191 (vec_select:<ssehalfvecmode>
15192 (match_operand:VI8F_256 1 "register_operand" "x")
15193 (parallel [(const_int 2) (const_int 3)]))))]
15195 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
15196 [(set_attr "type" "sselog")
15197 (set_attr "prefix_extra" "1")
15198 (set_attr "length_immediate" "1")
15199 (set_attr "prefix" "vex")
15200 (set_attr "mode" "<sseinsnmode>")])
15202 (define_insn "vec_set_hi_<mode>"
15203 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
15204 (vec_concat:VI8F_256
15205 (vec_select:<ssehalfvecmode>
15206 (match_operand:VI8F_256 1 "register_operand" "x")
15207 (parallel [(const_int 0) (const_int 1)]))
15208 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
15210 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
15211 [(set_attr "type" "sselog")
15212 (set_attr "prefix_extra" "1")
15213 (set_attr "length_immediate" "1")
15214 (set_attr "prefix" "vex")
15215 (set_attr "mode" "<sseinsnmode>")])
15217 (define_insn "vec_set_lo_<mode>"
15218 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
15219 (vec_concat:VI4F_256
15220 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
15221 (vec_select:<ssehalfvecmode>
15222 (match_operand:VI4F_256 1 "register_operand" "x")
15223 (parallel [(const_int 4) (const_int 5)
15224 (const_int 6) (const_int 7)]))))]
15226 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
15227 [(set_attr "type" "sselog")
15228 (set_attr "prefix_extra" "1")
15229 (set_attr "length_immediate" "1")
15230 (set_attr "prefix" "vex")
15231 (set_attr "mode" "<sseinsnmode>")])
15233 (define_insn "vec_set_hi_<mode>"
15234 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
15235 (vec_concat:VI4F_256
15236 (vec_select:<ssehalfvecmode>
15237 (match_operand:VI4F_256 1 "register_operand" "x")
15238 (parallel [(const_int 0) (const_int 1)
15239 (const_int 2) (const_int 3)]))
15240 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
15242 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
15243 [(set_attr "type" "sselog")
15244 (set_attr "prefix_extra" "1")
15245 (set_attr "length_immediate" "1")
15246 (set_attr "prefix" "vex")
15247 (set_attr "mode" "<sseinsnmode>")])
15249 (define_insn "vec_set_lo_v16hi"
15250 [(set (match_operand:V16HI 0 "register_operand" "=x")
15252 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15254 (match_operand:V16HI 1 "register_operand" "x")
15255 (parallel [(const_int 8) (const_int 9)
15256 (const_int 10) (const_int 11)
15257 (const_int 12) (const_int 13)
15258 (const_int 14) (const_int 15)]))))]
15260 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
15261 [(set_attr "type" "sselog")
15262 (set_attr "prefix_extra" "1")
15263 (set_attr "length_immediate" "1")
15264 (set_attr "prefix" "vex")
15265 (set_attr "mode" "OI")])
15267 (define_insn "vec_set_hi_v16hi"
15268 [(set (match_operand:V16HI 0 "register_operand" "=x")
15271 (match_operand:V16HI 1 "register_operand" "x")
15272 (parallel [(const_int 0) (const_int 1)
15273 (const_int 2) (const_int 3)
15274 (const_int 4) (const_int 5)
15275 (const_int 6) (const_int 7)]))
15276 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
15278 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
15279 [(set_attr "type" "sselog")
15280 (set_attr "prefix_extra" "1")
15281 (set_attr "length_immediate" "1")
15282 (set_attr "prefix" "vex")
15283 (set_attr "mode" "OI")])
15285 (define_insn "vec_set_lo_v32qi"
15286 [(set (match_operand:V32QI 0 "register_operand" "=x")
15288 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
15290 (match_operand:V32QI 1 "register_operand" "x")
15291 (parallel [(const_int 16) (const_int 17)
15292 (const_int 18) (const_int 19)
15293 (const_int 20) (const_int 21)
15294 (const_int 22) (const_int 23)
15295 (const_int 24) (const_int 25)
15296 (const_int 26) (const_int 27)
15297 (const_int 28) (const_int 29)
15298 (const_int 30) (const_int 31)]))))]
15300 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
15301 [(set_attr "type" "sselog")
15302 (set_attr "prefix_extra" "1")
15303 (set_attr "length_immediate" "1")
15304 (set_attr "prefix" "vex")
15305 (set_attr "mode" "OI")])
15307 (define_insn "vec_set_hi_v32qi"
15308 [(set (match_operand:V32QI 0 "register_operand" "=x")
15311 (match_operand:V32QI 1 "register_operand" "x")
15312 (parallel [(const_int 0) (const_int 1)
15313 (const_int 2) (const_int 3)
15314 (const_int 4) (const_int 5)
15315 (const_int 6) (const_int 7)
15316 (const_int 8) (const_int 9)
15317 (const_int 10) (const_int 11)
15318 (const_int 12) (const_int 13)
15319 (const_int 14) (const_int 15)]))
15320 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
15322 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
15323 [(set_attr "type" "sselog")
15324 (set_attr "prefix_extra" "1")
15325 (set_attr "length_immediate" "1")
15326 (set_attr "prefix" "vex")
15327 (set_attr "mode" "OI")])
15329 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
15330 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
15332 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
15333 (match_operand:V48_AVX2 1 "memory_operand" "m")]
15336 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
15337 [(set_attr "type" "sselog1")
15338 (set_attr "prefix_extra" "1")
15339 (set_attr "prefix" "vex")
15340 (set_attr "btver2_decode" "vector")
15341 (set_attr "mode" "<sseinsnmode>")])
15343 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
15344 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
15346 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
15347 (match_operand:V48_AVX2 2 "register_operand" "x")
15351 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15352 [(set_attr "type" "sselog1")
15353 (set_attr "prefix_extra" "1")
15354 (set_attr "prefix" "vex")
15355 (set_attr "btver2_decode" "vector")
15356 (set_attr "mode" "<sseinsnmode>")])
15358 (define_expand "maskload<mode>"
15359 [(set (match_operand:V48_AVX2 0 "register_operand")
15361 [(match_operand:<sseintvecmode> 2 "register_operand")
15362 (match_operand:V48_AVX2 1 "memory_operand")]
15366 (define_expand "maskstore<mode>"
15367 [(set (match_operand:V48_AVX2 0 "memory_operand")
15369 [(match_operand:<sseintvecmode> 2 "register_operand")
15370 (match_operand:V48_AVX2 1 "register_operand")
15375 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
15376 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
15377 (unspec:AVX256MODE2P
15378 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
15382 "&& reload_completed"
15385 rtx op0 = operands[0];
15386 rtx op1 = operands[1];
15388 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
15390 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
15391 emit_move_insn (op0, op1);
15395 (define_expand "vec_init<mode>"
15396 [(match_operand:V_256 0 "register_operand")
15400 ix86_expand_vector_init (false, operands[0], operands[1]);
15404 (define_expand "vec_init<mode>"
15405 [(match_operand:VI48F_512 0 "register_operand")
15409 ix86_expand_vector_init (false, operands[0], operands[1]);
15413 (define_expand "avx2_extracti128"
15414 [(match_operand:V2DI 0 "nonimmediate_operand")
15415 (match_operand:V4DI 1 "register_operand")
15416 (match_operand:SI 2 "const_0_to_1_operand")]
15419 rtx (*insn)(rtx, rtx);
15421 switch (INTVAL (operands[2]))
15424 insn = gen_vec_extract_lo_v4di;
15427 insn = gen_vec_extract_hi_v4di;
15430 gcc_unreachable ();
15433 emit_insn (insn (operands[0], operands[1]));
15437 (define_expand "avx2_inserti128"
15438 [(match_operand:V4DI 0 "register_operand")
15439 (match_operand:V4DI 1 "register_operand")
15440 (match_operand:V2DI 2 "nonimmediate_operand")
15441 (match_operand:SI 3 "const_0_to_1_operand")]
15444 rtx (*insn)(rtx, rtx, rtx);
15446 switch (INTVAL (operands[3]))
15449 insn = gen_avx2_vec_set_lo_v4di;
15452 insn = gen_avx2_vec_set_hi_v4di;
15455 gcc_unreachable ();
15458 emit_insn (insn (operands[0], operands[1], operands[2]));
15462 (define_insn "<avx2_avx512bw>_ashrv<mode><mask_name>"
15463 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
15464 (ashiftrt:VI48_AVX512F_AVX512VL
15465 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
15466 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
15467 "TARGET_AVX2 && <mask_mode512bit_condition>"
15468 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15469 [(set_attr "type" "sseishft")
15470 (set_attr "prefix" "maybe_evex")
15471 (set_attr "mode" "<sseinsnmode>")])
15473 (define_insn "<avx2_avx512bw>_ashrv<mode><mask_name>"
15474 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15475 (ashiftrt:VI2_AVX512VL
15476 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
15477 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
15479 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15480 [(set_attr "type" "sseishft")
15481 (set_attr "prefix" "maybe_evex")
15482 (set_attr "mode" "<sseinsnmode>")])
15484 (define_insn "<avx2_avx512bw>_<shift_insn>v<mode><mask_name>"
15485 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
15486 (any_lshift:VI48_AVX512F
15487 (match_operand:VI48_AVX512F 1 "register_operand" "v")
15488 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
15489 "TARGET_AVX2 && <mask_mode512bit_condition>"
15490 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15491 [(set_attr "type" "sseishft")
15492 (set_attr "prefix" "maybe_evex")
15493 (set_attr "mode" "<sseinsnmode>")])
15495 (define_insn "<avx2_avx512bw>_<shift_insn>v<mode><mask_name>"
15496 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15497 (any_lshift:VI2_AVX512VL
15498 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
15499 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
15501 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15502 [(set_attr "type" "sseishft")
15503 (set_attr "prefix" "maybe_evex")
15504 (set_attr "mode" "<sseinsnmode>")])
15506 (define_insn "avx_vec_concat<mode>"
15507 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
15508 (vec_concat:V_256_512
15509 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
15510 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
15513 switch (which_alternative)
15516 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
15518 switch (get_attr_mode (insn))
15521 return "vmovaps\t{%1, %t0|%t0, %1}";
15523 return "vmovapd\t{%1, %t0|%t0, %1}";
15525 return "vmovaps\t{%1, %x0|%x0, %1}";
15527 return "vmovapd\t{%1, %x0|%x0, %1}";
15529 return "vmovdqa\t{%1, %t0|%t0, %1}";
15531 return "vmovdqa\t{%1, %x0|%x0, %1}";
15533 gcc_unreachable ();
15536 gcc_unreachable ();
15539 [(set_attr "type" "sselog,ssemov")
15540 (set_attr "prefix_extra" "1,*")
15541 (set_attr "length_immediate" "1,*")
15542 (set_attr "prefix" "maybe_evex")
15543 (set_attr "mode" "<sseinsnmode>")])
15545 (define_insn "vcvtph2ps"
15546 [(set (match_operand:V4SF 0 "register_operand" "=x")
15548 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
15550 (parallel [(const_int 0) (const_int 1)
15551 (const_int 2) (const_int 3)])))]
15553 "vcvtph2ps\t{%1, %0|%0, %1}"
15554 [(set_attr "type" "ssecvt")
15555 (set_attr "prefix" "vex")
15556 (set_attr "mode" "V4SF")])
15558 (define_insn "*vcvtph2ps_load"
15559 [(set (match_operand:V4SF 0 "register_operand" "=x")
15560 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
15561 UNSPEC_VCVTPH2PS))]
15563 "vcvtph2ps\t{%1, %0|%0, %1}"
15564 [(set_attr "type" "ssecvt")
15565 (set_attr "prefix" "vex")
15566 (set_attr "mode" "V8SF")])
15568 (define_insn "vcvtph2ps256"
15569 [(set (match_operand:V8SF 0 "register_operand" "=x")
15570 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
15571 UNSPEC_VCVTPH2PS))]
15573 "vcvtph2ps\t{%1, %0|%0, %1}"
15574 [(set_attr "type" "ssecvt")
15575 (set_attr "prefix" "vex")
15576 (set_attr "btver2_decode" "double")
15577 (set_attr "mode" "V8SF")])
15579 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
15580 [(set (match_operand:V16SF 0 "register_operand" "=v")
15582 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15583 UNSPEC_VCVTPH2PS))]
15585 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15586 [(set_attr "type" "ssecvt")
15587 (set_attr "prefix" "evex")
15588 (set_attr "mode" "V16SF")])
15590 (define_expand "vcvtps2ph"
15591 [(set (match_operand:V8HI 0 "register_operand")
15593 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
15594 (match_operand:SI 2 "const_0_to_255_operand")]
15598 "operands[3] = CONST0_RTX (V4HImode);")
15600 (define_insn "*vcvtps2ph"
15601 [(set (match_operand:V8HI 0 "register_operand" "=x")
15603 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15604 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15606 (match_operand:V4HI 3 "const0_operand")))]
15608 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15609 [(set_attr "type" "ssecvt")
15610 (set_attr "prefix" "vex")
15611 (set_attr "mode" "V4SF")])
15613 (define_insn "*vcvtps2ph_store"
15614 [(set (match_operand:V4HI 0 "memory_operand" "=m")
15615 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15616 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15617 UNSPEC_VCVTPS2PH))]
15619 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15620 [(set_attr "type" "ssecvt")
15621 (set_attr "prefix" "vex")
15622 (set_attr "mode" "V4SF")])
15624 (define_insn "vcvtps2ph256"
15625 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
15626 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
15627 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15628 UNSPEC_VCVTPS2PH))]
15630 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15631 [(set_attr "type" "ssecvt")
15632 (set_attr "prefix" "vex")
15633 (set_attr "btver2_decode" "vector")
15634 (set_attr "mode" "V8SF")])
15636 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
15637 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
15639 [(match_operand:V16SF 1 "register_operand" "v")
15640 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15641 UNSPEC_VCVTPS2PH))]
15643 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15644 [(set_attr "type" "ssecvt")
15645 (set_attr "prefix" "evex")
15646 (set_attr "mode" "V16SF")])
15648 ;; For gather* insn patterns
15649 (define_mode_iterator VEC_GATHER_MODE
15650 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
15651 (define_mode_attr VEC_GATHER_IDXSI
15652 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
15653 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
15654 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
15655 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
15657 (define_mode_attr VEC_GATHER_IDXDI
15658 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15659 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
15660 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
15661 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
15663 (define_mode_attr VEC_GATHER_SRCDI
15664 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15665 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
15666 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
15667 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
15669 (define_expand "avx2_gathersi<mode>"
15670 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15671 (unspec:VEC_GATHER_MODE
15672 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
15673 (mem:<ssescalarmode>
15675 [(match_operand 2 "vsib_address_operand")
15676 (match_operand:<VEC_GATHER_IDXSI>
15677 3 "register_operand")
15678 (match_operand:SI 5 "const1248_operand ")]))
15679 (mem:BLK (scratch))
15680 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
15682 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15686 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15687 operands[5]), UNSPEC_VSIBADDR);
15690 (define_insn "*avx2_gathersi<mode>"
15691 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15692 (unspec:VEC_GATHER_MODE
15693 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
15694 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15696 [(match_operand:P 3 "vsib_address_operand" "Tv")
15697 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
15698 (match_operand:SI 6 "const1248_operand" "n")]
15700 (mem:BLK (scratch))
15701 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
15703 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15705 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
15706 [(set_attr "type" "ssemov")
15707 (set_attr "prefix" "vex")
15708 (set_attr "mode" "<sseinsnmode>")])
15710 (define_insn "*avx2_gathersi<mode>_2"
15711 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15712 (unspec:VEC_GATHER_MODE
15714 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15716 [(match_operand:P 2 "vsib_address_operand" "Tv")
15717 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
15718 (match_operand:SI 5 "const1248_operand" "n")]
15720 (mem:BLK (scratch))
15721 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
15723 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15725 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
15726 [(set_attr "type" "ssemov")
15727 (set_attr "prefix" "vex")
15728 (set_attr "mode" "<sseinsnmode>")])
15730 (define_expand "avx2_gatherdi<mode>"
15731 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15732 (unspec:VEC_GATHER_MODE
15733 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15734 (mem:<ssescalarmode>
15736 [(match_operand 2 "vsib_address_operand")
15737 (match_operand:<VEC_GATHER_IDXDI>
15738 3 "register_operand")
15739 (match_operand:SI 5 "const1248_operand ")]))
15740 (mem:BLK (scratch))
15741 (match_operand:<VEC_GATHER_SRCDI>
15742 4 "register_operand")]
15744 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15748 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15749 operands[5]), UNSPEC_VSIBADDR);
15752 (define_insn "*avx2_gatherdi<mode>"
15753 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15754 (unspec:VEC_GATHER_MODE
15755 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15756 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15758 [(match_operand:P 3 "vsib_address_operand" "Tv")
15759 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15760 (match_operand:SI 6 "const1248_operand" "n")]
15762 (mem:BLK (scratch))
15763 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15765 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15767 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
15768 [(set_attr "type" "ssemov")
15769 (set_attr "prefix" "vex")
15770 (set_attr "mode" "<sseinsnmode>")])
15772 (define_insn "*avx2_gatherdi<mode>_2"
15773 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15774 (unspec:VEC_GATHER_MODE
15776 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15778 [(match_operand:P 2 "vsib_address_operand" "Tv")
15779 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15780 (match_operand:SI 5 "const1248_operand" "n")]
15782 (mem:BLK (scratch))
15783 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15785 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15788 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15789 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
15790 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
15792 [(set_attr "type" "ssemov")
15793 (set_attr "prefix" "vex")
15794 (set_attr "mode" "<sseinsnmode>")])
15796 (define_insn "*avx2_gatherdi<mode>_3"
15797 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15798 (vec_select:<VEC_GATHER_SRCDI>
15800 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15801 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15803 [(match_operand:P 3 "vsib_address_operand" "Tv")
15804 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15805 (match_operand:SI 6 "const1248_operand" "n")]
15807 (mem:BLK (scratch))
15808 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15810 (parallel [(const_int 0) (const_int 1)
15811 (const_int 2) (const_int 3)])))
15812 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15814 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
15815 [(set_attr "type" "ssemov")
15816 (set_attr "prefix" "vex")
15817 (set_attr "mode" "<sseinsnmode>")])
15819 (define_insn "*avx2_gatherdi<mode>_4"
15820 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15821 (vec_select:<VEC_GATHER_SRCDI>
15824 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15826 [(match_operand:P 2 "vsib_address_operand" "Tv")
15827 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15828 (match_operand:SI 5 "const1248_operand" "n")]
15830 (mem:BLK (scratch))
15831 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15833 (parallel [(const_int 0) (const_int 1)
15834 (const_int 2) (const_int 3)])))
15835 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15837 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
15838 [(set_attr "type" "ssemov")
15839 (set_attr "prefix" "vex")
15840 (set_attr "mode" "<sseinsnmode>")])
15842 (define_expand "avx512f_gathersi<mode>"
15843 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15845 [(match_operand:VI48F_512 1 "register_operand")
15846 (match_operand:<avx512fmaskmode> 4 "register_operand")
15847 (mem:<ssescalarmode>
15849 [(match_operand 2 "vsib_address_operand")
15850 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
15851 (match_operand:SI 5 "const1248_operand")]))]
15853 (clobber (match_scratch:<avx512fmaskmode> 7))])]
15857 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15858 operands[5]), UNSPEC_VSIBADDR);
15861 (define_insn "*avx512f_gathersi<mode>"
15862 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15864 [(match_operand:VI48F_512 1 "register_operand" "0")
15865 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
15866 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15868 [(match_operand:P 4 "vsib_address_operand" "Tv")
15869 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
15870 (match_operand:SI 5 "const1248_operand" "n")]
15871 UNSPEC_VSIBADDR)])]
15873 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
15875 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
15876 [(set_attr "type" "ssemov")
15877 (set_attr "prefix" "evex")
15878 (set_attr "mode" "<sseinsnmode>")])
15880 (define_insn "*avx512f_gathersi<mode>_2"
15881 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15884 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15885 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15887 [(match_operand:P 3 "vsib_address_operand" "Tv")
15888 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15889 (match_operand:SI 4 "const1248_operand" "n")]
15890 UNSPEC_VSIBADDR)])]
15892 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15894 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
15895 [(set_attr "type" "ssemov")
15896 (set_attr "prefix" "evex")
15897 (set_attr "mode" "<sseinsnmode>")])
15900 (define_expand "avx512f_gatherdi<mode>"
15901 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15903 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15904 (match_operand:QI 4 "register_operand")
15905 (mem:<ssescalarmode>
15907 [(match_operand 2 "vsib_address_operand")
15908 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
15909 (match_operand:SI 5 "const1248_operand")]))]
15911 (clobber (match_scratch:QI 7))])]
15915 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15916 operands[5]), UNSPEC_VSIBADDR);
15919 (define_insn "*avx512f_gatherdi<mode>"
15920 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15922 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
15923 (match_operand:QI 7 "register_operand" "2")
15924 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15926 [(match_operand:P 4 "vsib_address_operand" "Tv")
15927 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
15928 (match_operand:SI 5 "const1248_operand" "n")]
15929 UNSPEC_VSIBADDR)])]
15931 (clobber (match_scratch:QI 2 "=&Yk"))]
15933 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
15934 [(set_attr "type" "ssemov")
15935 (set_attr "prefix" "evex")
15936 (set_attr "mode" "<sseinsnmode>")])
15938 (define_insn "*avx512f_gatherdi<mode>_2"
15939 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15942 (match_operand:QI 6 "register_operand" "1")
15943 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15945 [(match_operand:P 3 "vsib_address_operand" "Tv")
15946 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
15947 (match_operand:SI 4 "const1248_operand" "n")]
15948 UNSPEC_VSIBADDR)])]
15950 (clobber (match_scratch:QI 1 "=&Yk"))]
15953 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15954 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
15955 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
15957 [(set_attr "type" "ssemov")
15958 (set_attr "prefix" "evex")
15959 (set_attr "mode" "<sseinsnmode>")])
15961 (define_expand "avx512f_scattersi<mode>"
15962 [(parallel [(set (mem:VI48F_512
15964 [(match_operand 0 "vsib_address_operand")
15965 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
15966 (match_operand:SI 4 "const1248_operand")]))
15968 [(match_operand:<avx512fmaskmode> 1 "register_operand")
15969 (match_operand:VI48F_512 3 "register_operand")]
15971 (clobber (match_scratch:<avx512fmaskmode> 6))])]
15975 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15976 operands[4]), UNSPEC_VSIBADDR);
15979 (define_insn "*avx512f_scattersi<mode>"
15980 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15982 [(match_operand:P 0 "vsib_address_operand" "Tv")
15983 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15984 (match_operand:SI 4 "const1248_operand" "n")]
15987 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15988 (match_operand:VI48F_512 3 "register_operand" "v")]
15990 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15992 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15993 [(set_attr "type" "ssemov")
15994 (set_attr "prefix" "evex")
15995 (set_attr "mode" "<sseinsnmode>")])
15997 (define_expand "avx512f_scatterdi<mode>"
15998 [(parallel [(set (mem:VI48F_512
16000 [(match_operand 0 "vsib_address_operand")
16001 (match_operand:V8DI 2 "register_operand")
16002 (match_operand:SI 4 "const1248_operand")]))
16004 [(match_operand:QI 1 "register_operand")
16005 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
16007 (clobber (match_scratch:QI 6))])]
16011 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
16012 operands[4]), UNSPEC_VSIBADDR);
16015 (define_insn "*avx512f_scatterdi<mode>"
16016 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
16018 [(match_operand:P 0 "vsib_address_operand" "Tv")
16019 (match_operand:V8DI 2 "register_operand" "v")
16020 (match_operand:SI 4 "const1248_operand" "n")]
16023 [(match_operand:QI 6 "register_operand" "1")
16024 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
16026 (clobber (match_scratch:QI 1 "=&Yk"))]
16028 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
16029 [(set_attr "type" "ssemov")
16030 (set_attr "prefix" "evex")
16031 (set_attr "mode" "<sseinsnmode>")])
16033 (define_insn "avx512f_compress<mode>_mask"
16034 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
16036 [(match_operand:VI48F_512 1 "register_operand" "v")
16037 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
16038 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
16041 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16042 [(set_attr "type" "ssemov")
16043 (set_attr "prefix" "evex")
16044 (set_attr "mode" "<sseinsnmode>")])
16046 (define_insn "avx512f_compressstore<mode>_mask"
16047 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
16049 [(match_operand:VI48F_512 1 "register_operand" "x")
16051 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
16052 UNSPEC_COMPRESS_STORE))]
16054 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
16055 [(set_attr "type" "ssemov")
16056 (set_attr "prefix" "evex")
16057 (set_attr "memory" "store")
16058 (set_attr "mode" "<sseinsnmode>")])
16060 (define_expand "avx512f_expand<mode>_maskz"
16061 [(set (match_operand:VI48F_512 0 "register_operand")
16063 [(match_operand:VI48F_512 1 "nonimmediate_operand")
16064 (match_operand:VI48F_512 2 "vector_move_operand")
16065 (match_operand:<avx512fmaskmode> 3 "register_operand")]
16068 "operands[2] = CONST0_RTX (<MODE>mode);")
16070 (define_insn "avx512f_expand<mode>_mask"
16071 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
16073 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
16074 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
16075 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
16078 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16079 [(set_attr "type" "ssemov")
16080 (set_attr "prefix" "evex")
16081 (set_attr "memory" "none,load")
16082 (set_attr "mode" "<sseinsnmode>")])
16084 (define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
16085 [(set (match_operand:VF_512 0 "register_operand" "=v")
16087 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
16088 (match_operand:SI 2 "const_0_to_15_operand")]
16091 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
16092 [(set_attr "prefix" "evex")
16093 (set_attr "mode" "<MODE>")])
16095 (define_insn "avx512f_getmant<mode><round_saeonly_name>"
16096 [(set (match_operand:VF_128 0 "register_operand" "=v")
16099 [(match_operand:VF_128 1 "register_operand" "v")
16100 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
16101 (match_operand:SI 3 "const_0_to_15_operand")]
16106 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
16107 [(set_attr "prefix" "evex")
16108 (set_attr "mode" "<ssescalarmode>")])
16110 (define_insn "clz<mode>2<mask_name>"
16111 [(set (match_operand:VI48_512 0 "register_operand" "=v")
16113 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
16115 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16116 [(set_attr "type" "sse")
16117 (set_attr "prefix" "evex")
16118 (set_attr "mode" "<sseinsnmode>")])
16120 (define_insn "<mask_codefor>conflict<mode><mask_name>"
16121 [(set (match_operand:VI48_512 0 "register_operand" "=v")
16123 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
16126 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16127 [(set_attr "type" "sse")
16128 (set_attr "prefix" "evex")
16129 (set_attr "mode" "<sseinsnmode>")])
16131 (define_insn "sha1msg1"
16132 [(set (match_operand:V4SI 0 "register_operand" "=x")
16134 [(match_operand:V4SI 1 "register_operand" "0")
16135 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
16138 "sha1msg1\t{%2, %0|%0, %2}"
16139 [(set_attr "type" "sselog1")
16140 (set_attr "mode" "TI")])
16142 (define_insn "sha1msg2"
16143 [(set (match_operand:V4SI 0 "register_operand" "=x")
16145 [(match_operand:V4SI 1 "register_operand" "0")
16146 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
16149 "sha1msg2\t{%2, %0|%0, %2}"
16150 [(set_attr "type" "sselog1")
16151 (set_attr "mode" "TI")])
16153 (define_insn "sha1nexte"
16154 [(set (match_operand:V4SI 0 "register_operand" "=x")
16156 [(match_operand:V4SI 1 "register_operand" "0")
16157 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
16158 UNSPEC_SHA1NEXTE))]
16160 "sha1nexte\t{%2, %0|%0, %2}"
16161 [(set_attr "type" "sselog1")
16162 (set_attr "mode" "TI")])
16164 (define_insn "sha1rnds4"
16165 [(set (match_operand:V4SI 0 "register_operand" "=x")
16167 [(match_operand:V4SI 1 "register_operand" "0")
16168 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16169 (match_operand:SI 3 "const_0_to_3_operand" "n")]
16170 UNSPEC_SHA1RNDS4))]
16172 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
16173 [(set_attr "type" "sselog1")
16174 (set_attr "length_immediate" "1")
16175 (set_attr "mode" "TI")])
16177 (define_insn "sha256msg1"
16178 [(set (match_operand:V4SI 0 "register_operand" "=x")
16180 [(match_operand:V4SI 1 "register_operand" "0")
16181 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
16182 UNSPEC_SHA256MSG1))]
16184 "sha256msg1\t{%2, %0|%0, %2}"
16185 [(set_attr "type" "sselog1")
16186 (set_attr "mode" "TI")])
16188 (define_insn "sha256msg2"
16189 [(set (match_operand:V4SI 0 "register_operand" "=x")
16191 [(match_operand:V4SI 1 "register_operand" "0")
16192 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
16193 UNSPEC_SHA256MSG2))]
16195 "sha256msg2\t{%2, %0|%0, %2}"
16196 [(set_attr "type" "sselog1")
16197 (set_attr "mode" "TI")])
16199 (define_insn "sha256rnds2"
16200 [(set (match_operand:V4SI 0 "register_operand" "=x")
16202 [(match_operand:V4SI 1 "register_operand" "0")
16203 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16204 (match_operand:V4SI 3 "register_operand" "Yz")]
16205 UNSPEC_SHA256RNDS2))]
16207 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
16208 [(set_attr "type" "sselog1")
16209 (set_attr "length_immediate" "1")
16210 (set_attr "mode" "TI")])
16212 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
16213 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
16214 (unspec:AVX512MODE2P
16215 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
16219 "&& reload_completed"
16222 rtx op0 = operands[0];
16223 rtx op1 = operands[1];
16225 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
16227 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
16228 emit_move_insn (op0, op1);
16232 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
16233 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
16234 (unspec:AVX512MODE2P
16235 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
16239 "&& reload_completed"
16242 rtx op0 = operands[0];
16243 rtx op1 = operands[1];
16245 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
16247 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
16248 emit_move_insn (op0, op1);