1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
88 (define_c_enum "unspecv" [
98 ;; All vector modes including V?TImode, used in move patterns.
99 (define_mode_iterator VMOVE
100 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
101 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
102 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
103 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
104 (V2TI "TARGET_AVX") V1TI
105 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
106 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
109 (define_mode_iterator V
110 [(V32QI "TARGET_AVX") V16QI
111 (V16HI "TARGET_AVX") V8HI
112 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
113 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
114 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
115 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
117 ;; All 128bit vector modes
118 (define_mode_iterator V_128
119 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
121 ;; All 256bit vector modes
122 (define_mode_iterator V_256
123 [V32QI V16HI V8SI V4DI V8SF V4DF])
125 ;; All 256bit and 512bit vector modes
126 (define_mode_iterator V_256_512
127 [V32QI V16HI V8SI V4DI V8SF V4DF
128 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
129 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
131 ;; All vector float modes
132 (define_mode_iterator VF
133 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
134 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
136 ;; 128- and 256-bit float vector modes
137 (define_mode_iterator VF_128_256
138 [(V8SF "TARGET_AVX") V4SF
139 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
141 ;; All SFmode vector float modes
142 (define_mode_iterator VF1
143 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
145 ;; 128- and 256-bit SF vector modes
146 (define_mode_iterator VF1_128_256
147 [(V8SF "TARGET_AVX") V4SF])
149 ;; All DFmode vector float modes
150 (define_mode_iterator VF2
151 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
153 ;; 128- and 256-bit DF vector modes
154 (define_mode_iterator VF2_128_256
155 [(V4DF "TARGET_AVX") V2DF])
157 (define_mode_iterator VF2_512_256
158 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
160 ;; All 128bit vector float modes
161 (define_mode_iterator VF_128
162 [V4SF (V2DF "TARGET_SSE2")])
164 ;; All 256bit vector float modes
165 (define_mode_iterator VF_256
168 ;; All 512bit vector float modes
169 (define_mode_iterator VF_512
172 ;; All vector integer modes
173 (define_mode_iterator VI
174 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
175 (V32QI "TARGET_AVX") V16QI
176 (V16HI "TARGET_AVX") V8HI
177 (V8SI "TARGET_AVX") V4SI
178 (V4DI "TARGET_AVX") V2DI])
180 (define_mode_iterator VI_AVX2
181 [(V32QI "TARGET_AVX2") V16QI
182 (V16HI "TARGET_AVX2") V8HI
183 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
184 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
186 ;; All QImode vector integer modes
187 (define_mode_iterator VI1
188 [(V32QI "TARGET_AVX") V16QI])
190 (define_mode_iterator VI_UNALIGNED_LOADSTORE
191 [(V32QI "TARGET_AVX") V16QI
192 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
194 ;; All DImode vector integer modes
195 (define_mode_iterator VI8
196 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
198 (define_mode_iterator VI1_AVX2
199 [(V32QI "TARGET_AVX2") V16QI])
201 (define_mode_iterator VI2_AVX2
202 [(V16HI "TARGET_AVX2") V8HI])
204 (define_mode_iterator VI2_AVX512F
205 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
207 (define_mode_iterator VI4_AVX2
208 [(V8SI "TARGET_AVX2") V4SI])
210 (define_mode_iterator VI4_AVX512F
211 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
213 (define_mode_iterator VI48_AVX512F
214 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
215 (V8DI "TARGET_AVX512F")])
217 (define_mode_iterator VI8_AVX2
218 [(V4DI "TARGET_AVX2") V2DI])
220 (define_mode_iterator VI8_AVX2_AVX512F
221 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
223 ;; ??? We should probably use TImode instead.
224 (define_mode_iterator VIMAX_AVX2
225 [(V2TI "TARGET_AVX2") V1TI])
227 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
228 (define_mode_iterator SSESCALARMODE
229 [(V2TI "TARGET_AVX2") TI])
231 (define_mode_iterator VI12_AVX2
232 [(V32QI "TARGET_AVX2") V16QI
233 (V16HI "TARGET_AVX2") V8HI])
235 (define_mode_iterator VI24_AVX2
236 [(V16HI "TARGET_AVX2") V8HI
237 (V8SI "TARGET_AVX2") V4SI])
239 (define_mode_iterator VI124_AVX512F
240 [(V32QI "TARGET_AVX2") V16QI
241 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
242 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
244 (define_mode_iterator VI124_AVX2
245 [(V32QI "TARGET_AVX2") V16QI
246 (V16HI "TARGET_AVX2") V8HI
247 (V8SI "TARGET_AVX2") V4SI])
249 (define_mode_iterator VI248_AVX2
250 [(V16HI "TARGET_AVX2") V8HI
251 (V8SI "TARGET_AVX2") V4SI
252 (V4DI "TARGET_AVX2") V2DI])
254 (define_mode_iterator VI48_AVX2_48_AVX512F
255 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
256 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
258 (define_mode_iterator V48_AVX2
261 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
262 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
264 (define_mode_attr sse2_avx_avx512f
265 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
266 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
268 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
269 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
271 (define_mode_attr sse2_avx2
272 [(V16QI "sse2") (V32QI "avx2")
273 (V8HI "sse2") (V16HI "avx2")
274 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
275 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
276 (V1TI "sse2") (V2TI "avx2")])
278 (define_mode_attr ssse3_avx2
279 [(V16QI "ssse3") (V32QI "avx2")
280 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
281 (V4SI "ssse3") (V8SI "avx2")
282 (V2DI "ssse3") (V4DI "avx2")
283 (TI "ssse3") (V2TI "avx2")])
285 (define_mode_attr sse4_1_avx2
286 [(V16QI "sse4_1") (V32QI "avx2")
287 (V8HI "sse4_1") (V16HI "avx2")
288 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
289 (V2DI "sse4_1") (V4DI "avx2")])
291 (define_mode_attr avx_avx2
292 [(V4SF "avx") (V2DF "avx")
293 (V8SF "avx") (V4DF "avx")
294 (V4SI "avx2") (V2DI "avx2")
295 (V8SI "avx2") (V4DI "avx2")])
297 (define_mode_attr vec_avx2
298 [(V16QI "vec") (V32QI "avx2")
299 (V8HI "vec") (V16HI "avx2")
300 (V4SI "vec") (V8SI "avx2")
301 (V2DI "vec") (V4DI "avx2")])
303 (define_mode_attr avx2_avx512f
304 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
305 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
306 (V8SF "avx2") (V16SF "avx512f")
307 (V4DF "avx2") (V8DF "avx512f")])
309 (define_mode_attr shuffletype
310 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
311 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
312 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
313 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
314 (V64QI "i") (V1TI "i") (V2TI "i")])
316 (define_mode_attr ssedoublemode
317 [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
318 (V32QI "V32HI") (V16QI "V16HI")])
320 (define_mode_attr ssebytemode
321 [(V4DI "V32QI") (V2DI "V16QI")])
323 ;; All 128bit vector integer modes
324 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
326 ;; All 256bit vector integer modes
327 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
329 ;; Random 128bit vector integer mode combinations
330 (define_mode_iterator VI12_128 [V16QI V8HI])
331 (define_mode_iterator VI14_128 [V16QI V4SI])
332 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
333 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
334 (define_mode_iterator VI24_128 [V8HI V4SI])
335 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
336 (define_mode_iterator VI48_128 [V4SI V2DI])
338 ;; Random 256bit vector integer mode combinations
339 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
340 (define_mode_iterator VI48_256 [V8SI V4DI])
342 ;; Int-float size matches
343 (define_mode_iterator VI4F_128 [V4SI V4SF])
344 (define_mode_iterator VI8F_128 [V2DI V2DF])
345 (define_mode_iterator VI4F_256 [V8SI V8SF])
346 (define_mode_iterator VI8F_256 [V4DI V4DF])
348 ;; Mapping from float mode to required SSE level
349 (define_mode_attr sse
350 [(SF "sse") (DF "sse2")
351 (V4SF "sse") (V2DF "sse2")
352 (V16SF "avx512f") (V8SF "avx")
353 (V8DF "avx512f") (V4DF "avx")])
355 (define_mode_attr sse2
356 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
357 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
359 (define_mode_attr sse3
360 [(V16QI "sse3") (V32QI "avx")])
362 (define_mode_attr sse4_1
363 [(V4SF "sse4_1") (V2DF "sse4_1")
364 (V8SF "avx") (V4DF "avx")
367 (define_mode_attr avxsizesuffix
368 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
369 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
370 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
371 (V16SF "512") (V8DF "512")
372 (V8SF "256") (V4DF "256")
373 (V4SF "") (V2DF "")])
375 ;; SSE instruction mode
376 (define_mode_attr sseinsnmode
377 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
378 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
379 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
380 (V16SF "V16SF") (V8DF "V8DF")
381 (V8SF "V8SF") (V4DF "V4DF")
382 (V4SF "V4SF") (V2DF "V2DF")
385 ;; Mapping of vector float modes to an integer mode of the same size
386 (define_mode_attr sseintvecmode
387 [(V16SF "V16SI") (V8DF "V8DI")
388 (V8SF "V8SI") (V4DF "V4DI")
389 (V4SF "V4SI") (V2DF "V2DI")
390 (V16SI "V16SI") (V8DI "V8DI")
391 (V8SI "V8SI") (V4DI "V4DI")
392 (V4SI "V4SI") (V2DI "V2DI")
393 (V16HI "V16HI") (V8HI "V8HI")
394 (V32QI "V32QI") (V16QI "V16QI")])
396 (define_mode_attr sseintvecmodelower
398 (V8SF "v8si") (V4DF "v4di")
399 (V4SF "v4si") (V2DF "v2di")
400 (V8SI "v8si") (V4DI "v4di")
401 (V4SI "v4si") (V2DI "v2di")
402 (V16HI "v16hi") (V8HI "v8hi")
403 (V32QI "v32qi") (V16QI "v16qi")])
405 ;; Mapping of vector modes to a vector mode of double size
406 (define_mode_attr ssedoublevecmode
407 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
408 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
409 (V8SF "V16SF") (V4DF "V8DF")
410 (V4SF "V8SF") (V2DF "V4DF")])
412 ;; Mapping of vector modes to a vector mode of half size
413 (define_mode_attr ssehalfvecmode
414 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
415 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
416 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
417 (V16SF "V8SF") (V8DF "V4DF")
418 (V8SF "V4SF") (V4DF "V2DF")
421 ;; Mapping of vector modes ti packed single mode of the same size
422 (define_mode_attr ssePSmode
423 [(V16SI "V16SF") (V8DF "V16SF")
424 (V16SF "V16SF") (V8DI "V16SF")
425 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
426 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
427 (V8SI "V8SF") (V4SI "V4SF")
428 (V4DI "V8SF") (V2DI "V4SF")
429 (V2TI "V8SF") (V1TI "V4SF")
430 (V8SF "V8SF") (V4SF "V4SF")
431 (V4DF "V8SF") (V2DF "V4SF")])
433 ;; Mapping of vector modes back to the scalar modes
434 (define_mode_attr ssescalarmode
435 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
436 (V32HI "HI") (V16HI "HI") (V8HI "HI")
437 (V16SI "SI") (V8SI "SI") (V4SI "SI")
438 (V8DI "DI") (V4DI "DI") (V2DI "DI")
439 (V16SF "SF") (V8SF "SF") (V4SF "SF")
440 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
442 ;; Mapping of vector modes to the 128bit modes
443 (define_mode_attr ssexmmmode
444 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
445 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
446 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
447 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
448 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
449 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
451 ;; Pointer size override for scalar modes (Intel asm dialect)
452 (define_mode_attr iptr
453 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
454 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
455 (V8SF "k") (V4DF "q")
456 (V4SF "k") (V2DF "q")
459 ;; Number of scalar elements in each vector type
460 (define_mode_attr ssescalarnum
461 [(V64QI "64") (V16SI "16") (V8DI "8")
462 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
463 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
464 (V16SF "16") (V8DF "8")
465 (V8SF "8") (V4DF "4")
466 (V4SF "4") (V2DF "2")])
468 ;; Mask of scalar elements in each vector type
469 (define_mode_attr ssescalarnummask
470 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
471 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
472 (V8SF "7") (V4DF "3")
473 (V4SF "3") (V2DF "1")])
475 ;; SSE prefix for integer vector modes
476 (define_mode_attr sseintprefix
477 [(V2DI "p") (V2DF "")
480 (V8SI "p") (V8SF "")])
482 ;; SSE scalar suffix for vector modes
483 (define_mode_attr ssescalarmodesuffix
485 (V8SF "ss") (V4DF "sd")
486 (V4SF "ss") (V2DF "sd")
487 (V8SI "ss") (V4DI "sd")
490 ;; Pack/unpack vector modes
491 (define_mode_attr sseunpackmode
492 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
493 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
494 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
496 (define_mode_attr ssepackmode
497 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
498 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
500 ;; Mapping of the max integer size for xop rotate immediate constraint
501 (define_mode_attr sserotatemax
502 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
504 ;; Mapping of mode to cast intrinsic name
505 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
507 ;; Instruction suffix for sign and zero extensions.
508 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
510 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
511 ;; i64x4 or f64x4 for 512bit modes.
512 (define_mode_attr i128
513 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
514 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
515 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
518 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
520 ;; Mapping of immediate bits for blend instructions
521 (define_mode_attr blendbits
522 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
524 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
526 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
532 ;; All of these patterns are enabled for SSE1 as well as SSE2.
533 ;; This is essential for maintaining stable calling conventions.
535 (define_expand "mov<mode>"
536 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
537 (match_operand:VMOVE 1 "nonimmediate_operand"))]
540 ix86_expand_vector_move (<MODE>mode, operands);
544 (define_insn "*mov<mode>_internal"
545 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
546 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
548 && (register_operand (operands[0], <MODE>mode)
549 || register_operand (operands[1], <MODE>mode))"
551 int mode = get_attr_mode (insn);
552 switch (which_alternative)
555 return standard_sse_constant_opcode (insn, operands[1]);
558 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
559 in avx512f, so we need to use workarounds, to access sse registers
560 16-31, which are evex-only. */
561 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
562 && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
563 || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
565 if (memory_operand (operands[0], <MODE>mode))
567 if (GET_MODE_SIZE (<MODE>mode) == 32)
568 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
569 else if (GET_MODE_SIZE (<MODE>mode) == 16)
570 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
574 else if (memory_operand (operands[1], <MODE>mode))
576 if (GET_MODE_SIZE (<MODE>mode) == 32)
577 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
578 else if (GET_MODE_SIZE (<MODE>mode) == 16)
579 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
584 /* Reg -> reg move is always aligned. Just use wider move. */
589 return "vmovaps\t{%g1, %g0|%g0, %g1}";
592 return "vmovapd\t{%g1, %g0|%g0, %g1}";
595 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
606 && (misaligned_operand (operands[0], <MODE>mode)
607 || misaligned_operand (operands[1], <MODE>mode)))
608 return "vmovups\t{%1, %0|%0, %1}";
610 return "%vmovaps\t{%1, %0|%0, %1}";
616 && (misaligned_operand (operands[0], <MODE>mode)
617 || misaligned_operand (operands[1], <MODE>mode)))
618 return "vmovupd\t{%1, %0|%0, %1}";
620 return "%vmovapd\t{%1, %0|%0, %1}";
625 && (misaligned_operand (operands[0], <MODE>mode)
626 || misaligned_operand (operands[1], <MODE>mode)))
627 return "vmovdqu\t{%1, %0|%0, %1}";
629 return "%vmovdqa\t{%1, %0|%0, %1}";
631 if (misaligned_operand (operands[0], <MODE>mode)
632 || misaligned_operand (operands[1], <MODE>mode))
633 return "vmovdqu64\t{%1, %0|%0, %1}";
635 return "vmovdqa64\t{%1, %0|%0, %1}";
644 [(set_attr "type" "sselog1,ssemov,ssemov")
645 (set_attr "prefix" "maybe_vex")
647 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
648 (const_string "<ssePSmode>")
649 (and (eq_attr "alternative" "2")
650 (match_test "TARGET_SSE_TYPELESS_STORES"))
651 (const_string "<ssePSmode>")
652 (match_test "TARGET_AVX")
653 (const_string "<sseinsnmode>")
654 (ior (not (match_test "TARGET_SSE2"))
655 (match_test "optimize_function_for_size_p (cfun)"))
656 (const_string "V4SF")
657 (and (eq_attr "alternative" "0")
658 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
661 (const_string "<sseinsnmode>")))])
663 (define_insn "sse2_movq128"
664 [(set (match_operand:V2DI 0 "register_operand" "=x")
667 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
668 (parallel [(const_int 0)]))
671 "%vmovq\t{%1, %0|%0, %q1}"
672 [(set_attr "type" "ssemov")
673 (set_attr "prefix" "maybe_vex")
674 (set_attr "mode" "TI")])
676 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
677 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
678 ;; from memory, we'd prefer to load the memory directly into the %xmm
679 ;; register. To facilitate this happy circumstance, this pattern won't
680 ;; split until after register allocation. If the 64-bit value didn't
681 ;; come from memory, this is the best we can do. This is much better
682 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
685 (define_insn_and_split "movdi_to_sse"
687 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
688 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
689 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
690 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
692 "&& reload_completed"
695 if (register_operand (operands[1], DImode))
697 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
698 Assemble the 64-bit DImode value in an xmm register. */
699 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
700 gen_rtx_SUBREG (SImode, operands[1], 0)));
701 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
702 gen_rtx_SUBREG (SImode, operands[1], 4)));
703 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
706 else if (memory_operand (operands[1], DImode))
707 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
708 operands[1], const0_rtx));
714 [(set (match_operand:V4SF 0 "register_operand")
715 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
716 "TARGET_SSE && reload_completed"
719 (vec_duplicate:V4SF (match_dup 1))
723 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
724 operands[2] = CONST0_RTX (V4SFmode);
728 [(set (match_operand:V2DF 0 "register_operand")
729 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
730 "TARGET_SSE2 && reload_completed"
731 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
733 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
734 operands[2] = CONST0_RTX (DFmode);
737 (define_expand "push<mode>1"
738 [(match_operand:VMOVE 0 "register_operand")]
741 ix86_expand_push (<MODE>mode, operands[0]);
745 (define_expand "movmisalign<mode>"
746 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
747 (match_operand:VMOVE 1 "nonimmediate_operand"))]
750 ix86_expand_vector_move_misalign (<MODE>mode, operands);
754 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
755 [(set (match_operand:VF 0 "register_operand" "=v")
757 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
761 switch (get_attr_mode (insn))
766 return "%vmovups\t{%1, %0|%0, %1}";
768 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
771 [(set_attr "type" "ssemov")
772 (set_attr "movu" "1")
773 (set_attr "prefix" "maybe_vex")
775 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
776 (const_string "<ssePSmode>")
777 (match_test "TARGET_AVX")
778 (const_string "<MODE>")
779 (match_test "optimize_function_for_size_p (cfun)")
780 (const_string "V4SF")
782 (const_string "<MODE>")))])
784 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
785 [(set (match_operand:VF 0 "memory_operand" "=m")
787 [(match_operand:VF 1 "register_operand" "v")]
791 switch (get_attr_mode (insn))
796 return "%vmovups\t{%1, %0|%0, %1}";
798 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
801 [(set_attr "type" "ssemov")
802 (set_attr "movu" "1")
803 (set_attr "prefix" "maybe_vex")
805 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
806 (match_test "TARGET_SSE_TYPELESS_STORES"))
807 (const_string "<ssePSmode>")
808 (match_test "TARGET_AVX")
809 (const_string "<MODE>")
810 (match_test "optimize_function_for_size_p (cfun)")
811 (const_string "V4SF")
813 (const_string "<MODE>")))])
815 (define_insn "<sse2_avx_avx512f>_loaddqu<mode>"
816 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
817 (unspec:VI_UNALIGNED_LOADSTORE
818 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
822 switch (get_attr_mode (insn))
826 return "%vmovups\t{%1, %0|%0, %1}";
828 if (<MODE>mode == V8DImode)
829 return "vmovdqu64\t{%1, %0|%0, %1}";
831 return "vmovdqu32\t{%1, %0|%0, %1}";
833 return "%vmovdqu\t{%1, %0|%0, %1}";
836 [(set_attr "type" "ssemov")
837 (set_attr "movu" "1")
838 (set (attr "prefix_data16")
840 (match_test "TARGET_AVX")
843 (set_attr "prefix" "maybe_vex")
845 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
846 (const_string "<ssePSmode>")
847 (match_test "TARGET_AVX")
848 (const_string "<sseinsnmode>")
849 (match_test "optimize_function_for_size_p (cfun)")
850 (const_string "V4SF")
852 (const_string "<sseinsnmode>")))])
854 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
855 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
856 (unspec:VI_UNALIGNED_LOADSTORE
857 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
861 switch (get_attr_mode (insn))
865 return "%vmovups\t{%1, %0|%0, %1}";
867 if (<MODE>mode == V8DImode)
868 return "vmovdqu64\t{%1, %0|%0, %1}";
870 return "vmovdqu32\t{%1, %0|%0, %1}";
872 return "%vmovdqu\t{%1, %0|%0, %1}";
875 [(set_attr "type" "ssemov")
876 (set_attr "movu" "1")
877 (set (attr "prefix_data16")
879 (match_test "TARGET_AVX")
882 (set_attr "prefix" "maybe_vex")
884 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
885 (match_test "TARGET_SSE_TYPELESS_STORES"))
886 (const_string "<ssePSmode>")
887 (match_test "TARGET_AVX")
888 (const_string "<sseinsnmode>")
889 (match_test "optimize_function_for_size_p (cfun)")
890 (const_string "V4SF")
892 (const_string "<sseinsnmode>")))])
894 (define_insn "<sse3>_lddqu<avxsizesuffix>"
895 [(set (match_operand:VI1 0 "register_operand" "=x")
896 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
899 "%vlddqu\t{%1, %0|%0, %1}"
900 [(set_attr "type" "ssemov")
901 (set_attr "movu" "1")
902 (set (attr "prefix_data16")
904 (match_test "TARGET_AVX")
907 (set (attr "prefix_rep")
909 (match_test "TARGET_AVX")
912 (set_attr "prefix" "maybe_vex")
913 (set_attr "mode" "<sseinsnmode>")])
915 (define_insn "sse2_movnti<mode>"
916 [(set (match_operand:SWI48 0 "memory_operand" "=m")
917 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
920 "movnti\t{%1, %0|%0, %1}"
921 [(set_attr "type" "ssemov")
922 (set_attr "prefix_data16" "0")
923 (set_attr "mode" "<MODE>")])
925 (define_insn "<sse>_movnt<mode>"
926 [(set (match_operand:VF 0 "memory_operand" "=m")
928 [(match_operand:VF 1 "register_operand" "v")]
931 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
932 [(set_attr "type" "ssemov")
933 (set_attr "prefix" "maybe_vex")
934 (set_attr "mode" "<MODE>")])
936 (define_insn "<sse2>_movnt<mode>"
937 [(set (match_operand:VI8 0 "memory_operand" "=m")
938 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
941 "%vmovntdq\t{%1, %0|%0, %1}"
942 [(set_attr "type" "ssecvt")
943 (set (attr "prefix_data16")
945 (match_test "TARGET_AVX")
948 (set_attr "prefix" "maybe_vex")
949 (set_attr "mode" "<sseinsnmode>")])
951 ; Expand patterns for non-temporal stores. At the moment, only those
952 ; that directly map to insns are defined; it would be possible to
953 ; define patterns for other modes that would expand to several insns.
955 ;; Modes handled by storent patterns.
956 (define_mode_iterator STORENT_MODE
957 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
958 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
959 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
960 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
961 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
963 (define_expand "storent<mode>"
964 [(set (match_operand:STORENT_MODE 0 "memory_operand")
966 [(match_operand:STORENT_MODE 1 "register_operand")]
970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
972 ;; Parallel floating point arithmetic
974 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
976 (define_expand "<code><mode>2"
977 [(set (match_operand:VF 0 "register_operand")
979 (match_operand:VF 1 "register_operand")))]
981 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
983 (define_insn_and_split "*absneg<mode>2"
984 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
985 (match_operator:VF 3 "absneg_operator"
986 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
987 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
990 "&& reload_completed"
993 enum rtx_code absneg_op;
999 if (MEM_P (operands[1]))
1000 op1 = operands[2], op2 = operands[1];
1002 op1 = operands[1], op2 = operands[2];
1007 if (rtx_equal_p (operands[0], operands[1]))
1013 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1014 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1015 t = gen_rtx_SET (VOIDmode, operands[0], t);
1019 [(set_attr "isa" "noavx,noavx,avx,avx")])
1021 (define_expand "<plusminus_insn><mode>3"
1022 [(set (match_operand:VF 0 "register_operand")
1024 (match_operand:VF 1 "nonimmediate_operand")
1025 (match_operand:VF 2 "nonimmediate_operand")))]
1027 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1029 (define_insn "*<plusminus_insn><mode>3"
1030 [(set (match_operand:VF 0 "register_operand" "=x,v")
1032 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
1033 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1034 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1036 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1037 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1038 [(set_attr "isa" "noavx,avx")
1039 (set_attr "type" "sseadd")
1040 (set_attr "prefix" "orig,vex")
1041 (set_attr "mode" "<MODE>")])
1043 (define_insn "<sse>_vm<plusminus_insn><mode>3"
1044 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1047 (match_operand:VF_128 1 "register_operand" "0,v")
1048 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1053 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1054 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1055 [(set_attr "isa" "noavx,avx")
1056 (set_attr "type" "sseadd")
1057 (set_attr "prefix" "orig,vex")
1058 (set_attr "mode" "<ssescalarmode>")])
1060 (define_expand "mul<mode>3"
1061 [(set (match_operand:VF 0 "register_operand")
1063 (match_operand:VF 1 "nonimmediate_operand")
1064 (match_operand:VF 2 "nonimmediate_operand")))]
1066 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1068 (define_insn "*mul<mode>3"
1069 [(set (match_operand:VF 0 "register_operand" "=x,v")
1071 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1072 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1073 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
1075 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1076 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1077 [(set_attr "isa" "noavx,avx")
1078 (set_attr "type" "ssemul")
1079 (set_attr "prefix" "orig,vex")
1080 (set_attr "btver2_decode" "direct,double")
1081 (set_attr "mode" "<MODE>")])
1083 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
1084 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1087 (match_operand:VF_128 1 "register_operand" "0,v")
1088 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1093 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1094 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1095 [(set_attr "isa" "noavx,avx")
1096 (set_attr "type" "sse<multdiv_mnemonic>")
1097 (set_attr "prefix" "orig,maybe_evex")
1098 (set_attr "btver2_decode" "direct,double")
1099 (set_attr "mode" "<ssescalarmode>")])
1101 (define_expand "div<mode>3"
1102 [(set (match_operand:VF2 0 "register_operand")
1103 (div:VF2 (match_operand:VF2 1 "register_operand")
1104 (match_operand:VF2 2 "nonimmediate_operand")))]
1106 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1108 (define_expand "div<mode>3"
1109 [(set (match_operand:VF1 0 "register_operand")
1110 (div:VF1 (match_operand:VF1 1 "register_operand")
1111 (match_operand:VF1 2 "nonimmediate_operand")))]
1114 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1117 && TARGET_RECIP_VEC_DIV
1118 && !optimize_insn_for_size_p ()
1119 && flag_finite_math_only && !flag_trapping_math
1120 && flag_unsafe_math_optimizations)
1122 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1127 (define_insn "<sse>_div<mode>3"
1128 [(set (match_operand:VF 0 "register_operand" "=x,v")
1130 (match_operand:VF 1 "register_operand" "0,v")
1131 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1134 div<ssemodesuffix>\t{%2, %0|%0, %2}
1135 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1136 [(set_attr "isa" "noavx,avx")
1137 (set_attr "type" "ssediv")
1138 (set_attr "prefix" "orig,vex")
1139 (set_attr "mode" "<MODE>")])
1141 (define_insn "<sse>_rcp<mode>2"
1142 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1144 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1146 "%vrcpps\t{%1, %0|%0, %1}"
1147 [(set_attr "type" "sse")
1148 (set_attr "atom_sse_attr" "rcp")
1149 (set_attr "btver2_sse_attr" "rcp")
1150 (set_attr "prefix" "maybe_vex")
1151 (set_attr "mode" "<MODE>")])
1153 (define_insn "sse_vmrcpv4sf2"
1154 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1156 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1158 (match_operand:V4SF 2 "register_operand" "0,x")
1162 rcpss\t{%1, %0|%0, %k1}
1163 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1164 [(set_attr "isa" "noavx,avx")
1165 (set_attr "type" "sse")
1166 (set_attr "atom_sse_attr" "rcp")
1167 (set_attr "btver2_sse_attr" "rcp")
1168 (set_attr "prefix" "orig,vex")
1169 (set_attr "mode" "SF")])
1171 (define_expand "sqrt<mode>2"
1172 [(set (match_operand:VF2 0 "register_operand")
1173 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1176 (define_expand "sqrt<mode>2"
1177 [(set (match_operand:VF1 0 "register_operand")
1178 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1182 && TARGET_RECIP_VEC_SQRT
1183 && !optimize_insn_for_size_p ()
1184 && flag_finite_math_only && !flag_trapping_math
1185 && flag_unsafe_math_optimizations)
1187 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1192 (define_insn "<sse>_sqrt<mode>2"
1193 [(set (match_operand:VF 0 "register_operand" "=v")
1194 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
1196 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
1197 [(set_attr "type" "sse")
1198 (set_attr "atom_sse_attr" "sqrt")
1199 (set_attr "btver2_sse_attr" "sqrt")
1200 (set_attr "prefix" "maybe_vex")
1201 (set_attr "mode" "<MODE>")])
1203 (define_insn "<sse>_vmsqrt<mode>2"
1204 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1207 (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
1208 (match_operand:VF_128 2 "register_operand" "0,v")
1212 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1213 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1214 [(set_attr "isa" "noavx,avx")
1215 (set_attr "type" "sse")
1216 (set_attr "atom_sse_attr" "sqrt")
1217 (set_attr "btver2_sse_attr" "sqrt")
1218 (set_attr "prefix" "orig,vex")
1219 (set_attr "mode" "<ssescalarmode>")])
1221 (define_expand "rsqrt<mode>2"
1222 [(set (match_operand:VF1_128_256 0 "register_operand")
1224 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1227 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1231 (define_insn "<sse>_rsqrt<mode>2"
1232 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1234 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1236 "%vrsqrtps\t{%1, %0|%0, %1}"
1237 [(set_attr "type" "sse")
1238 (set_attr "prefix" "maybe_vex")
1239 (set_attr "mode" "<MODE>")])
1241 (define_insn "sse_vmrsqrtv4sf2"
1242 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1244 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1246 (match_operand:V4SF 2 "register_operand" "0,x")
1250 rsqrtss\t{%1, %0|%0, %k1}
1251 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1252 [(set_attr "isa" "noavx,avx")
1253 (set_attr "type" "sse")
1254 (set_attr "prefix" "orig,vex")
1255 (set_attr "mode" "SF")])
1257 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1258 ;; isn't really correct, as those rtl operators aren't defined when
1259 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1261 (define_expand "<code><mode>3"
1262 [(set (match_operand:VF 0 "register_operand")
1264 (match_operand:VF 1 "nonimmediate_operand")
1265 (match_operand:VF 2 "nonimmediate_operand")))]
1268 if (!flag_finite_math_only)
1269 operands[1] = force_reg (<MODE>mode, operands[1]);
1270 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1273 (define_insn "*<code><mode>3_finite"
1274 [(set (match_operand:VF 0 "register_operand" "=x,v")
1276 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1277 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1278 "TARGET_SSE && flag_finite_math_only
1279 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1281 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1282 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1283 [(set_attr "isa" "noavx,avx")
1284 (set_attr "type" "sseadd")
1285 (set_attr "btver2_sse_attr" "maxmin")
1286 (set_attr "prefix" "orig,vex")
1287 (set_attr "mode" "<MODE>")])
1289 (define_insn "*<code><mode>3"
1290 [(set (match_operand:VF 0 "register_operand" "=x,v")
1292 (match_operand:VF 1 "register_operand" "0,v")
1293 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1294 "TARGET_SSE && !flag_finite_math_only"
1296 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1297 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1298 [(set_attr "isa" "noavx,avx")
1299 (set_attr "type" "sseadd")
1300 (set_attr "btver2_sse_attr" "maxmin")
1301 (set_attr "prefix" "orig,vex")
1302 (set_attr "mode" "<MODE>")])
1304 (define_insn "<sse>_vm<code><mode>3"
1305 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1308 (match_operand:VF_128 1 "register_operand" "0,v")
1309 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1314 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1315 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1316 [(set_attr "isa" "noavx,avx")
1317 (set_attr "type" "sse")
1318 (set_attr "btver2_sse_attr" "maxmin")
1319 (set_attr "prefix" "orig,vex")
1320 (set_attr "mode" "<ssescalarmode>")])
1322 ;; These versions of the min/max patterns implement exactly the operations
1323 ;; min = (op1 < op2 ? op1 : op2)
1324 ;; max = (!(op1 < op2) ? op1 : op2)
1325 ;; Their operands are not commutative, and thus they may be used in the
1326 ;; presence of -0.0 and NaN.
1328 (define_insn "*ieee_smin<mode>3"
1329 [(set (match_operand:VF 0 "register_operand" "=v,v")
1331 [(match_operand:VF 1 "register_operand" "0,v")
1332 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1336 min<ssemodesuffix>\t{%2, %0|%0, %2}
1337 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1338 [(set_attr "isa" "noavx,avx")
1339 (set_attr "type" "sseadd")
1340 (set_attr "prefix" "orig,vex")
1341 (set_attr "mode" "<MODE>")])
1343 (define_insn "*ieee_smax<mode>3"
1344 [(set (match_operand:VF 0 "register_operand" "=v,v")
1346 [(match_operand:VF 1 "register_operand" "0,v")
1347 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1351 max<ssemodesuffix>\t{%2, %0|%0, %2}
1352 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1353 [(set_attr "isa" "noavx,avx")
1354 (set_attr "type" "sseadd")
1355 (set_attr "prefix" "orig,vex")
1356 (set_attr "mode" "<MODE>")])
1358 (define_insn "avx_addsubv4df3"
1359 [(set (match_operand:V4DF 0 "register_operand" "=x")
1362 (match_operand:V4DF 1 "register_operand" "x")
1363 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1364 (minus:V4DF (match_dup 1) (match_dup 2))
1367 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1368 [(set_attr "type" "sseadd")
1369 (set_attr "prefix" "vex")
1370 (set_attr "mode" "V4DF")])
1372 (define_insn "sse3_addsubv2df3"
1373 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1376 (match_operand:V2DF 1 "register_operand" "0,x")
1377 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1378 (minus:V2DF (match_dup 1) (match_dup 2))
1382 addsubpd\t{%2, %0|%0, %2}
1383 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1384 [(set_attr "isa" "noavx,avx")
1385 (set_attr "type" "sseadd")
1386 (set_attr "atom_unit" "complex")
1387 (set_attr "prefix" "orig,vex")
1388 (set_attr "mode" "V2DF")])
1390 (define_insn "avx_addsubv8sf3"
1391 [(set (match_operand:V8SF 0 "register_operand" "=x")
1394 (match_operand:V8SF 1 "register_operand" "x")
1395 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1396 (minus:V8SF (match_dup 1) (match_dup 2))
1399 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1400 [(set_attr "type" "sseadd")
1401 (set_attr "prefix" "vex")
1402 (set_attr "mode" "V8SF")])
1404 (define_insn "sse3_addsubv4sf3"
1405 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1408 (match_operand:V4SF 1 "register_operand" "0,x")
1409 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1410 (minus:V4SF (match_dup 1) (match_dup 2))
1414 addsubps\t{%2, %0|%0, %2}
1415 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1416 [(set_attr "isa" "noavx,avx")
1417 (set_attr "type" "sseadd")
1418 (set_attr "prefix" "orig,vex")
1419 (set_attr "prefix_rep" "1,*")
1420 (set_attr "mode" "V4SF")])
1422 (define_insn "avx_h<plusminus_insn>v4df3"
1423 [(set (match_operand:V4DF 0 "register_operand" "=x")
1428 (match_operand:V4DF 1 "register_operand" "x")
1429 (parallel [(const_int 0)]))
1430 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1433 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1434 (parallel [(const_int 0)]))
1435 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1438 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1439 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1441 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1442 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1444 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1445 [(set_attr "type" "sseadd")
1446 (set_attr "prefix" "vex")
1447 (set_attr "mode" "V4DF")])
1449 (define_expand "sse3_haddv2df3"
1450 [(set (match_operand:V2DF 0 "register_operand")
1454 (match_operand:V2DF 1 "register_operand")
1455 (parallel [(const_int 0)]))
1456 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1459 (match_operand:V2DF 2 "nonimmediate_operand")
1460 (parallel [(const_int 0)]))
1461 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1464 (define_insn "*sse3_haddv2df3"
1465 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1469 (match_operand:V2DF 1 "register_operand" "0,x")
1470 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1473 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1476 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1477 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1480 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1482 && INTVAL (operands[3]) != INTVAL (operands[4])
1483 && INTVAL (operands[5]) != INTVAL (operands[6])"
1485 haddpd\t{%2, %0|%0, %2}
1486 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1487 [(set_attr "isa" "noavx,avx")
1488 (set_attr "type" "sseadd")
1489 (set_attr "prefix" "orig,vex")
1490 (set_attr "mode" "V2DF")])
1492 (define_insn "sse3_hsubv2df3"
1493 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1497 (match_operand:V2DF 1 "register_operand" "0,x")
1498 (parallel [(const_int 0)]))
1499 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1502 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1503 (parallel [(const_int 0)]))
1504 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1507 hsubpd\t{%2, %0|%0, %2}
1508 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1509 [(set_attr "isa" "noavx,avx")
1510 (set_attr "type" "sseadd")
1511 (set_attr "prefix" "orig,vex")
1512 (set_attr "mode" "V2DF")])
1514 (define_insn "*sse3_haddv2df3_low"
1515 [(set (match_operand:DF 0 "register_operand" "=x,x")
1518 (match_operand:V2DF 1 "register_operand" "0,x")
1519 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1522 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1524 && INTVAL (operands[2]) != INTVAL (operands[3])"
1526 haddpd\t{%0, %0|%0, %0}
1527 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1528 [(set_attr "isa" "noavx,avx")
1529 (set_attr "type" "sseadd1")
1530 (set_attr "prefix" "orig,vex")
1531 (set_attr "mode" "V2DF")])
1533 (define_insn "*sse3_hsubv2df3_low"
1534 [(set (match_operand:DF 0 "register_operand" "=x,x")
1537 (match_operand:V2DF 1 "register_operand" "0,x")
1538 (parallel [(const_int 0)]))
1541 (parallel [(const_int 1)]))))]
1544 hsubpd\t{%0, %0|%0, %0}
1545 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1546 [(set_attr "isa" "noavx,avx")
1547 (set_attr "type" "sseadd1")
1548 (set_attr "prefix" "orig,vex")
1549 (set_attr "mode" "V2DF")])
1551 (define_insn "avx_h<plusminus_insn>v8sf3"
1552 [(set (match_operand:V8SF 0 "register_operand" "=x")
1558 (match_operand:V8SF 1 "register_operand" "x")
1559 (parallel [(const_int 0)]))
1560 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1562 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1563 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1567 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1568 (parallel [(const_int 0)]))
1569 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1571 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1572 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1576 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1577 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1579 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1580 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1583 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1584 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1586 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1587 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1589 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1590 [(set_attr "type" "sseadd")
1591 (set_attr "prefix" "vex")
1592 (set_attr "mode" "V8SF")])
1594 (define_insn "sse3_h<plusminus_insn>v4sf3"
1595 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1600 (match_operand:V4SF 1 "register_operand" "0,x")
1601 (parallel [(const_int 0)]))
1602 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1604 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1605 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1609 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1610 (parallel [(const_int 0)]))
1611 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1613 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1614 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1617 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1618 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1619 [(set_attr "isa" "noavx,avx")
1620 (set_attr "type" "sseadd")
1621 (set_attr "atom_unit" "complex")
1622 (set_attr "prefix" "orig,vex")
1623 (set_attr "prefix_rep" "1,*")
1624 (set_attr "mode" "V4SF")])
1626 (define_expand "reduc_splus_v4df"
1627 [(match_operand:V4DF 0 "register_operand")
1628 (match_operand:V4DF 1 "register_operand")]
1631 rtx tmp = gen_reg_rtx (V4DFmode);
1632 rtx tmp2 = gen_reg_rtx (V4DFmode);
1633 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1634 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1635 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1639 (define_expand "reduc_splus_v2df"
1640 [(match_operand:V2DF 0 "register_operand")
1641 (match_operand:V2DF 1 "register_operand")]
1644 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1648 (define_expand "reduc_splus_v8sf"
1649 [(match_operand:V8SF 0 "register_operand")
1650 (match_operand:V8SF 1 "register_operand")]
1653 rtx tmp = gen_reg_rtx (V8SFmode);
1654 rtx tmp2 = gen_reg_rtx (V8SFmode);
1655 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1656 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1657 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1658 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1662 (define_expand "reduc_splus_v4sf"
1663 [(match_operand:V4SF 0 "register_operand")
1664 (match_operand:V4SF 1 "register_operand")]
1669 rtx tmp = gen_reg_rtx (V4SFmode);
1670 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1671 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1674 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1678 ;; Modes handled by reduc_sm{in,ax}* patterns.
1679 (define_mode_iterator REDUC_SMINMAX_MODE
1680 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1681 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1682 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1683 (V4SF "TARGET_SSE")])
1685 (define_expand "reduc_<code>_<mode>"
1686 [(smaxmin:REDUC_SMINMAX_MODE
1687 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1688 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1691 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1695 (define_expand "reduc_<code>_<mode>"
1697 (match_operand:VI_256 0 "register_operand")
1698 (match_operand:VI_256 1 "register_operand"))]
1701 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1705 (define_expand "reduc_umin_v8hi"
1707 (match_operand:V8HI 0 "register_operand")
1708 (match_operand:V8HI 1 "register_operand"))]
1711 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 ;; Parallel floating point comparisons
1719 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1721 (define_insn "avx_cmp<mode>3"
1722 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
1724 [(match_operand:VF_128_256 1 "register_operand" "x")
1725 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
1726 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1729 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1730 [(set_attr "type" "ssecmp")
1731 (set_attr "length_immediate" "1")
1732 (set_attr "prefix" "vex")
1733 (set_attr "mode" "<MODE>")])
1735 (define_insn "avx_vmcmp<mode>3"
1736 [(set (match_operand:VF_128 0 "register_operand" "=x")
1739 [(match_operand:VF_128 1 "register_operand" "x")
1740 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1741 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1746 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
1747 [(set_attr "type" "ssecmp")
1748 (set_attr "length_immediate" "1")
1749 (set_attr "prefix" "vex")
1750 (set_attr "mode" "<ssescalarmode>")])
1752 (define_insn "*<sse>_maskcmp<mode>3_comm"
1753 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
1754 (match_operator:VF_128_256 3 "sse_comparison_operator"
1755 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
1756 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
1758 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1760 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1761 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1762 [(set_attr "isa" "noavx,avx")
1763 (set_attr "type" "ssecmp")
1764 (set_attr "length_immediate" "1")
1765 (set_attr "prefix" "orig,vex")
1766 (set_attr "mode" "<MODE>")])
1768 (define_insn "<sse>_maskcmp<mode>3"
1769 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
1770 (match_operator:VF_128_256 3 "sse_comparison_operator"
1771 [(match_operand:VF_128_256 1 "register_operand" "0,x")
1772 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
1775 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1776 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1777 [(set_attr "isa" "noavx,avx")
1778 (set_attr "type" "ssecmp")
1779 (set_attr "length_immediate" "1")
1780 (set_attr "prefix" "orig,vex")
1781 (set_attr "mode" "<MODE>")])
1783 (define_insn "<sse>_vmmaskcmp<mode>3"
1784 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1786 (match_operator:VF_128 3 "sse_comparison_operator"
1787 [(match_operand:VF_128 1 "register_operand" "0,x")
1788 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1793 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1794 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1795 [(set_attr "isa" "noavx,avx")
1796 (set_attr "type" "ssecmp")
1797 (set_attr "length_immediate" "1,*")
1798 (set_attr "prefix" "orig,vex")
1799 (set_attr "mode" "<ssescalarmode>")])
1801 (define_insn "<sse>_comi"
1802 [(set (reg:CCFP FLAGS_REG)
1805 (match_operand:<ssevecmode> 0 "register_operand" "v")
1806 (parallel [(const_int 0)]))
1808 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
1809 (parallel [(const_int 0)]))))]
1810 "SSE_FLOAT_MODE_P (<MODE>mode)"
1811 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
1812 [(set_attr "type" "ssecomi")
1813 (set_attr "prefix" "maybe_vex")
1814 (set_attr "prefix_rep" "0")
1815 (set (attr "prefix_data16")
1816 (if_then_else (eq_attr "mode" "DF")
1818 (const_string "0")))
1819 (set_attr "mode" "<MODE>")])
1821 (define_insn "<sse>_ucomi"
1822 [(set (reg:CCFPU FLAGS_REG)
1825 (match_operand:<ssevecmode> 0 "register_operand" "v")
1826 (parallel [(const_int 0)]))
1828 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
1829 (parallel [(const_int 0)]))))]
1830 "SSE_FLOAT_MODE_P (<MODE>mode)"
1831 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
1832 [(set_attr "type" "ssecomi")
1833 (set_attr "prefix" "maybe_vex")
1834 (set_attr "prefix_rep" "0")
1835 (set (attr "prefix_data16")
1836 (if_then_else (eq_attr "mode" "DF")
1838 (const_string "0")))
1839 (set_attr "mode" "<MODE>")])
1841 (define_expand "vcond<V_256:mode><VF_256:mode>"
1842 [(set (match_operand:V_256 0 "register_operand")
1844 (match_operator 3 ""
1845 [(match_operand:VF_256 4 "nonimmediate_operand")
1846 (match_operand:VF_256 5 "nonimmediate_operand")])
1847 (match_operand:V_256 1 "general_operand")
1848 (match_operand:V_256 2 "general_operand")))]
1850 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1851 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1853 bool ok = ix86_expand_fp_vcond (operands);
1858 (define_expand "vcond<V_128:mode><VF_128:mode>"
1859 [(set (match_operand:V_128 0 "register_operand")
1861 (match_operator 3 ""
1862 [(match_operand:VF_128 4 "nonimmediate_operand")
1863 (match_operand:VF_128 5 "nonimmediate_operand")])
1864 (match_operand:V_128 1 "general_operand")
1865 (match_operand:V_128 2 "general_operand")))]
1867 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1868 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1870 bool ok = ix86_expand_fp_vcond (operands);
1875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1877 ;; Parallel floating point logical operations
1879 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1881 (define_insn "<sse>_andnot<mode>3"
1882 [(set (match_operand:VF 0 "register_operand" "=x,v")
1885 (match_operand:VF 1 "register_operand" "0,v"))
1886 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1889 static char buf[32];
1893 switch (get_attr_mode (insn))
1900 suffix = "<ssemodesuffix>";
1903 switch (which_alternative)
1906 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1909 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1915 /* There is no vandnp[sd]. Use vpandnq. */
1916 if (GET_MODE_SIZE (<MODE>mode) == 64)
1919 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1922 snprintf (buf, sizeof (buf), ops, suffix);
1925 [(set_attr "isa" "noavx,avx")
1926 (set_attr "type" "sselog")
1927 (set_attr "prefix" "orig,maybe_evex")
1929 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1930 (const_string "<ssePSmode>")
1931 (match_test "TARGET_AVX")
1932 (const_string "<MODE>")
1933 (match_test "optimize_function_for_size_p (cfun)")
1934 (const_string "V4SF")
1936 (const_string "<MODE>")))])
1938 (define_expand "<code><mode>3"
1939 [(set (match_operand:VF_128_256 0 "register_operand")
1940 (any_logic:VF_128_256
1941 (match_operand:VF_128_256 1 "nonimmediate_operand")
1942 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
1944 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1946 (define_expand "<code><mode>3"
1947 [(set (match_operand:VF_512 0 "register_operand")
1949 (match_operand:VF_512 1 "nonimmediate_operand")
1950 (match_operand:VF_512 2 "nonimmediate_operand")))]
1952 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1954 (define_insn "*<code><mode>3"
1955 [(set (match_operand:VF 0 "register_operand" "=x,v")
1957 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1958 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1959 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1961 static char buf[32];
1965 switch (get_attr_mode (insn))
1972 suffix = "<ssemodesuffix>";
1975 switch (which_alternative)
1978 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1981 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1987 /* There is no v<logic>p[sd]. Use vp<logic>q. */
1988 if (GET_MODE_SIZE (<MODE>mode) == 64)
1991 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1994 snprintf (buf, sizeof (buf), ops, suffix);
1997 [(set_attr "isa" "noavx,avx")
1998 (set_attr "type" "sselog")
1999 (set_attr "prefix" "orig,maybe_evex")
2001 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2002 (const_string "<ssePSmode>")
2003 (match_test "TARGET_AVX")
2004 (const_string "<MODE>")
2005 (match_test "optimize_function_for_size_p (cfun)")
2006 (const_string "V4SF")
2008 (const_string "<MODE>")))])
2010 (define_expand "copysign<mode>3"
2013 (not:VF (match_dup 3))
2014 (match_operand:VF 1 "nonimmediate_operand")))
2016 (and:VF (match_dup 3)
2017 (match_operand:VF 2 "nonimmediate_operand")))
2018 (set (match_operand:VF 0 "register_operand")
2019 (ior:VF (match_dup 4) (match_dup 5)))]
2022 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2024 operands[4] = gen_reg_rtx (<MODE>mode);
2025 operands[5] = gen_reg_rtx (<MODE>mode);
2028 ;; Also define scalar versions. These are used for abs, neg, and
2029 ;; conditional move. Using subregs into vector modes causes register
2030 ;; allocation lossage. These patterns do not allow memory operands
2031 ;; because the native instructions read the full 128-bits.
2033 (define_insn "*andnot<mode>3"
2034 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2037 (match_operand:MODEF 1 "register_operand" "0,x"))
2038 (match_operand:MODEF 2 "register_operand" "x,x")))]
2039 "SSE_FLOAT_MODE_P (<MODE>mode)"
2041 static char buf[32];
2044 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2046 switch (which_alternative)
2049 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2052 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2058 snprintf (buf, sizeof (buf), ops, suffix);
2061 [(set_attr "isa" "noavx,avx")
2062 (set_attr "type" "sselog")
2063 (set_attr "prefix" "orig,vex")
2065 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2066 (const_string "V4SF")
2067 (match_test "TARGET_AVX")
2068 (const_string "<ssevecmode>")
2069 (match_test "optimize_function_for_size_p (cfun)")
2070 (const_string "V4SF")
2072 (const_string "<ssevecmode>")))])
2074 (define_insn "*andnottf3"
2075 [(set (match_operand:TF 0 "register_operand" "=x,x")
2077 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2078 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2081 static char buf[32];
2084 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2086 switch (which_alternative)
2089 ops = "%s\t{%%2, %%0|%%0, %%2}";
2092 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2098 snprintf (buf, sizeof (buf), ops, tmp);
2101 [(set_attr "isa" "noavx,avx")
2102 (set_attr "type" "sselog")
2103 (set (attr "prefix_data16")
2105 (and (eq_attr "alternative" "0")
2106 (eq_attr "mode" "TI"))
2108 (const_string "*")))
2109 (set_attr "prefix" "orig,vex")
2111 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2112 (const_string "V4SF")
2113 (match_test "TARGET_AVX")
2115 (ior (not (match_test "TARGET_SSE2"))
2116 (match_test "optimize_function_for_size_p (cfun)"))
2117 (const_string "V4SF")
2119 (const_string "TI")))])
2121 (define_insn "*<code><mode>3"
2122 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2124 (match_operand:MODEF 1 "register_operand" "%0,x")
2125 (match_operand:MODEF 2 "register_operand" "x,x")))]
2126 "SSE_FLOAT_MODE_P (<MODE>mode)"
2128 static char buf[32];
2131 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2133 switch (which_alternative)
2136 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2139 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2145 snprintf (buf, sizeof (buf), ops, suffix);
2148 [(set_attr "isa" "noavx,avx")
2149 (set_attr "type" "sselog")
2150 (set_attr "prefix" "orig,vex")
2152 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2153 (const_string "V4SF")
2154 (match_test "TARGET_AVX")
2155 (const_string "<ssevecmode>")
2156 (match_test "optimize_function_for_size_p (cfun)")
2157 (const_string "V4SF")
2159 (const_string "<ssevecmode>")))])
2161 (define_expand "<code>tf3"
2162 [(set (match_operand:TF 0 "register_operand")
2164 (match_operand:TF 1 "nonimmediate_operand")
2165 (match_operand:TF 2 "nonimmediate_operand")))]
2167 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2169 (define_insn "*<code>tf3"
2170 [(set (match_operand:TF 0 "register_operand" "=x,x")
2172 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2173 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2175 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2177 static char buf[32];
2180 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2182 switch (which_alternative)
2185 ops = "%s\t{%%2, %%0|%%0, %%2}";
2188 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2194 snprintf (buf, sizeof (buf), ops, tmp);
2197 [(set_attr "isa" "noavx,avx")
2198 (set_attr "type" "sselog")
2199 (set (attr "prefix_data16")
2201 (and (eq_attr "alternative" "0")
2202 (eq_attr "mode" "TI"))
2204 (const_string "*")))
2205 (set_attr "prefix" "orig,vex")
2207 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2208 (const_string "V4SF")
2209 (match_test "TARGET_AVX")
2211 (ior (not (match_test "TARGET_SSE2"))
2212 (match_test "optimize_function_for_size_p (cfun)"))
2213 (const_string "V4SF")
2215 (const_string "TI")))])
2217 ;; There are no floating point xor for V16SF and V8DF in avx512f
2218 ;; but we need them for negation. Instead we use int versions of
2219 ;; xor. Maybe there could be a better way to do that.
2221 (define_mode_attr avx512flogicsuff
2222 [(V16SF "d") (V8DF "q")])
2224 (define_insn "avx512f_<logic><mode>"
2225 [(set (match_operand:VF_512 0 "register_operand" "=v")
2227 (match_operand:VF_512 1 "register_operand" "v")
2228 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2230 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2231 [(set_attr "type" "sselog")
2232 (set_attr "prefix" "evex")])
2234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2236 ;; FMA floating point multiply/accumulate instructions. These include
2237 ;; scalar versions of the instructions as well as vector versions.
2239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2241 ;; The standard names for scalar FMA are only available with SSE math enabled.
2242 (define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
2243 (DF "TARGET_SSE_MATH")
2244 V4SF V2DF V8SF V4DF])
2246 (define_expand "fma<mode>4"
2247 [(set (match_operand:FMAMODEM 0 "register_operand")
2249 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2250 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2251 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2252 "TARGET_FMA || TARGET_FMA4")
2254 (define_expand "fms<mode>4"
2255 [(set (match_operand:FMAMODEM 0 "register_operand")
2257 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2258 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2259 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2260 "TARGET_FMA || TARGET_FMA4")
2262 (define_expand "fnma<mode>4"
2263 [(set (match_operand:FMAMODEM 0 "register_operand")
2265 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2266 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2267 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2268 "TARGET_FMA || TARGET_FMA4")
2270 (define_expand "fnms<mode>4"
2271 [(set (match_operand:FMAMODEM 0 "register_operand")
2273 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2274 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2275 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2276 "TARGET_FMA || TARGET_FMA4")
2278 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2279 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
2281 (define_expand "fma4i_fmadd_<mode>"
2282 [(set (match_operand:FMAMODE 0 "register_operand")
2284 (match_operand:FMAMODE 1 "nonimmediate_operand")
2285 (match_operand:FMAMODE 2 "nonimmediate_operand")
2286 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2287 "TARGET_FMA || TARGET_FMA4")
2289 (define_insn "*fma_fmadd_<mode>"
2290 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2292 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2293 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2294 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2295 "TARGET_FMA || TARGET_FMA4"
2297 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2298 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2299 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2300 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2301 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2302 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2303 (set_attr "type" "ssemuladd")
2304 (set_attr "mode" "<MODE>")])
2306 (define_insn "*fma_fmsub_<mode>"
2307 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2309 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2310 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2312 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2313 "TARGET_FMA || TARGET_FMA4"
2315 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2316 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2317 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2318 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2319 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2320 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2321 (set_attr "type" "ssemuladd")
2322 (set_attr "mode" "<MODE>")])
2324 (define_insn "*fma_fnmadd_<mode>"
2325 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2328 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2329 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2330 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2331 "TARGET_FMA || TARGET_FMA4"
2333 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2334 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2335 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2336 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2337 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2338 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2339 (set_attr "type" "ssemuladd")
2340 (set_attr "mode" "<MODE>")])
2342 (define_insn "*fma_fnmsub_<mode>"
2343 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2346 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2347 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2349 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2350 "TARGET_FMA || TARGET_FMA4"
2352 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2353 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2354 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2355 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2356 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2357 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2358 (set_attr "type" "ssemuladd")
2359 (set_attr "mode" "<MODE>")])
2361 ;; FMA parallel floating point multiply addsub and subadd operations.
2363 ;; It would be possible to represent these without the UNSPEC as
2366 ;; (fma op1 op2 op3)
2367 ;; (fma op1 op2 (neg op3))
2370 ;; But this doesn't seem useful in practice.
2372 (define_expand "fmaddsub_<mode>"
2373 [(set (match_operand:VF 0 "register_operand")
2375 [(match_operand:VF 1 "nonimmediate_operand")
2376 (match_operand:VF 2 "nonimmediate_operand")
2377 (match_operand:VF 3 "nonimmediate_operand")]
2379 "TARGET_FMA || TARGET_FMA4")
2381 (define_insn "*fma_fmaddsub_<mode>"
2382 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2384 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2385 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2386 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
2388 "TARGET_FMA || TARGET_FMA4"
2390 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2391 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2392 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2393 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2394 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2395 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2396 (set_attr "type" "ssemuladd")
2397 (set_attr "mode" "<MODE>")])
2399 (define_insn "*fma_fmsubadd_<mode>"
2400 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2402 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2403 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2405 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
2407 "TARGET_FMA || TARGET_FMA4"
2409 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2410 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2411 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2412 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2413 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2414 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2415 (set_attr "type" "ssemuladd")
2416 (set_attr "mode" "<MODE>")])
2418 ;; FMA3 floating point scalar intrinsics. These merge result with
2419 ;; high-order elements from the destination register.
2421 (define_expand "fmai_vmfmadd_<mode>"
2422 [(set (match_operand:VF_128 0 "register_operand")
2425 (match_operand:VF_128 1 "nonimmediate_operand")
2426 (match_operand:VF_128 2 "nonimmediate_operand")
2427 (match_operand:VF_128 3 "nonimmediate_operand"))
2432 (define_insn "*fmai_fmadd_<mode>"
2433 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2436 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2437 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
2438 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
2443 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2444 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2445 [(set_attr "type" "ssemuladd")
2446 (set_attr "mode" "<MODE>")])
2448 (define_insn "*fmai_fmsub_<mode>"
2449 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2452 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2453 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
2455 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
2460 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2461 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2462 [(set_attr "type" "ssemuladd")
2463 (set_attr "mode" "<MODE>")])
2465 (define_insn "*fmai_fnmadd_<mode>"
2466 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2470 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
2471 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2472 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
2477 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2478 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2479 [(set_attr "type" "ssemuladd")
2480 (set_attr "mode" "<MODE>")])
2482 (define_insn "*fmai_fnmsub_<mode>"
2483 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2487 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
2488 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2490 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
2495 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2496 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2497 [(set_attr "type" "ssemuladd")
2498 (set_attr "mode" "<MODE>")])
2500 ;; FMA4 floating point scalar intrinsics. These write the
2501 ;; entire destination register, with the high-order elements zeroed.
2503 (define_expand "fma4i_vmfmadd_<mode>"
2504 [(set (match_operand:VF_128 0 "register_operand")
2507 (match_operand:VF_128 1 "nonimmediate_operand")
2508 (match_operand:VF_128 2 "nonimmediate_operand")
2509 (match_operand:VF_128 3 "nonimmediate_operand"))
2513 "operands[4] = CONST0_RTX (<MODE>mode);")
2515 (define_insn "*fma4i_vmfmadd_<mode>"
2516 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2519 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2520 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2521 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2522 (match_operand:VF_128 4 "const0_operand")
2525 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2526 [(set_attr "type" "ssemuladd")
2527 (set_attr "mode" "<MODE>")])
2529 (define_insn "*fma4i_vmfmsub_<mode>"
2530 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2533 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2534 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2536 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2537 (match_operand:VF_128 4 "const0_operand")
2540 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2541 [(set_attr "type" "ssemuladd")
2542 (set_attr "mode" "<MODE>")])
2544 (define_insn "*fma4i_vmfnmadd_<mode>"
2545 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2549 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2550 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2551 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2552 (match_operand:VF_128 4 "const0_operand")
2555 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2556 [(set_attr "type" "ssemuladd")
2557 (set_attr "mode" "<MODE>")])
2559 (define_insn "*fma4i_vmfnmsub_<mode>"
2560 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2564 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2565 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2567 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2568 (match_operand:VF_128 4 "const0_operand")
2571 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2572 [(set_attr "type" "ssemuladd")
2573 (set_attr "mode" "<MODE>")])
2575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2577 ;; Parallel single-precision floating point conversion operations
2579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2581 (define_insn "sse_cvtpi2ps"
2582 [(set (match_operand:V4SF 0 "register_operand" "=x")
2585 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2586 (match_operand:V4SF 1 "register_operand" "0")
2589 "cvtpi2ps\t{%2, %0|%0, %2}"
2590 [(set_attr "type" "ssecvt")
2591 (set_attr "mode" "V4SF")])
2593 (define_insn "sse_cvtps2pi"
2594 [(set (match_operand:V2SI 0 "register_operand" "=y")
2596 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2598 (parallel [(const_int 0) (const_int 1)])))]
2600 "cvtps2pi\t{%1, %0|%0, %q1}"
2601 [(set_attr "type" "ssecvt")
2602 (set_attr "unit" "mmx")
2603 (set_attr "mode" "DI")])
2605 (define_insn "sse_cvttps2pi"
2606 [(set (match_operand:V2SI 0 "register_operand" "=y")
2608 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2609 (parallel [(const_int 0) (const_int 1)])))]
2611 "cvttps2pi\t{%1, %0|%0, %q1}"
2612 [(set_attr "type" "ssecvt")
2613 (set_attr "unit" "mmx")
2614 (set_attr "prefix_rep" "0")
2615 (set_attr "mode" "SF")])
2617 (define_insn "sse_cvtsi2ss"
2618 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2621 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2622 (match_operand:V4SF 1 "register_operand" "0,0,v")
2626 cvtsi2ss\t{%2, %0|%0, %2}
2627 cvtsi2ss\t{%2, %0|%0, %2}
2628 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2629 [(set_attr "isa" "noavx,noavx,avx")
2630 (set_attr "type" "sseicvt")
2631 (set_attr "athlon_decode" "vector,double,*")
2632 (set_attr "amdfam10_decode" "vector,double,*")
2633 (set_attr "bdver1_decode" "double,direct,*")
2634 (set_attr "btver2_decode" "double,double,double")
2635 (set_attr "prefix" "orig,orig,maybe_evex")
2636 (set_attr "mode" "SF")])
2638 (define_insn "sse_cvtsi2ssq"
2639 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2642 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2643 (match_operand:V4SF 1 "register_operand" "0,0,v")
2645 "TARGET_SSE && TARGET_64BIT"
2647 cvtsi2ssq\t{%2, %0|%0, %2}
2648 cvtsi2ssq\t{%2, %0|%0, %2}
2649 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2650 [(set_attr "isa" "noavx,noavx,avx")
2651 (set_attr "type" "sseicvt")
2652 (set_attr "athlon_decode" "vector,double,*")
2653 (set_attr "amdfam10_decode" "vector,double,*")
2654 (set_attr "bdver1_decode" "double,direct,*")
2655 (set_attr "btver2_decode" "double,double,double")
2656 (set_attr "length_vex" "*,*,4")
2657 (set_attr "prefix_rex" "1,1,*")
2658 (set_attr "prefix" "orig,orig,maybe_evex")
2659 (set_attr "mode" "SF")])
2661 (define_insn "sse_cvtss2si"
2662 [(set (match_operand:SI 0 "register_operand" "=r,r")
2665 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2666 (parallel [(const_int 0)]))]
2667 UNSPEC_FIX_NOTRUNC))]
2669 "%vcvtss2si\t{%1, %0|%0, %k1}"
2670 [(set_attr "type" "sseicvt")
2671 (set_attr "athlon_decode" "double,vector")
2672 (set_attr "bdver1_decode" "double,double")
2673 (set_attr "prefix_rep" "1")
2674 (set_attr "prefix" "maybe_vex")
2675 (set_attr "mode" "SI")])
2677 (define_insn "sse_cvtss2si_2"
2678 [(set (match_operand:SI 0 "register_operand" "=r,r")
2679 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
2680 UNSPEC_FIX_NOTRUNC))]
2682 "%vcvtss2si\t{%1, %0|%0, %k1}"
2683 [(set_attr "type" "sseicvt")
2684 (set_attr "athlon_decode" "double,vector")
2685 (set_attr "amdfam10_decode" "double,double")
2686 (set_attr "bdver1_decode" "double,double")
2687 (set_attr "prefix_rep" "1")
2688 (set_attr "prefix" "maybe_vex")
2689 (set_attr "mode" "SI")])
2691 (define_insn "sse_cvtss2siq"
2692 [(set (match_operand:DI 0 "register_operand" "=r,r")
2695 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2696 (parallel [(const_int 0)]))]
2697 UNSPEC_FIX_NOTRUNC))]
2698 "TARGET_SSE && TARGET_64BIT"
2699 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2700 [(set_attr "type" "sseicvt")
2701 (set_attr "athlon_decode" "double,vector")
2702 (set_attr "bdver1_decode" "double,double")
2703 (set_attr "prefix_rep" "1")
2704 (set_attr "prefix" "maybe_vex")
2705 (set_attr "mode" "DI")])
2707 (define_insn "sse_cvtss2siq_2"
2708 [(set (match_operand:DI 0 "register_operand" "=r,r")
2709 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
2710 UNSPEC_FIX_NOTRUNC))]
2711 "TARGET_SSE && TARGET_64BIT"
2712 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2713 [(set_attr "type" "sseicvt")
2714 (set_attr "athlon_decode" "double,vector")
2715 (set_attr "amdfam10_decode" "double,double")
2716 (set_attr "bdver1_decode" "double,double")
2717 (set_attr "prefix_rep" "1")
2718 (set_attr "prefix" "maybe_vex")
2719 (set_attr "mode" "DI")])
2721 (define_insn "sse_cvttss2si"
2722 [(set (match_operand:SI 0 "register_operand" "=r,r")
2725 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2726 (parallel [(const_int 0)]))))]
2728 "%vcvttss2si\t{%1, %0|%0, %k1}"
2729 [(set_attr "type" "sseicvt")
2730 (set_attr "athlon_decode" "double,vector")
2731 (set_attr "amdfam10_decode" "double,double")
2732 (set_attr "bdver1_decode" "double,double")
2733 (set_attr "prefix_rep" "1")
2734 (set_attr "prefix" "maybe_vex")
2735 (set_attr "mode" "SI")])
2737 (define_insn "sse_cvttss2siq"
2738 [(set (match_operand:DI 0 "register_operand" "=r,r")
2741 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2742 (parallel [(const_int 0)]))))]
2743 "TARGET_SSE && TARGET_64BIT"
2744 "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
2745 [(set_attr "type" "sseicvt")
2746 (set_attr "athlon_decode" "double,vector")
2747 (set_attr "amdfam10_decode" "double,double")
2748 (set_attr "bdver1_decode" "double,double")
2749 (set_attr "prefix_rep" "1")
2750 (set_attr "prefix" "maybe_vex")
2751 (set_attr "mode" "DI")])
2753 (define_insn "float<sseintvecmodelower><mode>2"
2754 [(set (match_operand:VF1 0 "register_operand" "=v")
2756 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
2758 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2759 [(set_attr "type" "ssecvt")
2760 (set_attr "prefix" "maybe_vex")
2761 (set_attr "mode" "<sseinsnmode>")])
2763 (define_expand "floatuns<sseintvecmodelower><mode>2"
2764 [(match_operand:VF1 0 "register_operand")
2765 (match_operand:<sseintvecmode> 1 "register_operand")]
2766 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2768 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2772 (define_insn "avx_cvtps2dq256"
2773 [(set (match_operand:V8SI 0 "register_operand" "=x")
2774 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2775 UNSPEC_FIX_NOTRUNC))]
2777 "vcvtps2dq\t{%1, %0|%0, %1}"
2778 [(set_attr "type" "ssecvt")
2779 (set_attr "prefix" "vex")
2780 (set_attr "mode" "OI")])
2782 (define_insn "sse2_cvtps2dq"
2783 [(set (match_operand:V4SI 0 "register_operand" "=x")
2784 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2785 UNSPEC_FIX_NOTRUNC))]
2787 "%vcvtps2dq\t{%1, %0|%0, %1}"
2788 [(set_attr "type" "ssecvt")
2789 (set (attr "prefix_data16")
2791 (match_test "TARGET_AVX")
2793 (const_string "1")))
2794 (set_attr "prefix" "maybe_vex")
2795 (set_attr "mode" "TI")])
2797 (define_insn "<fixsuffix>fix_truncv16sfv16si2"
2798 [(set (match_operand:V16SI 0 "register_operand" "=v")
2800 (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
2802 "vcvttps2<fixsuffix>dq\t{%1, %0|%0, %1}"
2803 [(set_attr "type" "ssecvt")
2804 (set_attr "prefix" "evex")
2805 (set_attr "mode" "XI")])
2807 (define_insn "fix_truncv8sfv8si2"
2808 [(set (match_operand:V8SI 0 "register_operand" "=x")
2809 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2811 "vcvttps2dq\t{%1, %0|%0, %1}"
2812 [(set_attr "type" "ssecvt")
2813 (set_attr "prefix" "vex")
2814 (set_attr "mode" "OI")])
2816 (define_insn "fix_truncv4sfv4si2"
2817 [(set (match_operand:V4SI 0 "register_operand" "=x")
2818 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2820 "%vcvttps2dq\t{%1, %0|%0, %1}"
2821 [(set_attr "type" "ssecvt")
2822 (set (attr "prefix_rep")
2824 (match_test "TARGET_AVX")
2826 (const_string "1")))
2827 (set (attr "prefix_data16")
2829 (match_test "TARGET_AVX")
2831 (const_string "0")))
2832 (set_attr "prefix_data16" "0")
2833 (set_attr "prefix" "maybe_vex")
2834 (set_attr "mode" "TI")])
2836 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2837 [(match_operand:<sseintvecmode> 0 "register_operand")
2838 (match_operand:VF1 1 "register_operand")]
2842 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2843 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2844 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2845 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2849 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2851 ;; Parallel double-precision floating point conversion operations
2853 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2855 (define_insn "sse2_cvtpi2pd"
2856 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2857 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2859 "cvtpi2pd\t{%1, %0|%0, %1}"
2860 [(set_attr "type" "ssecvt")
2861 (set_attr "unit" "mmx,*")
2862 (set_attr "prefix_data16" "1,*")
2863 (set_attr "mode" "V2DF")])
2865 (define_insn "sse2_cvtpd2pi"
2866 [(set (match_operand:V2SI 0 "register_operand" "=y")
2867 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2868 UNSPEC_FIX_NOTRUNC))]
2870 "cvtpd2pi\t{%1, %0|%0, %1}"
2871 [(set_attr "type" "ssecvt")
2872 (set_attr "unit" "mmx")
2873 (set_attr "bdver1_decode" "double")
2874 (set_attr "btver2_decode" "direct")
2875 (set_attr "prefix_data16" "1")
2876 (set_attr "mode" "DI")])
2878 (define_insn "sse2_cvttpd2pi"
2879 [(set (match_operand:V2SI 0 "register_operand" "=y")
2880 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2882 "cvttpd2pi\t{%1, %0|%0, %1}"
2883 [(set_attr "type" "ssecvt")
2884 (set_attr "unit" "mmx")
2885 (set_attr "bdver1_decode" "double")
2886 (set_attr "prefix_data16" "1")
2887 (set_attr "mode" "TI")])
2889 (define_insn "sse2_cvtsi2sd"
2890 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2893 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2894 (match_operand:V2DF 1 "register_operand" "0,0,x")
2898 cvtsi2sd\t{%2, %0|%0, %2}
2899 cvtsi2sd\t{%2, %0|%0, %2}
2900 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2901 [(set_attr "isa" "noavx,noavx,avx")
2902 (set_attr "type" "sseicvt")
2903 (set_attr "athlon_decode" "double,direct,*")
2904 (set_attr "amdfam10_decode" "vector,double,*")
2905 (set_attr "bdver1_decode" "double,direct,*")
2906 (set_attr "btver2_decode" "double,double,double")
2907 (set_attr "prefix" "orig,orig,vex")
2908 (set_attr "mode" "DF")])
2910 (define_insn "sse2_cvtsi2sdq"
2911 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
2914 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2915 (match_operand:V2DF 1 "register_operand" "0,0,v")
2917 "TARGET_SSE2 && TARGET_64BIT"
2919 cvtsi2sdq\t{%2, %0|%0, %2}
2920 cvtsi2sdq\t{%2, %0|%0, %2}
2921 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2922 [(set_attr "isa" "noavx,noavx,avx")
2923 (set_attr "type" "sseicvt")
2924 (set_attr "athlon_decode" "double,direct,*")
2925 (set_attr "amdfam10_decode" "vector,double,*")
2926 (set_attr "bdver1_decode" "double,direct,*")
2927 (set_attr "length_vex" "*,*,4")
2928 (set_attr "prefix_rex" "1,1,*")
2929 (set_attr "prefix" "orig,orig,maybe_evex")
2930 (set_attr "mode" "DF")])
2932 (define_insn "sse2_cvtsd2si"
2933 [(set (match_operand:SI 0 "register_operand" "=r,r")
2936 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
2937 (parallel [(const_int 0)]))]
2938 UNSPEC_FIX_NOTRUNC))]
2940 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2941 [(set_attr "type" "sseicvt")
2942 (set_attr "athlon_decode" "double,vector")
2943 (set_attr "bdver1_decode" "double,double")
2944 (set_attr "btver2_decode" "double,double")
2945 (set_attr "prefix_rep" "1")
2946 (set_attr "prefix" "maybe_vex")
2947 (set_attr "mode" "SI")])
2949 (define_insn "sse2_cvtsd2si_2"
2950 [(set (match_operand:SI 0 "register_operand" "=r,r")
2951 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
2952 UNSPEC_FIX_NOTRUNC))]
2954 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2955 [(set_attr "type" "sseicvt")
2956 (set_attr "athlon_decode" "double,vector")
2957 (set_attr "amdfam10_decode" "double,double")
2958 (set_attr "bdver1_decode" "double,double")
2959 (set_attr "prefix_rep" "1")
2960 (set_attr "prefix" "maybe_vex")
2961 (set_attr "mode" "SI")])
2963 (define_insn "sse2_cvtsd2siq"
2964 [(set (match_operand:DI 0 "register_operand" "=r,r")
2967 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
2968 (parallel [(const_int 0)]))]
2969 UNSPEC_FIX_NOTRUNC))]
2970 "TARGET_SSE2 && TARGET_64BIT"
2971 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2972 [(set_attr "type" "sseicvt")
2973 (set_attr "athlon_decode" "double,vector")
2974 (set_attr "bdver1_decode" "double,double")
2975 (set_attr "prefix_rep" "1")
2976 (set_attr "prefix" "maybe_vex")
2977 (set_attr "mode" "DI")])
2979 (define_insn "sse2_cvtsd2siq_2"
2980 [(set (match_operand:DI 0 "register_operand" "=r,r")
2981 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
2982 UNSPEC_FIX_NOTRUNC))]
2983 "TARGET_SSE2 && TARGET_64BIT"
2984 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2985 [(set_attr "type" "sseicvt")
2986 (set_attr "athlon_decode" "double,vector")
2987 (set_attr "amdfam10_decode" "double,double")
2988 (set_attr "bdver1_decode" "double,double")
2989 (set_attr "prefix_rep" "1")
2990 (set_attr "prefix" "maybe_vex")
2991 (set_attr "mode" "DI")])
2993 (define_insn "sse2_cvttsd2si"
2994 [(set (match_operand:SI 0 "register_operand" "=r,r")
2997 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
2998 (parallel [(const_int 0)]))))]
3000 "%vcvttsd2si\t{%1, %0|%0, %q1}"
3001 [(set_attr "type" "sseicvt")
3002 (set_attr "athlon_decode" "double,vector")
3003 (set_attr "amdfam10_decode" "double,double")
3004 (set_attr "bdver1_decode" "double,double")
3005 (set_attr "btver2_decode" "double,double")
3006 (set_attr "prefix_rep" "1")
3007 (set_attr "prefix" "maybe_vex")
3008 (set_attr "mode" "SI")])
3010 (define_insn "sse2_cvttsd2siq"
3011 [(set (match_operand:DI 0 "register_operand" "=r,r")
3014 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3015 (parallel [(const_int 0)]))))]
3016 "TARGET_SSE2 && TARGET_64BIT"
3017 "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
3018 [(set_attr "type" "sseicvt")
3019 (set_attr "athlon_decode" "double,vector")
3020 (set_attr "amdfam10_decode" "double,double")
3021 (set_attr "bdver1_decode" "double,double")
3022 (set_attr "prefix_rep" "1")
3023 (set_attr "prefix" "maybe_vex")
3024 (set_attr "mode" "DI")])
3026 ;; For float<si2dfmode><mode>2 insn pattern
3027 (define_mode_attr si2dfmode
3028 [(V8DF "V8SI") (V4DF "V4SI")])
3029 (define_mode_attr si2dfmodelower
3030 [(V8DF "v8si") (V4DF "v4si")])
3032 (define_insn "float<si2dfmodelower><mode>2"
3033 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3034 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
3036 "vcvtdq2pd\t{%1, %0|%0, %1}"
3037 [(set_attr "type" "ssecvt")
3038 (set_attr "prefix" "maybe_vex")
3039 (set_attr "mode" "<MODE>")])
3041 (define_insn "avx_cvtdq2pd256_2"
3042 [(set (match_operand:V4DF 0 "register_operand" "=x")
3045 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
3046 (parallel [(const_int 0) (const_int 1)
3047 (const_int 2) (const_int 3)]))))]
3049 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
3050 [(set_attr "type" "ssecvt")
3051 (set_attr "prefix" "vex")
3052 (set_attr "mode" "V4DF")])
3054 (define_insn "sse2_cvtdq2pd"
3055 [(set (match_operand:V2DF 0 "register_operand" "=x")
3058 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3059 (parallel [(const_int 0) (const_int 1)]))))]
3061 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
3062 [(set_attr "type" "ssecvt")
3063 (set_attr "prefix" "maybe_vex")
3064 (set_attr "mode" "V2DF")])
3066 (define_insn "avx_cvtpd2dq256"
3067 [(set (match_operand:V4SI 0 "register_operand" "=x")
3068 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3069 UNSPEC_FIX_NOTRUNC))]
3071 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3072 [(set_attr "type" "ssecvt")
3073 (set_attr "prefix" "vex")
3074 (set_attr "mode" "OI")])
3076 (define_expand "avx_cvtpd2dq256_2"
3077 [(set (match_operand:V8SI 0 "register_operand")
3079 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
3083 "operands[2] = CONST0_RTX (V4SImode);")
3085 (define_insn "*avx_cvtpd2dq256_2"
3086 [(set (match_operand:V8SI 0 "register_operand" "=x")
3088 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3090 (match_operand:V4SI 2 "const0_operand")))]
3092 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
3093 [(set_attr "type" "ssecvt")
3094 (set_attr "prefix" "vex")
3095 (set_attr "btver2_decode" "vector")
3096 (set_attr "mode" "OI")])
3098 (define_expand "sse2_cvtpd2dq"
3099 [(set (match_operand:V4SI 0 "register_operand")
3101 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
3105 "operands[2] = CONST0_RTX (V2SImode);")
3107 (define_insn "*sse2_cvtpd2dq"
3108 [(set (match_operand:V4SI 0 "register_operand" "=x")
3110 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3112 (match_operand:V2SI 2 "const0_operand")))]
3116 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
3118 return "cvtpd2dq\t{%1, %0|%0, %1}";
3120 [(set_attr "type" "ssecvt")
3121 (set_attr "prefix_rep" "1")
3122 (set_attr "prefix_data16" "0")
3123 (set_attr "prefix" "maybe_vex")
3124 (set_attr "mode" "TI")
3125 (set_attr "amdfam10_decode" "double")
3126 (set_attr "athlon_decode" "vector")
3127 (set_attr "bdver1_decode" "double")])
3129 (define_insn "<fixsuffix>fix_truncv8dfv8si2"
3130 [(set (match_operand:V8SI 0 "register_operand" "=v")
3131 (any_fix:V8SI (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
3133 "vcvttpd2<fixsuffix>dq\t{%1, %0|%0, %1}"
3134 [(set_attr "type" "ssecvt")
3135 (set_attr "prefix" "evex")
3136 (set_attr "mode" "OI")])
3138 (define_insn "fix_truncv4dfv4si2"
3139 [(set (match_operand:V4SI 0 "register_operand" "=x")
3140 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3142 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3143 [(set_attr "type" "ssecvt")
3144 (set_attr "prefix" "vex")
3145 (set_attr "mode" "OI")])
3147 (define_expand "avx_cvttpd2dq256_2"
3148 [(set (match_operand:V8SI 0 "register_operand")
3150 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
3153 "operands[2] = CONST0_RTX (V4SImode);")
3155 (define_insn "*avx_cvttpd2dq256_2"
3156 [(set (match_operand:V8SI 0 "register_operand" "=x")
3158 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
3159 (match_operand:V4SI 2 "const0_operand")))]
3161 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
3162 [(set_attr "type" "ssecvt")
3163 (set_attr "prefix" "vex")
3164 (set_attr "btver2_decode" "vector")
3165 (set_attr "mode" "OI")])
3167 (define_expand "sse2_cvttpd2dq"
3168 [(set (match_operand:V4SI 0 "register_operand")
3170 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
3173 "operands[2] = CONST0_RTX (V2SImode);")
3175 (define_insn "*sse2_cvttpd2dq"
3176 [(set (match_operand:V4SI 0 "register_operand" "=x")
3178 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3179 (match_operand:V2SI 2 "const0_operand")))]
3183 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
3185 return "cvttpd2dq\t{%1, %0|%0, %1}";
3187 [(set_attr "type" "ssecvt")
3188 (set_attr "amdfam10_decode" "double")
3189 (set_attr "athlon_decode" "vector")
3190 (set_attr "bdver1_decode" "double")
3191 (set_attr "prefix" "maybe_vex")
3192 (set_attr "mode" "TI")])
3194 (define_insn "sse2_cvtsd2ss"
3195 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3198 (float_truncate:V2SF
3199 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
3200 (match_operand:V4SF 1 "register_operand" "0,0,v")
3204 cvtsd2ss\t{%2, %0|%0, %2}
3205 cvtsd2ss\t{%2, %0|%0, %q2}
3206 vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
3207 [(set_attr "isa" "noavx,noavx,avx")
3208 (set_attr "type" "ssecvt")
3209 (set_attr "athlon_decode" "vector,double,*")
3210 (set_attr "amdfam10_decode" "vector,double,*")
3211 (set_attr "bdver1_decode" "direct,direct,*")
3212 (set_attr "btver2_decode" "double,double,double")
3213 (set_attr "prefix" "orig,orig,vex")
3214 (set_attr "mode" "SF")])
3216 (define_insn "sse2_cvtss2sd"
3217 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3221 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
3222 (parallel [(const_int 0) (const_int 1)])))
3223 (match_operand:V2DF 1 "register_operand" "0,0,v")
3227 cvtss2sd\t{%2, %0|%0, %2}
3228 cvtss2sd\t{%2, %0|%0, %k2}
3229 vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
3230 [(set_attr "isa" "noavx,noavx,avx")
3231 (set_attr "type" "ssecvt")
3232 (set_attr "amdfam10_decode" "vector,double,*")
3233 (set_attr "athlon_decode" "direct,direct,*")
3234 (set_attr "bdver1_decode" "direct,direct,*")
3235 (set_attr "btver2_decode" "double,double,double")
3236 (set_attr "prefix" "orig,orig,vex")
3237 (set_attr "mode" "DF")])
3239 (define_insn "avx_cvtpd2ps256"
3240 [(set (match_operand:V4SF 0 "register_operand" "=x")
3241 (float_truncate:V4SF
3242 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3244 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3245 [(set_attr "type" "ssecvt")
3246 (set_attr "prefix" "vex")
3247 (set_attr "btver2_decode" "vector")
3248 (set_attr "mode" "V4SF")])
3250 (define_expand "sse2_cvtpd2ps"
3251 [(set (match_operand:V4SF 0 "register_operand")
3253 (float_truncate:V2SF
3254 (match_operand:V2DF 1 "nonimmediate_operand"))
3257 "operands[2] = CONST0_RTX (V2SFmode);")
3259 (define_insn "*sse2_cvtpd2ps"
3260 [(set (match_operand:V4SF 0 "register_operand" "=x")
3262 (float_truncate:V2SF
3263 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3264 (match_operand:V2SF 2 "const0_operand")))]
3268 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
3270 return "cvtpd2ps\t{%1, %0|%0, %1}";
3272 [(set_attr "type" "ssecvt")
3273 (set_attr "amdfam10_decode" "double")
3274 (set_attr "athlon_decode" "vector")
3275 (set_attr "bdver1_decode" "double")
3276 (set_attr "prefix_data16" "1")
3277 (set_attr "prefix" "maybe_vex")
3278 (set_attr "mode" "V4SF")])
3280 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
3281 (define_mode_attr sf2dfmode
3282 [(V8DF "V8SF") (V4DF "V4SF")])
3284 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix>"
3285 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3286 (float_extend:VF2_512_256
3287 (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
3289 "vcvtps2pd\t{%1, %0|%0, %1}"
3290 [(set_attr "type" "ssecvt")
3291 (set_attr "prefix" "maybe_vex")
3292 (set_attr "mode" "<MODE>")])
3294 (define_insn "*avx_cvtps2pd256_2"
3295 [(set (match_operand:V4DF 0 "register_operand" "=x")
3298 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3299 (parallel [(const_int 0) (const_int 1)
3300 (const_int 2) (const_int 3)]))))]
3302 "vcvtps2pd\t{%x1, %0|%0, %x1}"
3303 [(set_attr "type" "ssecvt")
3304 (set_attr "prefix" "vex")
3305 (set_attr "mode" "V4DF")])
3307 (define_insn "sse2_cvtps2pd"
3308 [(set (match_operand:V2DF 0 "register_operand" "=x")
3311 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3312 (parallel [(const_int 0) (const_int 1)]))))]
3314 "%vcvtps2pd\t{%1, %0|%0, %q1}"
3315 [(set_attr "type" "ssecvt")
3316 (set_attr "amdfam10_decode" "direct")
3317 (set_attr "athlon_decode" "double")
3318 (set_attr "bdver1_decode" "double")
3319 (set_attr "prefix_data16" "0")
3320 (set_attr "prefix" "maybe_vex")
3321 (set_attr "mode" "V2DF")])
3323 (define_expand "vec_unpacks_hi_v4sf"
3328 (match_operand:V4SF 1 "nonimmediate_operand"))
3329 (parallel [(const_int 6) (const_int 7)
3330 (const_int 2) (const_int 3)])))
3331 (set (match_operand:V2DF 0 "register_operand")
3335 (parallel [(const_int 0) (const_int 1)]))))]
3337 "operands[2] = gen_reg_rtx (V4SFmode);")
3339 (define_expand "vec_unpacks_hi_v8sf"
3342 (match_operand:V8SF 1 "nonimmediate_operand")
3343 (parallel [(const_int 4) (const_int 5)
3344 (const_int 6) (const_int 7)])))
3345 (set (match_operand:V4DF 0 "register_operand")
3349 "operands[2] = gen_reg_rtx (V4SFmode);")
3351 (define_expand "vec_unpacks_lo_v4sf"
3352 [(set (match_operand:V2DF 0 "register_operand")
3355 (match_operand:V4SF 1 "nonimmediate_operand")
3356 (parallel [(const_int 0) (const_int 1)]))))]
3359 (define_expand "vec_unpacks_lo_v8sf"
3360 [(set (match_operand:V4DF 0 "register_operand")
3363 (match_operand:V8SF 1 "nonimmediate_operand")
3364 (parallel [(const_int 0) (const_int 1)
3365 (const_int 2) (const_int 3)]))))]
3368 (define_mode_attr sseunpackfltmode
3369 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
3370 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
3372 (define_expand "vec_unpacks_float_hi_<mode>"
3373 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3374 (match_operand:VI2_AVX512F 1 "register_operand")]
3377 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3379 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
3380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3381 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3385 (define_expand "vec_unpacks_float_lo_<mode>"
3386 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3387 (match_operand:VI2_AVX512F 1 "register_operand")]
3390 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3392 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
3393 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3394 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3398 (define_expand "vec_unpacku_float_hi_<mode>"
3399 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3400 (match_operand:VI2_AVX512F 1 "register_operand")]
3403 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3405 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
3406 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3407 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3411 (define_expand "vec_unpacku_float_lo_<mode>"
3412 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3413 (match_operand:VI2_AVX512F 1 "register_operand")]
3416 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3418 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
3419 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3420 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3424 (define_expand "vec_unpacks_float_hi_v4si"
3427 (match_operand:V4SI 1 "nonimmediate_operand")
3428 (parallel [(const_int 2) (const_int 3)
3429 (const_int 2) (const_int 3)])))
3430 (set (match_operand:V2DF 0 "register_operand")
3434 (parallel [(const_int 0) (const_int 1)]))))]
3436 "operands[2] = gen_reg_rtx (V4SImode);")
3438 (define_expand "vec_unpacks_float_lo_v4si"
3439 [(set (match_operand:V2DF 0 "register_operand")
3442 (match_operand:V4SI 1 "nonimmediate_operand")
3443 (parallel [(const_int 0) (const_int 1)]))))]
3446 (define_expand "vec_unpacks_float_hi_v8si"
3449 (match_operand:V8SI 1 "nonimmediate_operand")
3450 (parallel [(const_int 4) (const_int 5)
3451 (const_int 6) (const_int 7)])))
3452 (set (match_operand:V4DF 0 "register_operand")
3456 "operands[2] = gen_reg_rtx (V4SImode);")
3458 (define_expand "vec_unpacks_float_lo_v8si"
3459 [(set (match_operand:V4DF 0 "register_operand")
3462 (match_operand:V8SI 1 "nonimmediate_operand")
3463 (parallel [(const_int 0) (const_int 1)
3464 (const_int 2) (const_int 3)]))))]
3467 (define_expand "vec_unpacku_float_hi_v4si"
3470 (match_operand:V4SI 1 "nonimmediate_operand")
3471 (parallel [(const_int 2) (const_int 3)
3472 (const_int 2) (const_int 3)])))
3477 (parallel [(const_int 0) (const_int 1)]))))
3479 (lt:V2DF (match_dup 6) (match_dup 3)))
3481 (and:V2DF (match_dup 7) (match_dup 4)))
3482 (set (match_operand:V2DF 0 "register_operand")
3483 (plus:V2DF (match_dup 6) (match_dup 8)))]
3486 REAL_VALUE_TYPE TWO32r;
3490 real_ldexp (&TWO32r, &dconst1, 32);
3491 x = const_double_from_real_value (TWO32r, DFmode);
3493 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3494 operands[4] = force_reg (V2DFmode,
3495 ix86_build_const_vector (V2DFmode, 1, x));
3497 operands[5] = gen_reg_rtx (V4SImode);
3499 for (i = 6; i < 9; i++)
3500 operands[i] = gen_reg_rtx (V2DFmode);
3503 (define_expand "vec_unpacku_float_lo_v4si"
3507 (match_operand:V4SI 1 "nonimmediate_operand")
3508 (parallel [(const_int 0) (const_int 1)]))))
3510 (lt:V2DF (match_dup 5) (match_dup 3)))
3512 (and:V2DF (match_dup 6) (match_dup 4)))
3513 (set (match_operand:V2DF 0 "register_operand")
3514 (plus:V2DF (match_dup 5) (match_dup 7)))]
3517 REAL_VALUE_TYPE TWO32r;
3521 real_ldexp (&TWO32r, &dconst1, 32);
3522 x = const_double_from_real_value (TWO32r, DFmode);
3524 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3525 operands[4] = force_reg (V2DFmode,
3526 ix86_build_const_vector (V2DFmode, 1, x));
3528 for (i = 5; i < 8; i++)
3529 operands[i] = gen_reg_rtx (V2DFmode);
3532 (define_expand "vec_unpacku_float_hi_v8si"
3533 [(match_operand:V4DF 0 "register_operand")
3534 (match_operand:V8SI 1 "register_operand")]
3537 REAL_VALUE_TYPE TWO32r;
3541 real_ldexp (&TWO32r, &dconst1, 32);
3542 x = const_double_from_real_value (TWO32r, DFmode);
3544 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3545 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3546 tmp[5] = gen_reg_rtx (V4SImode);
3548 for (i = 2; i < 5; i++)
3549 tmp[i] = gen_reg_rtx (V4DFmode);
3550 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3551 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3552 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3553 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3554 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3555 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3559 (define_expand "vec_unpacku_float_lo_v8si"
3560 [(match_operand:V4DF 0 "register_operand")
3561 (match_operand:V8SI 1 "nonimmediate_operand")]
3564 REAL_VALUE_TYPE TWO32r;
3568 real_ldexp (&TWO32r, &dconst1, 32);
3569 x = const_double_from_real_value (TWO32r, DFmode);
3571 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3572 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3574 for (i = 2; i < 5; i++)
3575 tmp[i] = gen_reg_rtx (V4DFmode);
3576 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3577 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3578 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3579 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3580 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3584 (define_expand "vec_pack_trunc_<mode>"
3586 (float_truncate:<sf2dfmode>
3587 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
3589 (float_truncate:<sf2dfmode>
3590 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
3591 (set (match_operand:<ssePSmode> 0 "register_operand")
3592 (vec_concat:<ssePSmode>
3597 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
3598 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
3601 (define_expand "vec_pack_trunc_v2df"
3602 [(match_operand:V4SF 0 "register_operand")
3603 (match_operand:V2DF 1 "nonimmediate_operand")
3604 (match_operand:V2DF 2 "nonimmediate_operand")]
3609 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3611 tmp0 = gen_reg_rtx (V4DFmode);
3612 tmp1 = force_reg (V2DFmode, operands[1]);
3614 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3615 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3619 tmp0 = gen_reg_rtx (V4SFmode);
3620 tmp1 = gen_reg_rtx (V4SFmode);
3622 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3623 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3624 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3629 (define_expand "vec_pack_sfix_trunc_v8df"
3630 [(match_operand:V16SI 0 "register_operand")
3631 (match_operand:V8DF 1 "nonimmediate_operand")
3632 (match_operand:V8DF 2 "nonimmediate_operand")]
3637 r1 = gen_reg_rtx (V8SImode);
3638 r2 = gen_reg_rtx (V8SImode);
3640 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
3641 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
3642 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
3646 (define_expand "vec_pack_sfix_trunc_v4df"
3647 [(match_operand:V8SI 0 "register_operand")
3648 (match_operand:V4DF 1 "nonimmediate_operand")
3649 (match_operand:V4DF 2 "nonimmediate_operand")]
3654 r1 = gen_reg_rtx (V4SImode);
3655 r2 = gen_reg_rtx (V4SImode);
3657 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3658 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3659 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3663 (define_expand "vec_pack_sfix_trunc_v2df"
3664 [(match_operand:V4SI 0 "register_operand")
3665 (match_operand:V2DF 1 "nonimmediate_operand")
3666 (match_operand:V2DF 2 "nonimmediate_operand")]
3671 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3673 tmp0 = gen_reg_rtx (V4DFmode);
3674 tmp1 = force_reg (V2DFmode, operands[1]);
3676 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3677 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3681 tmp0 = gen_reg_rtx (V4SImode);
3682 tmp1 = gen_reg_rtx (V4SImode);
3684 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3685 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3687 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3688 gen_lowpart (V2DImode, tmp0),
3689 gen_lowpart (V2DImode, tmp1)));
3694 (define_mode_attr ssepackfltmode
3695 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
3697 (define_expand "vec_pack_ufix_trunc_<mode>"
3698 [(match_operand:<ssepackfltmode> 0 "register_operand")
3699 (match_operand:VF2_128_256 1 "register_operand")
3700 (match_operand:VF2_128_256 2 "register_operand")]
3704 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3705 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3706 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3707 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3708 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3710 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3711 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3715 tmp[5] = gen_reg_rtx (V8SFmode);
3716 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3717 gen_lowpart (V8SFmode, tmp[3]), 0);
3718 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3720 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3721 operands[0], 0, OPTAB_DIRECT);
3722 if (tmp[6] != operands[0])
3723 emit_move_insn (operands[0], tmp[6]);
3727 (define_expand "vec_pack_sfix_v4df"
3728 [(match_operand:V8SI 0 "register_operand")
3729 (match_operand:V4DF 1 "nonimmediate_operand")
3730 (match_operand:V4DF 2 "nonimmediate_operand")]
3735 r1 = gen_reg_rtx (V4SImode);
3736 r2 = gen_reg_rtx (V4SImode);
3738 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3739 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3740 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3744 (define_expand "vec_pack_sfix_v2df"
3745 [(match_operand:V4SI 0 "register_operand")
3746 (match_operand:V2DF 1 "nonimmediate_operand")
3747 (match_operand:V2DF 2 "nonimmediate_operand")]
3752 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3754 tmp0 = gen_reg_rtx (V4DFmode);
3755 tmp1 = force_reg (V2DFmode, operands[1]);
3757 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3758 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3762 tmp0 = gen_reg_rtx (V4SImode);
3763 tmp1 = gen_reg_rtx (V4SImode);
3765 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3766 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3768 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3769 gen_lowpart (V2DImode, tmp0),
3770 gen_lowpart (V2DImode, tmp1)));
3775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3777 ;; Parallel single-precision floating point element swizzling
3779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3781 (define_expand "sse_movhlps_exp"
3782 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3785 (match_operand:V4SF 1 "nonimmediate_operand")
3786 (match_operand:V4SF 2 "nonimmediate_operand"))
3787 (parallel [(const_int 6)
3793 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3795 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3797 /* Fix up the destination if needed. */
3798 if (dst != operands[0])
3799 emit_move_insn (operands[0], dst);
3804 (define_insn "sse_movhlps"
3805 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3808 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3809 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3810 (parallel [(const_int 6)
3814 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3816 movhlps\t{%2, %0|%0, %2}
3817 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3818 movlps\t{%H2, %0|%0, %H2}
3819 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3820 %vmovhps\t{%2, %0|%q0, %2}"
3821 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3822 (set_attr "type" "ssemov")
3823 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3824 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3826 (define_expand "sse_movlhps_exp"
3827 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3830 (match_operand:V4SF 1 "nonimmediate_operand")
3831 (match_operand:V4SF 2 "nonimmediate_operand"))
3832 (parallel [(const_int 0)
3838 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3840 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3842 /* Fix up the destination if needed. */
3843 if (dst != operands[0])
3844 emit_move_insn (operands[0], dst);
3849 (define_insn "sse_movlhps"
3850 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3853 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3854 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
3855 (parallel [(const_int 0)
3859 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3861 movlhps\t{%2, %0|%0, %2}
3862 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3863 movhps\t{%2, %0|%0, %q2}
3864 vmovhps\t{%2, %1, %0|%0, %1, %q2}
3865 %vmovlps\t{%2, %H0|%H0, %2}"
3866 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3867 (set_attr "type" "ssemov")
3868 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3869 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3871 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3872 (define_insn "avx_unpckhps256"
3873 [(set (match_operand:V8SF 0 "register_operand" "=x")
3876 (match_operand:V8SF 1 "register_operand" "x")
3877 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3878 (parallel [(const_int 2) (const_int 10)
3879 (const_int 3) (const_int 11)
3880 (const_int 6) (const_int 14)
3881 (const_int 7) (const_int 15)])))]
3883 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3884 [(set_attr "type" "sselog")
3885 (set_attr "prefix" "vex")
3886 (set_attr "mode" "V8SF")])
3888 (define_expand "vec_interleave_highv8sf"
3892 (match_operand:V8SF 1 "register_operand" "x")
3893 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3894 (parallel [(const_int 0) (const_int 8)
3895 (const_int 1) (const_int 9)
3896 (const_int 4) (const_int 12)
3897 (const_int 5) (const_int 13)])))
3903 (parallel [(const_int 2) (const_int 10)
3904 (const_int 3) (const_int 11)
3905 (const_int 6) (const_int 14)
3906 (const_int 7) (const_int 15)])))
3907 (set (match_operand:V8SF 0 "register_operand")
3912 (parallel [(const_int 4) (const_int 5)
3913 (const_int 6) (const_int 7)
3914 (const_int 12) (const_int 13)
3915 (const_int 14) (const_int 15)])))]
3918 operands[3] = gen_reg_rtx (V8SFmode);
3919 operands[4] = gen_reg_rtx (V8SFmode);
3922 (define_insn "vec_interleave_highv4sf"
3923 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3926 (match_operand:V4SF 1 "register_operand" "0,x")
3927 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3928 (parallel [(const_int 2) (const_int 6)
3929 (const_int 3) (const_int 7)])))]
3932 unpckhps\t{%2, %0|%0, %2}
3933 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3934 [(set_attr "isa" "noavx,avx")
3935 (set_attr "type" "sselog")
3936 (set_attr "prefix" "orig,vex")
3937 (set_attr "mode" "V4SF")])
3939 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3940 (define_insn "avx_unpcklps256"
3941 [(set (match_operand:V8SF 0 "register_operand" "=x")
3944 (match_operand:V8SF 1 "register_operand" "x")
3945 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3946 (parallel [(const_int 0) (const_int 8)
3947 (const_int 1) (const_int 9)
3948 (const_int 4) (const_int 12)
3949 (const_int 5) (const_int 13)])))]
3951 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3952 [(set_attr "type" "sselog")
3953 (set_attr "prefix" "vex")
3954 (set_attr "mode" "V8SF")])
3956 (define_expand "vec_interleave_lowv8sf"
3960 (match_operand:V8SF 1 "register_operand" "x")
3961 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3962 (parallel [(const_int 0) (const_int 8)
3963 (const_int 1) (const_int 9)
3964 (const_int 4) (const_int 12)
3965 (const_int 5) (const_int 13)])))
3971 (parallel [(const_int 2) (const_int 10)
3972 (const_int 3) (const_int 11)
3973 (const_int 6) (const_int 14)
3974 (const_int 7) (const_int 15)])))
3975 (set (match_operand:V8SF 0 "register_operand")
3980 (parallel [(const_int 0) (const_int 1)
3981 (const_int 2) (const_int 3)
3982 (const_int 8) (const_int 9)
3983 (const_int 10) (const_int 11)])))]
3986 operands[3] = gen_reg_rtx (V8SFmode);
3987 operands[4] = gen_reg_rtx (V8SFmode);
3990 (define_insn "vec_interleave_lowv4sf"
3991 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3994 (match_operand:V4SF 1 "register_operand" "0,x")
3995 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3996 (parallel [(const_int 0) (const_int 4)
3997 (const_int 1) (const_int 5)])))]
4000 unpcklps\t{%2, %0|%0, %2}
4001 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
4002 [(set_attr "isa" "noavx,avx")
4003 (set_attr "type" "sselog")
4004 (set_attr "prefix" "orig,vex")
4005 (set_attr "mode" "V4SF")])
4007 ;; These are modeled with the same vec_concat as the others so that we
4008 ;; capture users of shufps that can use the new instructions
4009 (define_insn "avx_movshdup256"
4010 [(set (match_operand:V8SF 0 "register_operand" "=x")
4013 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4015 (parallel [(const_int 1) (const_int 1)
4016 (const_int 3) (const_int 3)
4017 (const_int 5) (const_int 5)
4018 (const_int 7) (const_int 7)])))]
4020 "vmovshdup\t{%1, %0|%0, %1}"
4021 [(set_attr "type" "sse")
4022 (set_attr "prefix" "vex")
4023 (set_attr "mode" "V8SF")])
4025 (define_insn "sse3_movshdup"
4026 [(set (match_operand:V4SF 0 "register_operand" "=x")
4029 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4031 (parallel [(const_int 1)
4036 "%vmovshdup\t{%1, %0|%0, %1}"
4037 [(set_attr "type" "sse")
4038 (set_attr "prefix_rep" "1")
4039 (set_attr "prefix" "maybe_vex")
4040 (set_attr "mode" "V4SF")])
4042 (define_insn "avx_movsldup256"
4043 [(set (match_operand:V8SF 0 "register_operand" "=x")
4046 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4048 (parallel [(const_int 0) (const_int 0)
4049 (const_int 2) (const_int 2)
4050 (const_int 4) (const_int 4)
4051 (const_int 6) (const_int 6)])))]
4053 "vmovsldup\t{%1, %0|%0, %1}"
4054 [(set_attr "type" "sse")
4055 (set_attr "prefix" "vex")
4056 (set_attr "mode" "V8SF")])
4058 (define_insn "sse3_movsldup"
4059 [(set (match_operand:V4SF 0 "register_operand" "=x")
4062 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4064 (parallel [(const_int 0)
4069 "%vmovsldup\t{%1, %0|%0, %1}"
4070 [(set_attr "type" "sse")
4071 (set_attr "prefix_rep" "1")
4072 (set_attr "prefix" "maybe_vex")
4073 (set_attr "mode" "V4SF")])
4075 (define_expand "avx_shufps256"
4076 [(match_operand:V8SF 0 "register_operand")
4077 (match_operand:V8SF 1 "register_operand")
4078 (match_operand:V8SF 2 "nonimmediate_operand")
4079 (match_operand:SI 3 "const_int_operand")]
4082 int mask = INTVAL (operands[3]);
4083 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
4084 GEN_INT ((mask >> 0) & 3),
4085 GEN_INT ((mask >> 2) & 3),
4086 GEN_INT (((mask >> 4) & 3) + 8),
4087 GEN_INT (((mask >> 6) & 3) + 8),
4088 GEN_INT (((mask >> 0) & 3) + 4),
4089 GEN_INT (((mask >> 2) & 3) + 4),
4090 GEN_INT (((mask >> 4) & 3) + 12),
4091 GEN_INT (((mask >> 6) & 3) + 12)));
4095 ;; One bit in mask selects 2 elements.
4096 (define_insn "avx_shufps256_1"
4097 [(set (match_operand:V8SF 0 "register_operand" "=x")
4100 (match_operand:V8SF 1 "register_operand" "x")
4101 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4102 (parallel [(match_operand 3 "const_0_to_3_operand" )
4103 (match_operand 4 "const_0_to_3_operand" )
4104 (match_operand 5 "const_8_to_11_operand" )
4105 (match_operand 6 "const_8_to_11_operand" )
4106 (match_operand 7 "const_4_to_7_operand" )
4107 (match_operand 8 "const_4_to_7_operand" )
4108 (match_operand 9 "const_12_to_15_operand")
4109 (match_operand 10 "const_12_to_15_operand")])))]
4111 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
4112 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
4113 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
4114 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
4117 mask = INTVAL (operands[3]);
4118 mask |= INTVAL (operands[4]) << 2;
4119 mask |= (INTVAL (operands[5]) - 8) << 4;
4120 mask |= (INTVAL (operands[6]) - 8) << 6;
4121 operands[3] = GEN_INT (mask);
4123 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4125 [(set_attr "type" "sseshuf")
4126 (set_attr "length_immediate" "1")
4127 (set_attr "prefix" "vex")
4128 (set_attr "mode" "V8SF")])
4130 (define_expand "sse_shufps"
4131 [(match_operand:V4SF 0 "register_operand")
4132 (match_operand:V4SF 1 "register_operand")
4133 (match_operand:V4SF 2 "nonimmediate_operand")
4134 (match_operand:SI 3 "const_int_operand")]
4137 int mask = INTVAL (operands[3]);
4138 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
4139 GEN_INT ((mask >> 0) & 3),
4140 GEN_INT ((mask >> 2) & 3),
4141 GEN_INT (((mask >> 4) & 3) + 4),
4142 GEN_INT (((mask >> 6) & 3) + 4)));
4146 (define_insn "sse_shufps_<mode>"
4147 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
4148 (vec_select:VI4F_128
4149 (vec_concat:<ssedoublevecmode>
4150 (match_operand:VI4F_128 1 "register_operand" "0,x")
4151 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
4152 (parallel [(match_operand 3 "const_0_to_3_operand")
4153 (match_operand 4 "const_0_to_3_operand")
4154 (match_operand 5 "const_4_to_7_operand")
4155 (match_operand 6 "const_4_to_7_operand")])))]
4159 mask |= INTVAL (operands[3]) << 0;
4160 mask |= INTVAL (operands[4]) << 2;
4161 mask |= (INTVAL (operands[5]) - 4) << 4;
4162 mask |= (INTVAL (operands[6]) - 4) << 6;
4163 operands[3] = GEN_INT (mask);
4165 switch (which_alternative)
4168 return "shufps\t{%3, %2, %0|%0, %2, %3}";
4170 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4175 [(set_attr "isa" "noavx,avx")
4176 (set_attr "type" "sseshuf")
4177 (set_attr "length_immediate" "1")
4178 (set_attr "prefix" "orig,vex")
4179 (set_attr "mode" "V4SF")])
4181 (define_insn "sse_storehps"
4182 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
4184 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
4185 (parallel [(const_int 2) (const_int 3)])))]
4188 %vmovhps\t{%1, %0|%q0, %1}
4189 %vmovhlps\t{%1, %d0|%d0, %1}
4190 %vmovlps\t{%H1, %d0|%d0, %H1}"
4191 [(set_attr "type" "ssemov")
4192 (set_attr "prefix" "maybe_vex")
4193 (set_attr "mode" "V2SF,V4SF,V2SF")])
4195 (define_expand "sse_loadhps_exp"
4196 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4199 (match_operand:V4SF 1 "nonimmediate_operand")
4200 (parallel [(const_int 0) (const_int 1)]))
4201 (match_operand:V2SF 2 "nonimmediate_operand")))]
4204 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4206 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
4208 /* Fix up the destination if needed. */
4209 if (dst != operands[0])
4210 emit_move_insn (operands[0], dst);
4215 (define_insn "sse_loadhps"
4216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
4219 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4220 (parallel [(const_int 0) (const_int 1)]))
4221 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
4224 movhps\t{%2, %0|%0, %q2}
4225 vmovhps\t{%2, %1, %0|%0, %1, %q2}
4226 movlhps\t{%2, %0|%0, %2}
4227 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4228 %vmovlps\t{%2, %H0|%H0, %2}"
4229 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4230 (set_attr "type" "ssemov")
4231 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4232 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
4234 (define_insn "sse_storelps"
4235 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
4237 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
4238 (parallel [(const_int 0) (const_int 1)])))]
4241 %vmovlps\t{%1, %0|%q0, %1}
4242 %vmovaps\t{%1, %0|%0, %1}
4243 %vmovlps\t{%1, %d0|%d0, %q1}"
4244 [(set_attr "type" "ssemov")
4245 (set_attr "prefix" "maybe_vex")
4246 (set_attr "mode" "V2SF,V4SF,V2SF")])
4248 (define_expand "sse_loadlps_exp"
4249 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4251 (match_operand:V2SF 2 "nonimmediate_operand")
4253 (match_operand:V4SF 1 "nonimmediate_operand")
4254 (parallel [(const_int 2) (const_int 3)]))))]
4257 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4259 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
4261 /* Fix up the destination if needed. */
4262 if (dst != operands[0])
4263 emit_move_insn (operands[0], dst);
4268 (define_insn "sse_loadlps"
4269 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4271 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
4273 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
4274 (parallel [(const_int 2) (const_int 3)]))))]
4277 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
4278 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
4279 movlps\t{%2, %0|%0, %q2}
4280 vmovlps\t{%2, %1, %0|%0, %1, %q2}
4281 %vmovlps\t{%2, %0|%q0, %2}"
4282 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4283 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
4284 (set_attr "length_immediate" "1,1,*,*,*")
4285 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4286 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4288 (define_insn "sse_movss"
4289 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4291 (match_operand:V4SF 2 "register_operand" " x,x")
4292 (match_operand:V4SF 1 "register_operand" " 0,x")
4296 movss\t{%2, %0|%0, %2}
4297 vmovss\t{%2, %1, %0|%0, %1, %2}"
4298 [(set_attr "isa" "noavx,avx")
4299 (set_attr "type" "ssemov")
4300 (set_attr "prefix" "orig,vex")
4301 (set_attr "mode" "SF")])
4303 (define_insn "avx2_vec_dup<mode>"
4304 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
4305 (vec_duplicate:VF1_128_256
4307 (match_operand:V4SF 1 "register_operand" "x")
4308 (parallel [(const_int 0)]))))]
4310 "vbroadcastss\t{%1, %0|%0, %1}"
4311 [(set_attr "type" "sselog1")
4312 (set_attr "prefix" "vex")
4313 (set_attr "mode" "<MODE>")])
4315 (define_insn "avx2_vec_dupv8sf_1"
4316 [(set (match_operand:V8SF 0 "register_operand" "=x")
4319 (match_operand:V8SF 1 "register_operand" "x")
4320 (parallel [(const_int 0)]))))]
4322 "vbroadcastss\t{%x1, %0|%0, %x1}"
4323 [(set_attr "type" "sselog1")
4324 (set_attr "prefix" "vex")
4325 (set_attr "mode" "V8SF")])
4327 (define_insn "vec_dupv4sf"
4328 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
4330 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
4333 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
4334 vbroadcastss\t{%1, %0|%0, %1}
4335 shufps\t{$0, %0, %0|%0, %0, 0}"
4336 [(set_attr "isa" "avx,avx,noavx")
4337 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
4338 (set_attr "length_immediate" "1,0,1")
4339 (set_attr "prefix_extra" "0,1,*")
4340 (set_attr "prefix" "vex,vex,orig")
4341 (set_attr "mode" "V4SF")])
4343 ;; Although insertps takes register source, we prefer
4344 ;; unpcklps with register source since it is shorter.
4345 (define_insn "*vec_concatv2sf_sse4_1"
4346 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
4348 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
4349 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
4352 unpcklps\t{%2, %0|%0, %2}
4353 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4354 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4355 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4356 %vmovss\t{%1, %0|%0, %1}
4357 punpckldq\t{%2, %0|%0, %2}
4358 movd\t{%1, %0|%0, %1}"
4359 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4360 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
4361 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4362 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
4363 (set_attr "length_immediate" "*,*,1,1,*,*,*")
4364 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4365 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
4367 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4368 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4369 ;; alternatives pretty much forces the MMX alternative to be chosen.
4370 (define_insn "*vec_concatv2sf_sse"
4371 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4373 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4374 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4377 unpcklps\t{%2, %0|%0, %2}
4378 movss\t{%1, %0|%0, %1}
4379 punpckldq\t{%2, %0|%0, %2}
4380 movd\t{%1, %0|%0, %1}"
4381 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4382 (set_attr "mode" "V4SF,SF,DI,DI")])
4384 (define_insn "*vec_concatv4sf"
4385 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
4387 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
4388 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
4391 movlhps\t{%2, %0|%0, %2}
4392 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4393 movhps\t{%2, %0|%0, %q2}
4394 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
4395 [(set_attr "isa" "noavx,avx,noavx,avx")
4396 (set_attr "type" "ssemov")
4397 (set_attr "prefix" "orig,vex,orig,vex")
4398 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
4400 (define_expand "vec_init<mode>"
4401 [(match_operand:V_128 0 "register_operand")
4405 ix86_expand_vector_init (false, operands[0], operands[1]);
4409 ;; Avoid combining registers from different units in a single alternative,
4410 ;; see comment above inline_secondary_memory_needed function in i386.c
4411 (define_insn "vec_set<mode>_0"
4412 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
4413 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
4415 (vec_duplicate:VI4F_128
4416 (match_operand:<ssescalarmode> 2 "general_operand"
4417 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
4418 (match_operand:VI4F_128 1 "vector_move_operand"
4419 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
4423 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
4424 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4425 %vmovd\t{%2, %0|%0, %2}
4426 movss\t{%2, %0|%0, %2}
4427 movss\t{%2, %0|%0, %2}
4428 vmovss\t{%2, %1, %0|%0, %1, %2}
4429 pinsrd\t{$0, %2, %0|%0, %2, 0}
4430 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4434 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
4436 (cond [(eq_attr "alternative" "0,6,7")
4437 (const_string "sselog")
4438 (eq_attr "alternative" "9")
4439 (const_string "imov")
4440 (eq_attr "alternative" "10")
4441 (const_string "fmov")
4443 (const_string "ssemov")))
4444 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
4445 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
4446 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
4447 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
4449 ;; A subset is vec_setv4sf.
4450 (define_insn "*vec_setv4sf_sse4_1"
4451 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4454 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
4455 (match_operand:V4SF 1 "register_operand" "0,x")
4456 (match_operand:SI 3 "const_int_operand")))]
4458 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4459 < GET_MODE_NUNITS (V4SFmode))"
4461 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4462 switch (which_alternative)
4465 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4467 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4472 [(set_attr "isa" "noavx,avx")
4473 (set_attr "type" "sselog")
4474 (set_attr "prefix_data16" "1,*")
4475 (set_attr "prefix_extra" "1")
4476 (set_attr "length_immediate" "1")
4477 (set_attr "prefix" "orig,vex")
4478 (set_attr "mode" "V4SF")])
4480 (define_insn "sse4_1_insertps"
4481 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4482 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
4483 (match_operand:V4SF 1 "register_operand" "0,x")
4484 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
4488 if (MEM_P (operands[2]))
4490 unsigned count_s = INTVAL (operands[3]) >> 6;
4492 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
4493 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
4495 switch (which_alternative)
4498 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4500 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4505 [(set_attr "isa" "noavx,avx")
4506 (set_attr "type" "sselog")
4507 (set_attr "prefix_data16" "1,*")
4508 (set_attr "prefix_extra" "1")
4509 (set_attr "length_immediate" "1")
4510 (set_attr "prefix" "orig,vex")
4511 (set_attr "mode" "V4SF")])
4514 [(set (match_operand:VI4F_128 0 "memory_operand")
4516 (vec_duplicate:VI4F_128
4517 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
4520 "TARGET_SSE && reload_completed"
4521 [(set (match_dup 0) (match_dup 1))]
4522 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
4524 (define_expand "vec_set<mode>"
4525 [(match_operand:V 0 "register_operand")
4526 (match_operand:<ssescalarmode> 1 "register_operand")
4527 (match_operand 2 "const_int_operand")]
4530 ix86_expand_vector_set (false, operands[0], operands[1],
4531 INTVAL (operands[2]));
4535 (define_insn_and_split "*vec_extractv4sf_0"
4536 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4538 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4539 (parallel [(const_int 0)])))]
4540 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4542 "&& reload_completed"
4543 [(set (match_dup 0) (match_dup 1))]
4545 if (REG_P (operands[1]))
4546 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
4548 operands[1] = adjust_address (operands[1], SFmode, 0);
4551 (define_insn_and_split "*sse4_1_extractps"
4552 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4554 (match_operand:V4SF 1 "register_operand" "x,0,x")
4555 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4558 %vextractps\t{%2, %1, %0|%0, %1, %2}
4561 "&& reload_completed && SSE_REG_P (operands[0])"
4564 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4565 switch (INTVAL (operands[2]))
4569 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4570 operands[2], operands[2],
4571 GEN_INT (INTVAL (operands[2]) + 4),
4572 GEN_INT (INTVAL (operands[2]) + 4)));
4575 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4578 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4583 [(set_attr "isa" "*,noavx,avx")
4584 (set_attr "type" "sselog,*,*")
4585 (set_attr "prefix_data16" "1,*,*")
4586 (set_attr "prefix_extra" "1,*,*")
4587 (set_attr "length_immediate" "1,*,*")
4588 (set_attr "prefix" "maybe_vex,*,*")
4589 (set_attr "mode" "V4SF,*,*")])
4591 (define_insn_and_split "*vec_extractv4sf_mem"
4592 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4594 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4595 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4598 "&& reload_completed"
4599 [(set (match_dup 0) (match_dup 1))]
4601 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
4604 (define_expand "avx_vextractf128<mode>"
4605 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
4606 (match_operand:V_256 1 "register_operand")
4607 (match_operand:SI 2 "const_0_to_1_operand")]
4610 rtx (*insn)(rtx, rtx);
4612 switch (INTVAL (operands[2]))
4615 insn = gen_vec_extract_lo_<mode>;
4618 insn = gen_vec_extract_hi_<mode>;
4624 emit_insn (insn (operands[0], operands[1]));
4628 (define_insn_and_split "vec_extract_lo_<mode>"
4629 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4630 (vec_select:<ssehalfvecmode>
4631 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4632 (parallel [(const_int 0) (const_int 1)])))]
4633 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4635 "&& reload_completed"
4636 [(set (match_dup 0) (match_dup 1))]
4638 if (REG_P (operands[1]))
4639 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
4641 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
4644 (define_insn "vec_extract_hi_<mode>"
4645 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4646 (vec_select:<ssehalfvecmode>
4647 (match_operand:VI8F_256 1 "register_operand" "x,x")
4648 (parallel [(const_int 2) (const_int 3)])))]
4650 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4651 [(set_attr "type" "sselog")
4652 (set_attr "prefix_extra" "1")
4653 (set_attr "length_immediate" "1")
4654 (set_attr "memory" "none,store")
4655 (set_attr "prefix" "vex")
4656 (set_attr "mode" "<sseinsnmode>")])
4658 (define_insn_and_split "vec_extract_lo_<mode>"
4659 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4660 (vec_select:<ssehalfvecmode>
4661 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4662 (parallel [(const_int 0) (const_int 1)
4663 (const_int 2) (const_int 3)])))]
4664 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4666 "&& reload_completed"
4667 [(set (match_dup 0) (match_dup 1))]
4669 if (REG_P (operands[1]))
4670 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
4672 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
4675 (define_insn "vec_extract_hi_<mode>"
4676 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4677 (vec_select:<ssehalfvecmode>
4678 (match_operand:VI4F_256 1 "register_operand" "x,x")
4679 (parallel [(const_int 4) (const_int 5)
4680 (const_int 6) (const_int 7)])))]
4682 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4683 [(set_attr "type" "sselog")
4684 (set_attr "prefix_extra" "1")
4685 (set_attr "length_immediate" "1")
4686 (set_attr "memory" "none,store")
4687 (set_attr "prefix" "vex")
4688 (set_attr "mode" "<sseinsnmode>")])
4690 (define_insn_and_split "vec_extract_lo_v16hi"
4691 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4693 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4694 (parallel [(const_int 0) (const_int 1)
4695 (const_int 2) (const_int 3)
4696 (const_int 4) (const_int 5)
4697 (const_int 6) (const_int 7)])))]
4698 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4700 "&& reload_completed"
4701 [(set (match_dup 0) (match_dup 1))]
4703 if (REG_P (operands[1]))
4704 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
4706 operands[1] = adjust_address (operands[1], V8HImode, 0);
4709 (define_insn "vec_extract_hi_v16hi"
4710 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4712 (match_operand:V16HI 1 "register_operand" "x,x")
4713 (parallel [(const_int 8) (const_int 9)
4714 (const_int 10) (const_int 11)
4715 (const_int 12) (const_int 13)
4716 (const_int 14) (const_int 15)])))]
4718 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4719 [(set_attr "type" "sselog")
4720 (set_attr "prefix_extra" "1")
4721 (set_attr "length_immediate" "1")
4722 (set_attr "memory" "none,store")
4723 (set_attr "prefix" "vex")
4724 (set_attr "mode" "OI")])
4726 (define_insn_and_split "vec_extract_lo_v32qi"
4727 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4729 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4730 (parallel [(const_int 0) (const_int 1)
4731 (const_int 2) (const_int 3)
4732 (const_int 4) (const_int 5)
4733 (const_int 6) (const_int 7)
4734 (const_int 8) (const_int 9)
4735 (const_int 10) (const_int 11)
4736 (const_int 12) (const_int 13)
4737 (const_int 14) (const_int 15)])))]
4738 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4740 "&& reload_completed"
4741 [(set (match_dup 0) (match_dup 1))]
4743 if (REG_P (operands[1]))
4744 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
4746 operands[1] = adjust_address (operands[1], V16QImode, 0);
4749 (define_insn "vec_extract_hi_v32qi"
4750 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4752 (match_operand:V32QI 1 "register_operand" "x,x")
4753 (parallel [(const_int 16) (const_int 17)
4754 (const_int 18) (const_int 19)
4755 (const_int 20) (const_int 21)
4756 (const_int 22) (const_int 23)
4757 (const_int 24) (const_int 25)
4758 (const_int 26) (const_int 27)
4759 (const_int 28) (const_int 29)
4760 (const_int 30) (const_int 31)])))]
4762 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4763 [(set_attr "type" "sselog")
4764 (set_attr "prefix_extra" "1")
4765 (set_attr "length_immediate" "1")
4766 (set_attr "memory" "none,store")
4767 (set_attr "prefix" "vex")
4768 (set_attr "mode" "OI")])
4770 ;; Modes handled by vec_extract patterns.
4771 (define_mode_iterator VEC_EXTRACT_MODE
4772 [(V32QI "TARGET_AVX") V16QI
4773 (V16HI "TARGET_AVX") V8HI
4774 (V8SI "TARGET_AVX") V4SI
4775 (V4DI "TARGET_AVX") V2DI
4776 (V8SF "TARGET_AVX") V4SF
4777 (V4DF "TARGET_AVX") V2DF])
4779 (define_expand "vec_extract<mode>"
4780 [(match_operand:<ssescalarmode> 0 "register_operand")
4781 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
4782 (match_operand 2 "const_int_operand")]
4785 ix86_expand_vector_extract (false, operands[0], operands[1],
4786 INTVAL (operands[2]));
4790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4792 ;; Parallel double-precision floating point element swizzling
4794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4796 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4797 (define_insn "avx_unpckhpd256"
4798 [(set (match_operand:V4DF 0 "register_operand" "=x")
4801 (match_operand:V4DF 1 "register_operand" "x")
4802 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4803 (parallel [(const_int 1) (const_int 5)
4804 (const_int 3) (const_int 7)])))]
4806 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4807 [(set_attr "type" "sselog")
4808 (set_attr "prefix" "vex")
4809 (set_attr "mode" "V4DF")])
4811 (define_expand "vec_interleave_highv4df"
4815 (match_operand:V4DF 1 "register_operand" "x")
4816 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4817 (parallel [(const_int 0) (const_int 4)
4818 (const_int 2) (const_int 6)])))
4824 (parallel [(const_int 1) (const_int 5)
4825 (const_int 3) (const_int 7)])))
4826 (set (match_operand:V4DF 0 "register_operand")
4831 (parallel [(const_int 2) (const_int 3)
4832 (const_int 6) (const_int 7)])))]
4835 operands[3] = gen_reg_rtx (V4DFmode);
4836 operands[4] = gen_reg_rtx (V4DFmode);
4840 (define_expand "vec_interleave_highv2df"
4841 [(set (match_operand:V2DF 0 "register_operand")
4844 (match_operand:V2DF 1 "nonimmediate_operand")
4845 (match_operand:V2DF 2 "nonimmediate_operand"))
4846 (parallel [(const_int 1)
4850 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4851 operands[2] = force_reg (V2DFmode, operands[2]);
4854 (define_insn "*vec_interleave_highv2df"
4855 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4858 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4859 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4860 (parallel [(const_int 1)
4862 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4864 unpckhpd\t{%2, %0|%0, %2}
4865 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4866 %vmovddup\t{%H1, %0|%0, %H1}
4867 movlpd\t{%H1, %0|%0, %H1}
4868 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4869 %vmovhpd\t{%1, %0|%q0, %1}"
4870 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4871 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4872 (set_attr "prefix_data16" "*,*,*,1,*,1")
4873 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4874 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4876 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4877 (define_expand "avx_movddup256"
4878 [(set (match_operand:V4DF 0 "register_operand")
4881 (match_operand:V4DF 1 "nonimmediate_operand")
4883 (parallel [(const_int 0) (const_int 4)
4884 (const_int 2) (const_int 6)])))]
4887 (define_expand "avx_unpcklpd256"
4888 [(set (match_operand:V4DF 0 "register_operand")
4891 (match_operand:V4DF 1 "register_operand")
4892 (match_operand:V4DF 2 "nonimmediate_operand"))
4893 (parallel [(const_int 0) (const_int 4)
4894 (const_int 2) (const_int 6)])))]
4897 (define_insn "*avx_unpcklpd256"
4898 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4901 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4902 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4903 (parallel [(const_int 0) (const_int 4)
4904 (const_int 2) (const_int 6)])))]
4907 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4908 vmovddup\t{%1, %0|%0, %1}"
4909 [(set_attr "type" "sselog")
4910 (set_attr "prefix" "vex")
4911 (set_attr "mode" "V4DF")])
4913 (define_expand "vec_interleave_lowv4df"
4917 (match_operand:V4DF 1 "register_operand" "x")
4918 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4919 (parallel [(const_int 0) (const_int 4)
4920 (const_int 2) (const_int 6)])))
4926 (parallel [(const_int 1) (const_int 5)
4927 (const_int 3) (const_int 7)])))
4928 (set (match_operand:V4DF 0 "register_operand")
4933 (parallel [(const_int 0) (const_int 1)
4934 (const_int 4) (const_int 5)])))]
4937 operands[3] = gen_reg_rtx (V4DFmode);
4938 operands[4] = gen_reg_rtx (V4DFmode);
4941 (define_expand "vec_interleave_lowv2df"
4942 [(set (match_operand:V2DF 0 "register_operand")
4945 (match_operand:V2DF 1 "nonimmediate_operand")
4946 (match_operand:V2DF 2 "nonimmediate_operand"))
4947 (parallel [(const_int 0)
4951 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4952 operands[1] = force_reg (V2DFmode, operands[1]);
4955 (define_insn "*vec_interleave_lowv2df"
4956 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4959 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4960 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4961 (parallel [(const_int 0)
4963 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4965 unpcklpd\t{%2, %0|%0, %2}
4966 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4967 %vmovddup\t{%1, %0|%0, %q1}
4968 movhpd\t{%2, %0|%0, %q2}
4969 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
4970 %vmovlpd\t{%2, %H0|%H0, %2}"
4971 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4972 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4973 (set_attr "prefix_data16" "*,*,*,1,*,1")
4974 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4975 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4978 [(set (match_operand:V2DF 0 "memory_operand")
4981 (match_operand:V2DF 1 "register_operand")
4983 (parallel [(const_int 0)
4985 "TARGET_SSE3 && reload_completed"
4988 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4989 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4990 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4995 [(set (match_operand:V2DF 0 "register_operand")
4998 (match_operand:V2DF 1 "memory_operand")
5000 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
5001 (match_operand:SI 3 "const_int_operand")])))]
5002 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
5003 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
5005 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
5008 (define_insn "avx512f_rndscale<mode>"
5009 [(set (match_operand:VF_512 0 "register_operand" "=v")
5011 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
5012 (match_operand:SI 2 "const_0_to_255_operand")]
5015 "vrndscale<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5016 [(set_attr "length_immediate" "1")
5017 (set_attr "prefix" "evex")
5018 (set_attr "mode" "<MODE>")])
5020 (define_expand "avx_shufpd256"
5021 [(match_operand:V4DF 0 "register_operand")
5022 (match_operand:V4DF 1 "register_operand")
5023 (match_operand:V4DF 2 "nonimmediate_operand")
5024 (match_operand:SI 3 "const_int_operand")]
5027 int mask = INTVAL (operands[3]);
5028 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
5030 GEN_INT (mask & 2 ? 5 : 4),
5031 GEN_INT (mask & 4 ? 3 : 2),
5032 GEN_INT (mask & 8 ? 7 : 6)));
5036 (define_insn "avx_shufpd256_1"
5037 [(set (match_operand:V4DF 0 "register_operand" "=x")
5040 (match_operand:V4DF 1 "register_operand" "x")
5041 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
5042 (parallel [(match_operand 3 "const_0_to_1_operand")
5043 (match_operand 4 "const_4_to_5_operand")
5044 (match_operand 5 "const_2_to_3_operand")
5045 (match_operand 6 "const_6_to_7_operand")])))]
5049 mask = INTVAL (operands[3]);
5050 mask |= (INTVAL (operands[4]) - 4) << 1;
5051 mask |= (INTVAL (operands[5]) - 2) << 2;
5052 mask |= (INTVAL (operands[6]) - 6) << 3;
5053 operands[3] = GEN_INT (mask);
5055 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5057 [(set_attr "type" "sseshuf")
5058 (set_attr "length_immediate" "1")
5059 (set_attr "prefix" "vex")
5060 (set_attr "mode" "V4DF")])
5062 (define_expand "sse2_shufpd"
5063 [(match_operand:V2DF 0 "register_operand")
5064 (match_operand:V2DF 1 "register_operand")
5065 (match_operand:V2DF 2 "nonimmediate_operand")
5066 (match_operand:SI 3 "const_int_operand")]
5069 int mask = INTVAL (operands[3]);
5070 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
5072 GEN_INT (mask & 2 ? 3 : 2)));
5076 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
5077 (define_insn "avx2_interleave_highv4di"
5078 [(set (match_operand:V4DI 0 "register_operand" "=x")
5081 (match_operand:V4DI 1 "register_operand" "x")
5082 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
5083 (parallel [(const_int 1)
5088 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
5089 [(set_attr "type" "sselog")
5090 (set_attr "prefix" "vex")
5091 (set_attr "mode" "OI")])
5093 (define_insn "vec_interleave_highv2di"
5094 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5097 (match_operand:V2DI 1 "register_operand" "0,x")
5098 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
5099 (parallel [(const_int 1)
5103 punpckhqdq\t{%2, %0|%0, %2}
5104 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
5105 [(set_attr "isa" "noavx,avx")
5106 (set_attr "type" "sselog")
5107 (set_attr "prefix_data16" "1,*")
5108 (set_attr "prefix" "orig,vex")
5109 (set_attr "mode" "TI")])
5111 (define_insn "avx2_interleave_lowv4di"
5112 [(set (match_operand:V4DI 0 "register_operand" "=x")
5115 (match_operand:V4DI 1 "register_operand" "x")
5116 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
5117 (parallel [(const_int 0)
5122 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
5123 [(set_attr "type" "sselog")
5124 (set_attr "prefix" "vex")
5125 (set_attr "mode" "OI")])
5127 (define_insn "vec_interleave_lowv2di"
5128 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5131 (match_operand:V2DI 1 "register_operand" "0,x")
5132 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
5133 (parallel [(const_int 0)
5137 punpcklqdq\t{%2, %0|%0, %2}
5138 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
5139 [(set_attr "isa" "noavx,avx")
5140 (set_attr "type" "sselog")
5141 (set_attr "prefix_data16" "1,*")
5142 (set_attr "prefix" "orig,vex")
5143 (set_attr "mode" "TI")])
5145 (define_insn "sse2_shufpd_<mode>"
5146 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
5147 (vec_select:VI8F_128
5148 (vec_concat:<ssedoublevecmode>
5149 (match_operand:VI8F_128 1 "register_operand" "0,x")
5150 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
5151 (parallel [(match_operand 3 "const_0_to_1_operand")
5152 (match_operand 4 "const_2_to_3_operand")])))]
5156 mask = INTVAL (operands[3]);
5157 mask |= (INTVAL (operands[4]) - 2) << 1;
5158 operands[3] = GEN_INT (mask);
5160 switch (which_alternative)
5163 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
5165 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5170 [(set_attr "isa" "noavx,avx")
5171 (set_attr "type" "sseshuf")
5172 (set_attr "length_immediate" "1")
5173 (set_attr "prefix" "orig,vex")
5174 (set_attr "mode" "V2DF")])
5176 ;; Avoid combining registers from different units in a single alternative,
5177 ;; see comment above inline_secondary_memory_needed function in i386.c
5178 (define_insn "sse2_storehpd"
5179 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
5181 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
5182 (parallel [(const_int 1)])))]
5183 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5185 %vmovhpd\t{%1, %0|%0, %1}
5187 vunpckhpd\t{%d1, %0|%0, %d1}
5191 [(set_attr "isa" "*,noavx,avx,*,*,*")
5192 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
5193 (set (attr "prefix_data16")
5195 (and (eq_attr "alternative" "0")
5196 (not (match_test "TARGET_AVX")))
5198 (const_string "*")))
5199 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
5200 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
5203 [(set (match_operand:DF 0 "register_operand")
5205 (match_operand:V2DF 1 "memory_operand")
5206 (parallel [(const_int 1)])))]
5207 "TARGET_SSE2 && reload_completed"
5208 [(set (match_dup 0) (match_dup 1))]
5209 "operands[1] = adjust_address (operands[1], DFmode, 8);")
5211 (define_insn "*vec_extractv2df_1_sse"
5212 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5214 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5215 (parallel [(const_int 1)])))]
5216 "!TARGET_SSE2 && TARGET_SSE
5217 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5219 movhps\t{%1, %0|%q0, %1}
5220 movhlps\t{%1, %0|%0, %1}
5221 movlps\t{%H1, %0|%0, %H1}"
5222 [(set_attr "type" "ssemov")
5223 (set_attr "mode" "V2SF,V4SF,V2SF")])
5225 ;; Avoid combining registers from different units in a single alternative,
5226 ;; see comment above inline_secondary_memory_needed function in i386.c
5227 (define_insn "sse2_storelpd"
5228 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
5230 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
5231 (parallel [(const_int 0)])))]
5232 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5234 %vmovlpd\t{%1, %0|%0, %1}
5239 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
5240 (set_attr "prefix_data16" "1,*,*,*,*")
5241 (set_attr "prefix" "maybe_vex")
5242 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5245 [(set (match_operand:DF 0 "register_operand")
5247 (match_operand:V2DF 1 "nonimmediate_operand")
5248 (parallel [(const_int 0)])))]
5249 "TARGET_SSE2 && reload_completed"
5250 [(set (match_dup 0) (match_dup 1))]
5252 if (REG_P (operands[1]))
5253 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
5255 operands[1] = adjust_address (operands[1], DFmode, 0);
5258 (define_insn "*vec_extractv2df_0_sse"
5259 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5261 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5262 (parallel [(const_int 0)])))]
5263 "!TARGET_SSE2 && TARGET_SSE
5264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5266 movlps\t{%1, %0|%0, %1}
5267 movaps\t{%1, %0|%0, %1}
5268 movlps\t{%1, %0|%0, %q1}"
5269 [(set_attr "type" "ssemov")
5270 (set_attr "mode" "V2SF,V4SF,V2SF")])
5272 (define_expand "sse2_loadhpd_exp"
5273 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5276 (match_operand:V2DF 1 "nonimmediate_operand")
5277 (parallel [(const_int 0)]))
5278 (match_operand:DF 2 "nonimmediate_operand")))]
5281 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5283 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
5285 /* Fix up the destination if needed. */
5286 if (dst != operands[0])
5287 emit_move_insn (operands[0], dst);
5292 ;; Avoid combining registers from different units in a single alternative,
5293 ;; see comment above inline_secondary_memory_needed function in i386.c
5294 (define_insn "sse2_loadhpd"
5295 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5299 (match_operand:V2DF 1 "nonimmediate_operand"
5301 (parallel [(const_int 0)]))
5302 (match_operand:DF 2 "nonimmediate_operand"
5303 " m,m,x,x,x,*f,r")))]
5304 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5306 movhpd\t{%2, %0|%0, %2}
5307 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5308 unpcklpd\t{%2, %0|%0, %2}
5309 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5313 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5314 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
5315 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
5316 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
5317 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
5320 [(set (match_operand:V2DF 0 "memory_operand")
5322 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5323 (match_operand:DF 1 "register_operand")))]
5324 "TARGET_SSE2 && reload_completed"
5325 [(set (match_dup 0) (match_dup 1))]
5326 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5328 (define_expand "sse2_loadlpd_exp"
5329 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5331 (match_operand:DF 2 "nonimmediate_operand")
5333 (match_operand:V2DF 1 "nonimmediate_operand")
5334 (parallel [(const_int 1)]))))]
5337 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5339 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5341 /* Fix up the destination if needed. */
5342 if (dst != operands[0])
5343 emit_move_insn (operands[0], dst);
5348 ;; Avoid combining registers from different units in a single alternative,
5349 ;; see comment above inline_secondary_memory_needed function in i386.c
5350 (define_insn "sse2_loadlpd"
5351 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5352 "=x,x,x,x,x,x,x,x,m,m ,m")
5354 (match_operand:DF 2 "nonimmediate_operand"
5355 " m,m,m,x,x,0,0,x,x,*f,r")
5357 (match_operand:V2DF 1 "vector_move_operand"
5358 " C,0,x,0,x,x,o,o,0,0 ,0")
5359 (parallel [(const_int 1)]))))]
5360 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5362 %vmovsd\t{%2, %0|%0, %2}
5363 movlpd\t{%2, %0|%0, %2}
5364 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5365 movsd\t{%2, %0|%0, %2}
5366 vmovsd\t{%2, %1, %0|%0, %1, %2}
5367 shufpd\t{$2, %1, %0|%0, %1, 2}
5368 movhpd\t{%H1, %0|%0, %H1}
5369 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5373 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
5375 (cond [(eq_attr "alternative" "5")
5376 (const_string "sselog")
5377 (eq_attr "alternative" "9")
5378 (const_string "fmov")
5379 (eq_attr "alternative" "10")
5380 (const_string "imov")
5382 (const_string "ssemov")))
5383 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
5384 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
5385 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
5386 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
5389 [(set (match_operand:V2DF 0 "memory_operand")
5391 (match_operand:DF 1 "register_operand")
5392 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5393 "TARGET_SSE2 && reload_completed"
5394 [(set (match_dup 0) (match_dup 1))]
5395 "operands[0] = adjust_address (operands[0], DFmode, 0);")
5397 (define_insn "sse2_movsd"
5398 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
5400 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
5401 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
5405 movsd\t{%2, %0|%0, %2}
5406 vmovsd\t{%2, %1, %0|%0, %1, %2}
5407 movlpd\t{%2, %0|%0, %q2}
5408 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
5409 %vmovlpd\t{%2, %0|%q0, %2}
5410 shufpd\t{$2, %1, %0|%0, %1, 2}
5411 movhps\t{%H1, %0|%0, %H1}
5412 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5413 %vmovhps\t{%1, %H0|%H0, %1}"
5414 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
5417 (eq_attr "alternative" "5")
5418 (const_string "sselog")
5419 (const_string "ssemov")))
5420 (set (attr "prefix_data16")
5422 (and (eq_attr "alternative" "2,4")
5423 (not (match_test "TARGET_AVX")))
5425 (const_string "*")))
5426 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
5427 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
5428 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
5430 (define_insn "vec_dupv2df"
5431 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
5433 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
5437 %vmovddup\t{%1, %0|%0, %1}"
5438 [(set_attr "isa" "noavx,sse3")
5439 (set_attr "type" "sselog1")
5440 (set_attr "prefix" "orig,maybe_vex")
5441 (set_attr "mode" "V2DF,DF")])
5443 (define_insn "*vec_concatv2df"
5444 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
5446 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
5447 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
5450 unpcklpd\t{%2, %0|%0, %2}
5451 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5452 %vmovddup\t{%1, %0|%0, %1}
5453 movhpd\t{%2, %0|%0, %2}
5454 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5455 %vmovsd\t{%1, %0|%0, %1}
5456 movlhps\t{%2, %0|%0, %2}
5457 movhps\t{%2, %0|%0, %2}"
5458 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
5461 (eq_attr "alternative" "0,1,2")
5462 (const_string "sselog")
5463 (const_string "ssemov")))
5464 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
5465 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
5466 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
5468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5470 ;; Parallel integral arithmetic
5472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5474 (define_expand "neg<mode>2"
5475 [(set (match_operand:VI_AVX2 0 "register_operand")
5478 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
5480 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5482 (define_expand "<plusminus_insn><mode>3"
5483 [(set (match_operand:VI_AVX2 0 "register_operand")
5485 (match_operand:VI_AVX2 1 "nonimmediate_operand")
5486 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
5488 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5490 (define_insn "*<plusminus_insn><mode>3"
5491 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
5493 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
5494 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
5495 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5497 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5498 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5499 [(set_attr "isa" "noavx,avx")
5500 (set_attr "type" "sseiadd")
5501 (set_attr "prefix_data16" "1,*")
5502 (set_attr "prefix" "orig,vex")
5503 (set_attr "mode" "<sseinsnmode>")])
5505 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5506 [(set (match_operand:VI12_AVX2 0 "register_operand")
5507 (sat_plusminus:VI12_AVX2
5508 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
5509 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
5511 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5513 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5514 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
5515 (sat_plusminus:VI12_AVX2
5516 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
5517 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
5518 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5520 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5521 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5522 [(set_attr "isa" "noavx,avx")
5523 (set_attr "type" "sseiadd")
5524 (set_attr "prefix_data16" "1,*")
5525 (set_attr "prefix" "orig,vex")
5526 (set_attr "mode" "TI")])
5528 (define_expand "mul<mode>3"
5529 [(set (match_operand:VI1_AVX2 0 "register_operand")
5530 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
5531 (match_operand:VI1_AVX2 2 "register_operand")))]
5534 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
5538 (define_expand "mul<mode>3"
5539 [(set (match_operand:VI2_AVX2 0 "register_operand")
5540 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
5541 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
5543 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5545 (define_insn "*mul<mode>3"
5546 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5547 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5548 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5549 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5551 pmullw\t{%2, %0|%0, %2}
5552 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5553 [(set_attr "isa" "noavx,avx")
5554 (set_attr "type" "sseimul")
5555 (set_attr "prefix_data16" "1,*")
5556 (set_attr "prefix" "orig,vex")
5557 (set_attr "mode" "<sseinsnmode>")])
5559 (define_expand "<s>mul<mode>3_highpart"
5560 [(set (match_operand:VI2_AVX2 0 "register_operand")
5562 (lshiftrt:<ssedoublemode>
5563 (mult:<ssedoublemode>
5564 (any_extend:<ssedoublemode>
5565 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
5566 (any_extend:<ssedoublemode>
5567 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
5570 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5572 (define_insn "*<s>mul<mode>3_highpart"
5573 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5575 (lshiftrt:<ssedoublemode>
5576 (mult:<ssedoublemode>
5577 (any_extend:<ssedoublemode>
5578 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5579 (any_extend:<ssedoublemode>
5580 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5582 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5584 pmulh<u>w\t{%2, %0|%0, %2}
5585 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5586 [(set_attr "isa" "noavx,avx")
5587 (set_attr "type" "sseimul")
5588 (set_attr "prefix_data16" "1,*")
5589 (set_attr "prefix" "orig,vex")
5590 (set_attr "mode" "<sseinsnmode>")])
5592 (define_expand "vec_widen_umult_even_v8si"
5593 [(set (match_operand:V4DI 0 "register_operand")
5597 (match_operand:V8SI 1 "nonimmediate_operand")
5598 (parallel [(const_int 0) (const_int 2)
5599 (const_int 4) (const_int 6)])))
5602 (match_operand:V8SI 2 "nonimmediate_operand")
5603 (parallel [(const_int 0) (const_int 2)
5604 (const_int 4) (const_int 6)])))))]
5606 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5608 (define_insn "*vec_widen_umult_even_v8si"
5609 [(set (match_operand:V4DI 0 "register_operand" "=x")
5613 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5614 (parallel [(const_int 0) (const_int 2)
5615 (const_int 4) (const_int 6)])))
5618 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5619 (parallel [(const_int 0) (const_int 2)
5620 (const_int 4) (const_int 6)])))))]
5621 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5622 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5623 [(set_attr "type" "sseimul")
5624 (set_attr "prefix" "vex")
5625 (set_attr "mode" "OI")])
5627 (define_expand "vec_widen_umult_even_v4si"
5628 [(set (match_operand:V2DI 0 "register_operand")
5632 (match_operand:V4SI 1 "nonimmediate_operand")
5633 (parallel [(const_int 0) (const_int 2)])))
5636 (match_operand:V4SI 2 "nonimmediate_operand")
5637 (parallel [(const_int 0) (const_int 2)])))))]
5639 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5641 (define_insn "*vec_widen_umult_even_v4si"
5642 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5646 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5647 (parallel [(const_int 0) (const_int 2)])))
5650 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5651 (parallel [(const_int 0) (const_int 2)])))))]
5652 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5654 pmuludq\t{%2, %0|%0, %2}
5655 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5656 [(set_attr "isa" "noavx,avx")
5657 (set_attr "type" "sseimul")
5658 (set_attr "prefix_data16" "1,*")
5659 (set_attr "prefix" "orig,vex")
5660 (set_attr "mode" "TI")])
5662 (define_expand "vec_widen_smult_even_v8si"
5663 [(set (match_operand:V4DI 0 "register_operand")
5667 (match_operand:V8SI 1 "nonimmediate_operand")
5668 (parallel [(const_int 0) (const_int 2)
5669 (const_int 4) (const_int 6)])))
5672 (match_operand:V8SI 2 "nonimmediate_operand")
5673 (parallel [(const_int 0) (const_int 2)
5674 (const_int 4) (const_int 6)])))))]
5676 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5678 (define_insn "*vec_widen_smult_even_v8si"
5679 [(set (match_operand:V4DI 0 "register_operand" "=x")
5683 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5684 (parallel [(const_int 0) (const_int 2)
5685 (const_int 4) (const_int 6)])))
5688 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5689 (parallel [(const_int 0) (const_int 2)
5690 (const_int 4) (const_int 6)])))))]
5691 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5692 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5693 [(set_attr "isa" "avx")
5694 (set_attr "type" "sseimul")
5695 (set_attr "prefix_extra" "1")
5696 (set_attr "prefix" "vex")
5697 (set_attr "mode" "OI")])
5699 (define_expand "sse4_1_mulv2siv2di3"
5700 [(set (match_operand:V2DI 0 "register_operand")
5704 (match_operand:V4SI 1 "nonimmediate_operand")
5705 (parallel [(const_int 0) (const_int 2)])))
5708 (match_operand:V4SI 2 "nonimmediate_operand")
5709 (parallel [(const_int 0) (const_int 2)])))))]
5711 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5713 (define_insn "*sse4_1_mulv2siv2di3"
5714 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5718 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5719 (parallel [(const_int 0) (const_int 2)])))
5722 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5723 (parallel [(const_int 0) (const_int 2)])))))]
5724 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5726 pmuldq\t{%2, %0|%0, %2}
5727 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5728 [(set_attr "isa" "noavx,avx")
5729 (set_attr "type" "sseimul")
5730 (set_attr "prefix_data16" "1,*")
5731 (set_attr "prefix_extra" "1")
5732 (set_attr "prefix" "orig,vex")
5733 (set_attr "mode" "TI")])
5735 (define_expand "avx2_pmaddwd"
5736 [(set (match_operand:V8SI 0 "register_operand")
5741 (match_operand:V16HI 1 "nonimmediate_operand")
5742 (parallel [(const_int 0) (const_int 2)
5743 (const_int 4) (const_int 6)
5744 (const_int 8) (const_int 10)
5745 (const_int 12) (const_int 14)])))
5748 (match_operand:V16HI 2 "nonimmediate_operand")
5749 (parallel [(const_int 0) (const_int 2)
5750 (const_int 4) (const_int 6)
5751 (const_int 8) (const_int 10)
5752 (const_int 12) (const_int 14)]))))
5755 (vec_select:V8HI (match_dup 1)
5756 (parallel [(const_int 1) (const_int 3)
5757 (const_int 5) (const_int 7)
5758 (const_int 9) (const_int 11)
5759 (const_int 13) (const_int 15)])))
5761 (vec_select:V8HI (match_dup 2)
5762 (parallel [(const_int 1) (const_int 3)
5763 (const_int 5) (const_int 7)
5764 (const_int 9) (const_int 11)
5765 (const_int 13) (const_int 15)]))))))]
5767 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5769 (define_insn "*avx2_pmaddwd"
5770 [(set (match_operand:V8SI 0 "register_operand" "=x")
5775 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5776 (parallel [(const_int 0) (const_int 2)
5777 (const_int 4) (const_int 6)
5778 (const_int 8) (const_int 10)
5779 (const_int 12) (const_int 14)])))
5782 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5783 (parallel [(const_int 0) (const_int 2)
5784 (const_int 4) (const_int 6)
5785 (const_int 8) (const_int 10)
5786 (const_int 12) (const_int 14)]))))
5789 (vec_select:V8HI (match_dup 1)
5790 (parallel [(const_int 1) (const_int 3)
5791 (const_int 5) (const_int 7)
5792 (const_int 9) (const_int 11)
5793 (const_int 13) (const_int 15)])))
5795 (vec_select:V8HI (match_dup 2)
5796 (parallel [(const_int 1) (const_int 3)
5797 (const_int 5) (const_int 7)
5798 (const_int 9) (const_int 11)
5799 (const_int 13) (const_int 15)]))))))]
5800 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5801 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5802 [(set_attr "type" "sseiadd")
5803 (set_attr "prefix" "vex")
5804 (set_attr "mode" "OI")])
5806 (define_expand "sse2_pmaddwd"
5807 [(set (match_operand:V4SI 0 "register_operand")
5812 (match_operand:V8HI 1 "nonimmediate_operand")
5813 (parallel [(const_int 0) (const_int 2)
5814 (const_int 4) (const_int 6)])))
5817 (match_operand:V8HI 2 "nonimmediate_operand")
5818 (parallel [(const_int 0) (const_int 2)
5819 (const_int 4) (const_int 6)]))))
5822 (vec_select:V4HI (match_dup 1)
5823 (parallel [(const_int 1) (const_int 3)
5824 (const_int 5) (const_int 7)])))
5826 (vec_select:V4HI (match_dup 2)
5827 (parallel [(const_int 1) (const_int 3)
5828 (const_int 5) (const_int 7)]))))))]
5830 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5832 (define_insn "*sse2_pmaddwd"
5833 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5838 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5839 (parallel [(const_int 0) (const_int 2)
5840 (const_int 4) (const_int 6)])))
5843 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5844 (parallel [(const_int 0) (const_int 2)
5845 (const_int 4) (const_int 6)]))))
5848 (vec_select:V4HI (match_dup 1)
5849 (parallel [(const_int 1) (const_int 3)
5850 (const_int 5) (const_int 7)])))
5852 (vec_select:V4HI (match_dup 2)
5853 (parallel [(const_int 1) (const_int 3)
5854 (const_int 5) (const_int 7)]))))))]
5855 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5857 pmaddwd\t{%2, %0|%0, %2}
5858 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5859 [(set_attr "isa" "noavx,avx")
5860 (set_attr "type" "sseiadd")
5861 (set_attr "atom_unit" "simul")
5862 (set_attr "prefix_data16" "1,*")
5863 (set_attr "prefix" "orig,vex")
5864 (set_attr "mode" "TI")])
5866 (define_expand "mul<mode>3"
5867 [(set (match_operand:VI4_AVX512F 0 "register_operand")
5869 (match_operand:VI4_AVX512F 1 "general_vector_operand")
5870 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
5875 if (!nonimmediate_operand (operands[1], <MODE>mode))
5876 operands[1] = force_reg (<MODE>mode, operands[1]);
5877 if (!nonimmediate_operand (operands[2], <MODE>mode))
5878 operands[2] = force_reg (<MODE>mode, operands[2]);
5879 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5883 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
5888 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5889 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
5891 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
5892 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
5893 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5895 pmulld\t{%2, %0|%0, %2}
5896 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5897 [(set_attr "isa" "noavx,avx")
5898 (set_attr "type" "sseimul")
5899 (set_attr "prefix_extra" "1")
5900 (set_attr "prefix" "orig,vex")
5901 (set_attr "btver2_decode" "vector,vector")
5902 (set_attr "mode" "<sseinsnmode>")])
5904 (define_expand "mul<mode>3"
5905 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
5906 (mult:VI8_AVX2_AVX512F
5907 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
5908 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
5911 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
5915 (define_expand "vec_widen_<s>mult_hi_<mode>"
5916 [(match_operand:<sseunpackmode> 0 "register_operand")
5917 (any_extend:<sseunpackmode>
5918 (match_operand:VI124_AVX2 1 "register_operand"))
5919 (match_operand:VI124_AVX2 2 "register_operand")]
5922 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5927 (define_expand "vec_widen_<s>mult_lo_<mode>"
5928 [(match_operand:<sseunpackmode> 0 "register_operand")
5929 (any_extend:<sseunpackmode>
5930 (match_operand:VI124_AVX2 1 "register_operand"))
5931 (match_operand:VI124_AVX2 2 "register_operand")]
5934 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5939 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
5940 ;; named patterns, but signed V4SI needs special help for plain SSE2.
5941 (define_expand "vec_widen_smult_even_v4si"
5942 [(match_operand:V2DI 0 "register_operand")
5943 (match_operand:V4SI 1 "nonimmediate_operand")
5944 (match_operand:V4SI 2 "nonimmediate_operand")]
5947 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5952 (define_expand "vec_widen_<s>mult_odd_<mode>"
5953 [(match_operand:<sseunpackmode> 0 "register_operand")
5954 (any_extend:<sseunpackmode>
5955 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
5956 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
5959 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5964 (define_expand "sdot_prod<mode>"
5965 [(match_operand:<sseunpackmode> 0 "register_operand")
5966 (match_operand:VI2_AVX2 1 "register_operand")
5967 (match_operand:VI2_AVX2 2 "register_operand")
5968 (match_operand:<sseunpackmode> 3 "register_operand")]
5971 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5972 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5973 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5974 gen_rtx_PLUS (<sseunpackmode>mode,
5979 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
5980 ;; back together when madd is available.
5981 (define_expand "sdot_prodv4si"
5982 [(match_operand:V2DI 0 "register_operand")
5983 (match_operand:V4SI 1 "register_operand")
5984 (match_operand:V4SI 2 "register_operand")
5985 (match_operand:V2DI 3 "register_operand")]
5988 rtx t = gen_reg_rtx (V2DImode);
5989 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
5990 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
5994 (define_insn "ashr<mode>3"
5995 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5997 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5998 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
6001 psra<ssemodesuffix>\t{%2, %0|%0, %2}
6002 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6003 [(set_attr "isa" "noavx,avx")
6004 (set_attr "type" "sseishft")
6005 (set (attr "length_immediate")
6006 (if_then_else (match_operand 2 "const_int_operand")
6008 (const_string "0")))
6009 (set_attr "prefix_data16" "1,*")
6010 (set_attr "prefix" "orig,vex")
6011 (set_attr "mode" "<sseinsnmode>")])
6013 (define_insn "<shift_insn><mode>3"
6014 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
6015 (any_lshift:VI248_AVX2
6016 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
6017 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
6020 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
6021 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6022 [(set_attr "isa" "noavx,avx")
6023 (set_attr "type" "sseishft")
6024 (set (attr "length_immediate")
6025 (if_then_else (match_operand 2 "const_int_operand")
6027 (const_string "0")))
6028 (set_attr "prefix_data16" "1,*")
6029 (set_attr "prefix" "orig,vex")
6030 (set_attr "mode" "<sseinsnmode>")])
6032 (define_expand "vec_shl_<mode>"
6033 [(set (match_operand:VI_128 0 "register_operand")
6035 (match_operand:VI_128 1 "register_operand")
6036 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
6039 operands[0] = gen_lowpart (V1TImode, operands[0]);
6040 operands[1] = gen_lowpart (V1TImode, operands[1]);
6043 (define_insn "<sse2_avx2>_ashl<mode>3"
6044 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
6046 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
6047 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
6050 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6052 switch (which_alternative)
6055 return "pslldq\t{%2, %0|%0, %2}";
6057 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6062 [(set_attr "isa" "noavx,avx")
6063 (set_attr "type" "sseishft")
6064 (set_attr "length_immediate" "1")
6065 (set_attr "prefix_data16" "1,*")
6066 (set_attr "prefix" "orig,vex")
6067 (set_attr "mode" "<sseinsnmode>")])
6069 (define_expand "vec_shr_<mode>"
6070 [(set (match_operand:VI_128 0 "register_operand")
6072 (match_operand:VI_128 1 "register_operand")
6073 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
6076 operands[0] = gen_lowpart (V1TImode, operands[0]);
6077 operands[1] = gen_lowpart (V1TImode, operands[1]);
6080 (define_insn "<sse2_avx2>_lshr<mode>3"
6081 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
6082 (lshiftrt:VIMAX_AVX2
6083 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
6084 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
6087 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6089 switch (which_alternative)
6092 return "psrldq\t{%2, %0|%0, %2}";
6094 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6099 [(set_attr "isa" "noavx,avx")
6100 (set_attr "type" "sseishft")
6101 (set_attr "length_immediate" "1")
6102 (set_attr "atom_unit" "sishuf")
6103 (set_attr "prefix_data16" "1,*")
6104 (set_attr "prefix" "orig,vex")
6105 (set_attr "mode" "<sseinsnmode>")])
6108 (define_expand "<code><mode>3"
6109 [(set (match_operand:VI124_256 0 "register_operand")
6111 (match_operand:VI124_256 1 "nonimmediate_operand")
6112 (match_operand:VI124_256 2 "nonimmediate_operand")))]
6114 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6116 (define_insn "*avx2_<code><mode>3"
6117 [(set (match_operand:VI124_256 0 "register_operand" "=v")
6119 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
6120 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
6121 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6122 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6123 [(set_attr "type" "sseiadd")
6124 (set_attr "prefix_extra" "1")
6125 (set_attr "prefix" "vex")
6126 (set_attr "mode" "OI")])
6128 (define_expand "<code><mode>3"
6129 [(set (match_operand:VI8_AVX2 0 "register_operand")
6131 (match_operand:VI8_AVX2 1 "register_operand")
6132 (match_operand:VI8_AVX2 2 "register_operand")))]
6139 xops[0] = operands[0];
6141 if (<CODE> == SMAX || <CODE> == UMAX)
6143 xops[1] = operands[1];
6144 xops[2] = operands[2];
6148 xops[1] = operands[2];
6149 xops[2] = operands[1];
6152 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
6154 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6155 xops[4] = operands[1];
6156 xops[5] = operands[2];
6158 ok = ix86_expand_int_vcond (xops);
6163 (define_expand "<code><mode>3"
6164 [(set (match_operand:VI124_128 0 "register_operand")
6166 (match_operand:VI124_128 1 "nonimmediate_operand")
6167 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6170 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6171 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6177 xops[0] = operands[0];
6178 operands[1] = force_reg (<MODE>mode, operands[1]);
6179 operands[2] = force_reg (<MODE>mode, operands[2]);
6183 xops[1] = operands[1];
6184 xops[2] = operands[2];
6188 xops[1] = operands[2];
6189 xops[2] = operands[1];
6192 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6193 xops[4] = operands[1];
6194 xops[5] = operands[2];
6196 ok = ix86_expand_int_vcond (xops);
6202 (define_insn "*sse4_1_<code><mode>3"
6203 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6205 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6206 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6207 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6209 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6210 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6211 [(set_attr "isa" "noavx,avx")
6212 (set_attr "type" "sseiadd")
6213 (set_attr "prefix_extra" "1,*")
6214 (set_attr "prefix" "orig,vex")
6215 (set_attr "mode" "TI")])
6217 (define_insn "*<code>v8hi3"
6218 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6220 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6221 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6222 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6224 p<maxmin_int>w\t{%2, %0|%0, %2}
6225 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6226 [(set_attr "isa" "noavx,avx")
6227 (set_attr "type" "sseiadd")
6228 (set_attr "prefix_data16" "1,*")
6229 (set_attr "prefix_extra" "*,1")
6230 (set_attr "prefix" "orig,vex")
6231 (set_attr "mode" "TI")])
6233 (define_expand "<code><mode>3"
6234 [(set (match_operand:VI124_128 0 "register_operand")
6236 (match_operand:VI124_128 1 "nonimmediate_operand")
6237 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6240 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6241 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6242 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6244 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6245 operands[1] = force_reg (<MODE>mode, operands[1]);
6246 if (rtx_equal_p (op3, op2))
6247 op3 = gen_reg_rtx (V8HImode);
6248 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6249 emit_insn (gen_addv8hi3 (op0, op3, op2));
6257 operands[1] = force_reg (<MODE>mode, operands[1]);
6258 operands[2] = force_reg (<MODE>mode, operands[2]);
6260 xops[0] = operands[0];
6264 xops[1] = operands[1];
6265 xops[2] = operands[2];
6269 xops[1] = operands[2];
6270 xops[2] = operands[1];
6273 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6274 xops[4] = operands[1];
6275 xops[5] = operands[2];
6277 ok = ix86_expand_int_vcond (xops);
6283 (define_insn "*sse4_1_<code><mode>3"
6284 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6286 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6287 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6288 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6290 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6291 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6292 [(set_attr "isa" "noavx,avx")
6293 (set_attr "type" "sseiadd")
6294 (set_attr "prefix_extra" "1,*")
6295 (set_attr "prefix" "orig,vex")
6296 (set_attr "mode" "TI")])
6298 (define_insn "*<code>v16qi3"
6299 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6301 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6302 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6303 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6305 p<maxmin_int>b\t{%2, %0|%0, %2}
6306 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6307 [(set_attr "isa" "noavx,avx")
6308 (set_attr "type" "sseiadd")
6309 (set_attr "prefix_data16" "1,*")
6310 (set_attr "prefix_extra" "*,1")
6311 (set_attr "prefix" "orig,vex")
6312 (set_attr "mode" "TI")])
6314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6316 ;; Parallel integral comparisons
6318 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6320 (define_expand "avx2_eq<mode>3"
6321 [(set (match_operand:VI_256 0 "register_operand")
6323 (match_operand:VI_256 1 "nonimmediate_operand")
6324 (match_operand:VI_256 2 "nonimmediate_operand")))]
6326 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6328 (define_insn "*avx2_eq<mode>3"
6329 [(set (match_operand:VI_256 0 "register_operand" "=x")
6331 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6332 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6333 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6334 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6335 [(set_attr "type" "ssecmp")
6336 (set_attr "prefix_extra" "1")
6337 (set_attr "prefix" "vex")
6338 (set_attr "mode" "OI")])
6340 (define_insn "*sse4_1_eqv2di3"
6341 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6343 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6344 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6345 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6347 pcmpeqq\t{%2, %0|%0, %2}
6348 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6349 [(set_attr "isa" "noavx,avx")
6350 (set_attr "type" "ssecmp")
6351 (set_attr "prefix_extra" "1")
6352 (set_attr "prefix" "orig,vex")
6353 (set_attr "mode" "TI")])
6355 (define_insn "*sse2_eq<mode>3"
6356 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6358 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6359 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6360 "TARGET_SSE2 && !TARGET_XOP
6361 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6363 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6364 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6365 [(set_attr "isa" "noavx,avx")
6366 (set_attr "type" "ssecmp")
6367 (set_attr "prefix_data16" "1,*")
6368 (set_attr "prefix" "orig,vex")
6369 (set_attr "mode" "TI")])
6371 (define_expand "sse2_eq<mode>3"
6372 [(set (match_operand:VI124_128 0 "register_operand")
6374 (match_operand:VI124_128 1 "nonimmediate_operand")
6375 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6376 "TARGET_SSE2 && !TARGET_XOP "
6377 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6379 (define_expand "sse4_1_eqv2di3"
6380 [(set (match_operand:V2DI 0 "register_operand")
6382 (match_operand:V2DI 1 "nonimmediate_operand")
6383 (match_operand:V2DI 2 "nonimmediate_operand")))]
6385 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6387 (define_insn "sse4_2_gtv2di3"
6388 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6390 (match_operand:V2DI 1 "register_operand" "0,x")
6391 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6394 pcmpgtq\t{%2, %0|%0, %2}
6395 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6396 [(set_attr "isa" "noavx,avx")
6397 (set_attr "type" "ssecmp")
6398 (set_attr "prefix_extra" "1")
6399 (set_attr "prefix" "orig,vex")
6400 (set_attr "mode" "TI")])
6402 (define_insn "avx2_gt<mode>3"
6403 [(set (match_operand:VI_256 0 "register_operand" "=x")
6405 (match_operand:VI_256 1 "register_operand" "x")
6406 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6408 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6409 [(set_attr "type" "ssecmp")
6410 (set_attr "prefix_extra" "1")
6411 (set_attr "prefix" "vex")
6412 (set_attr "mode" "OI")])
6414 (define_insn "sse2_gt<mode>3"
6415 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6417 (match_operand:VI124_128 1 "register_operand" "0,x")
6418 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6419 "TARGET_SSE2 && !TARGET_XOP"
6421 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6422 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6423 [(set_attr "isa" "noavx,avx")
6424 (set_attr "type" "ssecmp")
6425 (set_attr "prefix_data16" "1,*")
6426 (set_attr "prefix" "orig,vex")
6427 (set_attr "mode" "TI")])
6429 (define_expand "vcond<V_256:mode><VI_256:mode>"
6430 [(set (match_operand:V_256 0 "register_operand")
6432 (match_operator 3 ""
6433 [(match_operand:VI_256 4 "nonimmediate_operand")
6434 (match_operand:VI_256 5 "general_operand")])
6435 (match_operand:V_256 1)
6436 (match_operand:V_256 2)))]
6438 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6439 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6441 bool ok = ix86_expand_int_vcond (operands);
6446 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6447 [(set (match_operand:V_128 0 "register_operand")
6449 (match_operator 3 ""
6450 [(match_operand:VI124_128 4 "nonimmediate_operand")
6451 (match_operand:VI124_128 5 "general_operand")])
6452 (match_operand:V_128 1)
6453 (match_operand:V_128 2)))]
6455 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6456 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6458 bool ok = ix86_expand_int_vcond (operands);
6463 (define_expand "vcond<VI8F_128:mode>v2di"
6464 [(set (match_operand:VI8F_128 0 "register_operand")
6465 (if_then_else:VI8F_128
6466 (match_operator 3 ""
6467 [(match_operand:V2DI 4 "nonimmediate_operand")
6468 (match_operand:V2DI 5 "general_operand")])
6469 (match_operand:VI8F_128 1)
6470 (match_operand:VI8F_128 2)))]
6473 bool ok = ix86_expand_int_vcond (operands);
6478 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6479 [(set (match_operand:V_256 0 "register_operand")
6481 (match_operator 3 ""
6482 [(match_operand:VI_256 4 "nonimmediate_operand")
6483 (match_operand:VI_256 5 "nonimmediate_operand")])
6484 (match_operand:V_256 1 "general_operand")
6485 (match_operand:V_256 2 "general_operand")))]
6487 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6488 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6490 bool ok = ix86_expand_int_vcond (operands);
6495 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6496 [(set (match_operand:V_128 0 "register_operand")
6498 (match_operator 3 ""
6499 [(match_operand:VI124_128 4 "nonimmediate_operand")
6500 (match_operand:VI124_128 5 "nonimmediate_operand")])
6501 (match_operand:V_128 1 "general_operand")
6502 (match_operand:V_128 2 "general_operand")))]
6504 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6505 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6507 bool ok = ix86_expand_int_vcond (operands);
6512 (define_expand "vcondu<VI8F_128:mode>v2di"
6513 [(set (match_operand:VI8F_128 0 "register_operand")
6514 (if_then_else:VI8F_128
6515 (match_operator 3 ""
6516 [(match_operand:V2DI 4 "nonimmediate_operand")
6517 (match_operand:V2DI 5 "nonimmediate_operand")])
6518 (match_operand:VI8F_128 1 "general_operand")
6519 (match_operand:VI8F_128 2 "general_operand")))]
6522 bool ok = ix86_expand_int_vcond (operands);
6527 (define_mode_iterator VEC_PERM_AVX2
6528 [V16QI V8HI V4SI V2DI V4SF V2DF
6529 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6530 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6531 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6533 (define_expand "vec_perm<mode>"
6534 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
6535 (match_operand:VEC_PERM_AVX2 1 "register_operand")
6536 (match_operand:VEC_PERM_AVX2 2 "register_operand")
6537 (match_operand:<sseintvecmode> 3 "register_operand")]
6538 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6540 ix86_expand_vec_perm (operands);
6544 (define_mode_iterator VEC_PERM_CONST
6545 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6546 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6547 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6548 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6549 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6550 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6552 (define_expand "vec_perm_const<mode>"
6553 [(match_operand:VEC_PERM_CONST 0 "register_operand")
6554 (match_operand:VEC_PERM_CONST 1 "register_operand")
6555 (match_operand:VEC_PERM_CONST 2 "register_operand")
6556 (match_operand:<sseintvecmode> 3)]
6559 if (ix86_expand_vec_perm_const (operands))
6565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6567 ;; Parallel bitwise logical operations
6569 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6571 (define_expand "one_cmpl<mode>2"
6572 [(set (match_operand:VI 0 "register_operand")
6573 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
6577 int i, n = GET_MODE_NUNITS (<MODE>mode);
6578 rtvec v = rtvec_alloc (n);
6580 for (i = 0; i < n; ++i)
6581 RTVEC_ELT (v, i) = constm1_rtx;
6583 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6586 (define_expand "<sse2_avx2>_andnot<mode>3"
6587 [(set (match_operand:VI_AVX2 0 "register_operand")
6589 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
6590 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
6593 (define_insn "*andnot<mode>3"
6594 [(set (match_operand:VI 0 "register_operand" "=x,v")
6596 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
6597 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
6600 static char buf[64];
6604 switch (get_attr_mode (insn))
6607 gcc_assert (TARGET_AVX512F);
6609 tmp = "pandn<ssemodesuffix>";
6613 gcc_assert (TARGET_AVX2);
6615 gcc_assert (TARGET_SSE2);
6621 gcc_assert (TARGET_AVX);
6623 gcc_assert (TARGET_SSE);
6632 switch (which_alternative)
6635 ops = "%s\t{%%2, %%0|%%0, %%2}";
6638 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6644 snprintf (buf, sizeof (buf), ops, tmp);
6647 [(set_attr "isa" "noavx,avx")
6648 (set_attr "type" "sselog")
6649 (set (attr "prefix_data16")
6651 (and (eq_attr "alternative" "0")
6652 (eq_attr "mode" "TI"))
6654 (const_string "*")))
6655 (set_attr "prefix" "orig,vex")
6657 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6658 (const_string "<ssePSmode>")
6659 (match_test "TARGET_AVX2")
6660 (const_string "<sseinsnmode>")
6661 (match_test "TARGET_AVX")
6663 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6664 (const_string "V8SF")
6665 (const_string "<sseinsnmode>"))
6666 (ior (not (match_test "TARGET_SSE2"))
6667 (match_test "optimize_function_for_size_p (cfun)"))
6668 (const_string "V4SF")
6670 (const_string "<sseinsnmode>")))])
6672 (define_expand "<code><mode>3"
6673 [(set (match_operand:VI 0 "register_operand")
6675 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
6676 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
6679 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
6683 (define_insn "*<code><mode>3"
6684 [(set (match_operand:VI 0 "register_operand" "=x,v")
6686 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
6687 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
6689 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6691 static char buf[64];
6695 switch (get_attr_mode (insn))
6698 gcc_assert (TARGET_AVX512F);
6699 tmp = "p<logic><ssemodesuffix>";
6703 gcc_assert (TARGET_AVX2);
6705 gcc_assert (TARGET_SSE2);
6711 gcc_assert (TARGET_AVX512F);
6713 gcc_assert (TARGET_AVX);
6715 gcc_assert (TARGET_SSE);
6724 switch (which_alternative)
6727 ops = "%s\t{%%2, %%0|%%0, %%2}";
6730 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6736 snprintf (buf, sizeof (buf), ops, tmp);
6739 [(set_attr "isa" "noavx,avx")
6740 (set_attr "type" "sselog")
6741 (set (attr "prefix_data16")
6743 (and (eq_attr "alternative" "0")
6744 (eq_attr "mode" "TI"))
6746 (const_string "*")))
6747 (set_attr "prefix" "orig,vex")
6749 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6750 (const_string "<ssePSmode>")
6751 (match_test "TARGET_AVX2")
6752 (const_string "<sseinsnmode>")
6753 (match_test "TARGET_AVX")
6755 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6756 (const_string "V8SF")
6757 (const_string "<sseinsnmode>"))
6758 (ior (not (match_test "TARGET_SSE2"))
6759 (match_test "optimize_function_for_size_p (cfun)"))
6760 (const_string "V4SF")
6762 (const_string "<sseinsnmode>")))])
6764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6766 ;; Parallel integral element swizzling
6768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6770 (define_expand "vec_pack_trunc_<mode>"
6771 [(match_operand:<ssepackmode> 0 "register_operand")
6772 (match_operand:VI248_AVX2 1 "register_operand")
6773 (match_operand:VI248_AVX2 2 "register_operand")]
6776 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6777 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6778 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6782 (define_insn "<sse2_avx2>_packsswb"
6783 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6784 (vec_concat:VI1_AVX2
6785 (ss_truncate:<ssehalfvecmode>
6786 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6787 (ss_truncate:<ssehalfvecmode>
6788 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6791 packsswb\t{%2, %0|%0, %2}
6792 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6793 [(set_attr "isa" "noavx,avx")
6794 (set_attr "type" "sselog")
6795 (set_attr "prefix_data16" "1,*")
6796 (set_attr "prefix" "orig,vex")
6797 (set_attr "mode" "<sseinsnmode>")])
6799 (define_insn "<sse2_avx2>_packssdw"
6800 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6801 (vec_concat:VI2_AVX2
6802 (ss_truncate:<ssehalfvecmode>
6803 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6804 (ss_truncate:<ssehalfvecmode>
6805 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6808 packssdw\t{%2, %0|%0, %2}
6809 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6810 [(set_attr "isa" "noavx,avx")
6811 (set_attr "type" "sselog")
6812 (set_attr "prefix_data16" "1,*")
6813 (set_attr "prefix" "orig,vex")
6814 (set_attr "mode" "<sseinsnmode>")])
6816 (define_insn "<sse2_avx2>_packuswb"
6817 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6818 (vec_concat:VI1_AVX2
6819 (us_truncate:<ssehalfvecmode>
6820 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6821 (us_truncate:<ssehalfvecmode>
6822 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6825 packuswb\t{%2, %0|%0, %2}
6826 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6827 [(set_attr "isa" "noavx,avx")
6828 (set_attr "type" "sselog")
6829 (set_attr "prefix_data16" "1,*")
6830 (set_attr "prefix" "orig,vex")
6831 (set_attr "mode" "<sseinsnmode>")])
6833 (define_insn "avx2_interleave_highv32qi"
6834 [(set (match_operand:V32QI 0 "register_operand" "=x")
6837 (match_operand:V32QI 1 "register_operand" "x")
6838 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6839 (parallel [(const_int 8) (const_int 40)
6840 (const_int 9) (const_int 41)
6841 (const_int 10) (const_int 42)
6842 (const_int 11) (const_int 43)
6843 (const_int 12) (const_int 44)
6844 (const_int 13) (const_int 45)
6845 (const_int 14) (const_int 46)
6846 (const_int 15) (const_int 47)
6847 (const_int 24) (const_int 56)
6848 (const_int 25) (const_int 57)
6849 (const_int 26) (const_int 58)
6850 (const_int 27) (const_int 59)
6851 (const_int 28) (const_int 60)
6852 (const_int 29) (const_int 61)
6853 (const_int 30) (const_int 62)
6854 (const_int 31) (const_int 63)])))]
6856 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6857 [(set_attr "type" "sselog")
6858 (set_attr "prefix" "vex")
6859 (set_attr "mode" "OI")])
6861 (define_insn "vec_interleave_highv16qi"
6862 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6865 (match_operand:V16QI 1 "register_operand" "0,x")
6866 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6867 (parallel [(const_int 8) (const_int 24)
6868 (const_int 9) (const_int 25)
6869 (const_int 10) (const_int 26)
6870 (const_int 11) (const_int 27)
6871 (const_int 12) (const_int 28)
6872 (const_int 13) (const_int 29)
6873 (const_int 14) (const_int 30)
6874 (const_int 15) (const_int 31)])))]
6877 punpckhbw\t{%2, %0|%0, %2}
6878 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6879 [(set_attr "isa" "noavx,avx")
6880 (set_attr "type" "sselog")
6881 (set_attr "prefix_data16" "1,*")
6882 (set_attr "prefix" "orig,vex")
6883 (set_attr "mode" "TI")])
6885 (define_insn "avx2_interleave_lowv32qi"
6886 [(set (match_operand:V32QI 0 "register_operand" "=x")
6889 (match_operand:V32QI 1 "register_operand" "x")
6890 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6891 (parallel [(const_int 0) (const_int 32)
6892 (const_int 1) (const_int 33)
6893 (const_int 2) (const_int 34)
6894 (const_int 3) (const_int 35)
6895 (const_int 4) (const_int 36)
6896 (const_int 5) (const_int 37)
6897 (const_int 6) (const_int 38)
6898 (const_int 7) (const_int 39)
6899 (const_int 16) (const_int 48)
6900 (const_int 17) (const_int 49)
6901 (const_int 18) (const_int 50)
6902 (const_int 19) (const_int 51)
6903 (const_int 20) (const_int 52)
6904 (const_int 21) (const_int 53)
6905 (const_int 22) (const_int 54)
6906 (const_int 23) (const_int 55)])))]
6908 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6909 [(set_attr "type" "sselog")
6910 (set_attr "prefix" "vex")
6911 (set_attr "mode" "OI")])
6913 (define_insn "vec_interleave_lowv16qi"
6914 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6917 (match_operand:V16QI 1 "register_operand" "0,x")
6918 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6919 (parallel [(const_int 0) (const_int 16)
6920 (const_int 1) (const_int 17)
6921 (const_int 2) (const_int 18)
6922 (const_int 3) (const_int 19)
6923 (const_int 4) (const_int 20)
6924 (const_int 5) (const_int 21)
6925 (const_int 6) (const_int 22)
6926 (const_int 7) (const_int 23)])))]
6929 punpcklbw\t{%2, %0|%0, %2}
6930 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6931 [(set_attr "isa" "noavx,avx")
6932 (set_attr "type" "sselog")
6933 (set_attr "prefix_data16" "1,*")
6934 (set_attr "prefix" "orig,vex")
6935 (set_attr "mode" "TI")])
6937 (define_insn "avx2_interleave_highv16hi"
6938 [(set (match_operand:V16HI 0 "register_operand" "=x")
6941 (match_operand:V16HI 1 "register_operand" "x")
6942 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6943 (parallel [(const_int 4) (const_int 20)
6944 (const_int 5) (const_int 21)
6945 (const_int 6) (const_int 22)
6946 (const_int 7) (const_int 23)
6947 (const_int 12) (const_int 28)
6948 (const_int 13) (const_int 29)
6949 (const_int 14) (const_int 30)
6950 (const_int 15) (const_int 31)])))]
6952 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6953 [(set_attr "type" "sselog")
6954 (set_attr "prefix" "vex")
6955 (set_attr "mode" "OI")])
6957 (define_insn "vec_interleave_highv8hi"
6958 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6961 (match_operand:V8HI 1 "register_operand" "0,x")
6962 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6963 (parallel [(const_int 4) (const_int 12)
6964 (const_int 5) (const_int 13)
6965 (const_int 6) (const_int 14)
6966 (const_int 7) (const_int 15)])))]
6969 punpckhwd\t{%2, %0|%0, %2}
6970 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6971 [(set_attr "isa" "noavx,avx")
6972 (set_attr "type" "sselog")
6973 (set_attr "prefix_data16" "1,*")
6974 (set_attr "prefix" "orig,vex")
6975 (set_attr "mode" "TI")])
6977 (define_insn "avx2_interleave_lowv16hi"
6978 [(set (match_operand:V16HI 0 "register_operand" "=x")
6981 (match_operand:V16HI 1 "register_operand" "x")
6982 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6983 (parallel [(const_int 0) (const_int 16)
6984 (const_int 1) (const_int 17)
6985 (const_int 2) (const_int 18)
6986 (const_int 3) (const_int 19)
6987 (const_int 8) (const_int 24)
6988 (const_int 9) (const_int 25)
6989 (const_int 10) (const_int 26)
6990 (const_int 11) (const_int 27)])))]
6992 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6993 [(set_attr "type" "sselog")
6994 (set_attr "prefix" "vex")
6995 (set_attr "mode" "OI")])
6997 (define_insn "vec_interleave_lowv8hi"
6998 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7001 (match_operand:V8HI 1 "register_operand" "0,x")
7002 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
7003 (parallel [(const_int 0) (const_int 8)
7004 (const_int 1) (const_int 9)
7005 (const_int 2) (const_int 10)
7006 (const_int 3) (const_int 11)])))]
7009 punpcklwd\t{%2, %0|%0, %2}
7010 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
7011 [(set_attr "isa" "noavx,avx")
7012 (set_attr "type" "sselog")
7013 (set_attr "prefix_data16" "1,*")
7014 (set_attr "prefix" "orig,vex")
7015 (set_attr "mode" "TI")])
7017 (define_insn "avx2_interleave_highv8si"
7018 [(set (match_operand:V8SI 0 "register_operand" "=x")
7021 (match_operand:V8SI 1 "register_operand" "x")
7022 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
7023 (parallel [(const_int 2) (const_int 10)
7024 (const_int 3) (const_int 11)
7025 (const_int 6) (const_int 14)
7026 (const_int 7) (const_int 15)])))]
7028 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7029 [(set_attr "type" "sselog")
7030 (set_attr "prefix" "vex")
7031 (set_attr "mode" "OI")])
7033 (define_insn "vec_interleave_highv4si"
7034 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7037 (match_operand:V4SI 1 "register_operand" "0,x")
7038 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
7039 (parallel [(const_int 2) (const_int 6)
7040 (const_int 3) (const_int 7)])))]
7043 punpckhdq\t{%2, %0|%0, %2}
7044 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7045 [(set_attr "isa" "noavx,avx")
7046 (set_attr "type" "sselog")
7047 (set_attr "prefix_data16" "1,*")
7048 (set_attr "prefix" "orig,vex")
7049 (set_attr "mode" "TI")])
7051 (define_insn "avx2_interleave_lowv8si"
7052 [(set (match_operand:V8SI 0 "register_operand" "=x")
7055 (match_operand:V8SI 1 "register_operand" "x")
7056 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
7057 (parallel [(const_int 0) (const_int 8)
7058 (const_int 1) (const_int 9)
7059 (const_int 4) (const_int 12)
7060 (const_int 5) (const_int 13)])))]
7062 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7063 [(set_attr "type" "sselog")
7064 (set_attr "prefix" "vex")
7065 (set_attr "mode" "OI")])
7067 (define_insn "vec_interleave_lowv4si"
7068 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7071 (match_operand:V4SI 1 "register_operand" "0,x")
7072 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
7073 (parallel [(const_int 0) (const_int 4)
7074 (const_int 1) (const_int 5)])))]
7077 punpckldq\t{%2, %0|%0, %2}
7078 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7079 [(set_attr "isa" "noavx,avx")
7080 (set_attr "type" "sselog")
7081 (set_attr "prefix_data16" "1,*")
7082 (set_attr "prefix" "orig,vex")
7083 (set_attr "mode" "TI")])
7085 (define_expand "vec_interleave_high<mode>"
7086 [(match_operand:VI_256 0 "register_operand" "=x")
7087 (match_operand:VI_256 1 "register_operand" "x")
7088 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7091 rtx t1 = gen_reg_rtx (<MODE>mode);
7092 rtx t2 = gen_reg_rtx (<MODE>mode);
7093 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7094 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7095 emit_insn (gen_avx2_permv2ti
7096 (gen_lowpart (V4DImode, operands[0]),
7097 gen_lowpart (V4DImode, t1),
7098 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
7102 (define_expand "vec_interleave_low<mode>"
7103 [(match_operand:VI_256 0 "register_operand" "=x")
7104 (match_operand:VI_256 1 "register_operand" "x")
7105 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7108 rtx t1 = gen_reg_rtx (<MODE>mode);
7109 rtx t2 = gen_reg_rtx (<MODE>mode);
7110 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7111 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7112 emit_insn (gen_avx2_permv2ti
7113 (gen_lowpart (V4DImode, operands[0]),
7114 gen_lowpart (V4DImode, t1),
7115 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
7119 ;; Modes handled by pinsr patterns.
7120 (define_mode_iterator PINSR_MODE
7121 [(V16QI "TARGET_SSE4_1") V8HI
7122 (V4SI "TARGET_SSE4_1")
7123 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
7125 (define_mode_attr sse2p4_1
7126 [(V16QI "sse4_1") (V8HI "sse2")
7127 (V4SI "sse4_1") (V2DI "sse4_1")])
7129 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
7130 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
7131 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
7132 (vec_merge:PINSR_MODE
7133 (vec_duplicate:PINSR_MODE
7134 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
7135 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
7136 (match_operand:SI 3 "const_int_operand")))]
7138 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7139 < GET_MODE_NUNITS (<MODE>mode))"
7141 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7143 switch (which_alternative)
7146 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7147 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7150 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7152 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7153 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7156 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7161 [(set_attr "isa" "noavx,noavx,avx,avx")
7162 (set_attr "type" "sselog")
7163 (set (attr "prefix_rex")
7165 (and (not (match_test "TARGET_AVX"))
7166 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7168 (const_string "*")))
7169 (set (attr "prefix_data16")
7171 (and (not (match_test "TARGET_AVX"))
7172 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7174 (const_string "*")))
7175 (set (attr "prefix_extra")
7177 (and (not (match_test "TARGET_AVX"))
7178 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7180 (const_string "1")))
7181 (set_attr "length_immediate" "1")
7182 (set_attr "prefix" "orig,orig,vex,vex")
7183 (set_attr "mode" "TI")])
7185 (define_expand "avx2_pshufdv3"
7186 [(match_operand:V8SI 0 "register_operand")
7187 (match_operand:V8SI 1 "nonimmediate_operand")
7188 (match_operand:SI 2 "const_0_to_255_operand")]
7191 int mask = INTVAL (operands[2]);
7192 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7193 GEN_INT ((mask >> 0) & 3),
7194 GEN_INT ((mask >> 2) & 3),
7195 GEN_INT ((mask >> 4) & 3),
7196 GEN_INT ((mask >> 6) & 3),
7197 GEN_INT (((mask >> 0) & 3) + 4),
7198 GEN_INT (((mask >> 2) & 3) + 4),
7199 GEN_INT (((mask >> 4) & 3) + 4),
7200 GEN_INT (((mask >> 6) & 3) + 4)));
7204 (define_insn "avx2_pshufd_1"
7205 [(set (match_operand:V8SI 0 "register_operand" "=x")
7207 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7208 (parallel [(match_operand 2 "const_0_to_3_operand")
7209 (match_operand 3 "const_0_to_3_operand")
7210 (match_operand 4 "const_0_to_3_operand")
7211 (match_operand 5 "const_0_to_3_operand")
7212 (match_operand 6 "const_4_to_7_operand")
7213 (match_operand 7 "const_4_to_7_operand")
7214 (match_operand 8 "const_4_to_7_operand")
7215 (match_operand 9 "const_4_to_7_operand")])))]
7217 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7218 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7219 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7220 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7223 mask |= INTVAL (operands[2]) << 0;
7224 mask |= INTVAL (operands[3]) << 2;
7225 mask |= INTVAL (operands[4]) << 4;
7226 mask |= INTVAL (operands[5]) << 6;
7227 operands[2] = GEN_INT (mask);
7229 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7231 [(set_attr "type" "sselog1")
7232 (set_attr "prefix" "vex")
7233 (set_attr "length_immediate" "1")
7234 (set_attr "mode" "OI")])
7236 (define_expand "sse2_pshufd"
7237 [(match_operand:V4SI 0 "register_operand")
7238 (match_operand:V4SI 1 "nonimmediate_operand")
7239 (match_operand:SI 2 "const_int_operand")]
7242 int mask = INTVAL (operands[2]);
7243 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7244 GEN_INT ((mask >> 0) & 3),
7245 GEN_INT ((mask >> 2) & 3),
7246 GEN_INT ((mask >> 4) & 3),
7247 GEN_INT ((mask >> 6) & 3)));
7251 (define_insn "sse2_pshufd_1"
7252 [(set (match_operand:V4SI 0 "register_operand" "=x")
7254 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7255 (parallel [(match_operand 2 "const_0_to_3_operand")
7256 (match_operand 3 "const_0_to_3_operand")
7257 (match_operand 4 "const_0_to_3_operand")
7258 (match_operand 5 "const_0_to_3_operand")])))]
7262 mask |= INTVAL (operands[2]) << 0;
7263 mask |= INTVAL (operands[3]) << 2;
7264 mask |= INTVAL (operands[4]) << 4;
7265 mask |= INTVAL (operands[5]) << 6;
7266 operands[2] = GEN_INT (mask);
7268 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7270 [(set_attr "type" "sselog1")
7271 (set_attr "prefix_data16" "1")
7272 (set_attr "prefix" "maybe_vex")
7273 (set_attr "length_immediate" "1")
7274 (set_attr "mode" "TI")])
7276 (define_expand "avx2_pshuflwv3"
7277 [(match_operand:V16HI 0 "register_operand")
7278 (match_operand:V16HI 1 "nonimmediate_operand")
7279 (match_operand:SI 2 "const_0_to_255_operand")]
7282 int mask = INTVAL (operands[2]);
7283 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7284 GEN_INT ((mask >> 0) & 3),
7285 GEN_INT ((mask >> 2) & 3),
7286 GEN_INT ((mask >> 4) & 3),
7287 GEN_INT ((mask >> 6) & 3),
7288 GEN_INT (((mask >> 0) & 3) + 8),
7289 GEN_INT (((mask >> 2) & 3) + 8),
7290 GEN_INT (((mask >> 4) & 3) + 8),
7291 GEN_INT (((mask >> 6) & 3) + 8)));
7295 (define_insn "avx2_pshuflw_1"
7296 [(set (match_operand:V16HI 0 "register_operand" "=x")
7298 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7299 (parallel [(match_operand 2 "const_0_to_3_operand")
7300 (match_operand 3 "const_0_to_3_operand")
7301 (match_operand 4 "const_0_to_3_operand")
7302 (match_operand 5 "const_0_to_3_operand")
7307 (match_operand 6 "const_8_to_11_operand")
7308 (match_operand 7 "const_8_to_11_operand")
7309 (match_operand 8 "const_8_to_11_operand")
7310 (match_operand 9 "const_8_to_11_operand")
7316 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7317 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7318 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7319 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7322 mask |= INTVAL (operands[2]) << 0;
7323 mask |= INTVAL (operands[3]) << 2;
7324 mask |= INTVAL (operands[4]) << 4;
7325 mask |= INTVAL (operands[5]) << 6;
7326 operands[2] = GEN_INT (mask);
7328 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7330 [(set_attr "type" "sselog")
7331 (set_attr "prefix" "vex")
7332 (set_attr "length_immediate" "1")
7333 (set_attr "mode" "OI")])
7335 (define_expand "sse2_pshuflw"
7336 [(match_operand:V8HI 0 "register_operand")
7337 (match_operand:V8HI 1 "nonimmediate_operand")
7338 (match_operand:SI 2 "const_int_operand")]
7341 int mask = INTVAL (operands[2]);
7342 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7343 GEN_INT ((mask >> 0) & 3),
7344 GEN_INT ((mask >> 2) & 3),
7345 GEN_INT ((mask >> 4) & 3),
7346 GEN_INT ((mask >> 6) & 3)));
7350 (define_insn "sse2_pshuflw_1"
7351 [(set (match_operand:V8HI 0 "register_operand" "=x")
7353 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7354 (parallel [(match_operand 2 "const_0_to_3_operand")
7355 (match_operand 3 "const_0_to_3_operand")
7356 (match_operand 4 "const_0_to_3_operand")
7357 (match_operand 5 "const_0_to_3_operand")
7365 mask |= INTVAL (operands[2]) << 0;
7366 mask |= INTVAL (operands[3]) << 2;
7367 mask |= INTVAL (operands[4]) << 4;
7368 mask |= INTVAL (operands[5]) << 6;
7369 operands[2] = GEN_INT (mask);
7371 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7373 [(set_attr "type" "sselog")
7374 (set_attr "prefix_data16" "0")
7375 (set_attr "prefix_rep" "1")
7376 (set_attr "prefix" "maybe_vex")
7377 (set_attr "length_immediate" "1")
7378 (set_attr "mode" "TI")])
7380 (define_expand "avx2_pshufhwv3"
7381 [(match_operand:V16HI 0 "register_operand")
7382 (match_operand:V16HI 1 "nonimmediate_operand")
7383 (match_operand:SI 2 "const_0_to_255_operand")]
7386 int mask = INTVAL (operands[2]);
7387 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7388 GEN_INT (((mask >> 0) & 3) + 4),
7389 GEN_INT (((mask >> 2) & 3) + 4),
7390 GEN_INT (((mask >> 4) & 3) + 4),
7391 GEN_INT (((mask >> 6) & 3) + 4),
7392 GEN_INT (((mask >> 0) & 3) + 12),
7393 GEN_INT (((mask >> 2) & 3) + 12),
7394 GEN_INT (((mask >> 4) & 3) + 12),
7395 GEN_INT (((mask >> 6) & 3) + 12)));
7399 (define_insn "avx2_pshufhw_1"
7400 [(set (match_operand:V16HI 0 "register_operand" "=x")
7402 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7403 (parallel [(const_int 0)
7407 (match_operand 2 "const_4_to_7_operand")
7408 (match_operand 3 "const_4_to_7_operand")
7409 (match_operand 4 "const_4_to_7_operand")
7410 (match_operand 5 "const_4_to_7_operand")
7415 (match_operand 6 "const_12_to_15_operand")
7416 (match_operand 7 "const_12_to_15_operand")
7417 (match_operand 8 "const_12_to_15_operand")
7418 (match_operand 9 "const_12_to_15_operand")])))]
7420 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7421 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7422 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7423 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7426 mask |= (INTVAL (operands[2]) - 4) << 0;
7427 mask |= (INTVAL (operands[3]) - 4) << 2;
7428 mask |= (INTVAL (operands[4]) - 4) << 4;
7429 mask |= (INTVAL (operands[5]) - 4) << 6;
7430 operands[2] = GEN_INT (mask);
7432 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7434 [(set_attr "type" "sselog")
7435 (set_attr "prefix" "vex")
7436 (set_attr "length_immediate" "1")
7437 (set_attr "mode" "OI")])
7439 (define_expand "sse2_pshufhw"
7440 [(match_operand:V8HI 0 "register_operand")
7441 (match_operand:V8HI 1 "nonimmediate_operand")
7442 (match_operand:SI 2 "const_int_operand")]
7445 int mask = INTVAL (operands[2]);
7446 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7447 GEN_INT (((mask >> 0) & 3) + 4),
7448 GEN_INT (((mask >> 2) & 3) + 4),
7449 GEN_INT (((mask >> 4) & 3) + 4),
7450 GEN_INT (((mask >> 6) & 3) + 4)));
7454 (define_insn "sse2_pshufhw_1"
7455 [(set (match_operand:V8HI 0 "register_operand" "=x")
7457 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7458 (parallel [(const_int 0)
7462 (match_operand 2 "const_4_to_7_operand")
7463 (match_operand 3 "const_4_to_7_operand")
7464 (match_operand 4 "const_4_to_7_operand")
7465 (match_operand 5 "const_4_to_7_operand")])))]
7469 mask |= (INTVAL (operands[2]) - 4) << 0;
7470 mask |= (INTVAL (operands[3]) - 4) << 2;
7471 mask |= (INTVAL (operands[4]) - 4) << 4;
7472 mask |= (INTVAL (operands[5]) - 4) << 6;
7473 operands[2] = GEN_INT (mask);
7475 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7477 [(set_attr "type" "sselog")
7478 (set_attr "prefix_rep" "1")
7479 (set_attr "prefix_data16" "0")
7480 (set_attr "prefix" "maybe_vex")
7481 (set_attr "length_immediate" "1")
7482 (set_attr "mode" "TI")])
7484 (define_expand "sse2_loadd"
7485 [(set (match_operand:V4SI 0 "register_operand")
7488 (match_operand:SI 1 "nonimmediate_operand"))
7492 "operands[2] = CONST0_RTX (V4SImode);")
7494 (define_insn "sse2_loadld"
7495 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7498 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7499 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7503 %vmovd\t{%2, %0|%0, %2}
7504 %vmovd\t{%2, %0|%0, %2}
7505 movss\t{%2, %0|%0, %2}
7506 movss\t{%2, %0|%0, %2}
7507 vmovss\t{%2, %1, %0|%0, %1, %2}"
7508 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7509 (set_attr "type" "ssemov")
7510 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7511 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7513 (define_insn "*vec_extract<mode>"
7514 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
7515 (vec_select:<ssescalarmode>
7516 (match_operand:VI12_128 1 "register_operand" "x,x")
7518 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
7521 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
7522 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7523 [(set_attr "type" "sselog1")
7524 (set (attr "prefix_data16")
7526 (and (eq_attr "alternative" "0")
7527 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7529 (const_string "*")))
7530 (set (attr "prefix_extra")
7532 (and (eq_attr "alternative" "0")
7533 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7535 (const_string "1")))
7536 (set_attr "length_immediate" "1")
7537 (set_attr "prefix" "maybe_vex")
7538 (set_attr "mode" "TI")])
7540 (define_insn "*vec_extractv8hi_sse2"
7541 [(set (match_operand:HI 0 "register_operand" "=r")
7543 (match_operand:V8HI 1 "register_operand" "x")
7545 [(match_operand:SI 2 "const_0_to_7_operand")])))]
7546 "TARGET_SSE2 && !TARGET_SSE4_1"
7547 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
7548 [(set_attr "type" "sselog1")
7549 (set_attr "prefix_data16" "1")
7550 (set_attr "length_immediate" "1")
7551 (set_attr "mode" "TI")])
7553 (define_insn "*vec_extractv16qi_zext"
7554 [(set (match_operand:SWI48 0 "register_operand" "=r")
7557 (match_operand:V16QI 1 "register_operand" "x")
7559 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
7561 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7562 [(set_attr "type" "sselog1")
7563 (set_attr "prefix_extra" "1")
7564 (set_attr "length_immediate" "1")
7565 (set_attr "prefix" "maybe_vex")
7566 (set_attr "mode" "TI")])
7568 (define_insn "*vec_extractv8hi_zext"
7569 [(set (match_operand:SWI48 0 "register_operand" "=r")
7572 (match_operand:V8HI 1 "register_operand" "x")
7574 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
7576 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7577 [(set_attr "type" "sselog1")
7578 (set_attr "prefix_data16" "1")
7579 (set_attr "length_immediate" "1")
7580 (set_attr "prefix" "maybe_vex")
7581 (set_attr "mode" "TI")])
7583 (define_insn "*vec_extract<mode>_mem"
7584 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
7585 (vec_select:<ssescalarmode>
7586 (match_operand:VI12_128 1 "memory_operand" "o")
7588 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
7592 (define_insn "*vec_extract<ssevecmodelower>_0"
7593 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
7595 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
7596 (parallel [(const_int 0)])))]
7597 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7599 [(set_attr "isa" "*,sse4,*,*")])
7601 (define_insn_and_split "*vec_extractv4si_0_zext"
7602 [(set (match_operand:DI 0 "register_operand" "=r")
7605 (match_operand:V4SI 1 "register_operand" "x")
7606 (parallel [(const_int 0)]))))]
7607 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
7609 "&& reload_completed"
7610 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
7611 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7613 (define_insn "*vec_extractv2di_0_sse"
7614 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
7616 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
7617 (parallel [(const_int 0)])))]
7618 "TARGET_SSE && !TARGET_64BIT
7619 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7623 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
7625 (match_operand:<ssevecmode> 1 "register_operand")
7626 (parallel [(const_int 0)])))]
7627 "TARGET_SSE && reload_completed"
7628 [(set (match_dup 0) (match_dup 1))]
7629 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
7631 (define_insn "*vec_extractv4si"
7632 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
7634 (match_operand:V4SI 1 "register_operand" "x,0,x")
7635 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
7638 switch (which_alternative)
7641 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
7644 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
7645 return "psrldq\t{%2, %0|%0, %2}";
7648 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
7649 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
7655 [(set_attr "isa" "*,noavx,avx")
7656 (set_attr "type" "sselog1,sseishft1,sseishft1")
7657 (set_attr "prefix_extra" "1,*,*")
7658 (set_attr "length_immediate" "1")
7659 (set_attr "prefix" "maybe_vex,orig,vex")
7660 (set_attr "mode" "TI")])
7662 (define_insn "*vec_extractv4si_zext"
7663 [(set (match_operand:DI 0 "register_operand" "=r")
7666 (match_operand:V4SI 1 "register_operand" "x")
7667 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
7668 "TARGET_64BIT && TARGET_SSE4_1"
7669 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7670 [(set_attr "type" "sselog1")
7671 (set_attr "prefix_extra" "1")
7672 (set_attr "length_immediate" "1")
7673 (set_attr "prefix" "maybe_vex")
7674 (set_attr "mode" "TI")])
7676 (define_insn "*vec_extractv4si_mem"
7677 [(set (match_operand:SI 0 "register_operand" "=x,r")
7679 (match_operand:V4SI 1 "memory_operand" "o,o")
7680 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
7684 (define_insn_and_split "*vec_extractv4si_zext_mem"
7685 [(set (match_operand:DI 0 "register_operand" "=x,r")
7688 (match_operand:V4SI 1 "memory_operand" "o,o")
7689 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
7690 "TARGET_64BIT && TARGET_SSE"
7692 "&& reload_completed"
7693 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
7695 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
7698 (define_insn "*vec_extractv2di_1"
7699 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
7701 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
7702 (parallel [(const_int 1)])))]
7703 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7705 %vpextrq\t{$1, %1, %0|%0, %1, 1}
7706 %vmovhps\t{%1, %0|%0, %1}
7707 psrldq\t{$8, %0|%0, 8}
7708 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7709 movhlps\t{%1, %0|%0, %1}
7712 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
7713 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
7714 (set_attr "length_immediate" "1,*,1,1,*,*,*")
7715 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
7716 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7717 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
7718 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
7721 [(set (match_operand:<ssescalarmode> 0 "register_operand")
7722 (vec_select:<ssescalarmode>
7723 (match_operand:VI_128 1 "memory_operand")
7725 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
7726 "TARGET_SSE && reload_completed"
7727 [(set (match_dup 0) (match_dup 1))]
7729 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
7731 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
7734 (define_insn "*vec_dupv4si"
7735 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7737 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7740 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7741 vbroadcastss\t{%1, %0|%0, %1}
7742 shufps\t{$0, %0, %0|%0, %0, 0}"
7743 [(set_attr "isa" "sse2,avx,noavx")
7744 (set_attr "type" "sselog1,ssemov,sselog1")
7745 (set_attr "length_immediate" "1,0,1")
7746 (set_attr "prefix_extra" "0,1,*")
7747 (set_attr "prefix" "maybe_vex,vex,orig")
7748 (set_attr "mode" "TI,V4SF,V4SF")])
7750 (define_insn "*vec_dupv2di"
7751 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7753 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7757 vpunpcklqdq\t{%d1, %0|%0, %d1}
7758 %vmovddup\t{%1, %0|%0, %1}
7760 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7761 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7762 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7763 (set_attr "mode" "TI,TI,DF,V4SF")])
7765 (define_insn "*vec_concatv2si_sse4_1"
7766 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7768 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7769 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7772 pinsrd\t{$1, %2, %0|%0, %2, 1}
7773 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7774 punpckldq\t{%2, %0|%0, %2}
7775 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7776 %vmovd\t{%1, %0|%0, %1}
7777 punpckldq\t{%2, %0|%0, %2}
7778 movd\t{%1, %0|%0, %1}"
7779 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7780 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7781 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7782 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7783 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7784 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7786 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7787 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7788 ;; alternatives pretty much forces the MMX alternative to be chosen.
7789 (define_insn "*vec_concatv2si"
7790 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
7792 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
7793 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
7794 "TARGET_SSE && !TARGET_SSE4_1"
7796 punpckldq\t{%2, %0|%0, %2}
7797 movd\t{%1, %0|%0, %1}
7798 movd\t{%1, %0|%0, %1}
7799 unpcklps\t{%2, %0|%0, %2}
7800 movss\t{%1, %0|%0, %1}
7801 punpckldq\t{%2, %0|%0, %2}
7802 movd\t{%1, %0|%0, %1}"
7803 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
7804 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
7805 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
7807 (define_insn "*vec_concatv4si"
7808 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7810 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7811 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7814 punpcklqdq\t{%2, %0|%0, %2}
7815 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7816 movlhps\t{%2, %0|%0, %2}
7817 movhps\t{%2, %0|%0, %q2}
7818 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7819 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7820 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7821 (set_attr "prefix" "orig,vex,orig,orig,vex")
7822 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7824 ;; movd instead of movq is required to handle broken assemblers.
7825 (define_insn "vec_concatv2di"
7826 [(set (match_operand:V2DI 0 "register_operand"
7827 "=x,x ,Yi,x ,!x,x,x,x,x,x")
7829 (match_operand:DI 1 "nonimmediate_operand"
7830 " 0,x ,r ,xm,*y,0,x,0,0,x")
7831 (match_operand:DI 2 "vector_move_operand"
7832 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
7835 pinsrq\t{$1, %2, %0|%0, %2, 1}
7836 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7837 %vmovd\t{%1, %0|%0, %1}
7838 %vmovq\t{%1, %0|%0, %1}
7839 movq2dq\t{%1, %0|%0, %1}
7840 punpcklqdq\t{%2, %0|%0, %2}
7841 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7842 movlhps\t{%2, %0|%0, %2}
7843 movhps\t{%2, %0|%0, %2}
7844 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7845 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7848 (eq_attr "alternative" "0,1,5,6")
7849 (const_string "sselog")
7850 (const_string "ssemov")))
7851 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
7852 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
7853 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
7854 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
7855 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7857 (define_expand "vec_unpacks_lo_<mode>"
7858 [(match_operand:<sseunpackmode> 0 "register_operand")
7859 (match_operand:VI124_AVX512F 1 "register_operand")]
7861 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
7863 (define_expand "vec_unpacks_hi_<mode>"
7864 [(match_operand:<sseunpackmode> 0 "register_operand")
7865 (match_operand:VI124_AVX512F 1 "register_operand")]
7867 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
7869 (define_expand "vec_unpacku_lo_<mode>"
7870 [(match_operand:<sseunpackmode> 0 "register_operand")
7871 (match_operand:VI124_AVX512F 1 "register_operand")]
7873 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
7875 (define_expand "vec_unpacku_hi_<mode>"
7876 [(match_operand:<sseunpackmode> 0 "register_operand")
7877 (match_operand:VI124_AVX512F 1 "register_operand")]
7879 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
7881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7885 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7887 (define_expand "<sse2_avx2>_uavg<mode>3"
7888 [(set (match_operand:VI12_AVX2 0 "register_operand")
7890 (lshiftrt:<ssedoublemode>
7891 (plus:<ssedoublemode>
7892 (plus:<ssedoublemode>
7893 (zero_extend:<ssedoublemode>
7894 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
7895 (zero_extend:<ssedoublemode>
7896 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
7901 operands[3] = CONST1_RTX(<MODE>mode);
7902 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7905 (define_insn "*<sse2_avx2>_uavg<mode>3"
7906 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
7908 (lshiftrt:<ssedoublemode>
7909 (plus:<ssedoublemode>
7910 (plus:<ssedoublemode>
7911 (zero_extend:<ssedoublemode>
7912 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
7913 (zero_extend:<ssedoublemode>
7914 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
7915 (match_operand:VI12_AVX2 3 "const1_operand"))
7917 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7919 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
7920 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7921 [(set_attr "isa" "noavx,avx")
7922 (set_attr "type" "sseiadd")
7923 (set_attr "prefix_data16" "1,*")
7924 (set_attr "prefix" "orig,vex")
7925 (set_attr "mode" "<sseinsnmode>")])
7927 ;; The correct representation for this is absolutely enormous, and
7928 ;; surely not generally useful.
7929 (define_insn "<sse2_avx2>_psadbw"
7930 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7932 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7933 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7937 psadbw\t{%2, %0|%0, %2}
7938 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7939 [(set_attr "isa" "noavx,avx")
7940 (set_attr "type" "sseiadd")
7941 (set_attr "atom_unit" "simul")
7942 (set_attr "prefix_data16" "1,*")
7943 (set_attr "prefix" "orig,vex")
7944 (set_attr "mode" "<sseinsnmode>")])
7946 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7947 [(set (match_operand:SI 0 "register_operand" "=r")
7949 [(match_operand:VF_128_256 1 "register_operand" "x")]
7952 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7953 [(set_attr "type" "ssemov")
7954 (set_attr "prefix" "maybe_vex")
7955 (set_attr "mode" "<MODE>")])
7957 (define_insn "avx2_pmovmskb"
7958 [(set (match_operand:SI 0 "register_operand" "=r")
7959 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7962 "vpmovmskb\t{%1, %0|%0, %1}"
7963 [(set_attr "type" "ssemov")
7964 (set_attr "prefix" "vex")
7965 (set_attr "mode" "DI")])
7967 (define_insn "sse2_pmovmskb"
7968 [(set (match_operand:SI 0 "register_operand" "=r")
7969 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7972 "%vpmovmskb\t{%1, %0|%0, %1}"
7973 [(set_attr "type" "ssemov")
7974 (set_attr "prefix_data16" "1")
7975 (set_attr "prefix" "maybe_vex")
7976 (set_attr "mode" "SI")])
7978 (define_expand "sse2_maskmovdqu"
7979 [(set (match_operand:V16QI 0 "memory_operand")
7980 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
7981 (match_operand:V16QI 2 "register_operand")
7986 (define_insn "*sse2_maskmovdqu"
7987 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7988 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7989 (match_operand:V16QI 2 "register_operand" "x")
7990 (mem:V16QI (match_dup 0))]
7994 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
7995 that requires %v to be at the beginning of the opcode name. */
7996 if (Pmode != word_mode)
7997 fputs ("\taddr32", asm_out_file);
7998 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
8000 [(set_attr "type" "ssemov")
8001 (set_attr "prefix_data16" "1")
8002 (set (attr "length_address")
8003 (symbol_ref ("Pmode != word_mode")))
8004 ;; The implicit %rdi operand confuses default length_vex computation.
8005 (set (attr "length_vex")
8006 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8007 (set_attr "prefix" "maybe_vex")
8008 (set_attr "mode" "TI")])
8010 (define_insn "sse_ldmxcsr"
8011 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8015 [(set_attr "type" "sse")
8016 (set_attr "atom_sse_attr" "mxcsr")
8017 (set_attr "prefix" "maybe_vex")
8018 (set_attr "memory" "load")])
8020 (define_insn "sse_stmxcsr"
8021 [(set (match_operand:SI 0 "memory_operand" "=m")
8022 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8025 [(set_attr "type" "sse")
8026 (set_attr "atom_sse_attr" "mxcsr")
8027 (set_attr "prefix" "maybe_vex")
8028 (set_attr "memory" "store")])
8030 (define_insn "sse2_clflush"
8031 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8035 [(set_attr "type" "sse")
8036 (set_attr "atom_sse_attr" "fence")
8037 (set_attr "memory" "unknown")])
8040 (define_insn "sse3_mwait"
8041 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8042 (match_operand:SI 1 "register_operand" "c")]
8045 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8046 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8047 ;; we only need to set up 32bit registers.
8049 [(set_attr "length" "3")])
8051 (define_insn "sse3_monitor_<mode>"
8052 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
8053 (match_operand:SI 1 "register_operand" "c")
8054 (match_operand:SI 2 "register_operand" "d")]
8057 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8058 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8059 ;; zero extended to 64bit, we only need to set up 32bit registers.
8061 [(set (attr "length")
8062 (symbol_ref ("(Pmode != word_mode) + 3")))])
8064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8066 ;; SSSE3 instructions
8068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8070 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
8072 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
8073 [(set (match_operand:V16HI 0 "register_operand" "=x")
8080 (match_operand:V16HI 1 "register_operand" "x")
8081 (parallel [(const_int 0)]))
8082 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8084 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8085 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8088 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8089 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8091 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8092 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8096 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8097 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8099 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8100 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8103 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8104 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8106 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8107 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8113 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8114 (parallel [(const_int 0)]))
8115 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8117 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8118 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8124 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8125 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8129 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8130 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8132 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8133 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8136 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8137 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8139 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8140 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8142 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
8143 [(set_attr "type" "sseiadd")
8144 (set_attr "prefix_extra" "1")
8145 (set_attr "prefix" "vex")
8146 (set_attr "mode" "OI")])
8148 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
8149 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8155 (match_operand:V8HI 1 "register_operand" "0,x")
8156 (parallel [(const_int 0)]))
8157 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8159 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8160 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8163 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8164 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8166 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8167 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8172 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8173 (parallel [(const_int 0)]))
8174 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8176 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8177 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8180 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8181 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8183 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8184 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8187 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
8188 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
8189 [(set_attr "isa" "noavx,avx")
8190 (set_attr "type" "sseiadd")
8191 (set_attr "atom_unit" "complex")
8192 (set_attr "prefix_data16" "1,*")
8193 (set_attr "prefix_extra" "1")
8194 (set_attr "prefix" "orig,vex")
8195 (set_attr "mode" "TI")])
8197 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
8198 [(set (match_operand:V4HI 0 "register_operand" "=y")
8203 (match_operand:V4HI 1 "register_operand" "0")
8204 (parallel [(const_int 0)]))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8207 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8208 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8212 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8213 (parallel [(const_int 0)]))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8216 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8217 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8219 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
8220 [(set_attr "type" "sseiadd")
8221 (set_attr "atom_unit" "complex")
8222 (set_attr "prefix_extra" "1")
8223 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8224 (set_attr "mode" "DI")])
8226 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
8227 [(set (match_operand:V8SI 0 "register_operand" "=x")
8233 (match_operand:V8SI 1 "register_operand" "x")
8234 (parallel [(const_int 0)]))
8235 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8237 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8238 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8241 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8242 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8244 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8245 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8250 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8251 (parallel [(const_int 0)]))
8252 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8254 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8255 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8258 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8259 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8261 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8262 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8264 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8265 [(set_attr "type" "sseiadd")
8266 (set_attr "prefix_extra" "1")
8267 (set_attr "prefix" "vex")
8268 (set_attr "mode" "OI")])
8270 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
8271 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8276 (match_operand:V4SI 1 "register_operand" "0,x")
8277 (parallel [(const_int 0)]))
8278 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8280 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8281 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8285 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8286 (parallel [(const_int 0)]))
8287 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8289 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8290 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8293 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
8294 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8295 [(set_attr "isa" "noavx,avx")
8296 (set_attr "type" "sseiadd")
8297 (set_attr "atom_unit" "complex")
8298 (set_attr "prefix_data16" "1,*")
8299 (set_attr "prefix_extra" "1")
8300 (set_attr "prefix" "orig,vex")
8301 (set_attr "mode" "TI")])
8303 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
8304 [(set (match_operand:V2SI 0 "register_operand" "=y")
8308 (match_operand:V2SI 1 "register_operand" "0")
8309 (parallel [(const_int 0)]))
8310 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8313 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8314 (parallel [(const_int 0)]))
8315 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8317 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
8318 [(set_attr "type" "sseiadd")
8319 (set_attr "atom_unit" "complex")
8320 (set_attr "prefix_extra" "1")
8321 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8322 (set_attr "mode" "DI")])
8324 (define_insn "avx2_pmaddubsw256"
8325 [(set (match_operand:V16HI 0 "register_operand" "=x")
8330 (match_operand:V32QI 1 "register_operand" "x")
8331 (parallel [(const_int 0) (const_int 2)
8332 (const_int 4) (const_int 6)
8333 (const_int 8) (const_int 10)
8334 (const_int 12) (const_int 14)
8335 (const_int 16) (const_int 18)
8336 (const_int 20) (const_int 22)
8337 (const_int 24) (const_int 26)
8338 (const_int 28) (const_int 30)])))
8341 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8342 (parallel [(const_int 0) (const_int 2)
8343 (const_int 4) (const_int 6)
8344 (const_int 8) (const_int 10)
8345 (const_int 12) (const_int 14)
8346 (const_int 16) (const_int 18)
8347 (const_int 20) (const_int 22)
8348 (const_int 24) (const_int 26)
8349 (const_int 28) (const_int 30)]))))
8352 (vec_select:V16QI (match_dup 1)
8353 (parallel [(const_int 1) (const_int 3)
8354 (const_int 5) (const_int 7)
8355 (const_int 9) (const_int 11)
8356 (const_int 13) (const_int 15)
8357 (const_int 17) (const_int 19)
8358 (const_int 21) (const_int 23)
8359 (const_int 25) (const_int 27)
8360 (const_int 29) (const_int 31)])))
8362 (vec_select:V16QI (match_dup 2)
8363 (parallel [(const_int 1) (const_int 3)
8364 (const_int 5) (const_int 7)
8365 (const_int 9) (const_int 11)
8366 (const_int 13) (const_int 15)
8367 (const_int 17) (const_int 19)
8368 (const_int 21) (const_int 23)
8369 (const_int 25) (const_int 27)
8370 (const_int 29) (const_int 31)]))))))]
8372 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8373 [(set_attr "type" "sseiadd")
8374 (set_attr "prefix_extra" "1")
8375 (set_attr "prefix" "vex")
8376 (set_attr "mode" "OI")])
8378 (define_insn "ssse3_pmaddubsw128"
8379 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8384 (match_operand:V16QI 1 "register_operand" "0,x")
8385 (parallel [(const_int 0) (const_int 2)
8386 (const_int 4) (const_int 6)
8387 (const_int 8) (const_int 10)
8388 (const_int 12) (const_int 14)])))
8391 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8392 (parallel [(const_int 0) (const_int 2)
8393 (const_int 4) (const_int 6)
8394 (const_int 8) (const_int 10)
8395 (const_int 12) (const_int 14)]))))
8398 (vec_select:V8QI (match_dup 1)
8399 (parallel [(const_int 1) (const_int 3)
8400 (const_int 5) (const_int 7)
8401 (const_int 9) (const_int 11)
8402 (const_int 13) (const_int 15)])))
8404 (vec_select:V8QI (match_dup 2)
8405 (parallel [(const_int 1) (const_int 3)
8406 (const_int 5) (const_int 7)
8407 (const_int 9) (const_int 11)
8408 (const_int 13) (const_int 15)]))))))]
8411 pmaddubsw\t{%2, %0|%0, %2}
8412 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8413 [(set_attr "isa" "noavx,avx")
8414 (set_attr "type" "sseiadd")
8415 (set_attr "atom_unit" "simul")
8416 (set_attr "prefix_data16" "1,*")
8417 (set_attr "prefix_extra" "1")
8418 (set_attr "prefix" "orig,vex")
8419 (set_attr "mode" "TI")])
8421 (define_insn "ssse3_pmaddubsw"
8422 [(set (match_operand:V4HI 0 "register_operand" "=y")
8427 (match_operand:V8QI 1 "register_operand" "0")
8428 (parallel [(const_int 0) (const_int 2)
8429 (const_int 4) (const_int 6)])))
8432 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8433 (parallel [(const_int 0) (const_int 2)
8434 (const_int 4) (const_int 6)]))))
8437 (vec_select:V4QI (match_dup 1)
8438 (parallel [(const_int 1) (const_int 3)
8439 (const_int 5) (const_int 7)])))
8441 (vec_select:V4QI (match_dup 2)
8442 (parallel [(const_int 1) (const_int 3)
8443 (const_int 5) (const_int 7)]))))))]
8445 "pmaddubsw\t{%2, %0|%0, %2}"
8446 [(set_attr "type" "sseiadd")
8447 (set_attr "atom_unit" "simul")
8448 (set_attr "prefix_extra" "1")
8449 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8450 (set_attr "mode" "DI")])
8452 (define_mode_iterator PMULHRSW
8453 [V4HI V8HI (V16HI "TARGET_AVX2")])
8455 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
8456 [(set (match_operand:PMULHRSW 0 "register_operand")
8458 (lshiftrt:<ssedoublemode>
8459 (plus:<ssedoublemode>
8460 (lshiftrt:<ssedoublemode>
8461 (mult:<ssedoublemode>
8462 (sign_extend:<ssedoublemode>
8463 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
8464 (sign_extend:<ssedoublemode>
8465 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
8471 operands[3] = CONST1_RTX(<MODE>mode);
8472 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8475 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
8476 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8478 (lshiftrt:<ssedoublemode>
8479 (plus:<ssedoublemode>
8480 (lshiftrt:<ssedoublemode>
8481 (mult:<ssedoublemode>
8482 (sign_extend:<ssedoublemode>
8483 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
8484 (sign_extend:<ssedoublemode>
8485 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
8487 (match_operand:VI2_AVX2 3 "const1_operand"))
8489 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8491 pmulhrsw\t{%2, %0|%0, %2}
8492 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8493 [(set_attr "isa" "noavx,avx")
8494 (set_attr "type" "sseimul")
8495 (set_attr "prefix_data16" "1,*")
8496 (set_attr "prefix_extra" "1")
8497 (set_attr "prefix" "orig,vex")
8498 (set_attr "mode" "<sseinsnmode>")])
8500 (define_insn "*ssse3_pmulhrswv4hi3"
8501 [(set (match_operand:V4HI 0 "register_operand" "=y")
8508 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8510 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8512 (match_operand:V4HI 3 "const1_operand"))
8514 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8515 "pmulhrsw\t{%2, %0|%0, %2}"
8516 [(set_attr "type" "sseimul")
8517 (set_attr "prefix_extra" "1")
8518 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8519 (set_attr "mode" "DI")])
8521 (define_insn "<ssse3_avx2>_pshufb<mode>3"
8522 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8524 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8525 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
8529 pshufb\t{%2, %0|%0, %2}
8530 vpshufb\t{%2, %1, %0|%0, %1, %2}"
8531 [(set_attr "isa" "noavx,avx")
8532 (set_attr "type" "sselog1")
8533 (set_attr "prefix_data16" "1,*")
8534 (set_attr "prefix_extra" "1")
8535 (set_attr "prefix" "orig,vex")
8536 (set_attr "btver2_decode" "vector,vector")
8537 (set_attr "mode" "<sseinsnmode>")])
8539 (define_insn "ssse3_pshufbv8qi3"
8540 [(set (match_operand:V8QI 0 "register_operand" "=y")
8541 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8542 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8545 "pshufb\t{%2, %0|%0, %2}";
8546 [(set_attr "type" "sselog1")
8547 (set_attr "prefix_extra" "1")
8548 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8549 (set_attr "mode" "DI")])
8551 (define_insn "<ssse3_avx2>_psign<mode>3"
8552 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
8554 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
8555 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
8559 psign<ssemodesuffix>\t{%2, %0|%0, %2}
8560 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8561 [(set_attr "isa" "noavx,avx")
8562 (set_attr "type" "sselog1")
8563 (set_attr "prefix_data16" "1,*")
8564 (set_attr "prefix_extra" "1")
8565 (set_attr "prefix" "orig,vex")
8566 (set_attr "mode" "<sseinsnmode>")])
8568 (define_insn "ssse3_psign<mode>3"
8569 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8571 [(match_operand:MMXMODEI 1 "register_operand" "0")
8572 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8575 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8576 [(set_attr "type" "sselog1")
8577 (set_attr "prefix_extra" "1")
8578 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8579 (set_attr "mode" "DI")])
8581 (define_insn "<ssse3_avx2>_palignr<mode>"
8582 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
8583 (unspec:SSESCALARMODE
8584 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
8585 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
8586 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
8590 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8592 switch (which_alternative)
8595 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8597 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8602 [(set_attr "isa" "noavx,avx")
8603 (set_attr "type" "sseishft")
8604 (set_attr "atom_unit" "sishuf")
8605 (set_attr "prefix_data16" "1,*")
8606 (set_attr "prefix_extra" "1")
8607 (set_attr "length_immediate" "1")
8608 (set_attr "prefix" "orig,vex")
8609 (set_attr "mode" "<sseinsnmode>")])
8611 (define_insn "ssse3_palignrdi"
8612 [(set (match_operand:DI 0 "register_operand" "=y")
8613 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8614 (match_operand:DI 2 "nonimmediate_operand" "ym")
8615 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8619 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8620 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8622 [(set_attr "type" "sseishft")
8623 (set_attr "atom_unit" "sishuf")
8624 (set_attr "prefix_extra" "1")
8625 (set_attr "length_immediate" "1")
8626 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8627 (set_attr "mode" "DI")])
8629 (define_insn "abs<mode>2"
8630 [(set (match_operand:VI124_AVX2 0 "register_operand" "=v")
8632 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "vm")))]
8634 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
8635 [(set_attr "type" "sselog1")
8636 (set_attr "prefix_data16" "1")
8637 (set_attr "prefix_extra" "1")
8638 (set_attr "prefix" "maybe_vex")
8639 (set_attr "mode" "<sseinsnmode>")])
8641 (define_insn "abs<mode>2"
8642 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8644 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8646 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8647 [(set_attr "type" "sselog1")
8648 (set_attr "prefix_rep" "0")
8649 (set_attr "prefix_extra" "1")
8650 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8651 (set_attr "mode" "DI")])
8653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8655 ;; AMD SSE4A instructions
8657 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8659 (define_insn "sse4a_movnt<mode>"
8660 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8662 [(match_operand:MODEF 1 "register_operand" "x")]
8665 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
8666 [(set_attr "type" "ssemov")
8667 (set_attr "mode" "<MODE>")])
8669 (define_insn "sse4a_vmmovnt<mode>"
8670 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8671 (unspec:<ssescalarmode>
8672 [(vec_select:<ssescalarmode>
8673 (match_operand:VF_128 1 "register_operand" "x")
8674 (parallel [(const_int 0)]))]
8677 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
8678 [(set_attr "type" "ssemov")
8679 (set_attr "mode" "<ssescalarmode>")])
8681 (define_insn "sse4a_extrqi"
8682 [(set (match_operand:V2DI 0 "register_operand" "=x")
8683 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8684 (match_operand 2 "const_0_to_255_operand")
8685 (match_operand 3 "const_0_to_255_operand")]
8688 "extrq\t{%3, %2, %0|%0, %2, %3}"
8689 [(set_attr "type" "sse")
8690 (set_attr "prefix_data16" "1")
8691 (set_attr "length_immediate" "2")
8692 (set_attr "mode" "TI")])
8694 (define_insn "sse4a_extrq"
8695 [(set (match_operand:V2DI 0 "register_operand" "=x")
8696 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8697 (match_operand:V16QI 2 "register_operand" "x")]
8700 "extrq\t{%2, %0|%0, %2}"
8701 [(set_attr "type" "sse")
8702 (set_attr "prefix_data16" "1")
8703 (set_attr "mode" "TI")])
8705 (define_insn "sse4a_insertqi"
8706 [(set (match_operand:V2DI 0 "register_operand" "=x")
8707 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8708 (match_operand:V2DI 2 "register_operand" "x")
8709 (match_operand 3 "const_0_to_255_operand")
8710 (match_operand 4 "const_0_to_255_operand")]
8713 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8714 [(set_attr "type" "sseins")
8715 (set_attr "prefix_data16" "0")
8716 (set_attr "prefix_rep" "1")
8717 (set_attr "length_immediate" "2")
8718 (set_attr "mode" "TI")])
8720 (define_insn "sse4a_insertq"
8721 [(set (match_operand:V2DI 0 "register_operand" "=x")
8722 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8723 (match_operand:V2DI 2 "register_operand" "x")]
8726 "insertq\t{%2, %0|%0, %2}"
8727 [(set_attr "type" "sseins")
8728 (set_attr "prefix_data16" "0")
8729 (set_attr "prefix_rep" "1")
8730 (set_attr "mode" "TI")])
8732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8734 ;; Intel SSE4.1 instructions
8736 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8738 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
8739 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
8740 (vec_merge:VF_128_256
8741 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
8742 (match_operand:VF_128_256 1 "register_operand" "0,x")
8743 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
8746 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8747 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8748 [(set_attr "isa" "noavx,avx")
8749 (set_attr "type" "ssemov")
8750 (set_attr "length_immediate" "1")
8751 (set_attr "prefix_data16" "1,*")
8752 (set_attr "prefix_extra" "1")
8753 (set_attr "prefix" "orig,vex")
8754 (set_attr "mode" "<MODE>")])
8756 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
8757 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
8759 [(match_operand:VF_128_256 1 "register_operand" "0,x")
8760 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
8761 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
8765 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8766 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8767 [(set_attr "isa" "noavx,avx")
8768 (set_attr "type" "ssemov")
8769 (set_attr "length_immediate" "1")
8770 (set_attr "prefix_data16" "1,*")
8771 (set_attr "prefix_extra" "1")
8772 (set_attr "prefix" "orig,vex")
8773 (set_attr "btver2_decode" "vector,vector")
8774 (set_attr "mode" "<MODE>")])
8776 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
8777 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
8779 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
8780 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
8781 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8785 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8786 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8787 [(set_attr "isa" "noavx,avx")
8788 (set_attr "type" "ssemul")
8789 (set_attr "length_immediate" "1")
8790 (set_attr "prefix_data16" "1,*")
8791 (set_attr "prefix_extra" "1")
8792 (set_attr "prefix" "orig,vex")
8793 (set_attr "btver2_decode" "vector,vector")
8794 (set_attr "mode" "<MODE>")])
8796 (define_insn "<sse4_1_avx2>_movntdqa"
8797 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
8798 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
8801 "%vmovntdqa\t{%1, %0|%0, %1}"
8802 [(set_attr "type" "ssemov")
8803 (set_attr "prefix_extra" "1")
8804 (set_attr "prefix" "maybe_vex")
8805 (set_attr "mode" "<sseinsnmode>")])
8807 (define_insn "<sse4_1_avx2>_mpsadbw"
8808 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8810 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8811 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8812 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8816 mpsadbw\t{%3, %2, %0|%0, %2, %3}
8817 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8818 [(set_attr "isa" "noavx,avx")
8819 (set_attr "type" "sselog1")
8820 (set_attr "length_immediate" "1")
8821 (set_attr "prefix_extra" "1")
8822 (set_attr "prefix" "orig,vex")
8823 (set_attr "btver2_decode" "vector,vector")
8824 (set_attr "mode" "<sseinsnmode>")])
8826 (define_insn "avx2_packusdw"
8827 [(set (match_operand:V16HI 0 "register_operand" "=x")
8830 (match_operand:V8SI 1 "register_operand" "x"))
8832 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
8834 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8835 [(set_attr "type" "sselog")
8836 (set_attr "prefix_extra" "1")
8837 (set_attr "prefix" "vex")
8838 (set_attr "mode" "OI")])
8840 (define_insn "sse4_1_packusdw"
8841 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8844 (match_operand:V4SI 1 "register_operand" "0,x"))
8846 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
8849 packusdw\t{%2, %0|%0, %2}
8850 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8851 [(set_attr "isa" "noavx,avx")
8852 (set_attr "type" "sselog")
8853 (set_attr "prefix_extra" "1")
8854 (set_attr "prefix" "orig,vex")
8855 (set_attr "mode" "TI")])
8857 (define_insn "<sse4_1_avx2>_pblendvb"
8858 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8860 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8861 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8862 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
8866 pblendvb\t{%3, %2, %0|%0, %2, %3}
8867 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8868 [(set_attr "isa" "noavx,avx")
8869 (set_attr "type" "ssemov")
8870 (set_attr "prefix_extra" "1")
8871 (set_attr "length_immediate" "*,1")
8872 (set_attr "prefix" "orig,vex")
8873 (set_attr "btver2_decode" "vector,vector")
8874 (set_attr "mode" "<sseinsnmode>")])
8876 (define_insn "sse4_1_pblendw"
8877 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8879 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8880 (match_operand:V8HI 1 "register_operand" "0,x")
8881 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8884 pblendw\t{%3, %2, %0|%0, %2, %3}
8885 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8886 [(set_attr "isa" "noavx,avx")
8887 (set_attr "type" "ssemov")
8888 (set_attr "prefix_extra" "1")
8889 (set_attr "length_immediate" "1")
8890 (set_attr "prefix" "orig,vex")
8891 (set_attr "mode" "TI")])
8893 ;; The builtin uses an 8-bit immediate. Expand that.
8894 (define_expand "avx2_pblendw"
8895 [(set (match_operand:V16HI 0 "register_operand")
8897 (match_operand:V16HI 2 "nonimmediate_operand")
8898 (match_operand:V16HI 1 "register_operand")
8899 (match_operand:SI 3 "const_0_to_255_operand")))]
8902 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
8903 operands[3] = GEN_INT (val << 8 | val);
8906 (define_insn "*avx2_pblendw"
8907 [(set (match_operand:V16HI 0 "register_operand" "=x")
8909 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8910 (match_operand:V16HI 1 "register_operand" "x")
8911 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
8914 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
8915 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8917 [(set_attr "type" "ssemov")
8918 (set_attr "prefix_extra" "1")
8919 (set_attr "length_immediate" "1")
8920 (set_attr "prefix" "vex")
8921 (set_attr "mode" "OI")])
8923 (define_insn "avx2_pblendd<mode>"
8924 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
8926 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
8927 (match_operand:VI4_AVX2 1 "register_operand" "x")
8928 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8930 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8931 [(set_attr "type" "ssemov")
8932 (set_attr "prefix_extra" "1")
8933 (set_attr "length_immediate" "1")
8934 (set_attr "prefix" "vex")
8935 (set_attr "mode" "<sseinsnmode>")])
8937 (define_insn "sse4_1_phminposuw"
8938 [(set (match_operand:V8HI 0 "register_operand" "=x")
8939 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8940 UNSPEC_PHMINPOSUW))]
8942 "%vphminposuw\t{%1, %0|%0, %1}"
8943 [(set_attr "type" "sselog1")
8944 (set_attr "prefix_extra" "1")
8945 (set_attr "prefix" "maybe_vex")
8946 (set_attr "mode" "TI")])
8948 (define_insn "avx2_<code>v16qiv16hi2"
8949 [(set (match_operand:V16HI 0 "register_operand" "=x")
8951 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
8953 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8954 [(set_attr "type" "ssemov")
8955 (set_attr "prefix_extra" "1")
8956 (set_attr "prefix" "vex")
8957 (set_attr "mode" "OI")])
8959 (define_insn "sse4_1_<code>v8qiv8hi2"
8960 [(set (match_operand:V8HI 0 "register_operand" "=x")
8963 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8964 (parallel [(const_int 0) (const_int 1)
8965 (const_int 2) (const_int 3)
8966 (const_int 4) (const_int 5)
8967 (const_int 6) (const_int 7)]))))]
8969 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
8970 [(set_attr "type" "ssemov")
8971 (set_attr "prefix_extra" "1")
8972 (set_attr "prefix" "maybe_vex")
8973 (set_attr "mode" "TI")])
8975 (define_insn "avx2_<code>v8qiv8si2"
8976 [(set (match_operand:V8SI 0 "register_operand" "=x")
8979 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8980 (parallel [(const_int 0) (const_int 1)
8981 (const_int 2) (const_int 3)
8982 (const_int 4) (const_int 5)
8983 (const_int 6) (const_int 7)]))))]
8985 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
8986 [(set_attr "type" "ssemov")
8987 (set_attr "prefix_extra" "1")
8988 (set_attr "prefix" "vex")
8989 (set_attr "mode" "OI")])
8991 (define_insn "sse4_1_<code>v4qiv4si2"
8992 [(set (match_operand:V4SI 0 "register_operand" "=x")
8995 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8996 (parallel [(const_int 0) (const_int 1)
8997 (const_int 2) (const_int 3)]))))]
8999 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9000 [(set_attr "type" "ssemov")
9001 (set_attr "prefix_extra" "1")
9002 (set_attr "prefix" "maybe_vex")
9003 (set_attr "mode" "TI")])
9005 (define_insn "avx2_<code>v8hiv8si2"
9006 [(set (match_operand:V8SI 0 "register_operand" "=x")
9008 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9010 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9011 [(set_attr "type" "ssemov")
9012 (set_attr "prefix_extra" "1")
9013 (set_attr "prefix" "vex")
9014 (set_attr "mode" "OI")])
9016 (define_insn "sse4_1_<code>v4hiv4si2"
9017 [(set (match_operand:V4SI 0 "register_operand" "=x")
9020 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9021 (parallel [(const_int 0) (const_int 1)
9022 (const_int 2) (const_int 3)]))))]
9024 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9025 [(set_attr "type" "ssemov")
9026 (set_attr "prefix_extra" "1")
9027 (set_attr "prefix" "maybe_vex")
9028 (set_attr "mode" "TI")])
9030 (define_insn "avx2_<code>v4qiv4di2"
9031 [(set (match_operand:V4DI 0 "register_operand" "=x")
9034 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9035 (parallel [(const_int 0) (const_int 1)
9036 (const_int 2) (const_int 3)]))))]
9038 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9039 [(set_attr "type" "ssemov")
9040 (set_attr "prefix_extra" "1")
9041 (set_attr "prefix" "vex")
9042 (set_attr "mode" "OI")])
9044 (define_insn "sse4_1_<code>v2qiv2di2"
9045 [(set (match_operand:V2DI 0 "register_operand" "=x")
9048 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9049 (parallel [(const_int 0) (const_int 1)]))))]
9051 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9052 [(set_attr "type" "ssemov")
9053 (set_attr "prefix_extra" "1")
9054 (set_attr "prefix" "maybe_vex")
9055 (set_attr "mode" "TI")])
9057 (define_insn "avx2_<code>v4hiv4di2"
9058 [(set (match_operand:V4DI 0 "register_operand" "=x")
9061 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9062 (parallel [(const_int 0) (const_int 1)
9063 (const_int 2) (const_int 3)]))))]
9065 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9066 [(set_attr "type" "ssemov")
9067 (set_attr "prefix_extra" "1")
9068 (set_attr "prefix" "vex")
9069 (set_attr "mode" "OI")])
9071 (define_insn "sse4_1_<code>v2hiv2di2"
9072 [(set (match_operand:V2DI 0 "register_operand" "=x")
9075 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9076 (parallel [(const_int 0) (const_int 1)]))))]
9078 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9079 [(set_attr "type" "ssemov")
9080 (set_attr "prefix_extra" "1")
9081 (set_attr "prefix" "maybe_vex")
9082 (set_attr "mode" "TI")])
9084 (define_insn "avx2_<code>v4siv4di2"
9085 [(set (match_operand:V4DI 0 "register_operand" "=x")
9087 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9089 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9090 [(set_attr "type" "ssemov")
9091 (set_attr "prefix_extra" "1")
9092 (set_attr "mode" "OI")])
9094 (define_insn "sse4_1_<code>v2siv2di2"
9095 [(set (match_operand:V2DI 0 "register_operand" "=x")
9098 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9099 (parallel [(const_int 0) (const_int 1)]))))]
9101 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9102 [(set_attr "type" "ssemov")
9103 (set_attr "prefix_extra" "1")
9104 (set_attr "prefix" "maybe_vex")
9105 (set_attr "mode" "TI")])
9107 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9108 ;; setting FLAGS_REG. But it is not a really compare instruction.
9109 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9110 [(set (reg:CC FLAGS_REG)
9111 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
9112 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
9115 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9116 [(set_attr "type" "ssecomi")
9117 (set_attr "prefix_extra" "1")
9118 (set_attr "prefix" "vex")
9119 (set_attr "mode" "<MODE>")])
9121 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9122 ;; But it is not a really compare instruction.
9123 (define_insn "avx_ptest256"
9124 [(set (reg:CC FLAGS_REG)
9125 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9126 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9129 "vptest\t{%1, %0|%0, %1}"
9130 [(set_attr "type" "ssecomi")
9131 (set_attr "prefix_extra" "1")
9132 (set_attr "prefix" "vex")
9133 (set_attr "btver2_decode" "vector")
9134 (set_attr "mode" "OI")])
9136 (define_insn "sse4_1_ptest"
9137 [(set (reg:CC FLAGS_REG)
9138 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9139 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9142 "%vptest\t{%1, %0|%0, %1}"
9143 [(set_attr "type" "ssecomi")
9144 (set_attr "prefix_extra" "1")
9145 (set_attr "prefix" "maybe_vex")
9146 (set_attr "mode" "TI")])
9148 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9149 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
9151 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
9152 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9155 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9156 [(set_attr "type" "ssecvt")
9157 (set (attr "prefix_data16")
9159 (match_test "TARGET_AVX")
9161 (const_string "1")))
9162 (set_attr "prefix_extra" "1")
9163 (set_attr "length_immediate" "1")
9164 (set_attr "prefix" "maybe_vex")
9165 (set_attr "mode" "<MODE>")])
9167 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9168 [(match_operand:<sseintvecmode> 0 "register_operand")
9169 (match_operand:VF1_128_256 1 "nonimmediate_operand")
9170 (match_operand:SI 2 "const_0_to_15_operand")]
9173 rtx tmp = gen_reg_rtx (<MODE>mode);
9176 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9179 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9183 (define_expand "avx512f_roundpd512"
9184 [(match_operand:V8DF 0 "register_operand")
9185 (match_operand:V8DF 1 "nonimmediate_operand")
9186 (match_operand:SI 2 "const_0_to_15_operand")]
9189 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
9193 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9194 [(match_operand:<ssepackfltmode> 0 "register_operand")
9195 (match_operand:VF2 1 "nonimmediate_operand")
9196 (match_operand:VF2 2 "nonimmediate_operand")
9197 (match_operand:SI 3 "const_0_to_15_operand")]
9202 if (<MODE>mode == V2DFmode
9203 && TARGET_AVX && !TARGET_PREFER_AVX128)
9205 rtx tmp2 = gen_reg_rtx (V4DFmode);
9207 tmp0 = gen_reg_rtx (V4DFmode);
9208 tmp1 = force_reg (V2DFmode, operands[1]);
9210 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9211 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9212 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9216 tmp0 = gen_reg_rtx (<MODE>mode);
9217 tmp1 = gen_reg_rtx (<MODE>mode);
9220 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9223 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9226 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9231 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9232 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9235 [(match_operand:VF_128 2 "register_operand" "x,x")
9236 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9238 (match_operand:VF_128 1 "register_operand" "0,x")
9242 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9243 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9244 [(set_attr "isa" "noavx,avx")
9245 (set_attr "type" "ssecvt")
9246 (set_attr "length_immediate" "1")
9247 (set_attr "prefix_data16" "1,*")
9248 (set_attr "prefix_extra" "1")
9249 (set_attr "prefix" "orig,vex")
9250 (set_attr "mode" "<MODE>")])
9252 (define_expand "round<mode>2"
9255 (match_operand:VF 1 "register_operand")
9257 (set (match_operand:VF 0 "register_operand")
9259 [(match_dup 4) (match_dup 5)]
9261 "TARGET_ROUND && !flag_trapping_math"
9263 enum machine_mode scalar_mode;
9264 const struct real_format *fmt;
9265 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9268 scalar_mode = GET_MODE_INNER (<MODE>mode);
9270 /* load nextafter (0.5, 0.0) */
9271 fmt = REAL_MODE_FORMAT (scalar_mode);
9272 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9273 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9274 half = const_double_from_real_value (pred_half, scalar_mode);
9276 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9277 vec_half = force_reg (<MODE>mode, vec_half);
9279 operands[3] = gen_reg_rtx (<MODE>mode);
9280 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9282 operands[4] = gen_reg_rtx (<MODE>mode);
9283 operands[5] = GEN_INT (ROUND_TRUNC);
9286 (define_expand "round<mode>2_sfix"
9287 [(match_operand:<sseintvecmode> 0 "register_operand")
9288 (match_operand:VF1_128_256 1 "register_operand")]
9289 "TARGET_ROUND && !flag_trapping_math"
9291 rtx tmp = gen_reg_rtx (<MODE>mode);
9293 emit_insn (gen_round<mode>2 (tmp, operands[1]));
9296 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9300 (define_expand "round<mode>2_vec_pack_sfix"
9301 [(match_operand:<ssepackfltmode> 0 "register_operand")
9302 (match_operand:VF2 1 "register_operand")
9303 (match_operand:VF2 2 "register_operand")]
9304 "TARGET_ROUND && !flag_trapping_math"
9308 if (<MODE>mode == V2DFmode
9309 && TARGET_AVX && !TARGET_PREFER_AVX128)
9311 rtx tmp2 = gen_reg_rtx (V4DFmode);
9313 tmp0 = gen_reg_rtx (V4DFmode);
9314 tmp1 = force_reg (V2DFmode, operands[1]);
9316 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9317 emit_insn (gen_roundv4df2 (tmp2, tmp0));
9318 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9322 tmp0 = gen_reg_rtx (<MODE>mode);
9323 tmp1 = gen_reg_rtx (<MODE>mode);
9325 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
9326 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
9329 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9336 ;; Intel SSE4.2 string/text processing instructions
9338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9340 (define_insn_and_split "sse4_2_pcmpestr"
9341 [(set (match_operand:SI 0 "register_operand" "=c,c")
9343 [(match_operand:V16QI 2 "register_operand" "x,x")
9344 (match_operand:SI 3 "register_operand" "a,a")
9345 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
9346 (match_operand:SI 5 "register_operand" "d,d")
9347 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9349 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9357 (set (reg:CC FLAGS_REG)
9366 && can_create_pseudo_p ()"
9371 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9372 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9373 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9376 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9377 operands[3], operands[4],
9378 operands[5], operands[6]));
9380 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9381 operands[3], operands[4],
9382 operands[5], operands[6]));
9383 if (flags && !(ecx || xmm0))
9384 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9385 operands[2], operands[3],
9386 operands[4], operands[5],
9388 if (!(flags || ecx || xmm0))
9389 emit_note (NOTE_INSN_DELETED);
9393 [(set_attr "type" "sselog")
9394 (set_attr "prefix_data16" "1")
9395 (set_attr "prefix_extra" "1")
9396 (set_attr "length_immediate" "1")
9397 (set_attr "memory" "none,load")
9398 (set_attr "mode" "TI")])
9400 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
9401 [(set (match_operand:SI 0 "register_operand" "=c")
9403 [(match_operand:V16QI 2 "register_operand" "x")
9404 (match_operand:SI 3 "register_operand" "a")
9406 [(match_operand:V16QI 4 "memory_operand" "m")]
9408 (match_operand:SI 5 "register_operand" "d")
9409 (match_operand:SI 6 "const_0_to_255_operand" "n")]
9411 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9415 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9419 (set (reg:CC FLAGS_REG)
9423 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9428 && can_create_pseudo_p ()"
9433 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9434 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9435 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9438 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9439 operands[3], operands[4],
9440 operands[5], operands[6]));
9442 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9443 operands[3], operands[4],
9444 operands[5], operands[6]));
9445 if (flags && !(ecx || xmm0))
9446 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9447 operands[2], operands[3],
9448 operands[4], operands[5],
9450 if (!(flags || ecx || xmm0))
9451 emit_note (NOTE_INSN_DELETED);
9455 [(set_attr "type" "sselog")
9456 (set_attr "prefix_data16" "1")
9457 (set_attr "prefix_extra" "1")
9458 (set_attr "length_immediate" "1")
9459 (set_attr "memory" "load")
9460 (set_attr "mode" "TI")])
9462 (define_insn "sse4_2_pcmpestri"
9463 [(set (match_operand:SI 0 "register_operand" "=c,c")
9465 [(match_operand:V16QI 1 "register_operand" "x,x")
9466 (match_operand:SI 2 "register_operand" "a,a")
9467 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9468 (match_operand:SI 4 "register_operand" "d,d")
9469 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9471 (set (reg:CC FLAGS_REG)
9480 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9481 [(set_attr "type" "sselog")
9482 (set_attr "prefix_data16" "1")
9483 (set_attr "prefix_extra" "1")
9484 (set_attr "prefix" "maybe_vex")
9485 (set_attr "length_immediate" "1")
9486 (set_attr "btver2_decode" "vector")
9487 (set_attr "memory" "none,load")
9488 (set_attr "mode" "TI")])
9490 (define_insn "sse4_2_pcmpestrm"
9491 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9493 [(match_operand:V16QI 1 "register_operand" "x,x")
9494 (match_operand:SI 2 "register_operand" "a,a")
9495 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9496 (match_operand:SI 4 "register_operand" "d,d")
9497 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9499 (set (reg:CC FLAGS_REG)
9508 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9509 [(set_attr "type" "sselog")
9510 (set_attr "prefix_data16" "1")
9511 (set_attr "prefix_extra" "1")
9512 (set_attr "length_immediate" "1")
9513 (set_attr "prefix" "maybe_vex")
9514 (set_attr "btver2_decode" "vector")
9515 (set_attr "memory" "none,load")
9516 (set_attr "mode" "TI")])
9518 (define_insn "sse4_2_pcmpestr_cconly"
9519 [(set (reg:CC FLAGS_REG)
9521 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9522 (match_operand:SI 3 "register_operand" "a,a,a,a")
9523 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9524 (match_operand:SI 5 "register_operand" "d,d,d,d")
9525 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9527 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9528 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9531 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9532 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9533 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9534 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9535 [(set_attr "type" "sselog")
9536 (set_attr "prefix_data16" "1")
9537 (set_attr "prefix_extra" "1")
9538 (set_attr "length_immediate" "1")
9539 (set_attr "memory" "none,load,none,load")
9540 (set_attr "btver2_decode" "vector,vector,vector,vector")
9541 (set_attr "prefix" "maybe_vex")
9542 (set_attr "mode" "TI")])
9544 (define_insn_and_split "sse4_2_pcmpistr"
9545 [(set (match_operand:SI 0 "register_operand" "=c,c")
9547 [(match_operand:V16QI 2 "register_operand" "x,x")
9548 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9549 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9551 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9557 (set (reg:CC FLAGS_REG)
9564 && can_create_pseudo_p ()"
9569 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9570 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9571 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9574 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9575 operands[3], operands[4]));
9577 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9578 operands[3], operands[4]));
9579 if (flags && !(ecx || xmm0))
9580 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9581 operands[2], operands[3],
9583 if (!(flags || ecx || xmm0))
9584 emit_note (NOTE_INSN_DELETED);
9588 [(set_attr "type" "sselog")
9589 (set_attr "prefix_data16" "1")
9590 (set_attr "prefix_extra" "1")
9591 (set_attr "length_immediate" "1")
9592 (set_attr "memory" "none,load")
9593 (set_attr "mode" "TI")])
9595 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
9596 [(set (match_operand:SI 0 "register_operand" "=c")
9598 [(match_operand:V16QI 2 "register_operand" "x")
9600 [(match_operand:V16QI 3 "memory_operand" "m")]
9602 (match_operand:SI 4 "const_0_to_255_operand" "n")]
9604 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9607 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9610 (set (reg:CC FLAGS_REG)
9613 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9617 && can_create_pseudo_p ()"
9622 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9623 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9624 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9627 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9628 operands[3], operands[4]));
9630 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9631 operands[3], operands[4]));
9632 if (flags && !(ecx || xmm0))
9633 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9634 operands[2], operands[3],
9636 if (!(flags || ecx || xmm0))
9637 emit_note (NOTE_INSN_DELETED);
9641 [(set_attr "type" "sselog")
9642 (set_attr "prefix_data16" "1")
9643 (set_attr "prefix_extra" "1")
9644 (set_attr "length_immediate" "1")
9645 (set_attr "memory" "load")
9646 (set_attr "mode" "TI")])
9648 (define_insn "sse4_2_pcmpistri"
9649 [(set (match_operand:SI 0 "register_operand" "=c,c")
9651 [(match_operand:V16QI 1 "register_operand" "x,x")
9652 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9653 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9655 (set (reg:CC FLAGS_REG)
9662 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9663 [(set_attr "type" "sselog")
9664 (set_attr "prefix_data16" "1")
9665 (set_attr "prefix_extra" "1")
9666 (set_attr "length_immediate" "1")
9667 (set_attr "prefix" "maybe_vex")
9668 (set_attr "memory" "none,load")
9669 (set_attr "btver2_decode" "vector")
9670 (set_attr "mode" "TI")])
9672 (define_insn "sse4_2_pcmpistrm"
9673 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9675 [(match_operand:V16QI 1 "register_operand" "x,x")
9676 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9677 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9679 (set (reg:CC FLAGS_REG)
9686 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9687 [(set_attr "type" "sselog")
9688 (set_attr "prefix_data16" "1")
9689 (set_attr "prefix_extra" "1")
9690 (set_attr "length_immediate" "1")
9691 (set_attr "prefix" "maybe_vex")
9692 (set_attr "memory" "none,load")
9693 (set_attr "btver2_decode" "vector")
9694 (set_attr "mode" "TI")])
9696 (define_insn "sse4_2_pcmpistr_cconly"
9697 [(set (reg:CC FLAGS_REG)
9699 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9700 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9701 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9703 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9704 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9707 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9708 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9709 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9710 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9711 [(set_attr "type" "sselog")
9712 (set_attr "prefix_data16" "1")
9713 (set_attr "prefix_extra" "1")
9714 (set_attr "length_immediate" "1")
9715 (set_attr "memory" "none,load,none,load")
9716 (set_attr "prefix" "maybe_vex")
9717 (set_attr "btver2_decode" "vector,vector,vector,vector")
9718 (set_attr "mode" "TI")])
9720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9726 (define_code_iterator xop_plus [plus ss_plus])
9728 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
9729 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
9731 ;; XOP parallel integer multiply/add instructions.
9733 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
9734 [(set (match_operand:VI24_128 0 "register_operand" "=x")
9737 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
9738 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
9739 (match_operand:VI24_128 3 "register_operand" "x")))]
9741 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9742 [(set_attr "type" "ssemuladd")
9743 (set_attr "mode" "TI")])
9745 (define_insn "xop_p<macs>dql"
9746 [(set (match_operand:V2DI 0 "register_operand" "=x")
9751 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9752 (parallel [(const_int 0) (const_int 2)])))
9755 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9756 (parallel [(const_int 0) (const_int 2)]))))
9757 (match_operand:V2DI 3 "register_operand" "x")))]
9759 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9760 [(set_attr "type" "ssemuladd")
9761 (set_attr "mode" "TI")])
9763 (define_insn "xop_p<macs>dqh"
9764 [(set (match_operand:V2DI 0 "register_operand" "=x")
9769 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9770 (parallel [(const_int 1) (const_int 3)])))
9773 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9774 (parallel [(const_int 1) (const_int 3)]))))
9775 (match_operand:V2DI 3 "register_operand" "x")))]
9777 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9778 [(set_attr "type" "ssemuladd")
9779 (set_attr "mode" "TI")])
9781 ;; XOP parallel integer multiply/add instructions for the intrinisics
9782 (define_insn "xop_p<macs>wd"
9783 [(set (match_operand:V4SI 0 "register_operand" "=x")
9788 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9789 (parallel [(const_int 1) (const_int 3)
9790 (const_int 5) (const_int 7)])))
9793 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9794 (parallel [(const_int 1) (const_int 3)
9795 (const_int 5) (const_int 7)]))))
9796 (match_operand:V4SI 3 "register_operand" "x")))]
9798 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9799 [(set_attr "type" "ssemuladd")
9800 (set_attr "mode" "TI")])
9802 (define_insn "xop_p<madcs>wd"
9803 [(set (match_operand:V4SI 0 "register_operand" "=x")
9809 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9810 (parallel [(const_int 0) (const_int 2)
9811 (const_int 4) (const_int 6)])))
9814 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9815 (parallel [(const_int 0) (const_int 2)
9816 (const_int 4) (const_int 6)]))))
9821 (parallel [(const_int 1) (const_int 3)
9822 (const_int 5) (const_int 7)])))
9826 (parallel [(const_int 1) (const_int 3)
9827 (const_int 5) (const_int 7)])))))
9828 (match_operand:V4SI 3 "register_operand" "x")))]
9830 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9831 [(set_attr "type" "ssemuladd")
9832 (set_attr "mode" "TI")])
9834 ;; XOP parallel XMM conditional moves
9835 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
9836 [(set (match_operand:V 0 "register_operand" "=x,x")
9838 (match_operand:V 3 "nonimmediate_operand" "x,m")
9839 (match_operand:V 1 "register_operand" "x,x")
9840 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
9842 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9843 [(set_attr "type" "sse4arg")])
9845 ;; XOP horizontal add/subtract instructions
9846 (define_insn "xop_phadd<u>bw"
9847 [(set (match_operand:V8HI 0 "register_operand" "=x")
9851 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9852 (parallel [(const_int 0) (const_int 2)
9853 (const_int 4) (const_int 6)
9854 (const_int 8) (const_int 10)
9855 (const_int 12) (const_int 14)])))
9859 (parallel [(const_int 1) (const_int 3)
9860 (const_int 5) (const_int 7)
9861 (const_int 9) (const_int 11)
9862 (const_int 13) (const_int 15)])))))]
9864 "vphadd<u>bw\t{%1, %0|%0, %1}"
9865 [(set_attr "type" "sseiadd1")])
9867 (define_insn "xop_phadd<u>bd"
9868 [(set (match_operand:V4SI 0 "register_operand" "=x")
9873 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9874 (parallel [(const_int 0) (const_int 4)
9875 (const_int 8) (const_int 12)])))
9879 (parallel [(const_int 1) (const_int 5)
9880 (const_int 9) (const_int 13)]))))
9885 (parallel [(const_int 2) (const_int 6)
9886 (const_int 10) (const_int 14)])))
9890 (parallel [(const_int 3) (const_int 7)
9891 (const_int 11) (const_int 15)]))))))]
9893 "vphadd<u>bd\t{%1, %0|%0, %1}"
9894 [(set_attr "type" "sseiadd1")])
9896 (define_insn "xop_phadd<u>bq"
9897 [(set (match_operand:V2DI 0 "register_operand" "=x")
9903 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9904 (parallel [(const_int 0) (const_int 8)])))
9908 (parallel [(const_int 1) (const_int 9)]))))
9913 (parallel [(const_int 2) (const_int 10)])))
9917 (parallel [(const_int 3) (const_int 11)])))))
9923 (parallel [(const_int 4) (const_int 12)])))
9927 (parallel [(const_int 5) (const_int 13)]))))
9932 (parallel [(const_int 6) (const_int 14)])))
9936 (parallel [(const_int 7) (const_int 15)])))))))]
9938 "vphadd<u>bq\t{%1, %0|%0, %1}"
9939 [(set_attr "type" "sseiadd1")])
9941 (define_insn "xop_phadd<u>wd"
9942 [(set (match_operand:V4SI 0 "register_operand" "=x")
9946 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9947 (parallel [(const_int 0) (const_int 2)
9948 (const_int 4) (const_int 6)])))
9952 (parallel [(const_int 1) (const_int 3)
9953 (const_int 5) (const_int 7)])))))]
9955 "vphadd<u>wd\t{%1, %0|%0, %1}"
9956 [(set_attr "type" "sseiadd1")])
9958 (define_insn "xop_phadd<u>wq"
9959 [(set (match_operand:V2DI 0 "register_operand" "=x")
9964 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9965 (parallel [(const_int 0) (const_int 4)])))
9969 (parallel [(const_int 1) (const_int 5)]))))
9974 (parallel [(const_int 2) (const_int 6)])))
9978 (parallel [(const_int 3) (const_int 7)]))))))]
9980 "vphadd<u>wq\t{%1, %0|%0, %1}"
9981 [(set_attr "type" "sseiadd1")])
9983 (define_insn "xop_phadd<u>dq"
9984 [(set (match_operand:V2DI 0 "register_operand" "=x")
9988 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9989 (parallel [(const_int 0) (const_int 2)])))
9993 (parallel [(const_int 1) (const_int 3)])))))]
9995 "vphadd<u>dq\t{%1, %0|%0, %1}"
9996 [(set_attr "type" "sseiadd1")])
9998 (define_insn "xop_phsubbw"
9999 [(set (match_operand:V8HI 0 "register_operand" "=x")
10003 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10004 (parallel [(const_int 0) (const_int 2)
10005 (const_int 4) (const_int 6)
10006 (const_int 8) (const_int 10)
10007 (const_int 12) (const_int 14)])))
10011 (parallel [(const_int 1) (const_int 3)
10012 (const_int 5) (const_int 7)
10013 (const_int 9) (const_int 11)
10014 (const_int 13) (const_int 15)])))))]
10016 "vphsubbw\t{%1, %0|%0, %1}"
10017 [(set_attr "type" "sseiadd1")])
10019 (define_insn "xop_phsubwd"
10020 [(set (match_operand:V4SI 0 "register_operand" "=x")
10024 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10025 (parallel [(const_int 0) (const_int 2)
10026 (const_int 4) (const_int 6)])))
10030 (parallel [(const_int 1) (const_int 3)
10031 (const_int 5) (const_int 7)])))))]
10033 "vphsubwd\t{%1, %0|%0, %1}"
10034 [(set_attr "type" "sseiadd1")])
10036 (define_insn "xop_phsubdq"
10037 [(set (match_operand:V2DI 0 "register_operand" "=x")
10041 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10042 (parallel [(const_int 0) (const_int 2)])))
10046 (parallel [(const_int 1) (const_int 3)])))))]
10048 "vphsubdq\t{%1, %0|%0, %1}"
10049 [(set_attr "type" "sseiadd1")])
10051 ;; XOP permute instructions
10052 (define_insn "xop_pperm"
10053 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10055 [(match_operand:V16QI 1 "register_operand" "x,x")
10056 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10057 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10058 UNSPEC_XOP_PERMUTE))]
10059 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10060 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10061 [(set_attr "type" "sse4arg")
10062 (set_attr "mode" "TI")])
10064 ;; XOP pack instructions that combine two vectors into a smaller vector
10065 (define_insn "xop_pperm_pack_v2di_v4si"
10066 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10069 (match_operand:V2DI 1 "register_operand" "x,x"))
10071 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10072 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10073 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10074 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10075 [(set_attr "type" "sse4arg")
10076 (set_attr "mode" "TI")])
10078 (define_insn "xop_pperm_pack_v4si_v8hi"
10079 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10082 (match_operand:V4SI 1 "register_operand" "x,x"))
10084 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10085 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10086 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10087 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10088 [(set_attr "type" "sse4arg")
10089 (set_attr "mode" "TI")])
10091 (define_insn "xop_pperm_pack_v8hi_v16qi"
10092 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10095 (match_operand:V8HI 1 "register_operand" "x,x"))
10097 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10098 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10099 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10100 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10101 [(set_attr "type" "sse4arg")
10102 (set_attr "mode" "TI")])
10104 ;; XOP packed rotate instructions
10105 (define_expand "rotl<mode>3"
10106 [(set (match_operand:VI_128 0 "register_operand")
10108 (match_operand:VI_128 1 "nonimmediate_operand")
10109 (match_operand:SI 2 "general_operand")))]
10112 /* If we were given a scalar, convert it to parallel */
10113 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10115 rtvec vs = rtvec_alloc (<ssescalarnum>);
10116 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10117 rtx reg = gen_reg_rtx (<MODE>mode);
10118 rtx op2 = operands[2];
10121 if (GET_MODE (op2) != <ssescalarmode>mode)
10123 op2 = gen_reg_rtx (<ssescalarmode>mode);
10124 convert_move (op2, operands[2], false);
10127 for (i = 0; i < <ssescalarnum>; i++)
10128 RTVEC_ELT (vs, i) = op2;
10130 emit_insn (gen_vec_init<mode> (reg, par));
10131 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10136 (define_expand "rotr<mode>3"
10137 [(set (match_operand:VI_128 0 "register_operand")
10139 (match_operand:VI_128 1 "nonimmediate_operand")
10140 (match_operand:SI 2 "general_operand")))]
10143 /* If we were given a scalar, convert it to parallel */
10144 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10146 rtvec vs = rtvec_alloc (<ssescalarnum>);
10147 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10148 rtx neg = gen_reg_rtx (<MODE>mode);
10149 rtx reg = gen_reg_rtx (<MODE>mode);
10150 rtx op2 = operands[2];
10153 if (GET_MODE (op2) != <ssescalarmode>mode)
10155 op2 = gen_reg_rtx (<ssescalarmode>mode);
10156 convert_move (op2, operands[2], false);
10159 for (i = 0; i < <ssescalarnum>; i++)
10160 RTVEC_ELT (vs, i) = op2;
10162 emit_insn (gen_vec_init<mode> (reg, par));
10163 emit_insn (gen_neg<mode>2 (neg, reg));
10164 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10169 (define_insn "xop_rotl<mode>3"
10170 [(set (match_operand:VI_128 0 "register_operand" "=x")
10172 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10173 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10175 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10176 [(set_attr "type" "sseishft")
10177 (set_attr "length_immediate" "1")
10178 (set_attr "mode" "TI")])
10180 (define_insn "xop_rotr<mode>3"
10181 [(set (match_operand:VI_128 0 "register_operand" "=x")
10183 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10184 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10188 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
10189 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
10191 [(set_attr "type" "sseishft")
10192 (set_attr "length_immediate" "1")
10193 (set_attr "mode" "TI")])
10195 (define_expand "vrotr<mode>3"
10196 [(match_operand:VI_128 0 "register_operand")
10197 (match_operand:VI_128 1 "register_operand")
10198 (match_operand:VI_128 2 "register_operand")]
10201 rtx reg = gen_reg_rtx (<MODE>mode);
10202 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10203 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10207 (define_expand "vrotl<mode>3"
10208 [(match_operand:VI_128 0 "register_operand")
10209 (match_operand:VI_128 1 "register_operand")
10210 (match_operand:VI_128 2 "register_operand")]
10213 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10217 (define_insn "xop_vrotl<mode>3"
10218 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10219 (if_then_else:VI_128
10221 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10224 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10228 (neg:VI_128 (match_dup 2)))))]
10229 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10230 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10231 [(set_attr "type" "sseishft")
10232 (set_attr "prefix_data16" "0")
10233 (set_attr "prefix_extra" "2")
10234 (set_attr "mode" "TI")])
10236 ;; XOP packed shift instructions.
10237 (define_expand "vlshr<mode>3"
10238 [(set (match_operand:VI12_128 0 "register_operand")
10240 (match_operand:VI12_128 1 "register_operand")
10241 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10244 rtx neg = gen_reg_rtx (<MODE>mode);
10245 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10246 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10250 (define_expand "vlshr<mode>3"
10251 [(set (match_operand:VI48_128 0 "register_operand")
10253 (match_operand:VI48_128 1 "register_operand")
10254 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10255 "TARGET_AVX2 || TARGET_XOP"
10259 rtx neg = gen_reg_rtx (<MODE>mode);
10260 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10261 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10266 (define_expand "vlshr<mode>3"
10267 [(set (match_operand:VI48_256 0 "register_operand")
10269 (match_operand:VI48_256 1 "register_operand")
10270 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10273 (define_expand "vashr<mode>3"
10274 [(set (match_operand:VI128_128 0 "register_operand")
10275 (ashiftrt:VI128_128
10276 (match_operand:VI128_128 1 "register_operand")
10277 (match_operand:VI128_128 2 "nonimmediate_operand")))]
10280 rtx neg = gen_reg_rtx (<MODE>mode);
10281 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10282 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
10286 (define_expand "vashrv4si3"
10287 [(set (match_operand:V4SI 0 "register_operand")
10288 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
10289 (match_operand:V4SI 2 "nonimmediate_operand")))]
10290 "TARGET_AVX2 || TARGET_XOP"
10294 rtx neg = gen_reg_rtx (V4SImode);
10295 emit_insn (gen_negv4si2 (neg, operands[2]));
10296 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
10301 (define_expand "vashrv8si3"
10302 [(set (match_operand:V8SI 0 "register_operand")
10303 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
10304 (match_operand:V8SI 2 "nonimmediate_operand")))]
10307 (define_expand "vashl<mode>3"
10308 [(set (match_operand:VI12_128 0 "register_operand")
10310 (match_operand:VI12_128 1 "register_operand")
10311 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10314 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10318 (define_expand "vashl<mode>3"
10319 [(set (match_operand:VI48_128 0 "register_operand")
10321 (match_operand:VI48_128 1 "register_operand")
10322 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10323 "TARGET_AVX2 || TARGET_XOP"
10327 operands[2] = force_reg (<MODE>mode, operands[2]);
10328 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10333 (define_expand "vashl<mode>3"
10334 [(set (match_operand:VI48_256 0 "register_operand")
10336 (match_operand:VI48_256 1 "register_operand")
10337 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10340 (define_insn "xop_sha<mode>3"
10341 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10342 (if_then_else:VI_128
10344 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10347 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10351 (neg:VI_128 (match_dup 2)))))]
10352 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10353 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10354 [(set_attr "type" "sseishft")
10355 (set_attr "prefix_data16" "0")
10356 (set_attr "prefix_extra" "2")
10357 (set_attr "mode" "TI")])
10359 (define_insn "xop_shl<mode>3"
10360 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10361 (if_then_else:VI_128
10363 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10366 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10370 (neg:VI_128 (match_dup 2)))))]
10371 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10372 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10373 [(set_attr "type" "sseishft")
10374 (set_attr "prefix_data16" "0")
10375 (set_attr "prefix_extra" "2")
10376 (set_attr "mode" "TI")])
10378 (define_expand "<shift_insn><mode>3"
10379 [(set (match_operand:VI1_AVX2 0 "register_operand")
10380 (any_shift:VI1_AVX2
10381 (match_operand:VI1_AVX2 1 "register_operand")
10382 (match_operand:SI 2 "nonmemory_operand")))]
10385 if (TARGET_XOP && <MODE>mode == V16QImode)
10387 bool negate = false;
10388 rtx (*gen) (rtx, rtx, rtx);
10392 if (<CODE> != ASHIFT)
10394 if (CONST_INT_P (operands[2]))
10395 operands[2] = GEN_INT (-INTVAL (operands[2]));
10399 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
10400 for (i = 0; i < 16; i++)
10401 XVECEXP (par, 0, i) = operands[2];
10403 tmp = gen_reg_rtx (V16QImode);
10404 emit_insn (gen_vec_initv16qi (tmp, par));
10407 emit_insn (gen_negv16qi2 (tmp, tmp));
10409 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
10410 emit_insn (gen (operands[0], operands[1], tmp));
10413 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
10417 (define_expand "ashrv2di3"
10418 [(set (match_operand:V2DI 0 "register_operand")
10420 (match_operand:V2DI 1 "register_operand")
10421 (match_operand:DI 2 "nonmemory_operand")))]
10424 rtx reg = gen_reg_rtx (V2DImode);
10426 bool negate = false;
10429 if (CONST_INT_P (operands[2]))
10430 operands[2] = GEN_INT (-INTVAL (operands[2]));
10434 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
10435 for (i = 0; i < 2; i++)
10436 XVECEXP (par, 0, i) = operands[2];
10438 emit_insn (gen_vec_initv2di (reg, par));
10441 emit_insn (gen_negv2di2 (reg, reg));
10443 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
10447 ;; XOP FRCZ support
10448 (define_insn "xop_frcz<mode>2"
10449 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
10451 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
10454 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
10455 [(set_attr "type" "ssecvt1")
10456 (set_attr "mode" "<MODE>")])
10459 (define_expand "xop_vmfrcz<mode>2"
10460 [(set (match_operand:VF_128 0 "register_operand")
10463 [(match_operand:VF_128 1 "nonimmediate_operand")]
10469 operands[3] = CONST0_RTX (<MODE>mode);
10472 (define_insn "*xop_vmfrcz_<mode>"
10473 [(set (match_operand:VF_128 0 "register_operand" "=x")
10476 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
10478 (match_operand:VF_128 2 "const0_operand")
10481 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
10482 [(set_attr "type" "ssecvt1")
10483 (set_attr "mode" "<MODE>")])
10485 (define_insn "xop_maskcmp<mode>3"
10486 [(set (match_operand:VI_128 0 "register_operand" "=x")
10487 (match_operator:VI_128 1 "ix86_comparison_int_operator"
10488 [(match_operand:VI_128 2 "register_operand" "x")
10489 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10491 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10492 [(set_attr "type" "sse4arg")
10493 (set_attr "prefix_data16" "0")
10494 (set_attr "prefix_rep" "0")
10495 (set_attr "prefix_extra" "2")
10496 (set_attr "length_immediate" "1")
10497 (set_attr "mode" "TI")])
10499 (define_insn "xop_maskcmp_uns<mode>3"
10500 [(set (match_operand:VI_128 0 "register_operand" "=x")
10501 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
10502 [(match_operand:VI_128 2 "register_operand" "x")
10503 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10505 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10506 [(set_attr "type" "ssecmp")
10507 (set_attr "prefix_data16" "0")
10508 (set_attr "prefix_rep" "0")
10509 (set_attr "prefix_extra" "2")
10510 (set_attr "length_immediate" "1")
10511 (set_attr "mode" "TI")])
10513 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
10514 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
10515 ;; the exact instruction generated for the intrinsic.
10516 (define_insn "xop_maskcmp_uns2<mode>3"
10517 [(set (match_operand:VI_128 0 "register_operand" "=x")
10519 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
10520 [(match_operand:VI_128 2 "register_operand" "x")
10521 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
10522 UNSPEC_XOP_UNSIGNED_CMP))]
10524 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10525 [(set_attr "type" "ssecmp")
10526 (set_attr "prefix_data16" "0")
10527 (set_attr "prefix_extra" "2")
10528 (set_attr "length_immediate" "1")
10529 (set_attr "mode" "TI")])
10531 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
10532 ;; being added here to be complete.
10533 (define_insn "xop_pcom_tf<mode>3"
10534 [(set (match_operand:VI_128 0 "register_operand" "=x")
10536 [(match_operand:VI_128 1 "register_operand" "x")
10537 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
10538 (match_operand:SI 3 "const_int_operand" "n")]
10539 UNSPEC_XOP_TRUEFALSE))]
10542 return ((INTVAL (operands[3]) != 0)
10543 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10544 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
10546 [(set_attr "type" "ssecmp")
10547 (set_attr "prefix_data16" "0")
10548 (set_attr "prefix_extra" "2")
10549 (set_attr "length_immediate" "1")
10550 (set_attr "mode" "TI")])
10552 (define_insn "xop_vpermil2<mode>3"
10553 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
10555 [(match_operand:VF_128_256 1 "register_operand" "x")
10556 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
10557 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
10558 (match_operand:SI 4 "const_0_to_3_operand" "n")]
10561 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
10562 [(set_attr "type" "sse4arg")
10563 (set_attr "length_immediate" "1")
10564 (set_attr "mode" "<MODE>")])
10566 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10568 (define_insn "aesenc"
10569 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10570 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10571 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10575 aesenc\t{%2, %0|%0, %2}
10576 vaesenc\t{%2, %1, %0|%0, %1, %2}"
10577 [(set_attr "isa" "noavx,avx")
10578 (set_attr "type" "sselog1")
10579 (set_attr "prefix_extra" "1")
10580 (set_attr "prefix" "orig,vex")
10581 (set_attr "btver2_decode" "double,double")
10582 (set_attr "mode" "TI")])
10584 (define_insn "aesenclast"
10585 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10586 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10587 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10588 UNSPEC_AESENCLAST))]
10591 aesenclast\t{%2, %0|%0, %2}
10592 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
10593 [(set_attr "isa" "noavx,avx")
10594 (set_attr "type" "sselog1")
10595 (set_attr "prefix_extra" "1")
10596 (set_attr "prefix" "orig,vex")
10597 (set_attr "btver2_decode" "double,double")
10598 (set_attr "mode" "TI")])
10600 (define_insn "aesdec"
10601 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10602 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10603 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10607 aesdec\t{%2, %0|%0, %2}
10608 vaesdec\t{%2, %1, %0|%0, %1, %2}"
10609 [(set_attr "isa" "noavx,avx")
10610 (set_attr "type" "sselog1")
10611 (set_attr "prefix_extra" "1")
10612 (set_attr "prefix" "orig,vex")
10613 (set_attr "btver2_decode" "double,double")
10614 (set_attr "mode" "TI")])
10616 (define_insn "aesdeclast"
10617 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10618 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10619 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10620 UNSPEC_AESDECLAST))]
10623 aesdeclast\t{%2, %0|%0, %2}
10624 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
10625 [(set_attr "isa" "noavx,avx")
10626 (set_attr "type" "sselog1")
10627 (set_attr "prefix_extra" "1")
10628 (set_attr "prefix" "orig,vex")
10629 (set_attr "btver2_decode" "double,double")
10630 (set_attr "mode" "TI")])
10632 (define_insn "aesimc"
10633 [(set (match_operand:V2DI 0 "register_operand" "=x")
10634 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10637 "%vaesimc\t{%1, %0|%0, %1}"
10638 [(set_attr "type" "sselog1")
10639 (set_attr "prefix_extra" "1")
10640 (set_attr "prefix" "maybe_vex")
10641 (set_attr "mode" "TI")])
10643 (define_insn "aeskeygenassist"
10644 [(set (match_operand:V2DI 0 "register_operand" "=x")
10645 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
10646 (match_operand:SI 2 "const_0_to_255_operand" "n")]
10647 UNSPEC_AESKEYGENASSIST))]
10649 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
10650 [(set_attr "type" "sselog1")
10651 (set_attr "prefix_extra" "1")
10652 (set_attr "length_immediate" "1")
10653 (set_attr "prefix" "maybe_vex")
10654 (set_attr "mode" "TI")])
10656 (define_insn "pclmulqdq"
10657 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10658 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10659 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
10660 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10664 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
10665 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10666 [(set_attr "isa" "noavx,avx")
10667 (set_attr "type" "sselog1")
10668 (set_attr "prefix_extra" "1")
10669 (set_attr "length_immediate" "1")
10670 (set_attr "prefix" "orig,vex")
10671 (set_attr "mode" "TI")])
10673 (define_expand "avx_vzeroall"
10674 [(match_par_dup 0 [(const_int 0)])]
10677 int nregs = TARGET_64BIT ? 16 : 8;
10680 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
10682 XVECEXP (operands[0], 0, 0)
10683 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
10686 for (regno = 0; regno < nregs; regno++)
10687 XVECEXP (operands[0], 0, regno + 1)
10688 = gen_rtx_SET (VOIDmode,
10689 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
10690 CONST0_RTX (V8SImode));
10693 (define_insn "*avx_vzeroall"
10694 [(match_parallel 0 "vzeroall_operation"
10695 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
10698 [(set_attr "type" "sse")
10699 (set_attr "modrm" "0")
10700 (set_attr "memory" "none")
10701 (set_attr "prefix" "vex")
10702 (set_attr "btver2_decode" "vector")
10703 (set_attr "mode" "OI")])
10705 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
10706 ;; if the upper 128bits are unused.
10707 (define_insn "avx_vzeroupper"
10708 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
10711 [(set_attr "type" "sse")
10712 (set_attr "modrm" "0")
10713 (set_attr "memory" "none")
10714 (set_attr "prefix" "vex")
10715 (set_attr "btver2_decode" "vector")
10716 (set_attr "mode" "OI")])
10718 (define_insn "avx2_pbroadcast<mode>"
10719 [(set (match_operand:VI 0 "register_operand" "=x")
10721 (vec_select:<ssescalarmode>
10722 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
10723 (parallel [(const_int 0)]))))]
10725 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
10726 [(set_attr "type" "ssemov")
10727 (set_attr "prefix_extra" "1")
10728 (set_attr "prefix" "vex")
10729 (set_attr "mode" "<sseinsnmode>")])
10731 (define_insn "avx2_pbroadcast<mode>_1"
10732 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
10733 (vec_duplicate:VI_256
10734 (vec_select:<ssescalarmode>
10735 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
10736 (parallel [(const_int 0)]))))]
10739 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
10740 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
10741 [(set_attr "type" "ssemov")
10742 (set_attr "prefix_extra" "1")
10743 (set_attr "prefix" "vex")
10744 (set_attr "mode" "<sseinsnmode>")])
10746 (define_insn "avx2_permvar<mode>"
10747 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
10749 [(match_operand:VI4F_256 1 "nonimmediate_operand" "vm")
10750 (match_operand:V8SI 2 "register_operand" "v")]
10753 "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
10754 [(set_attr "type" "sselog")
10755 (set_attr "prefix" "vex")
10756 (set_attr "mode" "OI")])
10758 (define_expand "avx2_perm<mode>"
10759 [(match_operand:VI8F_256 0 "register_operand")
10760 (match_operand:VI8F_256 1 "nonimmediate_operand")
10761 (match_operand:SI 2 "const_0_to_255_operand")]
10764 int mask = INTVAL (operands[2]);
10765 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
10766 GEN_INT ((mask >> 0) & 3),
10767 GEN_INT ((mask >> 2) & 3),
10768 GEN_INT ((mask >> 4) & 3),
10769 GEN_INT ((mask >> 6) & 3)));
10773 (define_insn "avx2_perm<mode>_1"
10774 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
10775 (vec_select:VI8F_256
10776 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
10777 (parallel [(match_operand 2 "const_0_to_3_operand")
10778 (match_operand 3 "const_0_to_3_operand")
10779 (match_operand 4 "const_0_to_3_operand")
10780 (match_operand 5 "const_0_to_3_operand")])))]
10784 mask |= INTVAL (operands[2]) << 0;
10785 mask |= INTVAL (operands[3]) << 2;
10786 mask |= INTVAL (operands[4]) << 4;
10787 mask |= INTVAL (operands[5]) << 6;
10788 operands[2] = GEN_INT (mask);
10789 return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10791 [(set_attr "type" "sselog")
10792 (set_attr "prefix" "vex")
10793 (set_attr "mode" "<sseinsnmode>")])
10795 (define_insn "avx2_permv2ti"
10796 [(set (match_operand:V4DI 0 "register_operand" "=x")
10798 [(match_operand:V4DI 1 "register_operand" "x")
10799 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
10800 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10803 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10804 [(set_attr "type" "sselog")
10805 (set_attr "prefix" "vex")
10806 (set_attr "mode" "OI")])
10808 (define_insn "avx2_vec_dupv4df"
10809 [(set (match_operand:V4DF 0 "register_operand" "=x")
10810 (vec_duplicate:V4DF
10812 (match_operand:V2DF 1 "register_operand" "x")
10813 (parallel [(const_int 0)]))))]
10815 "vbroadcastsd\t{%1, %0|%0, %1}"
10816 [(set_attr "type" "sselog1")
10817 (set_attr "prefix" "vex")
10818 (set_attr "mode" "V4DF")])
10820 ;; Modes handled by AVX vec_dup patterns.
10821 (define_mode_iterator AVX_VEC_DUP_MODE
10822 [V8SI V8SF V4DI V4DF])
10824 (define_insn "vec_dup<mode>"
10825 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
10826 (vec_duplicate:AVX_VEC_DUP_MODE
10827 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
10830 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
10831 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
10833 [(set_attr "type" "ssemov")
10834 (set_attr "prefix_extra" "1")
10835 (set_attr "prefix" "vex")
10836 (set_attr "isa" "*,avx2,noavx2")
10837 (set_attr "mode" "V8SF")])
10839 (define_insn "avx2_vbroadcasti128_<mode>"
10840 [(set (match_operand:VI_256 0 "register_operand" "=x")
10842 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
10845 "vbroadcasti128\t{%1, %0|%0, %1}"
10846 [(set_attr "type" "ssemov")
10847 (set_attr "prefix_extra" "1")
10848 (set_attr "prefix" "vex")
10849 (set_attr "mode" "OI")])
10852 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
10853 (vec_duplicate:AVX_VEC_DUP_MODE
10854 (match_operand:<ssescalarmode> 1 "register_operand")))]
10855 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
10856 [(set (match_dup 2)
10857 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
10859 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
10860 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
10862 (define_insn "avx_vbroadcastf128_<mode>"
10863 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
10865 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
10869 vbroadcast<i128>\t{%1, %0|%0, %1}
10870 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
10871 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
10872 [(set_attr "type" "ssemov,sselog1,sselog1")
10873 (set_attr "prefix_extra" "1")
10874 (set_attr "length_immediate" "0,1,1")
10875 (set_attr "prefix" "vex")
10876 (set_attr "mode" "<sseinsnmode>")])
10878 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
10879 ;; If it so happens that the input is in memory, use vbroadcast.
10880 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
10881 (define_insn "*avx_vperm_broadcast_v4sf"
10882 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
10884 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
10885 (match_parallel 2 "avx_vbroadcast_operand"
10886 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10889 int elt = INTVAL (operands[3]);
10890 switch (which_alternative)
10894 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
10895 return "vbroadcastss\t{%1, %0|%0, %k1}";
10897 operands[2] = GEN_INT (elt * 0x55);
10898 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
10900 gcc_unreachable ();
10903 [(set_attr "type" "ssemov,ssemov,sselog1")
10904 (set_attr "prefix_extra" "1")
10905 (set_attr "length_immediate" "0,0,1")
10906 (set_attr "prefix" "vex")
10907 (set_attr "mode" "SF,SF,V4SF")])
10909 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
10910 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
10912 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
10913 (match_parallel 2 "avx_vbroadcast_operand"
10914 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10917 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
10918 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
10920 rtx op0 = operands[0], op1 = operands[1];
10921 int elt = INTVAL (operands[3]);
10927 if (TARGET_AVX2 && elt == 0)
10929 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
10934 /* Shuffle element we care about into all elements of the 128-bit lane.
10935 The other lane gets shuffled too, but we don't care. */
10936 if (<MODE>mode == V4DFmode)
10937 mask = (elt & 1 ? 15 : 0);
10939 mask = (elt & 3) * 0x55;
10940 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
10942 /* Shuffle the lane we care about into both lanes of the dest. */
10943 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
10944 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10948 operands[1] = adjust_address (op1, <ssescalarmode>mode,
10949 elt * GET_MODE_SIZE (<ssescalarmode>mode));
10952 (define_expand "<sse2_avx_avx512f>_vpermil<mode>"
10953 [(set (match_operand:VF2 0 "register_operand")
10955 (match_operand:VF2 1 "nonimmediate_operand")
10956 (match_operand:SI 2 "const_0_to_255_operand")))]
10959 int mask = INTVAL (operands[2]);
10960 rtx perm[<ssescalarnum>];
10963 for (i = 0; i < <ssescalarnum>; i = i + 2)
10965 perm[i] = GEN_INT (((mask >> i) & 1) + i);
10966 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
10970 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10973 (define_expand "<sse2_avx_avx512f>_vpermil<mode>"
10974 [(set (match_operand:VF1 0 "register_operand")
10976 (match_operand:VF1 1 "nonimmediate_operand")
10977 (match_operand:SI 2 "const_0_to_255_operand")))]
10980 int mask = INTVAL (operands[2]);
10981 rtx perm[<ssescalarnum>];
10984 for (i = 0; i < <ssescalarnum>; i = i + 4)
10986 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
10987 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
10988 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
10989 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
10993 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10996 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode>"
10997 [(set (match_operand:VF 0 "register_operand" "=v")
10999 (match_operand:VF 1 "nonimmediate_operand" "vm")
11000 (match_parallel 2 ""
11001 [(match_operand 3 "const_int_operand")])))]
11003 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11005 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11006 operands[2] = GEN_INT (mask);
11007 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11009 [(set_attr "type" "sselog")
11010 (set_attr "prefix_extra" "1")
11011 (set_attr "length_immediate" "1")
11012 (set_attr "prefix" "vex")
11013 (set_attr "mode" "<sseinsnmode>")])
11015 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3"
11016 [(set (match_operand:VF 0 "register_operand" "=v")
11018 [(match_operand:VF 1 "register_operand" "v")
11019 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
11022 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11023 [(set_attr "type" "sselog")
11024 (set_attr "prefix_extra" "1")
11025 (set_attr "btver2_decode" "vector")
11026 (set_attr "prefix" "vex")
11027 (set_attr "mode" "<sseinsnmode>")])
11030 (define_expand "avx_vperm2f128<mode>3"
11031 [(set (match_operand:AVX256MODE2P 0 "register_operand")
11032 (unspec:AVX256MODE2P
11033 [(match_operand:AVX256MODE2P 1 "register_operand")
11034 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
11035 (match_operand:SI 3 "const_0_to_255_operand")]
11036 UNSPEC_VPERMIL2F128))]
11039 int mask = INTVAL (operands[3]);
11040 if ((mask & 0x88) == 0)
11042 rtx perm[<ssescalarnum>], t1, t2;
11043 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11045 base = (mask & 3) * nelt2;
11046 for (i = 0; i < nelt2; ++i)
11047 perm[i] = GEN_INT (base + i);
11049 base = ((mask >> 4) & 3) * nelt2;
11050 for (i = 0; i < nelt2; ++i)
11051 perm[i + nelt2] = GEN_INT (base + i);
11053 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11054 operands[1], operands[2]);
11055 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11056 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11057 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11063 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11064 ;; means that in order to represent this properly in rtl we'd have to
11065 ;; nest *another* vec_concat with a zero operand and do the select from
11066 ;; a 4x wide vector. That doesn't seem very nice.
11067 (define_insn "*avx_vperm2f128<mode>_full"
11068 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11069 (unspec:AVX256MODE2P
11070 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11071 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11072 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11073 UNSPEC_VPERMIL2F128))]
11075 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11076 [(set_attr "type" "sselog")
11077 (set_attr "prefix_extra" "1")
11078 (set_attr "length_immediate" "1")
11079 (set_attr "prefix" "vex")
11080 (set_attr "mode" "<sseinsnmode>")])
11082 (define_insn "*avx_vperm2f128<mode>_nozero"
11083 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11084 (vec_select:AVX256MODE2P
11085 (vec_concat:<ssedoublevecmode>
11086 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11087 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11088 (match_parallel 3 ""
11089 [(match_operand 4 "const_int_operand")])))]
11091 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11093 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11095 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
11097 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
11098 operands[3] = GEN_INT (mask);
11099 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11101 [(set_attr "type" "sselog")
11102 (set_attr "prefix_extra" "1")
11103 (set_attr "length_immediate" "1")
11104 (set_attr "prefix" "vex")
11105 (set_attr "mode" "<sseinsnmode>")])
11107 (define_expand "avx_vinsertf128<mode>"
11108 [(match_operand:V_256 0 "register_operand")
11109 (match_operand:V_256 1 "register_operand")
11110 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11111 (match_operand:SI 3 "const_0_to_1_operand")]
11114 rtx (*insn)(rtx, rtx, rtx);
11116 switch (INTVAL (operands[3]))
11119 insn = gen_vec_set_lo_<mode>;
11122 insn = gen_vec_set_hi_<mode>;
11125 gcc_unreachable ();
11128 emit_insn (insn (operands[0], operands[1], operands[2]));
11132 (define_insn "avx2_vec_set_lo_v4di"
11133 [(set (match_operand:V4DI 0 "register_operand" "=x")
11135 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11137 (match_operand:V4DI 1 "register_operand" "x")
11138 (parallel [(const_int 2) (const_int 3)]))))]
11140 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11141 [(set_attr "type" "sselog")
11142 (set_attr "prefix_extra" "1")
11143 (set_attr "length_immediate" "1")
11144 (set_attr "prefix" "vex")
11145 (set_attr "mode" "OI")])
11147 (define_insn "avx2_vec_set_hi_v4di"
11148 [(set (match_operand:V4DI 0 "register_operand" "=x")
11151 (match_operand:V4DI 1 "register_operand" "x")
11152 (parallel [(const_int 0) (const_int 1)]))
11153 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11155 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11156 [(set_attr "type" "sselog")
11157 (set_attr "prefix_extra" "1")
11158 (set_attr "length_immediate" "1")
11159 (set_attr "prefix" "vex")
11160 (set_attr "mode" "OI")])
11162 (define_insn "vec_set_lo_<mode>"
11163 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11164 (vec_concat:VI8F_256
11165 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11166 (vec_select:<ssehalfvecmode>
11167 (match_operand:VI8F_256 1 "register_operand" "x")
11168 (parallel [(const_int 2) (const_int 3)]))))]
11170 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11171 [(set_attr "type" "sselog")
11172 (set_attr "prefix_extra" "1")
11173 (set_attr "length_immediate" "1")
11174 (set_attr "prefix" "vex")
11175 (set_attr "mode" "<sseinsnmode>")])
11177 (define_insn "vec_set_hi_<mode>"
11178 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11179 (vec_concat:VI8F_256
11180 (vec_select:<ssehalfvecmode>
11181 (match_operand:VI8F_256 1 "register_operand" "x")
11182 (parallel [(const_int 0) (const_int 1)]))
11183 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11185 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11186 [(set_attr "type" "sselog")
11187 (set_attr "prefix_extra" "1")
11188 (set_attr "length_immediate" "1")
11189 (set_attr "prefix" "vex")
11190 (set_attr "mode" "<sseinsnmode>")])
11192 (define_insn "vec_set_lo_<mode>"
11193 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11194 (vec_concat:VI4F_256
11195 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11196 (vec_select:<ssehalfvecmode>
11197 (match_operand:VI4F_256 1 "register_operand" "x")
11198 (parallel [(const_int 4) (const_int 5)
11199 (const_int 6) (const_int 7)]))))]
11201 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11202 [(set_attr "type" "sselog")
11203 (set_attr "prefix_extra" "1")
11204 (set_attr "length_immediate" "1")
11205 (set_attr "prefix" "vex")
11206 (set_attr "mode" "<sseinsnmode>")])
11208 (define_insn "vec_set_hi_<mode>"
11209 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11210 (vec_concat:VI4F_256
11211 (vec_select:<ssehalfvecmode>
11212 (match_operand:VI4F_256 1 "register_operand" "x")
11213 (parallel [(const_int 0) (const_int 1)
11214 (const_int 2) (const_int 3)]))
11215 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11217 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11218 [(set_attr "type" "sselog")
11219 (set_attr "prefix_extra" "1")
11220 (set_attr "length_immediate" "1")
11221 (set_attr "prefix" "vex")
11222 (set_attr "mode" "<sseinsnmode>")])
11224 (define_insn "vec_set_lo_v16hi"
11225 [(set (match_operand:V16HI 0 "register_operand" "=x")
11227 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11229 (match_operand:V16HI 1 "register_operand" "x")
11230 (parallel [(const_int 8) (const_int 9)
11231 (const_int 10) (const_int 11)
11232 (const_int 12) (const_int 13)
11233 (const_int 14) (const_int 15)]))))]
11235 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11236 [(set_attr "type" "sselog")
11237 (set_attr "prefix_extra" "1")
11238 (set_attr "length_immediate" "1")
11239 (set_attr "prefix" "vex")
11240 (set_attr "mode" "OI")])
11242 (define_insn "vec_set_hi_v16hi"
11243 [(set (match_operand:V16HI 0 "register_operand" "=x")
11246 (match_operand:V16HI 1 "register_operand" "x")
11247 (parallel [(const_int 0) (const_int 1)
11248 (const_int 2) (const_int 3)
11249 (const_int 4) (const_int 5)
11250 (const_int 6) (const_int 7)]))
11251 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11253 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11254 [(set_attr "type" "sselog")
11255 (set_attr "prefix_extra" "1")
11256 (set_attr "length_immediate" "1")
11257 (set_attr "prefix" "vex")
11258 (set_attr "mode" "OI")])
11260 (define_insn "vec_set_lo_v32qi"
11261 [(set (match_operand:V32QI 0 "register_operand" "=x")
11263 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11265 (match_operand:V32QI 1 "register_operand" "x")
11266 (parallel [(const_int 16) (const_int 17)
11267 (const_int 18) (const_int 19)
11268 (const_int 20) (const_int 21)
11269 (const_int 22) (const_int 23)
11270 (const_int 24) (const_int 25)
11271 (const_int 26) (const_int 27)
11272 (const_int 28) (const_int 29)
11273 (const_int 30) (const_int 31)]))))]
11275 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11276 [(set_attr "type" "sselog")
11277 (set_attr "prefix_extra" "1")
11278 (set_attr "length_immediate" "1")
11279 (set_attr "prefix" "vex")
11280 (set_attr "mode" "OI")])
11282 (define_insn "vec_set_hi_v32qi"
11283 [(set (match_operand:V32QI 0 "register_operand" "=x")
11286 (match_operand:V32QI 1 "register_operand" "x")
11287 (parallel [(const_int 0) (const_int 1)
11288 (const_int 2) (const_int 3)
11289 (const_int 4) (const_int 5)
11290 (const_int 6) (const_int 7)
11291 (const_int 8) (const_int 9)
11292 (const_int 10) (const_int 11)
11293 (const_int 12) (const_int 13)
11294 (const_int 14) (const_int 15)]))
11295 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11297 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11298 [(set_attr "type" "sselog")
11299 (set_attr "prefix_extra" "1")
11300 (set_attr "length_immediate" "1")
11301 (set_attr "prefix" "vex")
11302 (set_attr "mode" "OI")])
11304 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
11305 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
11307 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
11308 (match_operand:V48_AVX2 1 "memory_operand" "m")]
11311 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11312 [(set_attr "type" "sselog1")
11313 (set_attr "prefix_extra" "1")
11314 (set_attr "prefix" "vex")
11315 (set_attr "btver2_decode" "vector")
11316 (set_attr "mode" "<sseinsnmode>")])
11318 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
11319 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
11321 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
11322 (match_operand:V48_AVX2 2 "register_operand" "x")
11326 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11327 [(set_attr "type" "sselog1")
11328 (set_attr "prefix_extra" "1")
11329 (set_attr "prefix" "vex")
11330 (set_attr "btver2_decode" "vector")
11331 (set_attr "mode" "<sseinsnmode>")])
11333 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
11334 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11335 (unspec:AVX256MODE2P
11336 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11340 "&& reload_completed"
11343 rtx op0 = operands[0];
11344 rtx op1 = operands[1];
11346 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
11348 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11349 emit_move_insn (op0, op1);
11353 (define_expand "vec_init<mode>"
11354 [(match_operand:V_256 0 "register_operand")
11358 ix86_expand_vector_init (false, operands[0], operands[1]);
11362 (define_expand "avx2_extracti128"
11363 [(match_operand:V2DI 0 "nonimmediate_operand")
11364 (match_operand:V4DI 1 "register_operand")
11365 (match_operand:SI 2 "const_0_to_1_operand")]
11368 rtx (*insn)(rtx, rtx);
11370 switch (INTVAL (operands[2]))
11373 insn = gen_vec_extract_lo_v4di;
11376 insn = gen_vec_extract_hi_v4di;
11379 gcc_unreachable ();
11382 emit_insn (insn (operands[0], operands[1]));
11386 (define_expand "avx2_inserti128"
11387 [(match_operand:V4DI 0 "register_operand")
11388 (match_operand:V4DI 1 "register_operand")
11389 (match_operand:V2DI 2 "nonimmediate_operand")
11390 (match_operand:SI 3 "const_0_to_1_operand")]
11393 rtx (*insn)(rtx, rtx, rtx);
11395 switch (INTVAL (operands[3]))
11398 insn = gen_avx2_vec_set_lo_v4di;
11401 insn = gen_avx2_vec_set_hi_v4di;
11404 gcc_unreachable ();
11407 emit_insn (insn (operands[0], operands[1], operands[2]));
11411 (define_insn "<avx2_avx512f>_ashrv<mode>"
11412 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
11413 (ashiftrt:VI48_AVX512F
11414 (match_operand:VI48_AVX512F 1 "register_operand" "v")
11415 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
11417 "vpsrav<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11418 [(set_attr "type" "sseishft")
11419 (set_attr "prefix" "maybe_evex")
11420 (set_attr "mode" "<sseinsnmode>")])
11422 (define_insn "<avx2_avx512f>_<shift_insn>v<mode>"
11423 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
11424 (any_lshift:VI48_AVX2_48_AVX512F
11425 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
11426 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
11428 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11429 [(set_attr "type" "sseishft")
11430 (set_attr "prefix" "maybe_evex")
11431 (set_attr "mode" "<sseinsnmode>")])
11433 ;; For avx_vec_concat<mode> insn pattern
11434 (define_mode_attr concat_tg_mode
11435 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
11436 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
11438 (define_insn "avx_vec_concat<mode>"
11439 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
11440 (vec_concat:V_256_512
11441 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
11442 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
11445 switch (which_alternative)
11448 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
11450 switch (get_attr_mode (insn))
11453 return "vmovaps\t{%1, %t0|%t0, %1}";
11455 return "vmovapd\t{%1, %t0|%t0, %1}";
11457 return "vmovaps\t{%1, %x0|%x0, %1}";
11459 return "vmovapd\t{%1, %x0|%x0, %1}";
11461 return "vmovdqa\t{%1, %t0|%t0, %1}";
11463 return "vmovdqa\t{%1, %x0|%x0, %1}";
11465 gcc_unreachable ();
11468 gcc_unreachable ();
11471 [(set_attr "type" "sselog,ssemov")
11472 (set_attr "prefix_extra" "1,*")
11473 (set_attr "length_immediate" "1,*")
11474 (set_attr "prefix" "maybe_evex")
11475 (set_attr "mode" "<sseinsnmode>")])
11477 (define_insn "vcvtph2ps"
11478 [(set (match_operand:V4SF 0 "register_operand" "=x")
11480 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11482 (parallel [(const_int 0) (const_int 1)
11483 (const_int 2) (const_int 3)])))]
11485 "vcvtph2ps\t{%1, %0|%0, %1}"
11486 [(set_attr "type" "ssecvt")
11487 (set_attr "prefix" "vex")
11488 (set_attr "mode" "V4SF")])
11490 (define_insn "*vcvtph2ps_load"
11491 [(set (match_operand:V4SF 0 "register_operand" "=x")
11492 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11493 UNSPEC_VCVTPH2PS))]
11495 "vcvtph2ps\t{%1, %0|%0, %1}"
11496 [(set_attr "type" "ssecvt")
11497 (set_attr "prefix" "vex")
11498 (set_attr "mode" "V8SF")])
11500 (define_insn "vcvtph2ps256"
11501 [(set (match_operand:V8SF 0 "register_operand" "=x")
11502 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11503 UNSPEC_VCVTPH2PS))]
11505 "vcvtph2ps\t{%1, %0|%0, %1}"
11506 [(set_attr "type" "ssecvt")
11507 (set_attr "prefix" "vex")
11508 (set_attr "btver2_decode" "double")
11509 (set_attr "mode" "V8SF")])
11511 (define_expand "vcvtps2ph"
11512 [(set (match_operand:V8HI 0 "register_operand")
11514 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
11515 (match_operand:SI 2 "const_0_to_255_operand")]
11519 "operands[3] = CONST0_RTX (V4HImode);")
11521 (define_insn "*vcvtps2ph"
11522 [(set (match_operand:V8HI 0 "register_operand" "=x")
11524 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11525 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11527 (match_operand:V4HI 3 "const0_operand")))]
11529 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11530 [(set_attr "type" "ssecvt")
11531 (set_attr "prefix" "vex")
11532 (set_attr "mode" "V4SF")])
11534 (define_insn "*vcvtps2ph_store"
11535 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11536 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11537 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11538 UNSPEC_VCVTPS2PH))]
11540 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11541 [(set_attr "type" "ssecvt")
11542 (set_attr "prefix" "vex")
11543 (set_attr "mode" "V4SF")])
11545 (define_insn "vcvtps2ph256"
11546 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
11547 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
11548 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11549 UNSPEC_VCVTPS2PH))]
11551 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11552 [(set_attr "type" "ssecvt")
11553 (set_attr "prefix" "vex")
11554 (set_attr "btver2_decode" "vector")
11555 (set_attr "mode" "V8SF")])
11557 ;; For gather* insn patterns
11558 (define_mode_iterator VEC_GATHER_MODE
11559 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
11560 (define_mode_attr VEC_GATHER_IDXSI
11561 [(V2DI "V4SI") (V2DF "V4SI")
11562 (V4DI "V4SI") (V4DF "V4SI")
11563 (V4SI "V4SI") (V4SF "V4SI")
11564 (V8SI "V8SI") (V8SF "V8SI")])
11565 (define_mode_attr VEC_GATHER_IDXDI
11566 [(V2DI "V2DI") (V2DF "V2DI")
11567 (V4DI "V4DI") (V4DF "V4DI")
11568 (V4SI "V2DI") (V4SF "V2DI")
11569 (V8SI "V4DI") (V8SF "V4DI")])
11570 (define_mode_attr VEC_GATHER_SRCDI
11571 [(V2DI "V2DI") (V2DF "V2DF")
11572 (V4DI "V4DI") (V4DF "V4DF")
11573 (V4SI "V4SI") (V4SF "V4SF")
11574 (V8SI "V4SI") (V8SF "V4SF")])
11576 (define_expand "avx2_gathersi<mode>"
11577 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11578 (unspec:VEC_GATHER_MODE
11579 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
11580 (mem:<ssescalarmode>
11582 [(match_operand 2 "vsib_address_operand")
11583 (match_operand:<VEC_GATHER_IDXSI>
11584 3 "register_operand")
11585 (match_operand:SI 5 "const1248_operand ")]))
11586 (mem:BLK (scratch))
11587 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
11589 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11593 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11594 operands[5]), UNSPEC_VSIBADDR);
11597 (define_insn "*avx2_gathersi<mode>"
11598 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11599 (unspec:VEC_GATHER_MODE
11600 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
11601 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11603 [(match_operand:P 3 "vsib_address_operand" "p")
11604 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
11605 (match_operand:SI 6 "const1248_operand" "n")]
11607 (mem:BLK (scratch))
11608 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
11610 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11612 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
11613 [(set_attr "type" "ssemov")
11614 (set_attr "prefix" "vex")
11615 (set_attr "mode" "<sseinsnmode>")])
11617 (define_insn "*avx2_gathersi<mode>_2"
11618 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11619 (unspec:VEC_GATHER_MODE
11621 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11623 [(match_operand:P 2 "vsib_address_operand" "p")
11624 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
11625 (match_operand:SI 5 "const1248_operand" "n")]
11627 (mem:BLK (scratch))
11628 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
11630 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11632 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
11633 [(set_attr "type" "ssemov")
11634 (set_attr "prefix" "vex")
11635 (set_attr "mode" "<sseinsnmode>")])
11637 (define_expand "avx2_gatherdi<mode>"
11638 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11639 (unspec:VEC_GATHER_MODE
11640 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
11641 (mem:<ssescalarmode>
11643 [(match_operand 2 "vsib_address_operand")
11644 (match_operand:<VEC_GATHER_IDXDI>
11645 3 "register_operand")
11646 (match_operand:SI 5 "const1248_operand ")]))
11647 (mem:BLK (scratch))
11648 (match_operand:<VEC_GATHER_SRCDI>
11649 4 "register_operand")]
11651 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11655 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11656 operands[5]), UNSPEC_VSIBADDR);
11659 (define_insn "*avx2_gatherdi<mode>"
11660 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11661 (unspec:VEC_GATHER_MODE
11662 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11663 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11665 [(match_operand:P 3 "vsib_address_operand" "p")
11666 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11667 (match_operand:SI 6 "const1248_operand" "n")]
11669 (mem:BLK (scratch))
11670 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11672 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11674 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
11675 [(set_attr "type" "ssemov")
11676 (set_attr "prefix" "vex")
11677 (set_attr "mode" "<sseinsnmode>")])
11679 (define_insn "*avx2_gatherdi<mode>_2"
11680 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11681 (unspec:VEC_GATHER_MODE
11683 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11685 [(match_operand:P 2 "vsib_address_operand" "p")
11686 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11687 (match_operand:SI 5 "const1248_operand" "n")]
11689 (mem:BLK (scratch))
11690 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11692 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11695 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
11696 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
11697 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
11699 [(set_attr "type" "ssemov")
11700 (set_attr "prefix" "vex")
11701 (set_attr "mode" "<sseinsnmode>")])
11703 (define_insn "*avx2_gatherdi<mode>_3"
11704 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11705 (vec_select:<VEC_GATHER_SRCDI>
11707 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11708 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11710 [(match_operand:P 3 "vsib_address_operand" "p")
11711 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11712 (match_operand:SI 6 "const1248_operand" "n")]
11714 (mem:BLK (scratch))
11715 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11717 (parallel [(const_int 0) (const_int 1)
11718 (const_int 2) (const_int 3)])))
11719 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11721 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
11722 [(set_attr "type" "ssemov")
11723 (set_attr "prefix" "vex")
11724 (set_attr "mode" "<sseinsnmode>")])
11726 (define_insn "*avx2_gatherdi<mode>_4"
11727 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11728 (vec_select:<VEC_GATHER_SRCDI>
11731 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11733 [(match_operand:P 2 "vsib_address_operand" "p")
11734 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11735 (match_operand:SI 5 "const1248_operand" "n")]
11737 (mem:BLK (scratch))
11738 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11740 (parallel [(const_int 0) (const_int 1)
11741 (const_int 2) (const_int 3)])))
11742 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11744 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
11745 [(set_attr "type" "ssemov")
11746 (set_attr "prefix" "vex")
11747 (set_attr "mode" "<sseinsnmode>")])