1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
88 (define_c_enum "unspecv" [
98 ;; All vector modes including V?TImode, used in move patterns.
99 (define_mode_iterator VMOVE
100 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
101 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
102 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
103 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
104 (V2TI "TARGET_AVX") V1TI
105 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
106 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
109 (define_mode_iterator V
110 [(V32QI "TARGET_AVX") V16QI
111 (V16HI "TARGET_AVX") V8HI
112 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
113 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
114 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
115 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
117 ;; All 128bit vector modes
118 (define_mode_iterator V_128
119 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
121 ;; All 256bit vector modes
122 (define_mode_iterator V_256
123 [V32QI V16HI V8SI V4DI V8SF V4DF])
125 ;; All 256bit and 512bit vector modes
126 (define_mode_iterator V_256_512
127 [V32QI V16HI V8SI V4DI V8SF V4DF
128 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
129 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
131 ;; All vector float modes
132 (define_mode_iterator VF
133 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
134 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
136 ;; 128- and 256-bit float vector modes
137 (define_mode_iterator VF_128_256
138 [(V8SF "TARGET_AVX") V4SF
139 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
141 ;; All SFmode vector float modes
142 (define_mode_iterator VF1
143 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
145 ;; 128- and 256-bit SF vector modes
146 (define_mode_iterator VF1_128_256
147 [(V8SF "TARGET_AVX") V4SF])
149 ;; All DFmode vector float modes
150 (define_mode_iterator VF2
151 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
153 ;; 128- and 256-bit DF vector modes
154 (define_mode_iterator VF2_128_256
155 [(V4DF "TARGET_AVX") V2DF])
157 (define_mode_iterator VF2_512_256
158 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
160 ;; All 128bit vector float modes
161 (define_mode_iterator VF_128
162 [V4SF (V2DF "TARGET_SSE2")])
164 ;; All 256bit vector float modes
165 (define_mode_iterator VF_256
168 ;; All 512bit vector float modes
169 (define_mode_iterator VF_512
172 ;; All vector integer modes
173 (define_mode_iterator VI
174 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
175 (V32QI "TARGET_AVX") V16QI
176 (V16HI "TARGET_AVX") V8HI
177 (V8SI "TARGET_AVX") V4SI
178 (V4DI "TARGET_AVX") V2DI])
180 (define_mode_iterator VI_AVX2
181 [(V32QI "TARGET_AVX2") V16QI
182 (V16HI "TARGET_AVX2") V8HI
183 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
184 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
186 ;; All QImode vector integer modes
187 (define_mode_iterator VI1
188 [(V32QI "TARGET_AVX") V16QI])
190 (define_mode_iterator VI_UNALIGNED_LOADSTORE
191 [(V32QI "TARGET_AVX") V16QI
192 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
194 ;; All DImode vector integer modes
195 (define_mode_iterator VI8
196 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
198 (define_mode_iterator VI1_AVX2
199 [(V32QI "TARGET_AVX2") V16QI])
201 (define_mode_iterator VI2_AVX2
202 [(V16HI "TARGET_AVX2") V8HI])
204 (define_mode_iterator VI2_AVX512F
205 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
207 (define_mode_iterator VI4_AVX2
208 [(V8SI "TARGET_AVX2") V4SI])
210 (define_mode_iterator VI4_AVX512F
211 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
213 (define_mode_iterator VI8_AVX2
214 [(V4DI "TARGET_AVX2") V2DI])
216 (define_mode_iterator VI8_AVX2_AVX512F
217 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
219 ;; ??? We should probably use TImode instead.
220 (define_mode_iterator VIMAX_AVX2
221 [(V2TI "TARGET_AVX2") V1TI])
223 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
224 (define_mode_iterator SSESCALARMODE
225 [(V2TI "TARGET_AVX2") TI])
227 (define_mode_iterator VI12_AVX2
228 [(V32QI "TARGET_AVX2") V16QI
229 (V16HI "TARGET_AVX2") V8HI])
231 (define_mode_iterator VI24_AVX2
232 [(V16HI "TARGET_AVX2") V8HI
233 (V8SI "TARGET_AVX2") V4SI])
235 (define_mode_iterator VI124_AVX512F
236 [(V32QI "TARGET_AVX2") V16QI
237 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
238 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
240 (define_mode_iterator VI124_AVX2
241 [(V32QI "TARGET_AVX2") V16QI
242 (V16HI "TARGET_AVX2") V8HI
243 (V8SI "TARGET_AVX2") V4SI])
245 (define_mode_iterator VI248_AVX2
246 [(V16HI "TARGET_AVX2") V8HI
247 (V8SI "TARGET_AVX2") V4SI
248 (V4DI "TARGET_AVX2") V2DI])
250 (define_mode_iterator VI48_AVX2
251 [(V8SI "TARGET_AVX2") V4SI
252 (V4DI "TARGET_AVX2") V2DI])
254 (define_mode_iterator V48_AVX2
257 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
258 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
260 (define_mode_attr sse2_avx_avx512f
261 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
262 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
264 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
265 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
267 (define_mode_attr sse2_avx2
268 [(V16QI "sse2") (V32QI "avx2")
269 (V8HI "sse2") (V16HI "avx2")
270 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
271 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
272 (V1TI "sse2") (V2TI "avx2")])
274 (define_mode_attr ssse3_avx2
275 [(V16QI "ssse3") (V32QI "avx2")
276 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
277 (V4SI "ssse3") (V8SI "avx2")
278 (V2DI "ssse3") (V4DI "avx2")
279 (TI "ssse3") (V2TI "avx2")])
281 (define_mode_attr sse4_1_avx2
282 [(V16QI "sse4_1") (V32QI "avx2")
283 (V8HI "sse4_1") (V16HI "avx2")
284 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
285 (V2DI "sse4_1") (V4DI "avx2")])
287 (define_mode_attr avx_avx2
288 [(V4SF "avx") (V2DF "avx")
289 (V8SF "avx") (V4DF "avx")
290 (V4SI "avx2") (V2DI "avx2")
291 (V8SI "avx2") (V4DI "avx2")])
293 (define_mode_attr vec_avx2
294 [(V16QI "vec") (V32QI "avx2")
295 (V8HI "vec") (V16HI "avx2")
296 (V4SI "vec") (V8SI "avx2")
297 (V2DI "vec") (V4DI "avx2")])
299 (define_mode_attr avx2_avx512f
300 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
301 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
302 (V8SF "avx2") (V16SF "avx512f")
303 (V4DF "avx2") (V8DF "avx512f")])
305 (define_mode_attr shuffletype
306 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
307 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
308 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
309 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
310 (V64QI "i") (V1TI "i") (V2TI "i")])
312 (define_mode_attr ssedoublemode
313 [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
314 (V32QI "V32HI") (V16QI "V16HI")])
316 (define_mode_attr ssebytemode
317 [(V4DI "V32QI") (V2DI "V16QI")])
319 ;; All 128bit vector integer modes
320 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
322 ;; All 256bit vector integer modes
323 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
325 ;; Random 128bit vector integer mode combinations
326 (define_mode_iterator VI12_128 [V16QI V8HI])
327 (define_mode_iterator VI14_128 [V16QI V4SI])
328 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
329 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
330 (define_mode_iterator VI24_128 [V8HI V4SI])
331 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
332 (define_mode_iterator VI48_128 [V4SI V2DI])
334 ;; Random 256bit vector integer mode combinations
335 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
336 (define_mode_iterator VI48_256 [V8SI V4DI])
338 ;; Int-float size matches
339 (define_mode_iterator VI4F_128 [V4SI V4SF])
340 (define_mode_iterator VI8F_128 [V2DI V2DF])
341 (define_mode_iterator VI4F_256 [V8SI V8SF])
342 (define_mode_iterator VI8F_256 [V4DI V4DF])
344 ;; Mapping from float mode to required SSE level
345 (define_mode_attr sse
346 [(SF "sse") (DF "sse2")
347 (V4SF "sse") (V2DF "sse2")
348 (V16SF "avx512f") (V8SF "avx")
349 (V8DF "avx512f") (V4DF "avx")])
351 (define_mode_attr sse2
352 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
353 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
355 (define_mode_attr sse3
356 [(V16QI "sse3") (V32QI "avx")])
358 (define_mode_attr sse4_1
359 [(V4SF "sse4_1") (V2DF "sse4_1")
360 (V8SF "avx") (V4DF "avx")
363 (define_mode_attr avxsizesuffix
364 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
365 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
366 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
367 (V16SF "512") (V8DF "512")
368 (V8SF "256") (V4DF "256")
369 (V4SF "") (V2DF "")])
371 ;; SSE instruction mode
372 (define_mode_attr sseinsnmode
373 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
374 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
375 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
376 (V16SF "V16SF") (V8DF "V8DF")
377 (V8SF "V8SF") (V4DF "V4DF")
378 (V4SF "V4SF") (V2DF "V2DF")
381 ;; Mapping of vector float modes to an integer mode of the same size
382 (define_mode_attr sseintvecmode
383 [(V16SF "V16SI") (V8DF "V8DI")
384 (V8SF "V8SI") (V4DF "V4DI")
385 (V4SF "V4SI") (V2DF "V2DI")
386 (V16SI "V16SI") (V8DI "V8DI")
387 (V8SI "V8SI") (V4DI "V4DI")
388 (V4SI "V4SI") (V2DI "V2DI")
389 (V16HI "V16HI") (V8HI "V8HI")
390 (V32QI "V32QI") (V16QI "V16QI")])
392 (define_mode_attr sseintvecmodelower
394 (V8SF "v8si") (V4DF "v4di")
395 (V4SF "v4si") (V2DF "v2di")
396 (V8SI "v8si") (V4DI "v4di")
397 (V4SI "v4si") (V2DI "v2di")
398 (V16HI "v16hi") (V8HI "v8hi")
399 (V32QI "v32qi") (V16QI "v16qi")])
401 ;; Mapping of vector modes to a vector mode of double size
402 (define_mode_attr ssedoublevecmode
403 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
404 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
405 (V8SF "V16SF") (V4DF "V8DF")
406 (V4SF "V8SF") (V2DF "V4DF")])
408 ;; Mapping of vector modes to a vector mode of half size
409 (define_mode_attr ssehalfvecmode
410 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
411 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
412 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
413 (V16SF "V8SF") (V8DF "V4DF")
414 (V8SF "V4SF") (V4DF "V2DF")
417 ;; Mapping of vector modes ti packed single mode of the same size
418 (define_mode_attr ssePSmode
419 [(V16SI "V16SF") (V8DF "V16SF")
420 (V16SF "V16SF") (V8DI "V16SF")
421 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
422 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
423 (V8SI "V8SF") (V4SI "V4SF")
424 (V4DI "V8SF") (V2DI "V4SF")
425 (V2TI "V8SF") (V1TI "V4SF")
426 (V8SF "V8SF") (V4SF "V4SF")
427 (V4DF "V8SF") (V2DF "V4SF")])
429 ;; Mapping of vector modes back to the scalar modes
430 (define_mode_attr ssescalarmode
431 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
432 (V32HI "HI") (V16HI "HI") (V8HI "HI")
433 (V16SI "SI") (V8SI "SI") (V4SI "SI")
434 (V8DI "DI") (V4DI "DI") (V2DI "DI")
435 (V16SF "SF") (V8SF "SF") (V4SF "SF")
436 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
438 ;; Mapping of vector modes to the 128bit modes
439 (define_mode_attr ssexmmmode
440 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
441 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
442 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
443 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
444 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
445 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
447 ;; Pointer size override for scalar modes (Intel asm dialect)
448 (define_mode_attr iptr
449 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
450 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
451 (V8SF "k") (V4DF "q")
452 (V4SF "k") (V2DF "q")
455 ;; Number of scalar elements in each vector type
456 (define_mode_attr ssescalarnum
457 [(V64QI "64") (V16SI "16") (V8DI "8")
458 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
459 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
460 (V16SF "16") (V8DF "8")
461 (V8SF "8") (V4DF "4")
462 (V4SF "4") (V2DF "2")])
464 ;; Mask of scalar elements in each vector type
465 (define_mode_attr ssescalarnummask
466 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
467 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
468 (V8SF "7") (V4DF "3")
469 (V4SF "3") (V2DF "1")])
471 ;; SSE prefix for integer vector modes
472 (define_mode_attr sseintprefix
473 [(V2DI "p") (V2DF "")
476 (V8SI "p") (V8SF "")])
478 ;; SSE scalar suffix for vector modes
479 (define_mode_attr ssescalarmodesuffix
481 (V8SF "ss") (V4DF "sd")
482 (V4SF "ss") (V2DF "sd")
483 (V8SI "ss") (V4DI "sd")
486 ;; Pack/unpack vector modes
487 (define_mode_attr sseunpackmode
488 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
489 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
490 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
492 (define_mode_attr ssepackmode
493 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
494 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
496 ;; Mapping of the max integer size for xop rotate immediate constraint
497 (define_mode_attr sserotatemax
498 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
500 ;; Mapping of mode to cast intrinsic name
501 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
503 ;; Instruction suffix for sign and zero extensions.
504 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
506 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
507 ;; i64x4 or f64x4 for 512bit modes.
508 (define_mode_attr i128
509 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
510 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
511 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
514 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
516 ;; Mapping of immediate bits for blend instructions
517 (define_mode_attr blendbits
518 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
520 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
522 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
526 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
528 ;; All of these patterns are enabled for SSE1 as well as SSE2.
529 ;; This is essential for maintaining stable calling conventions.
531 (define_expand "mov<mode>"
532 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
533 (match_operand:VMOVE 1 "nonimmediate_operand"))]
536 ix86_expand_vector_move (<MODE>mode, operands);
540 (define_insn "*mov<mode>_internal"
541 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
542 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
544 && (register_operand (operands[0], <MODE>mode)
545 || register_operand (operands[1], <MODE>mode))"
547 int mode = get_attr_mode (insn);
548 switch (which_alternative)
551 return standard_sse_constant_opcode (insn, operands[1]);
554 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
555 in avx512f, so we need to use workarounds, to access sse registers
556 16-31, which are evex-only. */
557 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
558 && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
559 || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
561 if (memory_operand (operands[0], <MODE>mode))
563 if (GET_MODE_SIZE (<MODE>mode) == 32)
564 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
565 else if (GET_MODE_SIZE (<MODE>mode) == 16)
566 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
570 else if (memory_operand (operands[1], <MODE>mode))
572 if (GET_MODE_SIZE (<MODE>mode) == 32)
573 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
574 else if (GET_MODE_SIZE (<MODE>mode) == 16)
575 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
580 /* Reg -> reg move is always aligned. Just use wider move. */
585 return "vmovaps\t{%g1, %g0|%g0, %g1}";
588 return "vmovapd\t{%g1, %g0|%g0, %g1}";
591 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
602 && (misaligned_operand (operands[0], <MODE>mode)
603 || misaligned_operand (operands[1], <MODE>mode)))
604 return "vmovups\t{%1, %0|%0, %1}";
606 return "%vmovaps\t{%1, %0|%0, %1}";
612 && (misaligned_operand (operands[0], <MODE>mode)
613 || misaligned_operand (operands[1], <MODE>mode)))
614 return "vmovupd\t{%1, %0|%0, %1}";
616 return "%vmovapd\t{%1, %0|%0, %1}";
621 && (misaligned_operand (operands[0], <MODE>mode)
622 || misaligned_operand (operands[1], <MODE>mode)))
623 return "vmovdqu\t{%1, %0|%0, %1}";
625 return "%vmovdqa\t{%1, %0|%0, %1}";
627 if (misaligned_operand (operands[0], <MODE>mode)
628 || misaligned_operand (operands[1], <MODE>mode))
629 return "vmovdqu64\t{%1, %0|%0, %1}";
631 return "vmovdqa64\t{%1, %0|%0, %1}";
640 [(set_attr "type" "sselog1,ssemov,ssemov")
641 (set_attr "prefix" "maybe_vex")
643 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
644 (const_string "<ssePSmode>")
645 (and (eq_attr "alternative" "2")
646 (match_test "TARGET_SSE_TYPELESS_STORES"))
647 (const_string "<ssePSmode>")
648 (match_test "TARGET_AVX")
649 (const_string "<sseinsnmode>")
650 (ior (not (match_test "TARGET_SSE2"))
651 (match_test "optimize_function_for_size_p (cfun)"))
652 (const_string "V4SF")
653 (and (eq_attr "alternative" "0")
654 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
657 (const_string "<sseinsnmode>")))])
659 (define_insn "sse2_movq128"
660 [(set (match_operand:V2DI 0 "register_operand" "=x")
663 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
664 (parallel [(const_int 0)]))
667 "%vmovq\t{%1, %0|%0, %q1}"
668 [(set_attr "type" "ssemov")
669 (set_attr "prefix" "maybe_vex")
670 (set_attr "mode" "TI")])
672 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
673 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
674 ;; from memory, we'd prefer to load the memory directly into the %xmm
675 ;; register. To facilitate this happy circumstance, this pattern won't
676 ;; split until after register allocation. If the 64-bit value didn't
677 ;; come from memory, this is the best we can do. This is much better
678 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
681 (define_insn_and_split "movdi_to_sse"
683 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
684 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
685 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
686 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
688 "&& reload_completed"
691 if (register_operand (operands[1], DImode))
693 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
694 Assemble the 64-bit DImode value in an xmm register. */
695 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
696 gen_rtx_SUBREG (SImode, operands[1], 0)));
697 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
698 gen_rtx_SUBREG (SImode, operands[1], 4)));
699 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
702 else if (memory_operand (operands[1], DImode))
703 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
704 operands[1], const0_rtx));
710 [(set (match_operand:V4SF 0 "register_operand")
711 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
712 "TARGET_SSE && reload_completed"
715 (vec_duplicate:V4SF (match_dup 1))
719 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
720 operands[2] = CONST0_RTX (V4SFmode);
724 [(set (match_operand:V2DF 0 "register_operand")
725 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
726 "TARGET_SSE2 && reload_completed"
727 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
729 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
730 operands[2] = CONST0_RTX (DFmode);
733 (define_expand "push<mode>1"
734 [(match_operand:VMOVE 0 "register_operand")]
737 ix86_expand_push (<MODE>mode, operands[0]);
741 (define_expand "movmisalign<mode>"
742 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
743 (match_operand:VMOVE 1 "nonimmediate_operand"))]
746 ix86_expand_vector_move_misalign (<MODE>mode, operands);
750 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
751 [(set (match_operand:VF 0 "register_operand" "=v")
753 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
757 switch (get_attr_mode (insn))
762 return "%vmovups\t{%1, %0|%0, %1}";
764 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
767 [(set_attr "type" "ssemov")
768 (set_attr "movu" "1")
769 (set_attr "prefix" "maybe_vex")
771 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
772 (const_string "<ssePSmode>")
773 (match_test "TARGET_AVX")
774 (const_string "<MODE>")
775 (match_test "optimize_function_for_size_p (cfun)")
776 (const_string "V4SF")
778 (const_string "<MODE>")))])
780 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
781 [(set (match_operand:VF 0 "memory_operand" "=m")
783 [(match_operand:VF 1 "register_operand" "v")]
787 switch (get_attr_mode (insn))
792 return "%vmovups\t{%1, %0|%0, %1}";
794 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
797 [(set_attr "type" "ssemov")
798 (set_attr "movu" "1")
799 (set_attr "prefix" "maybe_vex")
801 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
802 (match_test "TARGET_SSE_TYPELESS_STORES"))
803 (const_string "<ssePSmode>")
804 (match_test "TARGET_AVX")
805 (const_string "<MODE>")
806 (match_test "optimize_function_for_size_p (cfun)")
807 (const_string "V4SF")
809 (const_string "<MODE>")))])
811 (define_insn "<sse2_avx_avx512f>_loaddqu<mode>"
812 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
813 (unspec:VI_UNALIGNED_LOADSTORE
814 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
818 switch (get_attr_mode (insn))
822 return "%vmovups\t{%1, %0|%0, %1}";
824 if (<MODE>mode == V8DImode)
825 return "vmovdqu64\t{%1, %0|%0, %1}";
827 return "vmovdqu32\t{%1, %0|%0, %1}";
829 return "%vmovdqu\t{%1, %0|%0, %1}";
832 [(set_attr "type" "ssemov")
833 (set_attr "movu" "1")
834 (set (attr "prefix_data16")
836 (match_test "TARGET_AVX")
839 (set_attr "prefix" "maybe_vex")
841 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
842 (const_string "<ssePSmode>")
843 (match_test "TARGET_AVX")
844 (const_string "<sseinsnmode>")
845 (match_test "optimize_function_for_size_p (cfun)")
846 (const_string "V4SF")
848 (const_string "<sseinsnmode>")))])
850 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
851 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
852 (unspec:VI_UNALIGNED_LOADSTORE
853 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
857 switch (get_attr_mode (insn))
861 return "%vmovups\t{%1, %0|%0, %1}";
863 if (<MODE>mode == V8DImode)
864 return "vmovdqu64\t{%1, %0|%0, %1}";
866 return "vmovdqu32\t{%1, %0|%0, %1}";
868 return "%vmovdqu\t{%1, %0|%0, %1}";
871 [(set_attr "type" "ssemov")
872 (set_attr "movu" "1")
873 (set (attr "prefix_data16")
875 (match_test "TARGET_AVX")
878 (set_attr "prefix" "maybe_vex")
880 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
881 (match_test "TARGET_SSE_TYPELESS_STORES"))
882 (const_string "<ssePSmode>")
883 (match_test "TARGET_AVX")
884 (const_string "<sseinsnmode>")
885 (match_test "optimize_function_for_size_p (cfun)")
886 (const_string "V4SF")
888 (const_string "<sseinsnmode>")))])
890 (define_insn "<sse3>_lddqu<avxsizesuffix>"
891 [(set (match_operand:VI1 0 "register_operand" "=x")
892 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
895 "%vlddqu\t{%1, %0|%0, %1}"
896 [(set_attr "type" "ssemov")
897 (set_attr "movu" "1")
898 (set (attr "prefix_data16")
900 (match_test "TARGET_AVX")
903 (set (attr "prefix_rep")
905 (match_test "TARGET_AVX")
908 (set_attr "prefix" "maybe_vex")
909 (set_attr "mode" "<sseinsnmode>")])
911 (define_insn "sse2_movnti<mode>"
912 [(set (match_operand:SWI48 0 "memory_operand" "=m")
913 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
916 "movnti\t{%1, %0|%0, %1}"
917 [(set_attr "type" "ssemov")
918 (set_attr "prefix_data16" "0")
919 (set_attr "mode" "<MODE>")])
921 (define_insn "<sse>_movnt<mode>"
922 [(set (match_operand:VF 0 "memory_operand" "=m")
924 [(match_operand:VF 1 "register_operand" "v")]
927 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
928 [(set_attr "type" "ssemov")
929 (set_attr "prefix" "maybe_vex")
930 (set_attr "mode" "<MODE>")])
932 (define_insn "<sse2>_movnt<mode>"
933 [(set (match_operand:VI8 0 "memory_operand" "=m")
934 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
937 "%vmovntdq\t{%1, %0|%0, %1}"
938 [(set_attr "type" "ssecvt")
939 (set (attr "prefix_data16")
941 (match_test "TARGET_AVX")
944 (set_attr "prefix" "maybe_vex")
945 (set_attr "mode" "<sseinsnmode>")])
947 ; Expand patterns for non-temporal stores. At the moment, only those
948 ; that directly map to insns are defined; it would be possible to
949 ; define patterns for other modes that would expand to several insns.
951 ;; Modes handled by storent patterns.
952 (define_mode_iterator STORENT_MODE
953 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
954 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
955 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
956 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
957 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
959 (define_expand "storent<mode>"
960 [(set (match_operand:STORENT_MODE 0 "memory_operand")
962 [(match_operand:STORENT_MODE 1 "register_operand")]
966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
968 ;; Parallel floating point arithmetic
970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
972 (define_expand "<code><mode>2"
973 [(set (match_operand:VF 0 "register_operand")
975 (match_operand:VF 1 "register_operand")))]
977 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
979 (define_insn_and_split "*absneg<mode>2"
980 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
981 (match_operator:VF 3 "absneg_operator"
982 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
983 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
986 "&& reload_completed"
989 enum rtx_code absneg_op;
995 if (MEM_P (operands[1]))
996 op1 = operands[2], op2 = operands[1];
998 op1 = operands[1], op2 = operands[2];
1003 if (rtx_equal_p (operands[0], operands[1]))
1009 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1010 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1011 t = gen_rtx_SET (VOIDmode, operands[0], t);
1015 [(set_attr "isa" "noavx,noavx,avx,avx")])
1017 (define_expand "<plusminus_insn><mode>3"
1018 [(set (match_operand:VF 0 "register_operand")
1020 (match_operand:VF 1 "nonimmediate_operand")
1021 (match_operand:VF 2 "nonimmediate_operand")))]
1023 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1025 (define_insn "*<plusminus_insn><mode>3"
1026 [(set (match_operand:VF 0 "register_operand" "=x,v")
1028 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
1029 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1030 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1032 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1033 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1034 [(set_attr "isa" "noavx,avx")
1035 (set_attr "type" "sseadd")
1036 (set_attr "prefix" "orig,vex")
1037 (set_attr "mode" "<MODE>")])
1039 (define_insn "<sse>_vm<plusminus_insn><mode>3"
1040 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1043 (match_operand:VF_128 1 "register_operand" "0,v")
1044 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1049 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1050 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1051 [(set_attr "isa" "noavx,avx")
1052 (set_attr "type" "sseadd")
1053 (set_attr "prefix" "orig,vex")
1054 (set_attr "mode" "<ssescalarmode>")])
1056 (define_expand "mul<mode>3"
1057 [(set (match_operand:VF 0 "register_operand")
1059 (match_operand:VF 1 "nonimmediate_operand")
1060 (match_operand:VF 2 "nonimmediate_operand")))]
1062 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1064 (define_insn "*mul<mode>3"
1065 [(set (match_operand:VF 0 "register_operand" "=x,v")
1067 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1068 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1069 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
1071 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1072 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1073 [(set_attr "isa" "noavx,avx")
1074 (set_attr "type" "ssemul")
1075 (set_attr "prefix" "orig,vex")
1076 (set_attr "btver2_decode" "direct,double")
1077 (set_attr "mode" "<MODE>")])
1079 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
1080 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1083 (match_operand:VF_128 1 "register_operand" "0,v")
1084 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1089 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1090 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1091 [(set_attr "isa" "noavx,avx")
1092 (set_attr "type" "sse<multdiv_mnemonic>")
1093 (set_attr "prefix" "orig,maybe_evex")
1094 (set_attr "btver2_decode" "direct,double")
1095 (set_attr "mode" "<ssescalarmode>")])
1097 (define_expand "div<mode>3"
1098 [(set (match_operand:VF2 0 "register_operand")
1099 (div:VF2 (match_operand:VF2 1 "register_operand")
1100 (match_operand:VF2 2 "nonimmediate_operand")))]
1102 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1104 (define_expand "div<mode>3"
1105 [(set (match_operand:VF1 0 "register_operand")
1106 (div:VF1 (match_operand:VF1 1 "register_operand")
1107 (match_operand:VF1 2 "nonimmediate_operand")))]
1110 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1113 && TARGET_RECIP_VEC_DIV
1114 && !optimize_insn_for_size_p ()
1115 && flag_finite_math_only && !flag_trapping_math
1116 && flag_unsafe_math_optimizations)
1118 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1123 (define_insn "<sse>_div<mode>3"
1124 [(set (match_operand:VF 0 "register_operand" "=x,v")
1126 (match_operand:VF 1 "register_operand" "0,v")
1127 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1130 div<ssemodesuffix>\t{%2, %0|%0, %2}
1131 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1132 [(set_attr "isa" "noavx,avx")
1133 (set_attr "type" "ssediv")
1134 (set_attr "prefix" "orig,vex")
1135 (set_attr "mode" "<MODE>")])
1137 (define_insn "<sse>_rcp<mode>2"
1138 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1140 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1142 "%vrcpps\t{%1, %0|%0, %1}"
1143 [(set_attr "type" "sse")
1144 (set_attr "atom_sse_attr" "rcp")
1145 (set_attr "btver2_sse_attr" "rcp")
1146 (set_attr "prefix" "maybe_vex")
1147 (set_attr "mode" "<MODE>")])
1149 (define_insn "sse_vmrcpv4sf2"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1152 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1154 (match_operand:V4SF 2 "register_operand" "0,x")
1158 rcpss\t{%1, %0|%0, %k1}
1159 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1160 [(set_attr "isa" "noavx,avx")
1161 (set_attr "type" "sse")
1162 (set_attr "atom_sse_attr" "rcp")
1163 (set_attr "btver2_sse_attr" "rcp")
1164 (set_attr "prefix" "orig,vex")
1165 (set_attr "mode" "SF")])
1167 (define_expand "sqrt<mode>2"
1168 [(set (match_operand:VF2 0 "register_operand")
1169 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1172 (define_expand "sqrt<mode>2"
1173 [(set (match_operand:VF1 0 "register_operand")
1174 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1178 && TARGET_RECIP_VEC_SQRT
1179 && !optimize_insn_for_size_p ()
1180 && flag_finite_math_only && !flag_trapping_math
1181 && flag_unsafe_math_optimizations)
1183 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1188 (define_insn "<sse>_sqrt<mode>2"
1189 [(set (match_operand:VF 0 "register_operand" "=v")
1190 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
1192 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
1193 [(set_attr "type" "sse")
1194 (set_attr "atom_sse_attr" "sqrt")
1195 (set_attr "btver2_sse_attr" "sqrt")
1196 (set_attr "prefix" "maybe_vex")
1197 (set_attr "mode" "<MODE>")])
1199 (define_insn "<sse>_vmsqrt<mode>2"
1200 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1203 (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
1204 (match_operand:VF_128 2 "register_operand" "0,v")
1208 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1209 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1210 [(set_attr "isa" "noavx,avx")
1211 (set_attr "type" "sse")
1212 (set_attr "atom_sse_attr" "sqrt")
1213 (set_attr "btver2_sse_attr" "sqrt")
1214 (set_attr "prefix" "orig,vex")
1215 (set_attr "mode" "<ssescalarmode>")])
1217 (define_expand "rsqrt<mode>2"
1218 [(set (match_operand:VF1_128_256 0 "register_operand")
1220 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1223 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1227 (define_insn "<sse>_rsqrt<mode>2"
1228 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1230 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1232 "%vrsqrtps\t{%1, %0|%0, %1}"
1233 [(set_attr "type" "sse")
1234 (set_attr "prefix" "maybe_vex")
1235 (set_attr "mode" "<MODE>")])
1237 (define_insn "sse_vmrsqrtv4sf2"
1238 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1240 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1242 (match_operand:V4SF 2 "register_operand" "0,x")
1246 rsqrtss\t{%1, %0|%0, %k1}
1247 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1248 [(set_attr "isa" "noavx,avx")
1249 (set_attr "type" "sse")
1250 (set_attr "prefix" "orig,vex")
1251 (set_attr "mode" "SF")])
1253 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1254 ;; isn't really correct, as those rtl operators aren't defined when
1255 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1257 (define_expand "<code><mode>3"
1258 [(set (match_operand:VF 0 "register_operand")
1260 (match_operand:VF 1 "nonimmediate_operand")
1261 (match_operand:VF 2 "nonimmediate_operand")))]
1264 if (!flag_finite_math_only)
1265 operands[1] = force_reg (<MODE>mode, operands[1]);
1266 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1269 (define_insn "*<code><mode>3_finite"
1270 [(set (match_operand:VF 0 "register_operand" "=x,v")
1272 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1273 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1274 "TARGET_SSE && flag_finite_math_only
1275 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1277 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1278 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1279 [(set_attr "isa" "noavx,avx")
1280 (set_attr "type" "sseadd")
1281 (set_attr "btver2_sse_attr" "maxmin")
1282 (set_attr "prefix" "orig,vex")
1283 (set_attr "mode" "<MODE>")])
1285 (define_insn "*<code><mode>3"
1286 [(set (match_operand:VF 0 "register_operand" "=x,v")
1288 (match_operand:VF 1 "register_operand" "0,v")
1289 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1290 "TARGET_SSE && !flag_finite_math_only"
1292 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1293 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1294 [(set_attr "isa" "noavx,avx")
1295 (set_attr "type" "sseadd")
1296 (set_attr "btver2_sse_attr" "maxmin")
1297 (set_attr "prefix" "orig,vex")
1298 (set_attr "mode" "<MODE>")])
1300 (define_insn "<sse>_vm<code><mode>3"
1301 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1304 (match_operand:VF_128 1 "register_operand" "0,v")
1305 (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
1310 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1311 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1312 [(set_attr "isa" "noavx,avx")
1313 (set_attr "type" "sse")
1314 (set_attr "btver2_sse_attr" "maxmin")
1315 (set_attr "prefix" "orig,vex")
1316 (set_attr "mode" "<ssescalarmode>")])
1318 ;; These versions of the min/max patterns implement exactly the operations
1319 ;; min = (op1 < op2 ? op1 : op2)
1320 ;; max = (!(op1 < op2) ? op1 : op2)
1321 ;; Their operands are not commutative, and thus they may be used in the
1322 ;; presence of -0.0 and NaN.
1324 (define_insn "*ieee_smin<mode>3"
1325 [(set (match_operand:VF 0 "register_operand" "=v,v")
1327 [(match_operand:VF 1 "register_operand" "0,v")
1328 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1332 min<ssemodesuffix>\t{%2, %0|%0, %2}
1333 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1334 [(set_attr "isa" "noavx,avx")
1335 (set_attr "type" "sseadd")
1336 (set_attr "prefix" "orig,vex")
1337 (set_attr "mode" "<MODE>")])
1339 (define_insn "*ieee_smax<mode>3"
1340 [(set (match_operand:VF 0 "register_operand" "=v,v")
1342 [(match_operand:VF 1 "register_operand" "0,v")
1343 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1347 max<ssemodesuffix>\t{%2, %0|%0, %2}
1348 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "sseadd")
1351 (set_attr "prefix" "orig,vex")
1352 (set_attr "mode" "<MODE>")])
1354 (define_insn "avx_addsubv4df3"
1355 [(set (match_operand:V4DF 0 "register_operand" "=x")
1358 (match_operand:V4DF 1 "register_operand" "x")
1359 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1360 (minus:V4DF (match_dup 1) (match_dup 2))
1363 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1364 [(set_attr "type" "sseadd")
1365 (set_attr "prefix" "vex")
1366 (set_attr "mode" "V4DF")])
1368 (define_insn "sse3_addsubv2df3"
1369 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1372 (match_operand:V2DF 1 "register_operand" "0,x")
1373 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1374 (minus:V2DF (match_dup 1) (match_dup 2))
1378 addsubpd\t{%2, %0|%0, %2}
1379 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1380 [(set_attr "isa" "noavx,avx")
1381 (set_attr "type" "sseadd")
1382 (set_attr "atom_unit" "complex")
1383 (set_attr "prefix" "orig,vex")
1384 (set_attr "mode" "V2DF")])
1386 (define_insn "avx_addsubv8sf3"
1387 [(set (match_operand:V8SF 0 "register_operand" "=x")
1390 (match_operand:V8SF 1 "register_operand" "x")
1391 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1392 (minus:V8SF (match_dup 1) (match_dup 2))
1395 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1396 [(set_attr "type" "sseadd")
1397 (set_attr "prefix" "vex")
1398 (set_attr "mode" "V8SF")])
1400 (define_insn "sse3_addsubv4sf3"
1401 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1404 (match_operand:V4SF 1 "register_operand" "0,x")
1405 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1406 (minus:V4SF (match_dup 1) (match_dup 2))
1410 addsubps\t{%2, %0|%0, %2}
1411 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1412 [(set_attr "isa" "noavx,avx")
1413 (set_attr "type" "sseadd")
1414 (set_attr "prefix" "orig,vex")
1415 (set_attr "prefix_rep" "1,*")
1416 (set_attr "mode" "V4SF")])
1418 (define_insn "avx_h<plusminus_insn>v4df3"
1419 [(set (match_operand:V4DF 0 "register_operand" "=x")
1424 (match_operand:V4DF 1 "register_operand" "x")
1425 (parallel [(const_int 0)]))
1426 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1429 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1430 (parallel [(const_int 0)]))
1431 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1434 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1435 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1437 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1438 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1440 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1441 [(set_attr "type" "sseadd")
1442 (set_attr "prefix" "vex")
1443 (set_attr "mode" "V4DF")])
1445 (define_expand "sse3_haddv2df3"
1446 [(set (match_operand:V2DF 0 "register_operand")
1450 (match_operand:V2DF 1 "register_operand")
1451 (parallel [(const_int 0)]))
1452 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1455 (match_operand:V2DF 2 "nonimmediate_operand")
1456 (parallel [(const_int 0)]))
1457 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1460 (define_insn "*sse3_haddv2df3"
1461 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1465 (match_operand:V2DF 1 "register_operand" "0,x")
1466 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1469 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1472 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1473 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1476 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1478 && INTVAL (operands[3]) != INTVAL (operands[4])
1479 && INTVAL (operands[5]) != INTVAL (operands[6])"
1481 haddpd\t{%2, %0|%0, %2}
1482 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1483 [(set_attr "isa" "noavx,avx")
1484 (set_attr "type" "sseadd")
1485 (set_attr "prefix" "orig,vex")
1486 (set_attr "mode" "V2DF")])
1488 (define_insn "sse3_hsubv2df3"
1489 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1493 (match_operand:V2DF 1 "register_operand" "0,x")
1494 (parallel [(const_int 0)]))
1495 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1498 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1499 (parallel [(const_int 0)]))
1500 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1503 hsubpd\t{%2, %0|%0, %2}
1504 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1505 [(set_attr "isa" "noavx,avx")
1506 (set_attr "type" "sseadd")
1507 (set_attr "prefix" "orig,vex")
1508 (set_attr "mode" "V2DF")])
1510 (define_insn "*sse3_haddv2df3_low"
1511 [(set (match_operand:DF 0 "register_operand" "=x,x")
1514 (match_operand:V2DF 1 "register_operand" "0,x")
1515 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1518 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1520 && INTVAL (operands[2]) != INTVAL (operands[3])"
1522 haddpd\t{%0, %0|%0, %0}
1523 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1524 [(set_attr "isa" "noavx,avx")
1525 (set_attr "type" "sseadd1")
1526 (set_attr "prefix" "orig,vex")
1527 (set_attr "mode" "V2DF")])
1529 (define_insn "*sse3_hsubv2df3_low"
1530 [(set (match_operand:DF 0 "register_operand" "=x,x")
1533 (match_operand:V2DF 1 "register_operand" "0,x")
1534 (parallel [(const_int 0)]))
1537 (parallel [(const_int 1)]))))]
1540 hsubpd\t{%0, %0|%0, %0}
1541 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1542 [(set_attr "isa" "noavx,avx")
1543 (set_attr "type" "sseadd1")
1544 (set_attr "prefix" "orig,vex")
1545 (set_attr "mode" "V2DF")])
1547 (define_insn "avx_h<plusminus_insn>v8sf3"
1548 [(set (match_operand:V8SF 0 "register_operand" "=x")
1554 (match_operand:V8SF 1 "register_operand" "x")
1555 (parallel [(const_int 0)]))
1556 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1558 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1559 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1563 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1564 (parallel [(const_int 0)]))
1565 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1567 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1568 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1572 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1573 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1575 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1576 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1579 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1580 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1582 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1583 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1585 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1586 [(set_attr "type" "sseadd")
1587 (set_attr "prefix" "vex")
1588 (set_attr "mode" "V8SF")])
1590 (define_insn "sse3_h<plusminus_insn>v4sf3"
1591 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1596 (match_operand:V4SF 1 "register_operand" "0,x")
1597 (parallel [(const_int 0)]))
1598 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1600 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1601 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1605 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1606 (parallel [(const_int 0)]))
1607 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1609 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1610 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1613 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1614 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1615 [(set_attr "isa" "noavx,avx")
1616 (set_attr "type" "sseadd")
1617 (set_attr "atom_unit" "complex")
1618 (set_attr "prefix" "orig,vex")
1619 (set_attr "prefix_rep" "1,*")
1620 (set_attr "mode" "V4SF")])
1622 (define_expand "reduc_splus_v4df"
1623 [(match_operand:V4DF 0 "register_operand")
1624 (match_operand:V4DF 1 "register_operand")]
1627 rtx tmp = gen_reg_rtx (V4DFmode);
1628 rtx tmp2 = gen_reg_rtx (V4DFmode);
1629 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1630 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1631 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1635 (define_expand "reduc_splus_v2df"
1636 [(match_operand:V2DF 0 "register_operand")
1637 (match_operand:V2DF 1 "register_operand")]
1640 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1644 (define_expand "reduc_splus_v8sf"
1645 [(match_operand:V8SF 0 "register_operand")
1646 (match_operand:V8SF 1 "register_operand")]
1649 rtx tmp = gen_reg_rtx (V8SFmode);
1650 rtx tmp2 = gen_reg_rtx (V8SFmode);
1651 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1652 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1653 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1654 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1658 (define_expand "reduc_splus_v4sf"
1659 [(match_operand:V4SF 0 "register_operand")
1660 (match_operand:V4SF 1 "register_operand")]
1665 rtx tmp = gen_reg_rtx (V4SFmode);
1666 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1667 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1670 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1674 ;; Modes handled by reduc_sm{in,ax}* patterns.
1675 (define_mode_iterator REDUC_SMINMAX_MODE
1676 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1677 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1678 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1679 (V4SF "TARGET_SSE")])
1681 (define_expand "reduc_<code>_<mode>"
1682 [(smaxmin:REDUC_SMINMAX_MODE
1683 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1684 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1687 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1691 (define_expand "reduc_<code>_<mode>"
1693 (match_operand:VI_256 0 "register_operand")
1694 (match_operand:VI_256 1 "register_operand"))]
1697 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1701 (define_expand "reduc_umin_v8hi"
1703 (match_operand:V8HI 0 "register_operand")
1704 (match_operand:V8HI 1 "register_operand"))]
1707 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1713 ;; Parallel floating point comparisons
1715 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717 (define_insn "avx_cmp<mode>3"
1718 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
1720 [(match_operand:VF_128_256 1 "register_operand" "x")
1721 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
1722 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1725 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1726 [(set_attr "type" "ssecmp")
1727 (set_attr "length_immediate" "1")
1728 (set_attr "prefix" "vex")
1729 (set_attr "mode" "<MODE>")])
1731 (define_insn "avx_vmcmp<mode>3"
1732 [(set (match_operand:VF_128 0 "register_operand" "=x")
1735 [(match_operand:VF_128 1 "register_operand" "x")
1736 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1737 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1742 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
1743 [(set_attr "type" "ssecmp")
1744 (set_attr "length_immediate" "1")
1745 (set_attr "prefix" "vex")
1746 (set_attr "mode" "<ssescalarmode>")])
1748 (define_insn "*<sse>_maskcmp<mode>3_comm"
1749 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
1750 (match_operator:VF_128_256 3 "sse_comparison_operator"
1751 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
1752 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
1754 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1756 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1757 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1758 [(set_attr "isa" "noavx,avx")
1759 (set_attr "type" "ssecmp")
1760 (set_attr "length_immediate" "1")
1761 (set_attr "prefix" "orig,vex")
1762 (set_attr "mode" "<MODE>")])
1764 (define_insn "<sse>_maskcmp<mode>3"
1765 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
1766 (match_operator:VF_128_256 3 "sse_comparison_operator"
1767 [(match_operand:VF_128_256 1 "register_operand" "0,x")
1768 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
1771 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1772 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1773 [(set_attr "isa" "noavx,avx")
1774 (set_attr "type" "ssecmp")
1775 (set_attr "length_immediate" "1")
1776 (set_attr "prefix" "orig,vex")
1777 (set_attr "mode" "<MODE>")])
1779 (define_insn "<sse>_vmmaskcmp<mode>3"
1780 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1782 (match_operator:VF_128 3 "sse_comparison_operator"
1783 [(match_operand:VF_128 1 "register_operand" "0,x")
1784 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1789 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1790 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1791 [(set_attr "isa" "noavx,avx")
1792 (set_attr "type" "ssecmp")
1793 (set_attr "length_immediate" "1,*")
1794 (set_attr "prefix" "orig,vex")
1795 (set_attr "mode" "<ssescalarmode>")])
1797 (define_insn "<sse>_comi"
1798 [(set (reg:CCFP FLAGS_REG)
1801 (match_operand:<ssevecmode> 0 "register_operand" "v")
1802 (parallel [(const_int 0)]))
1804 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
1805 (parallel [(const_int 0)]))))]
1806 "SSE_FLOAT_MODE_P (<MODE>mode)"
1807 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
1808 [(set_attr "type" "ssecomi")
1809 (set_attr "prefix" "maybe_vex")
1810 (set_attr "prefix_rep" "0")
1811 (set (attr "prefix_data16")
1812 (if_then_else (eq_attr "mode" "DF")
1814 (const_string "0")))
1815 (set_attr "mode" "<MODE>")])
1817 (define_insn "<sse>_ucomi"
1818 [(set (reg:CCFPU FLAGS_REG)
1821 (match_operand:<ssevecmode> 0 "register_operand" "v")
1822 (parallel [(const_int 0)]))
1824 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
1825 (parallel [(const_int 0)]))))]
1826 "SSE_FLOAT_MODE_P (<MODE>mode)"
1827 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
1828 [(set_attr "type" "ssecomi")
1829 (set_attr "prefix" "maybe_vex")
1830 (set_attr "prefix_rep" "0")
1831 (set (attr "prefix_data16")
1832 (if_then_else (eq_attr "mode" "DF")
1834 (const_string "0")))
1835 (set_attr "mode" "<MODE>")])
1837 (define_expand "vcond<V_256:mode><VF_256:mode>"
1838 [(set (match_operand:V_256 0 "register_operand")
1840 (match_operator 3 ""
1841 [(match_operand:VF_256 4 "nonimmediate_operand")
1842 (match_operand:VF_256 5 "nonimmediate_operand")])
1843 (match_operand:V_256 1 "general_operand")
1844 (match_operand:V_256 2 "general_operand")))]
1846 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1847 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1849 bool ok = ix86_expand_fp_vcond (operands);
1854 (define_expand "vcond<V_128:mode><VF_128:mode>"
1855 [(set (match_operand:V_128 0 "register_operand")
1857 (match_operator 3 ""
1858 [(match_operand:VF_128 4 "nonimmediate_operand")
1859 (match_operand:VF_128 5 "nonimmediate_operand")])
1860 (match_operand:V_128 1 "general_operand")
1861 (match_operand:V_128 2 "general_operand")))]
1863 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1864 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1866 bool ok = ix86_expand_fp_vcond (operands);
1871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1873 ;; Parallel floating point logical operations
1875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1877 (define_insn "<sse>_andnot<mode>3"
1878 [(set (match_operand:VF 0 "register_operand" "=x,v")
1881 (match_operand:VF 1 "register_operand" "0,v"))
1882 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1885 static char buf[32];
1889 switch (get_attr_mode (insn))
1896 suffix = "<ssemodesuffix>";
1899 switch (which_alternative)
1902 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1905 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1911 /* There is no vandnp[sd]. Use vpandnq. */
1912 if (GET_MODE_SIZE (<MODE>mode) == 64)
1915 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1918 snprintf (buf, sizeof (buf), ops, suffix);
1921 [(set_attr "isa" "noavx,avx")
1922 (set_attr "type" "sselog")
1923 (set_attr "prefix" "orig,maybe_evex")
1925 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1926 (const_string "<ssePSmode>")
1927 (match_test "TARGET_AVX")
1928 (const_string "<MODE>")
1929 (match_test "optimize_function_for_size_p (cfun)")
1930 (const_string "V4SF")
1932 (const_string "<MODE>")))])
1934 (define_expand "<code><mode>3"
1935 [(set (match_operand:VF_128_256 0 "register_operand")
1936 (any_logic:VF_128_256
1937 (match_operand:VF_128_256 1 "nonimmediate_operand")
1938 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
1940 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1942 (define_expand "<code><mode>3"
1943 [(set (match_operand:VF_512 0 "register_operand")
1945 (match_operand:VF_512 1 "nonimmediate_operand")
1946 (match_operand:VF_512 2 "nonimmediate_operand")))]
1948 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1950 (define_insn "*<code><mode>3"
1951 [(set (match_operand:VF 0 "register_operand" "=x,v")
1953 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
1954 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
1955 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1957 static char buf[32];
1961 switch (get_attr_mode (insn))
1968 suffix = "<ssemodesuffix>";
1971 switch (which_alternative)
1974 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1977 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1983 /* There is no v<logic>p[sd]. Use vp<logic>q. */
1984 if (GET_MODE_SIZE (<MODE>mode) == 64)
1987 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1990 snprintf (buf, sizeof (buf), ops, suffix);
1993 [(set_attr "isa" "noavx,avx")
1994 (set_attr "type" "sselog")
1995 (set_attr "prefix" "orig,maybe_evex")
1997 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1998 (const_string "<ssePSmode>")
1999 (match_test "TARGET_AVX")
2000 (const_string "<MODE>")
2001 (match_test "optimize_function_for_size_p (cfun)")
2002 (const_string "V4SF")
2004 (const_string "<MODE>")))])
2006 (define_expand "copysign<mode>3"
2009 (not:VF (match_dup 3))
2010 (match_operand:VF 1 "nonimmediate_operand")))
2012 (and:VF (match_dup 3)
2013 (match_operand:VF 2 "nonimmediate_operand")))
2014 (set (match_operand:VF 0 "register_operand")
2015 (ior:VF (match_dup 4) (match_dup 5)))]
2018 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2020 operands[4] = gen_reg_rtx (<MODE>mode);
2021 operands[5] = gen_reg_rtx (<MODE>mode);
2024 ;; Also define scalar versions. These are used for abs, neg, and
2025 ;; conditional move. Using subregs into vector modes causes register
2026 ;; allocation lossage. These patterns do not allow memory operands
2027 ;; because the native instructions read the full 128-bits.
2029 (define_insn "*andnot<mode>3"
2030 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2033 (match_operand:MODEF 1 "register_operand" "0,x"))
2034 (match_operand:MODEF 2 "register_operand" "x,x")))]
2035 "SSE_FLOAT_MODE_P (<MODE>mode)"
2037 static char buf[32];
2040 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2042 switch (which_alternative)
2045 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2048 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2054 snprintf (buf, sizeof (buf), ops, suffix);
2057 [(set_attr "isa" "noavx,avx")
2058 (set_attr "type" "sselog")
2059 (set_attr "prefix" "orig,vex")
2061 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2062 (const_string "V4SF")
2063 (match_test "TARGET_AVX")
2064 (const_string "<ssevecmode>")
2065 (match_test "optimize_function_for_size_p (cfun)")
2066 (const_string "V4SF")
2068 (const_string "<ssevecmode>")))])
2070 (define_insn "*andnottf3"
2071 [(set (match_operand:TF 0 "register_operand" "=x,x")
2073 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2074 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2077 static char buf[32];
2080 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2082 switch (which_alternative)
2085 ops = "%s\t{%%2, %%0|%%0, %%2}";
2088 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2094 snprintf (buf, sizeof (buf), ops, tmp);
2097 [(set_attr "isa" "noavx,avx")
2098 (set_attr "type" "sselog")
2099 (set (attr "prefix_data16")
2101 (and (eq_attr "alternative" "0")
2102 (eq_attr "mode" "TI"))
2104 (const_string "*")))
2105 (set_attr "prefix" "orig,vex")
2107 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2108 (const_string "V4SF")
2109 (match_test "TARGET_AVX")
2111 (ior (not (match_test "TARGET_SSE2"))
2112 (match_test "optimize_function_for_size_p (cfun)"))
2113 (const_string "V4SF")
2115 (const_string "TI")))])
2117 (define_insn "*<code><mode>3"
2118 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2120 (match_operand:MODEF 1 "register_operand" "%0,x")
2121 (match_operand:MODEF 2 "register_operand" "x,x")))]
2122 "SSE_FLOAT_MODE_P (<MODE>mode)"
2124 static char buf[32];
2127 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2129 switch (which_alternative)
2132 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2135 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2141 snprintf (buf, sizeof (buf), ops, suffix);
2144 [(set_attr "isa" "noavx,avx")
2145 (set_attr "type" "sselog")
2146 (set_attr "prefix" "orig,vex")
2148 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2149 (const_string "V4SF")
2150 (match_test "TARGET_AVX")
2151 (const_string "<ssevecmode>")
2152 (match_test "optimize_function_for_size_p (cfun)")
2153 (const_string "V4SF")
2155 (const_string "<ssevecmode>")))])
2157 (define_expand "<code>tf3"
2158 [(set (match_operand:TF 0 "register_operand")
2160 (match_operand:TF 1 "nonimmediate_operand")
2161 (match_operand:TF 2 "nonimmediate_operand")))]
2163 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2165 (define_insn "*<code>tf3"
2166 [(set (match_operand:TF 0 "register_operand" "=x,x")
2168 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2169 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2171 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2173 static char buf[32];
2176 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2178 switch (which_alternative)
2181 ops = "%s\t{%%2, %%0|%%0, %%2}";
2184 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2190 snprintf (buf, sizeof (buf), ops, tmp);
2193 [(set_attr "isa" "noavx,avx")
2194 (set_attr "type" "sselog")
2195 (set (attr "prefix_data16")
2197 (and (eq_attr "alternative" "0")
2198 (eq_attr "mode" "TI"))
2200 (const_string "*")))
2201 (set_attr "prefix" "orig,vex")
2203 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2204 (const_string "V4SF")
2205 (match_test "TARGET_AVX")
2207 (ior (not (match_test "TARGET_SSE2"))
2208 (match_test "optimize_function_for_size_p (cfun)"))
2209 (const_string "V4SF")
2211 (const_string "TI")))])
2213 ;; There are no floating point xor for V16SF and V8DF in avx512f
2214 ;; but we need them for negation. Instead we use int versions of
2215 ;; xor. Maybe there could be a better way to do that.
2217 (define_mode_attr avx512flogicsuff
2218 [(V16SF "d") (V8DF "q")])
2220 (define_insn "avx512f_<logic><mode>"
2221 [(set (match_operand:VF_512 0 "register_operand" "=v")
2223 (match_operand:VF_512 1 "register_operand" "v")
2224 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2226 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2227 [(set_attr "type" "sselog")
2228 (set_attr "prefix" "evex")])
2230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2232 ;; FMA floating point multiply/accumulate instructions. These include
2233 ;; scalar versions of the instructions as well as vector versions.
2235 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2237 ;; The standard names for scalar FMA are only available with SSE math enabled.
2238 (define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
2239 (DF "TARGET_SSE_MATH")
2240 V4SF V2DF V8SF V4DF])
2242 (define_expand "fma<mode>4"
2243 [(set (match_operand:FMAMODEM 0 "register_operand")
2245 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2246 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2247 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2248 "TARGET_FMA || TARGET_FMA4")
2250 (define_expand "fms<mode>4"
2251 [(set (match_operand:FMAMODEM 0 "register_operand")
2253 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2254 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2255 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2256 "TARGET_FMA || TARGET_FMA4")
2258 (define_expand "fnma<mode>4"
2259 [(set (match_operand:FMAMODEM 0 "register_operand")
2261 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2262 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2263 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2264 "TARGET_FMA || TARGET_FMA4")
2266 (define_expand "fnms<mode>4"
2267 [(set (match_operand:FMAMODEM 0 "register_operand")
2269 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2270 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2271 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2272 "TARGET_FMA || TARGET_FMA4")
2274 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2275 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
2277 (define_expand "fma4i_fmadd_<mode>"
2278 [(set (match_operand:FMAMODE 0 "register_operand")
2280 (match_operand:FMAMODE 1 "nonimmediate_operand")
2281 (match_operand:FMAMODE 2 "nonimmediate_operand")
2282 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2283 "TARGET_FMA || TARGET_FMA4")
2285 (define_insn "*fma_fmadd_<mode>"
2286 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2288 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2289 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2290 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2291 "TARGET_FMA || TARGET_FMA4"
2293 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2294 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2295 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2296 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2297 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2298 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2299 (set_attr "type" "ssemuladd")
2300 (set_attr "mode" "<MODE>")])
2302 (define_insn "*fma_fmsub_<mode>"
2303 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2305 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
2306 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2308 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2309 "TARGET_FMA || TARGET_FMA4"
2311 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2312 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2313 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2314 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2315 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2316 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2317 (set_attr "type" "ssemuladd")
2318 (set_attr "mode" "<MODE>")])
2320 (define_insn "*fma_fnmadd_<mode>"
2321 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2324 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2325 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2326 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
2327 "TARGET_FMA || TARGET_FMA4"
2329 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2330 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2331 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2332 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2333 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2334 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2335 (set_attr "type" "ssemuladd")
2336 (set_attr "mode" "<MODE>")])
2338 (define_insn "*fma_fnmsub_<mode>"
2339 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
2342 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
2343 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
2345 (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
2346 "TARGET_FMA || TARGET_FMA4"
2348 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2349 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2350 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2351 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2352 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2353 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2354 (set_attr "type" "ssemuladd")
2355 (set_attr "mode" "<MODE>")])
2357 ;; FMA parallel floating point multiply addsub and subadd operations.
2359 ;; It would be possible to represent these without the UNSPEC as
2362 ;; (fma op1 op2 op3)
2363 ;; (fma op1 op2 (neg op3))
2366 ;; But this doesn't seem useful in practice.
2368 (define_expand "fmaddsub_<mode>"
2369 [(set (match_operand:VF 0 "register_operand")
2371 [(match_operand:VF 1 "nonimmediate_operand")
2372 (match_operand:VF 2 "nonimmediate_operand")
2373 (match_operand:VF 3 "nonimmediate_operand")]
2375 "TARGET_FMA || TARGET_FMA4")
2377 (define_insn "*fma_fmaddsub_<mode>"
2378 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2380 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2381 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2382 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
2384 "TARGET_FMA || TARGET_FMA4"
2386 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2387 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2388 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2389 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2390 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2391 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2392 (set_attr "type" "ssemuladd")
2393 (set_attr "mode" "<MODE>")])
2395 (define_insn "*fma_fmsubadd_<mode>"
2396 [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
2398 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
2399 (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
2401 (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
2403 "TARGET_FMA || TARGET_FMA4"
2405 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2406 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2407 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2408 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2409 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2410 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2411 (set_attr "type" "ssemuladd")
2412 (set_attr "mode" "<MODE>")])
2414 ;; FMA3 floating point scalar intrinsics. These merge result with
2415 ;; high-order elements from the destination register.
2417 (define_expand "fmai_vmfmadd_<mode>"
2418 [(set (match_operand:VF_128 0 "register_operand")
2421 (match_operand:VF_128 1 "nonimmediate_operand")
2422 (match_operand:VF_128 2 "nonimmediate_operand")
2423 (match_operand:VF_128 3 "nonimmediate_operand"))
2428 (define_insn "*fmai_fmadd_<mode>"
2429 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2432 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2433 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
2434 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
2439 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2440 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2441 [(set_attr "type" "ssemuladd")
2442 (set_attr "mode" "<MODE>")])
2444 (define_insn "*fmai_fmsub_<mode>"
2445 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2448 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2449 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
2451 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
2456 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2457 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2458 [(set_attr "type" "ssemuladd")
2459 (set_attr "mode" "<MODE>")])
2461 (define_insn "*fmai_fnmadd_<mode>"
2462 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2466 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
2467 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2468 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
2473 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2474 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2475 [(set_attr "type" "ssemuladd")
2476 (set_attr "mode" "<MODE>")])
2478 (define_insn "*fmai_fnmsub_<mode>"
2479 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2483 (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
2484 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2486 (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
2491 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2492 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2493 [(set_attr "type" "ssemuladd")
2494 (set_attr "mode" "<MODE>")])
2496 ;; FMA4 floating point scalar intrinsics. These write the
2497 ;; entire destination register, with the high-order elements zeroed.
2499 (define_expand "fma4i_vmfmadd_<mode>"
2500 [(set (match_operand:VF_128 0 "register_operand")
2503 (match_operand:VF_128 1 "nonimmediate_operand")
2504 (match_operand:VF_128 2 "nonimmediate_operand")
2505 (match_operand:VF_128 3 "nonimmediate_operand"))
2509 "operands[4] = CONST0_RTX (<MODE>mode);")
2511 (define_insn "*fma4i_vmfmadd_<mode>"
2512 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2515 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2516 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2517 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2518 (match_operand:VF_128 4 "const0_operand")
2521 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2522 [(set_attr "type" "ssemuladd")
2523 (set_attr "mode" "<MODE>")])
2525 (define_insn "*fma4i_vmfmsub_<mode>"
2526 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2529 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2530 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2532 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2533 (match_operand:VF_128 4 "const0_operand")
2536 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2537 [(set_attr "type" "ssemuladd")
2538 (set_attr "mode" "<MODE>")])
2540 (define_insn "*fma4i_vmfnmadd_<mode>"
2541 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2545 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2546 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2547 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2548 (match_operand:VF_128 4 "const0_operand")
2551 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2552 [(set_attr "type" "ssemuladd")
2553 (set_attr "mode" "<MODE>")])
2555 (define_insn "*fma4i_vmfnmsub_<mode>"
2556 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2560 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2561 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2563 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2564 (match_operand:VF_128 4 "const0_operand")
2567 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2568 [(set_attr "type" "ssemuladd")
2569 (set_attr "mode" "<MODE>")])
2571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2573 ;; Parallel single-precision floating point conversion operations
2575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2577 (define_insn "sse_cvtpi2ps"
2578 [(set (match_operand:V4SF 0 "register_operand" "=x")
2581 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2582 (match_operand:V4SF 1 "register_operand" "0")
2585 "cvtpi2ps\t{%2, %0|%0, %2}"
2586 [(set_attr "type" "ssecvt")
2587 (set_attr "mode" "V4SF")])
2589 (define_insn "sse_cvtps2pi"
2590 [(set (match_operand:V2SI 0 "register_operand" "=y")
2592 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2594 (parallel [(const_int 0) (const_int 1)])))]
2596 "cvtps2pi\t{%1, %0|%0, %q1}"
2597 [(set_attr "type" "ssecvt")
2598 (set_attr "unit" "mmx")
2599 (set_attr "mode" "DI")])
2601 (define_insn "sse_cvttps2pi"
2602 [(set (match_operand:V2SI 0 "register_operand" "=y")
2604 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2605 (parallel [(const_int 0) (const_int 1)])))]
2607 "cvttps2pi\t{%1, %0|%0, %q1}"
2608 [(set_attr "type" "ssecvt")
2609 (set_attr "unit" "mmx")
2610 (set_attr "prefix_rep" "0")
2611 (set_attr "mode" "SF")])
2613 (define_insn "sse_cvtsi2ss"
2614 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2617 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2618 (match_operand:V4SF 1 "register_operand" "0,0,v")
2622 cvtsi2ss\t{%2, %0|%0, %2}
2623 cvtsi2ss\t{%2, %0|%0, %2}
2624 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2625 [(set_attr "isa" "noavx,noavx,avx")
2626 (set_attr "type" "sseicvt")
2627 (set_attr "athlon_decode" "vector,double,*")
2628 (set_attr "amdfam10_decode" "vector,double,*")
2629 (set_attr "bdver1_decode" "double,direct,*")
2630 (set_attr "btver2_decode" "double,double,double")
2631 (set_attr "prefix" "orig,orig,maybe_evex")
2632 (set_attr "mode" "SF")])
2634 (define_insn "sse_cvtsi2ssq"
2635 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2638 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2639 (match_operand:V4SF 1 "register_operand" "0,0,v")
2641 "TARGET_SSE && TARGET_64BIT"
2643 cvtsi2ssq\t{%2, %0|%0, %2}
2644 cvtsi2ssq\t{%2, %0|%0, %2}
2645 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2646 [(set_attr "isa" "noavx,noavx,avx")
2647 (set_attr "type" "sseicvt")
2648 (set_attr "athlon_decode" "vector,double,*")
2649 (set_attr "amdfam10_decode" "vector,double,*")
2650 (set_attr "bdver1_decode" "double,direct,*")
2651 (set_attr "btver2_decode" "double,double,double")
2652 (set_attr "length_vex" "*,*,4")
2653 (set_attr "prefix_rex" "1,1,*")
2654 (set_attr "prefix" "orig,orig,maybe_evex")
2655 (set_attr "mode" "SF")])
2657 (define_insn "sse_cvtss2si"
2658 [(set (match_operand:SI 0 "register_operand" "=r,r")
2661 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2662 (parallel [(const_int 0)]))]
2663 UNSPEC_FIX_NOTRUNC))]
2665 "%vcvtss2si\t{%1, %0|%0, %k1}"
2666 [(set_attr "type" "sseicvt")
2667 (set_attr "athlon_decode" "double,vector")
2668 (set_attr "bdver1_decode" "double,double")
2669 (set_attr "prefix_rep" "1")
2670 (set_attr "prefix" "maybe_vex")
2671 (set_attr "mode" "SI")])
2673 (define_insn "sse_cvtss2si_2"
2674 [(set (match_operand:SI 0 "register_operand" "=r,r")
2675 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
2676 UNSPEC_FIX_NOTRUNC))]
2678 "%vcvtss2si\t{%1, %0|%0, %k1}"
2679 [(set_attr "type" "sseicvt")
2680 (set_attr "athlon_decode" "double,vector")
2681 (set_attr "amdfam10_decode" "double,double")
2682 (set_attr "bdver1_decode" "double,double")
2683 (set_attr "prefix_rep" "1")
2684 (set_attr "prefix" "maybe_vex")
2685 (set_attr "mode" "SI")])
2687 (define_insn "sse_cvtss2siq"
2688 [(set (match_operand:DI 0 "register_operand" "=r,r")
2691 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2692 (parallel [(const_int 0)]))]
2693 UNSPEC_FIX_NOTRUNC))]
2694 "TARGET_SSE && TARGET_64BIT"
2695 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2696 [(set_attr "type" "sseicvt")
2697 (set_attr "athlon_decode" "double,vector")
2698 (set_attr "bdver1_decode" "double,double")
2699 (set_attr "prefix_rep" "1")
2700 (set_attr "prefix" "maybe_vex")
2701 (set_attr "mode" "DI")])
2703 (define_insn "sse_cvtss2siq_2"
2704 [(set (match_operand:DI 0 "register_operand" "=r,r")
2705 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
2706 UNSPEC_FIX_NOTRUNC))]
2707 "TARGET_SSE && TARGET_64BIT"
2708 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2709 [(set_attr "type" "sseicvt")
2710 (set_attr "athlon_decode" "double,vector")
2711 (set_attr "amdfam10_decode" "double,double")
2712 (set_attr "bdver1_decode" "double,double")
2713 (set_attr "prefix_rep" "1")
2714 (set_attr "prefix" "maybe_vex")
2715 (set_attr "mode" "DI")])
2717 (define_insn "sse_cvttss2si"
2718 [(set (match_operand:SI 0 "register_operand" "=r,r")
2721 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2722 (parallel [(const_int 0)]))))]
2724 "%vcvttss2si\t{%1, %0|%0, %k1}"
2725 [(set_attr "type" "sseicvt")
2726 (set_attr "athlon_decode" "double,vector")
2727 (set_attr "amdfam10_decode" "double,double")
2728 (set_attr "bdver1_decode" "double,double")
2729 (set_attr "prefix_rep" "1")
2730 (set_attr "prefix" "maybe_vex")
2731 (set_attr "mode" "SI")])
2733 (define_insn "sse_cvttss2siq"
2734 [(set (match_operand:DI 0 "register_operand" "=r,r")
2737 (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
2738 (parallel [(const_int 0)]))))]
2739 "TARGET_SSE && TARGET_64BIT"
2740 "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
2741 [(set_attr "type" "sseicvt")
2742 (set_attr "athlon_decode" "double,vector")
2743 (set_attr "amdfam10_decode" "double,double")
2744 (set_attr "bdver1_decode" "double,double")
2745 (set_attr "prefix_rep" "1")
2746 (set_attr "prefix" "maybe_vex")
2747 (set_attr "mode" "DI")])
2749 (define_insn "float<sseintvecmodelower><mode>2"
2750 [(set (match_operand:VF1 0 "register_operand" "=v")
2752 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
2754 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2755 [(set_attr "type" "ssecvt")
2756 (set_attr "prefix" "maybe_vex")
2757 (set_attr "mode" "<sseinsnmode>")])
2759 (define_expand "floatuns<sseintvecmodelower><mode>2"
2760 [(match_operand:VF1 0 "register_operand")
2761 (match_operand:<sseintvecmode> 1 "register_operand")]
2762 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2764 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2768 (define_insn "avx_cvtps2dq256"
2769 [(set (match_operand:V8SI 0 "register_operand" "=x")
2770 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2771 UNSPEC_FIX_NOTRUNC))]
2773 "vcvtps2dq\t{%1, %0|%0, %1}"
2774 [(set_attr "type" "ssecvt")
2775 (set_attr "prefix" "vex")
2776 (set_attr "mode" "OI")])
2778 (define_insn "sse2_cvtps2dq"
2779 [(set (match_operand:V4SI 0 "register_operand" "=x")
2780 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2781 UNSPEC_FIX_NOTRUNC))]
2783 "%vcvtps2dq\t{%1, %0|%0, %1}"
2784 [(set_attr "type" "ssecvt")
2785 (set (attr "prefix_data16")
2787 (match_test "TARGET_AVX")
2789 (const_string "1")))
2790 (set_attr "prefix" "maybe_vex")
2791 (set_attr "mode" "TI")])
2793 (define_insn "<fixsuffix>fix_truncv16sfv16si2"
2794 [(set (match_operand:V16SI 0 "register_operand" "=v")
2796 (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
2798 "vcvttps2<fixsuffix>dq\t{%1, %0|%0, %1}"
2799 [(set_attr "type" "ssecvt")
2800 (set_attr "prefix" "evex")
2801 (set_attr "mode" "XI")])
2803 (define_insn "fix_truncv8sfv8si2"
2804 [(set (match_operand:V8SI 0 "register_operand" "=x")
2805 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2807 "vcvttps2dq\t{%1, %0|%0, %1}"
2808 [(set_attr "type" "ssecvt")
2809 (set_attr "prefix" "vex")
2810 (set_attr "mode" "OI")])
2812 (define_insn "fix_truncv4sfv4si2"
2813 [(set (match_operand:V4SI 0 "register_operand" "=x")
2814 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2816 "%vcvttps2dq\t{%1, %0|%0, %1}"
2817 [(set_attr "type" "ssecvt")
2818 (set (attr "prefix_rep")
2820 (match_test "TARGET_AVX")
2822 (const_string "1")))
2823 (set (attr "prefix_data16")
2825 (match_test "TARGET_AVX")
2827 (const_string "0")))
2828 (set_attr "prefix_data16" "0")
2829 (set_attr "prefix" "maybe_vex")
2830 (set_attr "mode" "TI")])
2832 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2833 [(match_operand:<sseintvecmode> 0 "register_operand")
2834 (match_operand:VF1 1 "register_operand")]
2838 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2839 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2840 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2841 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2847 ;; Parallel double-precision floating point conversion operations
2849 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2851 (define_insn "sse2_cvtpi2pd"
2852 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2853 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2855 "cvtpi2pd\t{%1, %0|%0, %1}"
2856 [(set_attr "type" "ssecvt")
2857 (set_attr "unit" "mmx,*")
2858 (set_attr "prefix_data16" "1,*")
2859 (set_attr "mode" "V2DF")])
2861 (define_insn "sse2_cvtpd2pi"
2862 [(set (match_operand:V2SI 0 "register_operand" "=y")
2863 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2864 UNSPEC_FIX_NOTRUNC))]
2866 "cvtpd2pi\t{%1, %0|%0, %1}"
2867 [(set_attr "type" "ssecvt")
2868 (set_attr "unit" "mmx")
2869 (set_attr "bdver1_decode" "double")
2870 (set_attr "btver2_decode" "direct")
2871 (set_attr "prefix_data16" "1")
2872 (set_attr "mode" "DI")])
2874 (define_insn "sse2_cvttpd2pi"
2875 [(set (match_operand:V2SI 0 "register_operand" "=y")
2876 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2878 "cvttpd2pi\t{%1, %0|%0, %1}"
2879 [(set_attr "type" "ssecvt")
2880 (set_attr "unit" "mmx")
2881 (set_attr "bdver1_decode" "double")
2882 (set_attr "prefix_data16" "1")
2883 (set_attr "mode" "TI")])
2885 (define_insn "sse2_cvtsi2sd"
2886 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2889 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2890 (match_operand:V2DF 1 "register_operand" "0,0,x")
2894 cvtsi2sd\t{%2, %0|%0, %2}
2895 cvtsi2sd\t{%2, %0|%0, %2}
2896 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2897 [(set_attr "isa" "noavx,noavx,avx")
2898 (set_attr "type" "sseicvt")
2899 (set_attr "athlon_decode" "double,direct,*")
2900 (set_attr "amdfam10_decode" "vector,double,*")
2901 (set_attr "bdver1_decode" "double,direct,*")
2902 (set_attr "btver2_decode" "double,double,double")
2903 (set_attr "prefix" "orig,orig,vex")
2904 (set_attr "mode" "DF")])
2906 (define_insn "sse2_cvtsi2sdq"
2907 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
2910 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2911 (match_operand:V2DF 1 "register_operand" "0,0,v")
2913 "TARGET_SSE2 && TARGET_64BIT"
2915 cvtsi2sdq\t{%2, %0|%0, %2}
2916 cvtsi2sdq\t{%2, %0|%0, %2}
2917 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2918 [(set_attr "isa" "noavx,noavx,avx")
2919 (set_attr "type" "sseicvt")
2920 (set_attr "athlon_decode" "double,direct,*")
2921 (set_attr "amdfam10_decode" "vector,double,*")
2922 (set_attr "bdver1_decode" "double,direct,*")
2923 (set_attr "length_vex" "*,*,4")
2924 (set_attr "prefix_rex" "1,1,*")
2925 (set_attr "prefix" "orig,orig,maybe_evex")
2926 (set_attr "mode" "DF")])
2928 (define_insn "sse2_cvtsd2si"
2929 [(set (match_operand:SI 0 "register_operand" "=r,r")
2932 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
2933 (parallel [(const_int 0)]))]
2934 UNSPEC_FIX_NOTRUNC))]
2936 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2937 [(set_attr "type" "sseicvt")
2938 (set_attr "athlon_decode" "double,vector")
2939 (set_attr "bdver1_decode" "double,double")
2940 (set_attr "btver2_decode" "double,double")
2941 (set_attr "prefix_rep" "1")
2942 (set_attr "prefix" "maybe_vex")
2943 (set_attr "mode" "SI")])
2945 (define_insn "sse2_cvtsd2si_2"
2946 [(set (match_operand:SI 0 "register_operand" "=r,r")
2947 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
2948 UNSPEC_FIX_NOTRUNC))]
2950 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2951 [(set_attr "type" "sseicvt")
2952 (set_attr "athlon_decode" "double,vector")
2953 (set_attr "amdfam10_decode" "double,double")
2954 (set_attr "bdver1_decode" "double,double")
2955 (set_attr "prefix_rep" "1")
2956 (set_attr "prefix" "maybe_vex")
2957 (set_attr "mode" "SI")])
2959 (define_insn "sse2_cvtsd2siq"
2960 [(set (match_operand:DI 0 "register_operand" "=r,r")
2963 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
2964 (parallel [(const_int 0)]))]
2965 UNSPEC_FIX_NOTRUNC))]
2966 "TARGET_SSE2 && TARGET_64BIT"
2967 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2968 [(set_attr "type" "sseicvt")
2969 (set_attr "athlon_decode" "double,vector")
2970 (set_attr "bdver1_decode" "double,double")
2971 (set_attr "prefix_rep" "1")
2972 (set_attr "prefix" "maybe_vex")
2973 (set_attr "mode" "DI")])
2975 (define_insn "sse2_cvtsd2siq_2"
2976 [(set (match_operand:DI 0 "register_operand" "=r,r")
2977 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
2978 UNSPEC_FIX_NOTRUNC))]
2979 "TARGET_SSE2 && TARGET_64BIT"
2980 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2981 [(set_attr "type" "sseicvt")
2982 (set_attr "athlon_decode" "double,vector")
2983 (set_attr "amdfam10_decode" "double,double")
2984 (set_attr "bdver1_decode" "double,double")
2985 (set_attr "prefix_rep" "1")
2986 (set_attr "prefix" "maybe_vex")
2987 (set_attr "mode" "DI")])
2989 (define_insn "sse2_cvttsd2si"
2990 [(set (match_operand:SI 0 "register_operand" "=r,r")
2993 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
2994 (parallel [(const_int 0)]))))]
2996 "%vcvttsd2si\t{%1, %0|%0, %q1}"
2997 [(set_attr "type" "sseicvt")
2998 (set_attr "athlon_decode" "double,vector")
2999 (set_attr "amdfam10_decode" "double,double")
3000 (set_attr "bdver1_decode" "double,double")
3001 (set_attr "btver2_decode" "double,double")
3002 (set_attr "prefix_rep" "1")
3003 (set_attr "prefix" "maybe_vex")
3004 (set_attr "mode" "SI")])
3006 (define_insn "sse2_cvttsd2siq"
3007 [(set (match_operand:DI 0 "register_operand" "=r,r")
3010 (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
3011 (parallel [(const_int 0)]))))]
3012 "TARGET_SSE2 && TARGET_64BIT"
3013 "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
3014 [(set_attr "type" "sseicvt")
3015 (set_attr "athlon_decode" "double,vector")
3016 (set_attr "amdfam10_decode" "double,double")
3017 (set_attr "bdver1_decode" "double,double")
3018 (set_attr "prefix_rep" "1")
3019 (set_attr "prefix" "maybe_vex")
3020 (set_attr "mode" "DI")])
3022 ;; For float<si2dfmode><mode>2 insn pattern
3023 (define_mode_attr si2dfmode
3024 [(V8DF "V8SI") (V4DF "V4SI")])
3025 (define_mode_attr si2dfmodelower
3026 [(V8DF "v8si") (V4DF "v4si")])
3028 (define_insn "float<si2dfmodelower><mode>2"
3029 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3030 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
3032 "vcvtdq2pd\t{%1, %0|%0, %1}"
3033 [(set_attr "type" "ssecvt")
3034 (set_attr "prefix" "maybe_vex")
3035 (set_attr "mode" "<MODE>")])
3037 (define_insn "avx_cvtdq2pd256_2"
3038 [(set (match_operand:V4DF 0 "register_operand" "=x")
3041 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
3042 (parallel [(const_int 0) (const_int 1)
3043 (const_int 2) (const_int 3)]))))]
3045 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
3046 [(set_attr "type" "ssecvt")
3047 (set_attr "prefix" "vex")
3048 (set_attr "mode" "V4DF")])
3050 (define_insn "sse2_cvtdq2pd"
3051 [(set (match_operand:V2DF 0 "register_operand" "=x")
3054 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3055 (parallel [(const_int 0) (const_int 1)]))))]
3057 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
3058 [(set_attr "type" "ssecvt")
3059 (set_attr "prefix" "maybe_vex")
3060 (set_attr "mode" "V2DF")])
3062 (define_insn "avx_cvtpd2dq256"
3063 [(set (match_operand:V4SI 0 "register_operand" "=x")
3064 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3065 UNSPEC_FIX_NOTRUNC))]
3067 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3068 [(set_attr "type" "ssecvt")
3069 (set_attr "prefix" "vex")
3070 (set_attr "mode" "OI")])
3072 (define_expand "avx_cvtpd2dq256_2"
3073 [(set (match_operand:V8SI 0 "register_operand")
3075 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
3079 "operands[2] = CONST0_RTX (V4SImode);")
3081 (define_insn "*avx_cvtpd2dq256_2"
3082 [(set (match_operand:V8SI 0 "register_operand" "=x")
3084 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3086 (match_operand:V4SI 2 "const0_operand")))]
3088 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
3089 [(set_attr "type" "ssecvt")
3090 (set_attr "prefix" "vex")
3091 (set_attr "btver2_decode" "vector")
3092 (set_attr "mode" "OI")])
3094 (define_expand "sse2_cvtpd2dq"
3095 [(set (match_operand:V4SI 0 "register_operand")
3097 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
3101 "operands[2] = CONST0_RTX (V2SImode);")
3103 (define_insn "*sse2_cvtpd2dq"
3104 [(set (match_operand:V4SI 0 "register_operand" "=x")
3106 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3108 (match_operand:V2SI 2 "const0_operand")))]
3112 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
3114 return "cvtpd2dq\t{%1, %0|%0, %1}";
3116 [(set_attr "type" "ssecvt")
3117 (set_attr "prefix_rep" "1")
3118 (set_attr "prefix_data16" "0")
3119 (set_attr "prefix" "maybe_vex")
3120 (set_attr "mode" "TI")
3121 (set_attr "amdfam10_decode" "double")
3122 (set_attr "athlon_decode" "vector")
3123 (set_attr "bdver1_decode" "double")])
3125 (define_insn "<fixsuffix>fix_truncv8dfv8si2"
3126 [(set (match_operand:V8SI 0 "register_operand" "=v")
3127 (any_fix:V8SI (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
3129 "vcvttpd2<fixsuffix>dq\t{%1, %0|%0, %1}"
3130 [(set_attr "type" "ssecvt")
3131 (set_attr "prefix" "evex")
3132 (set_attr "mode" "OI")])
3134 (define_insn "fix_truncv4dfv4si2"
3135 [(set (match_operand:V4SI 0 "register_operand" "=x")
3136 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3138 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3139 [(set_attr "type" "ssecvt")
3140 (set_attr "prefix" "vex")
3141 (set_attr "mode" "OI")])
3143 (define_expand "avx_cvttpd2dq256_2"
3144 [(set (match_operand:V8SI 0 "register_operand")
3146 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
3149 "operands[2] = CONST0_RTX (V4SImode);")
3151 (define_insn "*avx_cvttpd2dq256_2"
3152 [(set (match_operand:V8SI 0 "register_operand" "=x")
3154 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
3155 (match_operand:V4SI 2 "const0_operand")))]
3157 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
3158 [(set_attr "type" "ssecvt")
3159 (set_attr "prefix" "vex")
3160 (set_attr "btver2_decode" "vector")
3161 (set_attr "mode" "OI")])
3163 (define_expand "sse2_cvttpd2dq"
3164 [(set (match_operand:V4SI 0 "register_operand")
3166 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
3169 "operands[2] = CONST0_RTX (V2SImode);")
3171 (define_insn "*sse2_cvttpd2dq"
3172 [(set (match_operand:V4SI 0 "register_operand" "=x")
3174 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3175 (match_operand:V2SI 2 "const0_operand")))]
3179 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
3181 return "cvttpd2dq\t{%1, %0|%0, %1}";
3183 [(set_attr "type" "ssecvt")
3184 (set_attr "amdfam10_decode" "double")
3185 (set_attr "athlon_decode" "vector")
3186 (set_attr "bdver1_decode" "double")
3187 (set_attr "prefix" "maybe_vex")
3188 (set_attr "mode" "TI")])
3190 (define_insn "sse2_cvtsd2ss"
3191 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3194 (float_truncate:V2SF
3195 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
3196 (match_operand:V4SF 1 "register_operand" "0,0,v")
3200 cvtsd2ss\t{%2, %0|%0, %2}
3201 cvtsd2ss\t{%2, %0|%0, %q2}
3202 vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
3203 [(set_attr "isa" "noavx,noavx,avx")
3204 (set_attr "type" "ssecvt")
3205 (set_attr "athlon_decode" "vector,double,*")
3206 (set_attr "amdfam10_decode" "vector,double,*")
3207 (set_attr "bdver1_decode" "direct,direct,*")
3208 (set_attr "btver2_decode" "double,double,double")
3209 (set_attr "prefix" "orig,orig,vex")
3210 (set_attr "mode" "SF")])
3212 (define_insn "sse2_cvtss2sd"
3213 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3217 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
3218 (parallel [(const_int 0) (const_int 1)])))
3219 (match_operand:V2DF 1 "register_operand" "0,0,v")
3223 cvtss2sd\t{%2, %0|%0, %2}
3224 cvtss2sd\t{%2, %0|%0, %k2}
3225 vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
3226 [(set_attr "isa" "noavx,noavx,avx")
3227 (set_attr "type" "ssecvt")
3228 (set_attr "amdfam10_decode" "vector,double,*")
3229 (set_attr "athlon_decode" "direct,direct,*")
3230 (set_attr "bdver1_decode" "direct,direct,*")
3231 (set_attr "btver2_decode" "double,double,double")
3232 (set_attr "prefix" "orig,orig,vex")
3233 (set_attr "mode" "DF")])
3235 (define_insn "avx_cvtpd2ps256"
3236 [(set (match_operand:V4SF 0 "register_operand" "=x")
3237 (float_truncate:V4SF
3238 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3240 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3241 [(set_attr "type" "ssecvt")
3242 (set_attr "prefix" "vex")
3243 (set_attr "btver2_decode" "vector")
3244 (set_attr "mode" "V4SF")])
3246 (define_expand "sse2_cvtpd2ps"
3247 [(set (match_operand:V4SF 0 "register_operand")
3249 (float_truncate:V2SF
3250 (match_operand:V2DF 1 "nonimmediate_operand"))
3253 "operands[2] = CONST0_RTX (V2SFmode);")
3255 (define_insn "*sse2_cvtpd2ps"
3256 [(set (match_operand:V4SF 0 "register_operand" "=x")
3258 (float_truncate:V2SF
3259 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3260 (match_operand:V2SF 2 "const0_operand")))]
3264 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
3266 return "cvtpd2ps\t{%1, %0|%0, %1}";
3268 [(set_attr "type" "ssecvt")
3269 (set_attr "amdfam10_decode" "double")
3270 (set_attr "athlon_decode" "vector")
3271 (set_attr "bdver1_decode" "double")
3272 (set_attr "prefix_data16" "1")
3273 (set_attr "prefix" "maybe_vex")
3274 (set_attr "mode" "V4SF")])
3276 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
3277 (define_mode_attr sf2dfmode
3278 [(V8DF "V8SF") (V4DF "V4SF")])
3280 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix>"
3281 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
3282 (float_extend:VF2_512_256
3283 (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
3285 "vcvtps2pd\t{%1, %0|%0, %1}"
3286 [(set_attr "type" "ssecvt")
3287 (set_attr "prefix" "maybe_vex")
3288 (set_attr "mode" "<MODE>")])
3290 (define_insn "*avx_cvtps2pd256_2"
3291 [(set (match_operand:V4DF 0 "register_operand" "=x")
3294 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3295 (parallel [(const_int 0) (const_int 1)
3296 (const_int 2) (const_int 3)]))))]
3298 "vcvtps2pd\t{%x1, %0|%0, %x1}"
3299 [(set_attr "type" "ssecvt")
3300 (set_attr "prefix" "vex")
3301 (set_attr "mode" "V4DF")])
3303 (define_insn "sse2_cvtps2pd"
3304 [(set (match_operand:V2DF 0 "register_operand" "=x")
3307 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3308 (parallel [(const_int 0) (const_int 1)]))))]
3310 "%vcvtps2pd\t{%1, %0|%0, %q1}"
3311 [(set_attr "type" "ssecvt")
3312 (set_attr "amdfam10_decode" "direct")
3313 (set_attr "athlon_decode" "double")
3314 (set_attr "bdver1_decode" "double")
3315 (set_attr "prefix_data16" "0")
3316 (set_attr "prefix" "maybe_vex")
3317 (set_attr "mode" "V2DF")])
3319 (define_expand "vec_unpacks_hi_v4sf"
3324 (match_operand:V4SF 1 "nonimmediate_operand"))
3325 (parallel [(const_int 6) (const_int 7)
3326 (const_int 2) (const_int 3)])))
3327 (set (match_operand:V2DF 0 "register_operand")
3331 (parallel [(const_int 0) (const_int 1)]))))]
3333 "operands[2] = gen_reg_rtx (V4SFmode);")
3335 (define_expand "vec_unpacks_hi_v8sf"
3338 (match_operand:V8SF 1 "nonimmediate_operand")
3339 (parallel [(const_int 4) (const_int 5)
3340 (const_int 6) (const_int 7)])))
3341 (set (match_operand:V4DF 0 "register_operand")
3345 "operands[2] = gen_reg_rtx (V4SFmode);")
3347 (define_expand "vec_unpacks_lo_v4sf"
3348 [(set (match_operand:V2DF 0 "register_operand")
3351 (match_operand:V4SF 1 "nonimmediate_operand")
3352 (parallel [(const_int 0) (const_int 1)]))))]
3355 (define_expand "vec_unpacks_lo_v8sf"
3356 [(set (match_operand:V4DF 0 "register_operand")
3359 (match_operand:V8SF 1 "nonimmediate_operand")
3360 (parallel [(const_int 0) (const_int 1)
3361 (const_int 2) (const_int 3)]))))]
3364 (define_mode_attr sseunpackfltmode
3365 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
3366 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
3368 (define_expand "vec_unpacks_float_hi_<mode>"
3369 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3370 (match_operand:VI2_AVX512F 1 "register_operand")]
3373 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3375 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
3376 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3377 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3381 (define_expand "vec_unpacks_float_lo_<mode>"
3382 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3383 (match_operand:VI2_AVX512F 1 "register_operand")]
3386 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3388 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
3389 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3390 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3394 (define_expand "vec_unpacku_float_hi_<mode>"
3395 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3396 (match_operand:VI2_AVX512F 1 "register_operand")]
3399 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3401 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
3402 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3403 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3407 (define_expand "vec_unpacku_float_lo_<mode>"
3408 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3409 (match_operand:VI2_AVX512F 1 "register_operand")]
3412 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3414 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
3415 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3416 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3420 (define_expand "vec_unpacks_float_hi_v4si"
3423 (match_operand:V4SI 1 "nonimmediate_operand")
3424 (parallel [(const_int 2) (const_int 3)
3425 (const_int 2) (const_int 3)])))
3426 (set (match_operand:V2DF 0 "register_operand")
3430 (parallel [(const_int 0) (const_int 1)]))))]
3432 "operands[2] = gen_reg_rtx (V4SImode);")
3434 (define_expand "vec_unpacks_float_lo_v4si"
3435 [(set (match_operand:V2DF 0 "register_operand")
3438 (match_operand:V4SI 1 "nonimmediate_operand")
3439 (parallel [(const_int 0) (const_int 1)]))))]
3442 (define_expand "vec_unpacks_float_hi_v8si"
3445 (match_operand:V8SI 1 "nonimmediate_operand")
3446 (parallel [(const_int 4) (const_int 5)
3447 (const_int 6) (const_int 7)])))
3448 (set (match_operand:V4DF 0 "register_operand")
3452 "operands[2] = gen_reg_rtx (V4SImode);")
3454 (define_expand "vec_unpacks_float_lo_v8si"
3455 [(set (match_operand:V4DF 0 "register_operand")
3458 (match_operand:V8SI 1 "nonimmediate_operand")
3459 (parallel [(const_int 0) (const_int 1)
3460 (const_int 2) (const_int 3)]))))]
3463 (define_expand "vec_unpacku_float_hi_v4si"
3466 (match_operand:V4SI 1 "nonimmediate_operand")
3467 (parallel [(const_int 2) (const_int 3)
3468 (const_int 2) (const_int 3)])))
3473 (parallel [(const_int 0) (const_int 1)]))))
3475 (lt:V2DF (match_dup 6) (match_dup 3)))
3477 (and:V2DF (match_dup 7) (match_dup 4)))
3478 (set (match_operand:V2DF 0 "register_operand")
3479 (plus:V2DF (match_dup 6) (match_dup 8)))]
3482 REAL_VALUE_TYPE TWO32r;
3486 real_ldexp (&TWO32r, &dconst1, 32);
3487 x = const_double_from_real_value (TWO32r, DFmode);
3489 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3490 operands[4] = force_reg (V2DFmode,
3491 ix86_build_const_vector (V2DFmode, 1, x));
3493 operands[5] = gen_reg_rtx (V4SImode);
3495 for (i = 6; i < 9; i++)
3496 operands[i] = gen_reg_rtx (V2DFmode);
3499 (define_expand "vec_unpacku_float_lo_v4si"
3503 (match_operand:V4SI 1 "nonimmediate_operand")
3504 (parallel [(const_int 0) (const_int 1)]))))
3506 (lt:V2DF (match_dup 5) (match_dup 3)))
3508 (and:V2DF (match_dup 6) (match_dup 4)))
3509 (set (match_operand:V2DF 0 "register_operand")
3510 (plus:V2DF (match_dup 5) (match_dup 7)))]
3513 REAL_VALUE_TYPE TWO32r;
3517 real_ldexp (&TWO32r, &dconst1, 32);
3518 x = const_double_from_real_value (TWO32r, DFmode);
3520 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3521 operands[4] = force_reg (V2DFmode,
3522 ix86_build_const_vector (V2DFmode, 1, x));
3524 for (i = 5; i < 8; i++)
3525 operands[i] = gen_reg_rtx (V2DFmode);
3528 (define_expand "vec_unpacku_float_hi_v8si"
3529 [(match_operand:V4DF 0 "register_operand")
3530 (match_operand:V8SI 1 "register_operand")]
3533 REAL_VALUE_TYPE TWO32r;
3537 real_ldexp (&TWO32r, &dconst1, 32);
3538 x = const_double_from_real_value (TWO32r, DFmode);
3540 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3541 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3542 tmp[5] = gen_reg_rtx (V4SImode);
3544 for (i = 2; i < 5; i++)
3545 tmp[i] = gen_reg_rtx (V4DFmode);
3546 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3547 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3548 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3549 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3550 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3551 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3555 (define_expand "vec_unpacku_float_lo_v8si"
3556 [(match_operand:V4DF 0 "register_operand")
3557 (match_operand:V8SI 1 "nonimmediate_operand")]
3560 REAL_VALUE_TYPE TWO32r;
3564 real_ldexp (&TWO32r, &dconst1, 32);
3565 x = const_double_from_real_value (TWO32r, DFmode);
3567 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3568 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3570 for (i = 2; i < 5; i++)
3571 tmp[i] = gen_reg_rtx (V4DFmode);
3572 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3573 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3574 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3575 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3576 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3580 (define_expand "vec_pack_trunc_<mode>"
3582 (float_truncate:<sf2dfmode>
3583 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
3585 (float_truncate:<sf2dfmode>
3586 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
3587 (set (match_operand:<ssePSmode> 0 "register_operand")
3588 (vec_concat:<ssePSmode>
3593 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
3594 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
3597 (define_expand "vec_pack_trunc_v2df"
3598 [(match_operand:V4SF 0 "register_operand")
3599 (match_operand:V2DF 1 "nonimmediate_operand")
3600 (match_operand:V2DF 2 "nonimmediate_operand")]
3605 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3607 tmp0 = gen_reg_rtx (V4DFmode);
3608 tmp1 = force_reg (V2DFmode, operands[1]);
3610 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3611 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3615 tmp0 = gen_reg_rtx (V4SFmode);
3616 tmp1 = gen_reg_rtx (V4SFmode);
3618 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3619 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3620 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3625 (define_expand "vec_pack_sfix_trunc_v8df"
3626 [(match_operand:V16SI 0 "register_operand")
3627 (match_operand:V8DF 1 "nonimmediate_operand")
3628 (match_operand:V8DF 2 "nonimmediate_operand")]
3633 r1 = gen_reg_rtx (V8SImode);
3634 r2 = gen_reg_rtx (V8SImode);
3636 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
3637 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
3638 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
3642 (define_expand "vec_pack_sfix_trunc_v4df"
3643 [(match_operand:V8SI 0 "register_operand")
3644 (match_operand:V4DF 1 "nonimmediate_operand")
3645 (match_operand:V4DF 2 "nonimmediate_operand")]
3650 r1 = gen_reg_rtx (V4SImode);
3651 r2 = gen_reg_rtx (V4SImode);
3653 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3654 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3655 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3659 (define_expand "vec_pack_sfix_trunc_v2df"
3660 [(match_operand:V4SI 0 "register_operand")
3661 (match_operand:V2DF 1 "nonimmediate_operand")
3662 (match_operand:V2DF 2 "nonimmediate_operand")]
3667 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3669 tmp0 = gen_reg_rtx (V4DFmode);
3670 tmp1 = force_reg (V2DFmode, operands[1]);
3672 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3673 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3677 tmp0 = gen_reg_rtx (V4SImode);
3678 tmp1 = gen_reg_rtx (V4SImode);
3680 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3681 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3683 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3684 gen_lowpart (V2DImode, tmp0),
3685 gen_lowpart (V2DImode, tmp1)));
3690 (define_mode_attr ssepackfltmode
3691 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
3693 (define_expand "vec_pack_ufix_trunc_<mode>"
3694 [(match_operand:<ssepackfltmode> 0 "register_operand")
3695 (match_operand:VF2_128_256 1 "register_operand")
3696 (match_operand:VF2_128_256 2 "register_operand")]
3700 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3701 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3702 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3703 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3704 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3706 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3707 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3711 tmp[5] = gen_reg_rtx (V8SFmode);
3712 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3713 gen_lowpart (V8SFmode, tmp[3]), 0);
3714 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3716 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3717 operands[0], 0, OPTAB_DIRECT);
3718 if (tmp[6] != operands[0])
3719 emit_move_insn (operands[0], tmp[6]);
3723 (define_expand "vec_pack_sfix_v4df"
3724 [(match_operand:V8SI 0 "register_operand")
3725 (match_operand:V4DF 1 "nonimmediate_operand")
3726 (match_operand:V4DF 2 "nonimmediate_operand")]
3731 r1 = gen_reg_rtx (V4SImode);
3732 r2 = gen_reg_rtx (V4SImode);
3734 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3735 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3736 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3740 (define_expand "vec_pack_sfix_v2df"
3741 [(match_operand:V4SI 0 "register_operand")
3742 (match_operand:V2DF 1 "nonimmediate_operand")
3743 (match_operand:V2DF 2 "nonimmediate_operand")]
3748 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3750 tmp0 = gen_reg_rtx (V4DFmode);
3751 tmp1 = force_reg (V2DFmode, operands[1]);
3753 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3754 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3758 tmp0 = gen_reg_rtx (V4SImode);
3759 tmp1 = gen_reg_rtx (V4SImode);
3761 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3762 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3764 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3765 gen_lowpart (V2DImode, tmp0),
3766 gen_lowpart (V2DImode, tmp1)));
3771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3773 ;; Parallel single-precision floating point element swizzling
3775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3777 (define_expand "sse_movhlps_exp"
3778 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3781 (match_operand:V4SF 1 "nonimmediate_operand")
3782 (match_operand:V4SF 2 "nonimmediate_operand"))
3783 (parallel [(const_int 6)
3789 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3791 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3793 /* Fix up the destination if needed. */
3794 if (dst != operands[0])
3795 emit_move_insn (operands[0], dst);
3800 (define_insn "sse_movhlps"
3801 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3804 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3805 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3806 (parallel [(const_int 6)
3810 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3812 movhlps\t{%2, %0|%0, %2}
3813 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3814 movlps\t{%H2, %0|%0, %H2}
3815 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3816 %vmovhps\t{%2, %0|%q0, %2}"
3817 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3818 (set_attr "type" "ssemov")
3819 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3820 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3822 (define_expand "sse_movlhps_exp"
3823 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3826 (match_operand:V4SF 1 "nonimmediate_operand")
3827 (match_operand:V4SF 2 "nonimmediate_operand"))
3828 (parallel [(const_int 0)
3834 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3836 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3838 /* Fix up the destination if needed. */
3839 if (dst != operands[0])
3840 emit_move_insn (operands[0], dst);
3845 (define_insn "sse_movlhps"
3846 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3849 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3850 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
3851 (parallel [(const_int 0)
3855 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3857 movlhps\t{%2, %0|%0, %2}
3858 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3859 movhps\t{%2, %0|%0, %q2}
3860 vmovhps\t{%2, %1, %0|%0, %1, %q2}
3861 %vmovlps\t{%2, %H0|%H0, %2}"
3862 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3863 (set_attr "type" "ssemov")
3864 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3865 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3867 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3868 (define_insn "avx_unpckhps256"
3869 [(set (match_operand:V8SF 0 "register_operand" "=x")
3872 (match_operand:V8SF 1 "register_operand" "x")
3873 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3874 (parallel [(const_int 2) (const_int 10)
3875 (const_int 3) (const_int 11)
3876 (const_int 6) (const_int 14)
3877 (const_int 7) (const_int 15)])))]
3879 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3880 [(set_attr "type" "sselog")
3881 (set_attr "prefix" "vex")
3882 (set_attr "mode" "V8SF")])
3884 (define_expand "vec_interleave_highv8sf"
3888 (match_operand:V8SF 1 "register_operand" "x")
3889 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3890 (parallel [(const_int 0) (const_int 8)
3891 (const_int 1) (const_int 9)
3892 (const_int 4) (const_int 12)
3893 (const_int 5) (const_int 13)])))
3899 (parallel [(const_int 2) (const_int 10)
3900 (const_int 3) (const_int 11)
3901 (const_int 6) (const_int 14)
3902 (const_int 7) (const_int 15)])))
3903 (set (match_operand:V8SF 0 "register_operand")
3908 (parallel [(const_int 4) (const_int 5)
3909 (const_int 6) (const_int 7)
3910 (const_int 12) (const_int 13)
3911 (const_int 14) (const_int 15)])))]
3914 operands[3] = gen_reg_rtx (V8SFmode);
3915 operands[4] = gen_reg_rtx (V8SFmode);
3918 (define_insn "vec_interleave_highv4sf"
3919 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3922 (match_operand:V4SF 1 "register_operand" "0,x")
3923 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3924 (parallel [(const_int 2) (const_int 6)
3925 (const_int 3) (const_int 7)])))]
3928 unpckhps\t{%2, %0|%0, %2}
3929 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3930 [(set_attr "isa" "noavx,avx")
3931 (set_attr "type" "sselog")
3932 (set_attr "prefix" "orig,vex")
3933 (set_attr "mode" "V4SF")])
3935 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3936 (define_insn "avx_unpcklps256"
3937 [(set (match_operand:V8SF 0 "register_operand" "=x")
3940 (match_operand:V8SF 1 "register_operand" "x")
3941 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3942 (parallel [(const_int 0) (const_int 8)
3943 (const_int 1) (const_int 9)
3944 (const_int 4) (const_int 12)
3945 (const_int 5) (const_int 13)])))]
3947 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3948 [(set_attr "type" "sselog")
3949 (set_attr "prefix" "vex")
3950 (set_attr "mode" "V8SF")])
3952 (define_expand "vec_interleave_lowv8sf"
3956 (match_operand:V8SF 1 "register_operand" "x")
3957 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3958 (parallel [(const_int 0) (const_int 8)
3959 (const_int 1) (const_int 9)
3960 (const_int 4) (const_int 12)
3961 (const_int 5) (const_int 13)])))
3967 (parallel [(const_int 2) (const_int 10)
3968 (const_int 3) (const_int 11)
3969 (const_int 6) (const_int 14)
3970 (const_int 7) (const_int 15)])))
3971 (set (match_operand:V8SF 0 "register_operand")
3976 (parallel [(const_int 0) (const_int 1)
3977 (const_int 2) (const_int 3)
3978 (const_int 8) (const_int 9)
3979 (const_int 10) (const_int 11)])))]
3982 operands[3] = gen_reg_rtx (V8SFmode);
3983 operands[4] = gen_reg_rtx (V8SFmode);
3986 (define_insn "vec_interleave_lowv4sf"
3987 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3990 (match_operand:V4SF 1 "register_operand" "0,x")
3991 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3992 (parallel [(const_int 0) (const_int 4)
3993 (const_int 1) (const_int 5)])))]
3996 unpcklps\t{%2, %0|%0, %2}
3997 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3998 [(set_attr "isa" "noavx,avx")
3999 (set_attr "type" "sselog")
4000 (set_attr "prefix" "orig,vex")
4001 (set_attr "mode" "V4SF")])
4003 ;; These are modeled with the same vec_concat as the others so that we
4004 ;; capture users of shufps that can use the new instructions
4005 (define_insn "avx_movshdup256"
4006 [(set (match_operand:V8SF 0 "register_operand" "=x")
4009 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4011 (parallel [(const_int 1) (const_int 1)
4012 (const_int 3) (const_int 3)
4013 (const_int 5) (const_int 5)
4014 (const_int 7) (const_int 7)])))]
4016 "vmovshdup\t{%1, %0|%0, %1}"
4017 [(set_attr "type" "sse")
4018 (set_attr "prefix" "vex")
4019 (set_attr "mode" "V8SF")])
4021 (define_insn "sse3_movshdup"
4022 [(set (match_operand:V4SF 0 "register_operand" "=x")
4025 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4027 (parallel [(const_int 1)
4032 "%vmovshdup\t{%1, %0|%0, %1}"
4033 [(set_attr "type" "sse")
4034 (set_attr "prefix_rep" "1")
4035 (set_attr "prefix" "maybe_vex")
4036 (set_attr "mode" "V4SF")])
4038 (define_insn "avx_movsldup256"
4039 [(set (match_operand:V8SF 0 "register_operand" "=x")
4042 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4044 (parallel [(const_int 0) (const_int 0)
4045 (const_int 2) (const_int 2)
4046 (const_int 4) (const_int 4)
4047 (const_int 6) (const_int 6)])))]
4049 "vmovsldup\t{%1, %0|%0, %1}"
4050 [(set_attr "type" "sse")
4051 (set_attr "prefix" "vex")
4052 (set_attr "mode" "V8SF")])
4054 (define_insn "sse3_movsldup"
4055 [(set (match_operand:V4SF 0 "register_operand" "=x")
4058 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4060 (parallel [(const_int 0)
4065 "%vmovsldup\t{%1, %0|%0, %1}"
4066 [(set_attr "type" "sse")
4067 (set_attr "prefix_rep" "1")
4068 (set_attr "prefix" "maybe_vex")
4069 (set_attr "mode" "V4SF")])
4071 (define_expand "avx_shufps256"
4072 [(match_operand:V8SF 0 "register_operand")
4073 (match_operand:V8SF 1 "register_operand")
4074 (match_operand:V8SF 2 "nonimmediate_operand")
4075 (match_operand:SI 3 "const_int_operand")]
4078 int mask = INTVAL (operands[3]);
4079 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
4080 GEN_INT ((mask >> 0) & 3),
4081 GEN_INT ((mask >> 2) & 3),
4082 GEN_INT (((mask >> 4) & 3) + 8),
4083 GEN_INT (((mask >> 6) & 3) + 8),
4084 GEN_INT (((mask >> 0) & 3) + 4),
4085 GEN_INT (((mask >> 2) & 3) + 4),
4086 GEN_INT (((mask >> 4) & 3) + 12),
4087 GEN_INT (((mask >> 6) & 3) + 12)));
4091 ;; One bit in mask selects 2 elements.
4092 (define_insn "avx_shufps256_1"
4093 [(set (match_operand:V8SF 0 "register_operand" "=x")
4096 (match_operand:V8SF 1 "register_operand" "x")
4097 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
4098 (parallel [(match_operand 3 "const_0_to_3_operand" )
4099 (match_operand 4 "const_0_to_3_operand" )
4100 (match_operand 5 "const_8_to_11_operand" )
4101 (match_operand 6 "const_8_to_11_operand" )
4102 (match_operand 7 "const_4_to_7_operand" )
4103 (match_operand 8 "const_4_to_7_operand" )
4104 (match_operand 9 "const_12_to_15_operand")
4105 (match_operand 10 "const_12_to_15_operand")])))]
4107 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
4108 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
4109 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
4110 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
4113 mask = INTVAL (operands[3]);
4114 mask |= INTVAL (operands[4]) << 2;
4115 mask |= (INTVAL (operands[5]) - 8) << 4;
4116 mask |= (INTVAL (operands[6]) - 8) << 6;
4117 operands[3] = GEN_INT (mask);
4119 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4121 [(set_attr "type" "sseshuf")
4122 (set_attr "length_immediate" "1")
4123 (set_attr "prefix" "vex")
4124 (set_attr "mode" "V8SF")])
4126 (define_expand "sse_shufps"
4127 [(match_operand:V4SF 0 "register_operand")
4128 (match_operand:V4SF 1 "register_operand")
4129 (match_operand:V4SF 2 "nonimmediate_operand")
4130 (match_operand:SI 3 "const_int_operand")]
4133 int mask = INTVAL (operands[3]);
4134 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
4135 GEN_INT ((mask >> 0) & 3),
4136 GEN_INT ((mask >> 2) & 3),
4137 GEN_INT (((mask >> 4) & 3) + 4),
4138 GEN_INT (((mask >> 6) & 3) + 4)));
4142 (define_insn "sse_shufps_<mode>"
4143 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
4144 (vec_select:VI4F_128
4145 (vec_concat:<ssedoublevecmode>
4146 (match_operand:VI4F_128 1 "register_operand" "0,x")
4147 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
4148 (parallel [(match_operand 3 "const_0_to_3_operand")
4149 (match_operand 4 "const_0_to_3_operand")
4150 (match_operand 5 "const_4_to_7_operand")
4151 (match_operand 6 "const_4_to_7_operand")])))]
4155 mask |= INTVAL (operands[3]) << 0;
4156 mask |= INTVAL (operands[4]) << 2;
4157 mask |= (INTVAL (operands[5]) - 4) << 4;
4158 mask |= (INTVAL (operands[6]) - 4) << 6;
4159 operands[3] = GEN_INT (mask);
4161 switch (which_alternative)
4164 return "shufps\t{%3, %2, %0|%0, %2, %3}";
4166 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4171 [(set_attr "isa" "noavx,avx")
4172 (set_attr "type" "sseshuf")
4173 (set_attr "length_immediate" "1")
4174 (set_attr "prefix" "orig,vex")
4175 (set_attr "mode" "V4SF")])
4177 (define_insn "sse_storehps"
4178 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
4180 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
4181 (parallel [(const_int 2) (const_int 3)])))]
4184 %vmovhps\t{%1, %0|%q0, %1}
4185 %vmovhlps\t{%1, %d0|%d0, %1}
4186 %vmovlps\t{%H1, %d0|%d0, %H1}"
4187 [(set_attr "type" "ssemov")
4188 (set_attr "prefix" "maybe_vex")
4189 (set_attr "mode" "V2SF,V4SF,V2SF")])
4191 (define_expand "sse_loadhps_exp"
4192 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4195 (match_operand:V4SF 1 "nonimmediate_operand")
4196 (parallel [(const_int 0) (const_int 1)]))
4197 (match_operand:V2SF 2 "nonimmediate_operand")))]
4200 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4202 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
4204 /* Fix up the destination if needed. */
4205 if (dst != operands[0])
4206 emit_move_insn (operands[0], dst);
4211 (define_insn "sse_loadhps"
4212 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
4215 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4216 (parallel [(const_int 0) (const_int 1)]))
4217 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
4220 movhps\t{%2, %0|%0, %q2}
4221 vmovhps\t{%2, %1, %0|%0, %1, %q2}
4222 movlhps\t{%2, %0|%0, %2}
4223 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4224 %vmovlps\t{%2, %H0|%H0, %2}"
4225 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4226 (set_attr "type" "ssemov")
4227 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4228 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
4230 (define_insn "sse_storelps"
4231 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
4233 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
4234 (parallel [(const_int 0) (const_int 1)])))]
4237 %vmovlps\t{%1, %0|%q0, %1}
4238 %vmovaps\t{%1, %0|%0, %1}
4239 %vmovlps\t{%1, %d0|%d0, %q1}"
4240 [(set_attr "type" "ssemov")
4241 (set_attr "prefix" "maybe_vex")
4242 (set_attr "mode" "V2SF,V4SF,V2SF")])
4244 (define_expand "sse_loadlps_exp"
4245 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4247 (match_operand:V2SF 2 "nonimmediate_operand")
4249 (match_operand:V4SF 1 "nonimmediate_operand")
4250 (parallel [(const_int 2) (const_int 3)]))))]
4253 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4255 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
4257 /* Fix up the destination if needed. */
4258 if (dst != operands[0])
4259 emit_move_insn (operands[0], dst);
4264 (define_insn "sse_loadlps"
4265 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4267 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
4269 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
4270 (parallel [(const_int 2) (const_int 3)]))))]
4273 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
4274 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
4275 movlps\t{%2, %0|%0, %q2}
4276 vmovlps\t{%2, %1, %0|%0, %1, %q2}
4277 %vmovlps\t{%2, %0|%q0, %2}"
4278 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4279 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
4280 (set_attr "length_immediate" "1,1,*,*,*")
4281 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4282 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4284 (define_insn "sse_movss"
4285 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4287 (match_operand:V4SF 2 "register_operand" " x,x")
4288 (match_operand:V4SF 1 "register_operand" " 0,x")
4292 movss\t{%2, %0|%0, %2}
4293 vmovss\t{%2, %1, %0|%0, %1, %2}"
4294 [(set_attr "isa" "noavx,avx")
4295 (set_attr "type" "ssemov")
4296 (set_attr "prefix" "orig,vex")
4297 (set_attr "mode" "SF")])
4299 (define_insn "avx2_vec_dup<mode>"
4300 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
4301 (vec_duplicate:VF1_128_256
4303 (match_operand:V4SF 1 "register_operand" "x")
4304 (parallel [(const_int 0)]))))]
4306 "vbroadcastss\t{%1, %0|%0, %1}"
4307 [(set_attr "type" "sselog1")
4308 (set_attr "prefix" "vex")
4309 (set_attr "mode" "<MODE>")])
4311 (define_insn "avx2_vec_dupv8sf_1"
4312 [(set (match_operand:V8SF 0 "register_operand" "=x")
4315 (match_operand:V8SF 1 "register_operand" "x")
4316 (parallel [(const_int 0)]))))]
4318 "vbroadcastss\t{%x1, %0|%0, %x1}"
4319 [(set_attr "type" "sselog1")
4320 (set_attr "prefix" "vex")
4321 (set_attr "mode" "V8SF")])
4323 (define_insn "vec_dupv4sf"
4324 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
4326 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
4329 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
4330 vbroadcastss\t{%1, %0|%0, %1}
4331 shufps\t{$0, %0, %0|%0, %0, 0}"
4332 [(set_attr "isa" "avx,avx,noavx")
4333 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
4334 (set_attr "length_immediate" "1,0,1")
4335 (set_attr "prefix_extra" "0,1,*")
4336 (set_attr "prefix" "vex,vex,orig")
4337 (set_attr "mode" "V4SF")])
4339 ;; Although insertps takes register source, we prefer
4340 ;; unpcklps with register source since it is shorter.
4341 (define_insn "*vec_concatv2sf_sse4_1"
4342 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
4344 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
4345 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
4348 unpcklps\t{%2, %0|%0, %2}
4349 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4350 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4351 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4352 %vmovss\t{%1, %0|%0, %1}
4353 punpckldq\t{%2, %0|%0, %2}
4354 movd\t{%1, %0|%0, %1}"
4355 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4356 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
4357 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4358 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
4359 (set_attr "length_immediate" "*,*,1,1,*,*,*")
4360 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4361 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
4363 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4364 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4365 ;; alternatives pretty much forces the MMX alternative to be chosen.
4366 (define_insn "*vec_concatv2sf_sse"
4367 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4369 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4370 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4373 unpcklps\t{%2, %0|%0, %2}
4374 movss\t{%1, %0|%0, %1}
4375 punpckldq\t{%2, %0|%0, %2}
4376 movd\t{%1, %0|%0, %1}"
4377 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4378 (set_attr "mode" "V4SF,SF,DI,DI")])
4380 (define_insn "*vec_concatv4sf"
4381 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
4383 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
4384 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
4387 movlhps\t{%2, %0|%0, %2}
4388 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4389 movhps\t{%2, %0|%0, %q2}
4390 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
4391 [(set_attr "isa" "noavx,avx,noavx,avx")
4392 (set_attr "type" "ssemov")
4393 (set_attr "prefix" "orig,vex,orig,vex")
4394 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
4396 (define_expand "vec_init<mode>"
4397 [(match_operand:V_128 0 "register_operand")
4401 ix86_expand_vector_init (false, operands[0], operands[1]);
4405 ;; Avoid combining registers from different units in a single alternative,
4406 ;; see comment above inline_secondary_memory_needed function in i386.c
4407 (define_insn "vec_set<mode>_0"
4408 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
4409 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
4411 (vec_duplicate:VI4F_128
4412 (match_operand:<ssescalarmode> 2 "general_operand"
4413 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
4414 (match_operand:VI4F_128 1 "vector_move_operand"
4415 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
4419 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
4420 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4421 %vmovd\t{%2, %0|%0, %2}
4422 movss\t{%2, %0|%0, %2}
4423 movss\t{%2, %0|%0, %2}
4424 vmovss\t{%2, %1, %0|%0, %1, %2}
4425 pinsrd\t{$0, %2, %0|%0, %2, 0}
4426 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4430 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
4432 (cond [(eq_attr "alternative" "0,6,7")
4433 (const_string "sselog")
4434 (eq_attr "alternative" "9")
4435 (const_string "imov")
4436 (eq_attr "alternative" "10")
4437 (const_string "fmov")
4439 (const_string "ssemov")))
4440 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
4441 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
4442 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
4443 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
4445 ;; A subset is vec_setv4sf.
4446 (define_insn "*vec_setv4sf_sse4_1"
4447 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4450 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
4451 (match_operand:V4SF 1 "register_operand" "0,x")
4452 (match_operand:SI 3 "const_int_operand")))]
4454 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4455 < GET_MODE_NUNITS (V4SFmode))"
4457 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4458 switch (which_alternative)
4461 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4463 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4468 [(set_attr "isa" "noavx,avx")
4469 (set_attr "type" "sselog")
4470 (set_attr "prefix_data16" "1,*")
4471 (set_attr "prefix_extra" "1")
4472 (set_attr "length_immediate" "1")
4473 (set_attr "prefix" "orig,vex")
4474 (set_attr "mode" "V4SF")])
4476 (define_insn "sse4_1_insertps"
4477 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4478 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
4479 (match_operand:V4SF 1 "register_operand" "0,x")
4480 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
4484 if (MEM_P (operands[2]))
4486 unsigned count_s = INTVAL (operands[3]) >> 6;
4488 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
4489 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
4491 switch (which_alternative)
4494 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4496 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4501 [(set_attr "isa" "noavx,avx")
4502 (set_attr "type" "sselog")
4503 (set_attr "prefix_data16" "1,*")
4504 (set_attr "prefix_extra" "1")
4505 (set_attr "length_immediate" "1")
4506 (set_attr "prefix" "orig,vex")
4507 (set_attr "mode" "V4SF")])
4510 [(set (match_operand:VI4F_128 0 "memory_operand")
4512 (vec_duplicate:VI4F_128
4513 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
4516 "TARGET_SSE && reload_completed"
4517 [(set (match_dup 0) (match_dup 1))]
4518 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
4520 (define_expand "vec_set<mode>"
4521 [(match_operand:V 0 "register_operand")
4522 (match_operand:<ssescalarmode> 1 "register_operand")
4523 (match_operand 2 "const_int_operand")]
4526 ix86_expand_vector_set (false, operands[0], operands[1],
4527 INTVAL (operands[2]));
4531 (define_insn_and_split "*vec_extractv4sf_0"
4532 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4534 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4535 (parallel [(const_int 0)])))]
4536 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4538 "&& reload_completed"
4539 [(set (match_dup 0) (match_dup 1))]
4541 if (REG_P (operands[1]))
4542 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
4544 operands[1] = adjust_address (operands[1], SFmode, 0);
4547 (define_insn_and_split "*sse4_1_extractps"
4548 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4550 (match_operand:V4SF 1 "register_operand" "x,0,x")
4551 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4554 %vextractps\t{%2, %1, %0|%0, %1, %2}
4557 "&& reload_completed && SSE_REG_P (operands[0])"
4560 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4561 switch (INTVAL (operands[2]))
4565 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4566 operands[2], operands[2],
4567 GEN_INT (INTVAL (operands[2]) + 4),
4568 GEN_INT (INTVAL (operands[2]) + 4)));
4571 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4574 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4579 [(set_attr "isa" "*,noavx,avx")
4580 (set_attr "type" "sselog,*,*")
4581 (set_attr "prefix_data16" "1,*,*")
4582 (set_attr "prefix_extra" "1,*,*")
4583 (set_attr "length_immediate" "1,*,*")
4584 (set_attr "prefix" "maybe_vex,*,*")
4585 (set_attr "mode" "V4SF,*,*")])
4587 (define_insn_and_split "*vec_extractv4sf_mem"
4588 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4590 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4591 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4594 "&& reload_completed"
4595 [(set (match_dup 0) (match_dup 1))]
4597 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
4600 (define_expand "avx_vextractf128<mode>"
4601 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
4602 (match_operand:V_256 1 "register_operand")
4603 (match_operand:SI 2 "const_0_to_1_operand")]
4606 rtx (*insn)(rtx, rtx);
4608 switch (INTVAL (operands[2]))
4611 insn = gen_vec_extract_lo_<mode>;
4614 insn = gen_vec_extract_hi_<mode>;
4620 emit_insn (insn (operands[0], operands[1]));
4624 (define_insn_and_split "vec_extract_lo_<mode>"
4625 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4626 (vec_select:<ssehalfvecmode>
4627 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4628 (parallel [(const_int 0) (const_int 1)])))]
4629 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4631 "&& reload_completed"
4632 [(set (match_dup 0) (match_dup 1))]
4634 if (REG_P (operands[1]))
4635 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
4637 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
4640 (define_insn "vec_extract_hi_<mode>"
4641 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4642 (vec_select:<ssehalfvecmode>
4643 (match_operand:VI8F_256 1 "register_operand" "x,x")
4644 (parallel [(const_int 2) (const_int 3)])))]
4646 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4647 [(set_attr "type" "sselog")
4648 (set_attr "prefix_extra" "1")
4649 (set_attr "length_immediate" "1")
4650 (set_attr "memory" "none,store")
4651 (set_attr "prefix" "vex")
4652 (set_attr "mode" "<sseinsnmode>")])
4654 (define_insn_and_split "vec_extract_lo_<mode>"
4655 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4656 (vec_select:<ssehalfvecmode>
4657 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4658 (parallel [(const_int 0) (const_int 1)
4659 (const_int 2) (const_int 3)])))]
4660 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4662 "&& reload_completed"
4663 [(set (match_dup 0) (match_dup 1))]
4665 if (REG_P (operands[1]))
4666 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
4668 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
4671 (define_insn "vec_extract_hi_<mode>"
4672 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4673 (vec_select:<ssehalfvecmode>
4674 (match_operand:VI4F_256 1 "register_operand" "x,x")
4675 (parallel [(const_int 4) (const_int 5)
4676 (const_int 6) (const_int 7)])))]
4678 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4679 [(set_attr "type" "sselog")
4680 (set_attr "prefix_extra" "1")
4681 (set_attr "length_immediate" "1")
4682 (set_attr "memory" "none,store")
4683 (set_attr "prefix" "vex")
4684 (set_attr "mode" "<sseinsnmode>")])
4686 (define_insn_and_split "vec_extract_lo_v16hi"
4687 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4689 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4690 (parallel [(const_int 0) (const_int 1)
4691 (const_int 2) (const_int 3)
4692 (const_int 4) (const_int 5)
4693 (const_int 6) (const_int 7)])))]
4694 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4696 "&& reload_completed"
4697 [(set (match_dup 0) (match_dup 1))]
4699 if (REG_P (operands[1]))
4700 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
4702 operands[1] = adjust_address (operands[1], V8HImode, 0);
4705 (define_insn "vec_extract_hi_v16hi"
4706 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4708 (match_operand:V16HI 1 "register_operand" "x,x")
4709 (parallel [(const_int 8) (const_int 9)
4710 (const_int 10) (const_int 11)
4711 (const_int 12) (const_int 13)
4712 (const_int 14) (const_int 15)])))]
4714 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4715 [(set_attr "type" "sselog")
4716 (set_attr "prefix_extra" "1")
4717 (set_attr "length_immediate" "1")
4718 (set_attr "memory" "none,store")
4719 (set_attr "prefix" "vex")
4720 (set_attr "mode" "OI")])
4722 (define_insn_and_split "vec_extract_lo_v32qi"
4723 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4725 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4726 (parallel [(const_int 0) (const_int 1)
4727 (const_int 2) (const_int 3)
4728 (const_int 4) (const_int 5)
4729 (const_int 6) (const_int 7)
4730 (const_int 8) (const_int 9)
4731 (const_int 10) (const_int 11)
4732 (const_int 12) (const_int 13)
4733 (const_int 14) (const_int 15)])))]
4734 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4736 "&& reload_completed"
4737 [(set (match_dup 0) (match_dup 1))]
4739 if (REG_P (operands[1]))
4740 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
4742 operands[1] = adjust_address (operands[1], V16QImode, 0);
4745 (define_insn "vec_extract_hi_v32qi"
4746 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4748 (match_operand:V32QI 1 "register_operand" "x,x")
4749 (parallel [(const_int 16) (const_int 17)
4750 (const_int 18) (const_int 19)
4751 (const_int 20) (const_int 21)
4752 (const_int 22) (const_int 23)
4753 (const_int 24) (const_int 25)
4754 (const_int 26) (const_int 27)
4755 (const_int 28) (const_int 29)
4756 (const_int 30) (const_int 31)])))]
4758 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4759 [(set_attr "type" "sselog")
4760 (set_attr "prefix_extra" "1")
4761 (set_attr "length_immediate" "1")
4762 (set_attr "memory" "none,store")
4763 (set_attr "prefix" "vex")
4764 (set_attr "mode" "OI")])
4766 ;; Modes handled by vec_extract patterns.
4767 (define_mode_iterator VEC_EXTRACT_MODE
4768 [(V32QI "TARGET_AVX") V16QI
4769 (V16HI "TARGET_AVX") V8HI
4770 (V8SI "TARGET_AVX") V4SI
4771 (V4DI "TARGET_AVX") V2DI
4772 (V8SF "TARGET_AVX") V4SF
4773 (V4DF "TARGET_AVX") V2DF])
4775 (define_expand "vec_extract<mode>"
4776 [(match_operand:<ssescalarmode> 0 "register_operand")
4777 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
4778 (match_operand 2 "const_int_operand")]
4781 ix86_expand_vector_extract (false, operands[0], operands[1],
4782 INTVAL (operands[2]));
4786 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4788 ;; Parallel double-precision floating point element swizzling
4790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4792 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4793 (define_insn "avx_unpckhpd256"
4794 [(set (match_operand:V4DF 0 "register_operand" "=x")
4797 (match_operand:V4DF 1 "register_operand" "x")
4798 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4799 (parallel [(const_int 1) (const_int 5)
4800 (const_int 3) (const_int 7)])))]
4802 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4803 [(set_attr "type" "sselog")
4804 (set_attr "prefix" "vex")
4805 (set_attr "mode" "V4DF")])
4807 (define_expand "vec_interleave_highv4df"
4811 (match_operand:V4DF 1 "register_operand" "x")
4812 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4813 (parallel [(const_int 0) (const_int 4)
4814 (const_int 2) (const_int 6)])))
4820 (parallel [(const_int 1) (const_int 5)
4821 (const_int 3) (const_int 7)])))
4822 (set (match_operand:V4DF 0 "register_operand")
4827 (parallel [(const_int 2) (const_int 3)
4828 (const_int 6) (const_int 7)])))]
4831 operands[3] = gen_reg_rtx (V4DFmode);
4832 operands[4] = gen_reg_rtx (V4DFmode);
4836 (define_expand "vec_interleave_highv2df"
4837 [(set (match_operand:V2DF 0 "register_operand")
4840 (match_operand:V2DF 1 "nonimmediate_operand")
4841 (match_operand:V2DF 2 "nonimmediate_operand"))
4842 (parallel [(const_int 1)
4846 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4847 operands[2] = force_reg (V2DFmode, operands[2]);
4850 (define_insn "*vec_interleave_highv2df"
4851 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4854 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4855 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4856 (parallel [(const_int 1)
4858 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4860 unpckhpd\t{%2, %0|%0, %2}
4861 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4862 %vmovddup\t{%H1, %0|%0, %H1}
4863 movlpd\t{%H1, %0|%0, %H1}
4864 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4865 %vmovhpd\t{%1, %0|%q0, %1}"
4866 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4867 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4868 (set_attr "prefix_data16" "*,*,*,1,*,1")
4869 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4870 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4872 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4873 (define_expand "avx_movddup256"
4874 [(set (match_operand:V4DF 0 "register_operand")
4877 (match_operand:V4DF 1 "nonimmediate_operand")
4879 (parallel [(const_int 0) (const_int 4)
4880 (const_int 2) (const_int 6)])))]
4883 (define_expand "avx_unpcklpd256"
4884 [(set (match_operand:V4DF 0 "register_operand")
4887 (match_operand:V4DF 1 "register_operand")
4888 (match_operand:V4DF 2 "nonimmediate_operand"))
4889 (parallel [(const_int 0) (const_int 4)
4890 (const_int 2) (const_int 6)])))]
4893 (define_insn "*avx_unpcklpd256"
4894 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4897 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4898 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4899 (parallel [(const_int 0) (const_int 4)
4900 (const_int 2) (const_int 6)])))]
4903 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4904 vmovddup\t{%1, %0|%0, %1}"
4905 [(set_attr "type" "sselog")
4906 (set_attr "prefix" "vex")
4907 (set_attr "mode" "V4DF")])
4909 (define_expand "vec_interleave_lowv4df"
4913 (match_operand:V4DF 1 "register_operand" "x")
4914 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4915 (parallel [(const_int 0) (const_int 4)
4916 (const_int 2) (const_int 6)])))
4922 (parallel [(const_int 1) (const_int 5)
4923 (const_int 3) (const_int 7)])))
4924 (set (match_operand:V4DF 0 "register_operand")
4929 (parallel [(const_int 0) (const_int 1)
4930 (const_int 4) (const_int 5)])))]
4933 operands[3] = gen_reg_rtx (V4DFmode);
4934 operands[4] = gen_reg_rtx (V4DFmode);
4937 (define_expand "vec_interleave_lowv2df"
4938 [(set (match_operand:V2DF 0 "register_operand")
4941 (match_operand:V2DF 1 "nonimmediate_operand")
4942 (match_operand:V2DF 2 "nonimmediate_operand"))
4943 (parallel [(const_int 0)
4947 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4948 operands[1] = force_reg (V2DFmode, operands[1]);
4951 (define_insn "*vec_interleave_lowv2df"
4952 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4955 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4956 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4957 (parallel [(const_int 0)
4959 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4961 unpcklpd\t{%2, %0|%0, %2}
4962 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4963 %vmovddup\t{%1, %0|%0, %q1}
4964 movhpd\t{%2, %0|%0, %q2}
4965 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
4966 %vmovlpd\t{%2, %H0|%H0, %2}"
4967 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4968 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4969 (set_attr "prefix_data16" "*,*,*,1,*,1")
4970 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4971 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4974 [(set (match_operand:V2DF 0 "memory_operand")
4977 (match_operand:V2DF 1 "register_operand")
4979 (parallel [(const_int 0)
4981 "TARGET_SSE3 && reload_completed"
4984 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4985 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4986 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4991 [(set (match_operand:V2DF 0 "register_operand")
4994 (match_operand:V2DF 1 "memory_operand")
4996 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
4997 (match_operand:SI 3 "const_int_operand")])))]
4998 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4999 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
5001 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
5004 (define_insn "avx512f_rndscale<mode>"
5005 [(set (match_operand:VF_512 0 "register_operand" "=v")
5007 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
5008 (match_operand:SI 2 "const_0_to_255_operand")]
5011 "vrndscale<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5012 [(set_attr "length_immediate" "1")
5013 (set_attr "prefix" "evex")
5014 (set_attr "mode" "<MODE>")])
5016 (define_expand "avx_shufpd256"
5017 [(match_operand:V4DF 0 "register_operand")
5018 (match_operand:V4DF 1 "register_operand")
5019 (match_operand:V4DF 2 "nonimmediate_operand")
5020 (match_operand:SI 3 "const_int_operand")]
5023 int mask = INTVAL (operands[3]);
5024 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
5026 GEN_INT (mask & 2 ? 5 : 4),
5027 GEN_INT (mask & 4 ? 3 : 2),
5028 GEN_INT (mask & 8 ? 7 : 6)));
5032 (define_insn "avx_shufpd256_1"
5033 [(set (match_operand:V4DF 0 "register_operand" "=x")
5036 (match_operand:V4DF 1 "register_operand" "x")
5037 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
5038 (parallel [(match_operand 3 "const_0_to_1_operand")
5039 (match_operand 4 "const_4_to_5_operand")
5040 (match_operand 5 "const_2_to_3_operand")
5041 (match_operand 6 "const_6_to_7_operand")])))]
5045 mask = INTVAL (operands[3]);
5046 mask |= (INTVAL (operands[4]) - 4) << 1;
5047 mask |= (INTVAL (operands[5]) - 2) << 2;
5048 mask |= (INTVAL (operands[6]) - 6) << 3;
5049 operands[3] = GEN_INT (mask);
5051 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5053 [(set_attr "type" "sseshuf")
5054 (set_attr "length_immediate" "1")
5055 (set_attr "prefix" "vex")
5056 (set_attr "mode" "V4DF")])
5058 (define_expand "sse2_shufpd"
5059 [(match_operand:V2DF 0 "register_operand")
5060 (match_operand:V2DF 1 "register_operand")
5061 (match_operand:V2DF 2 "nonimmediate_operand")
5062 (match_operand:SI 3 "const_int_operand")]
5065 int mask = INTVAL (operands[3]);
5066 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
5068 GEN_INT (mask & 2 ? 3 : 2)));
5072 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
5073 (define_insn "avx2_interleave_highv4di"
5074 [(set (match_operand:V4DI 0 "register_operand" "=x")
5077 (match_operand:V4DI 1 "register_operand" "x")
5078 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
5079 (parallel [(const_int 1)
5084 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
5085 [(set_attr "type" "sselog")
5086 (set_attr "prefix" "vex")
5087 (set_attr "mode" "OI")])
5089 (define_insn "vec_interleave_highv2di"
5090 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5093 (match_operand:V2DI 1 "register_operand" "0,x")
5094 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
5095 (parallel [(const_int 1)
5099 punpckhqdq\t{%2, %0|%0, %2}
5100 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
5101 [(set_attr "isa" "noavx,avx")
5102 (set_attr "type" "sselog")
5103 (set_attr "prefix_data16" "1,*")
5104 (set_attr "prefix" "orig,vex")
5105 (set_attr "mode" "TI")])
5107 (define_insn "avx2_interleave_lowv4di"
5108 [(set (match_operand:V4DI 0 "register_operand" "=x")
5111 (match_operand:V4DI 1 "register_operand" "x")
5112 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
5113 (parallel [(const_int 0)
5118 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
5119 [(set_attr "type" "sselog")
5120 (set_attr "prefix" "vex")
5121 (set_attr "mode" "OI")])
5123 (define_insn "vec_interleave_lowv2di"
5124 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5127 (match_operand:V2DI 1 "register_operand" "0,x")
5128 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
5129 (parallel [(const_int 0)
5133 punpcklqdq\t{%2, %0|%0, %2}
5134 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
5135 [(set_attr "isa" "noavx,avx")
5136 (set_attr "type" "sselog")
5137 (set_attr "prefix_data16" "1,*")
5138 (set_attr "prefix" "orig,vex")
5139 (set_attr "mode" "TI")])
5141 (define_insn "sse2_shufpd_<mode>"
5142 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
5143 (vec_select:VI8F_128
5144 (vec_concat:<ssedoublevecmode>
5145 (match_operand:VI8F_128 1 "register_operand" "0,x")
5146 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
5147 (parallel [(match_operand 3 "const_0_to_1_operand")
5148 (match_operand 4 "const_2_to_3_operand")])))]
5152 mask = INTVAL (operands[3]);
5153 mask |= (INTVAL (operands[4]) - 2) << 1;
5154 operands[3] = GEN_INT (mask);
5156 switch (which_alternative)
5159 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
5161 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5166 [(set_attr "isa" "noavx,avx")
5167 (set_attr "type" "sseshuf")
5168 (set_attr "length_immediate" "1")
5169 (set_attr "prefix" "orig,vex")
5170 (set_attr "mode" "V2DF")])
5172 ;; Avoid combining registers from different units in a single alternative,
5173 ;; see comment above inline_secondary_memory_needed function in i386.c
5174 (define_insn "sse2_storehpd"
5175 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
5177 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
5178 (parallel [(const_int 1)])))]
5179 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5181 %vmovhpd\t{%1, %0|%0, %1}
5183 vunpckhpd\t{%d1, %0|%0, %d1}
5187 [(set_attr "isa" "*,noavx,avx,*,*,*")
5188 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
5189 (set (attr "prefix_data16")
5191 (and (eq_attr "alternative" "0")
5192 (not (match_test "TARGET_AVX")))
5194 (const_string "*")))
5195 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
5196 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
5199 [(set (match_operand:DF 0 "register_operand")
5201 (match_operand:V2DF 1 "memory_operand")
5202 (parallel [(const_int 1)])))]
5203 "TARGET_SSE2 && reload_completed"
5204 [(set (match_dup 0) (match_dup 1))]
5205 "operands[1] = adjust_address (operands[1], DFmode, 8);")
5207 (define_insn "*vec_extractv2df_1_sse"
5208 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5210 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5211 (parallel [(const_int 1)])))]
5212 "!TARGET_SSE2 && TARGET_SSE
5213 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5215 movhps\t{%1, %0|%q0, %1}
5216 movhlps\t{%1, %0|%0, %1}
5217 movlps\t{%H1, %0|%0, %H1}"
5218 [(set_attr "type" "ssemov")
5219 (set_attr "mode" "V2SF,V4SF,V2SF")])
5221 ;; Avoid combining registers from different units in a single alternative,
5222 ;; see comment above inline_secondary_memory_needed function in i386.c
5223 (define_insn "sse2_storelpd"
5224 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
5226 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
5227 (parallel [(const_int 0)])))]
5228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5230 %vmovlpd\t{%1, %0|%0, %1}
5235 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
5236 (set_attr "prefix_data16" "1,*,*,*,*")
5237 (set_attr "prefix" "maybe_vex")
5238 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5241 [(set (match_operand:DF 0 "register_operand")
5243 (match_operand:V2DF 1 "nonimmediate_operand")
5244 (parallel [(const_int 0)])))]
5245 "TARGET_SSE2 && reload_completed"
5246 [(set (match_dup 0) (match_dup 1))]
5248 if (REG_P (operands[1]))
5249 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
5251 operands[1] = adjust_address (operands[1], DFmode, 0);
5254 (define_insn "*vec_extractv2df_0_sse"
5255 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5257 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5258 (parallel [(const_int 0)])))]
5259 "!TARGET_SSE2 && TARGET_SSE
5260 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5262 movlps\t{%1, %0|%0, %1}
5263 movaps\t{%1, %0|%0, %1}
5264 movlps\t{%1, %0|%0, %q1}"
5265 [(set_attr "type" "ssemov")
5266 (set_attr "mode" "V2SF,V4SF,V2SF")])
5268 (define_expand "sse2_loadhpd_exp"
5269 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5272 (match_operand:V2DF 1 "nonimmediate_operand")
5273 (parallel [(const_int 0)]))
5274 (match_operand:DF 2 "nonimmediate_operand")))]
5277 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5279 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
5281 /* Fix up the destination if needed. */
5282 if (dst != operands[0])
5283 emit_move_insn (operands[0], dst);
5288 ;; Avoid combining registers from different units in a single alternative,
5289 ;; see comment above inline_secondary_memory_needed function in i386.c
5290 (define_insn "sse2_loadhpd"
5291 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5295 (match_operand:V2DF 1 "nonimmediate_operand"
5297 (parallel [(const_int 0)]))
5298 (match_operand:DF 2 "nonimmediate_operand"
5299 " m,m,x,x,x,*f,r")))]
5300 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5302 movhpd\t{%2, %0|%0, %2}
5303 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5304 unpcklpd\t{%2, %0|%0, %2}
5305 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5309 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5310 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
5311 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
5312 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
5313 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
5316 [(set (match_operand:V2DF 0 "memory_operand")
5318 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5319 (match_operand:DF 1 "register_operand")))]
5320 "TARGET_SSE2 && reload_completed"
5321 [(set (match_dup 0) (match_dup 1))]
5322 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5324 (define_expand "sse2_loadlpd_exp"
5325 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5327 (match_operand:DF 2 "nonimmediate_operand")
5329 (match_operand:V2DF 1 "nonimmediate_operand")
5330 (parallel [(const_int 1)]))))]
5333 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5335 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5337 /* Fix up the destination if needed. */
5338 if (dst != operands[0])
5339 emit_move_insn (operands[0], dst);
5344 ;; Avoid combining registers from different units in a single alternative,
5345 ;; see comment above inline_secondary_memory_needed function in i386.c
5346 (define_insn "sse2_loadlpd"
5347 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5348 "=x,x,x,x,x,x,x,x,m,m ,m")
5350 (match_operand:DF 2 "nonimmediate_operand"
5351 " m,m,m,x,x,0,0,x,x,*f,r")
5353 (match_operand:V2DF 1 "vector_move_operand"
5354 " C,0,x,0,x,x,o,o,0,0 ,0")
5355 (parallel [(const_int 1)]))))]
5356 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5358 %vmovsd\t{%2, %0|%0, %2}
5359 movlpd\t{%2, %0|%0, %2}
5360 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5361 movsd\t{%2, %0|%0, %2}
5362 vmovsd\t{%2, %1, %0|%0, %1, %2}
5363 shufpd\t{$2, %1, %0|%0, %1, 2}
5364 movhpd\t{%H1, %0|%0, %H1}
5365 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5369 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
5371 (cond [(eq_attr "alternative" "5")
5372 (const_string "sselog")
5373 (eq_attr "alternative" "9")
5374 (const_string "fmov")
5375 (eq_attr "alternative" "10")
5376 (const_string "imov")
5378 (const_string "ssemov")))
5379 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
5380 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
5381 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
5382 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
5385 [(set (match_operand:V2DF 0 "memory_operand")
5387 (match_operand:DF 1 "register_operand")
5388 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5389 "TARGET_SSE2 && reload_completed"
5390 [(set (match_dup 0) (match_dup 1))]
5391 "operands[0] = adjust_address (operands[0], DFmode, 0);")
5393 (define_insn "sse2_movsd"
5394 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
5396 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
5397 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
5401 movsd\t{%2, %0|%0, %2}
5402 vmovsd\t{%2, %1, %0|%0, %1, %2}
5403 movlpd\t{%2, %0|%0, %q2}
5404 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
5405 %vmovlpd\t{%2, %0|%q0, %2}
5406 shufpd\t{$2, %1, %0|%0, %1, 2}
5407 movhps\t{%H1, %0|%0, %H1}
5408 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5409 %vmovhps\t{%1, %H0|%H0, %1}"
5410 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
5413 (eq_attr "alternative" "5")
5414 (const_string "sselog")
5415 (const_string "ssemov")))
5416 (set (attr "prefix_data16")
5418 (and (eq_attr "alternative" "2,4")
5419 (not (match_test "TARGET_AVX")))
5421 (const_string "*")))
5422 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
5423 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
5424 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
5426 (define_insn "vec_dupv2df"
5427 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
5429 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
5433 %vmovddup\t{%1, %0|%0, %1}"
5434 [(set_attr "isa" "noavx,sse3")
5435 (set_attr "type" "sselog1")
5436 (set_attr "prefix" "orig,maybe_vex")
5437 (set_attr "mode" "V2DF,DF")])
5439 (define_insn "*vec_concatv2df"
5440 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
5442 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
5443 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
5446 unpcklpd\t{%2, %0|%0, %2}
5447 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5448 %vmovddup\t{%1, %0|%0, %1}
5449 movhpd\t{%2, %0|%0, %2}
5450 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5451 %vmovsd\t{%1, %0|%0, %1}
5452 movlhps\t{%2, %0|%0, %2}
5453 movhps\t{%2, %0|%0, %2}"
5454 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
5457 (eq_attr "alternative" "0,1,2")
5458 (const_string "sselog")
5459 (const_string "ssemov")))
5460 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
5461 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
5462 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
5464 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5466 ;; Parallel integral arithmetic
5468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5470 (define_expand "neg<mode>2"
5471 [(set (match_operand:VI_AVX2 0 "register_operand")
5474 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
5476 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5478 (define_expand "<plusminus_insn><mode>3"
5479 [(set (match_operand:VI_AVX2 0 "register_operand")
5481 (match_operand:VI_AVX2 1 "nonimmediate_operand")
5482 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
5484 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5486 (define_insn "*<plusminus_insn><mode>3"
5487 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
5489 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
5490 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
5491 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5493 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5494 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5495 [(set_attr "isa" "noavx,avx")
5496 (set_attr "type" "sseiadd")
5497 (set_attr "prefix_data16" "1,*")
5498 (set_attr "prefix" "orig,vex")
5499 (set_attr "mode" "<sseinsnmode>")])
5501 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5502 [(set (match_operand:VI12_AVX2 0 "register_operand")
5503 (sat_plusminus:VI12_AVX2
5504 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
5505 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
5507 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5509 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5510 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
5511 (sat_plusminus:VI12_AVX2
5512 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
5513 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
5514 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5516 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5517 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5518 [(set_attr "isa" "noavx,avx")
5519 (set_attr "type" "sseiadd")
5520 (set_attr "prefix_data16" "1,*")
5521 (set_attr "prefix" "orig,vex")
5522 (set_attr "mode" "TI")])
5524 (define_expand "mul<mode>3"
5525 [(set (match_operand:VI1_AVX2 0 "register_operand")
5526 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
5527 (match_operand:VI1_AVX2 2 "register_operand")))]
5530 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
5534 (define_expand "mul<mode>3"
5535 [(set (match_operand:VI2_AVX2 0 "register_operand")
5536 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
5537 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
5539 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5541 (define_insn "*mul<mode>3"
5542 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5543 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5544 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5545 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5547 pmullw\t{%2, %0|%0, %2}
5548 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5549 [(set_attr "isa" "noavx,avx")
5550 (set_attr "type" "sseimul")
5551 (set_attr "prefix_data16" "1,*")
5552 (set_attr "prefix" "orig,vex")
5553 (set_attr "mode" "<sseinsnmode>")])
5555 (define_expand "<s>mul<mode>3_highpart"
5556 [(set (match_operand:VI2_AVX2 0 "register_operand")
5558 (lshiftrt:<ssedoublemode>
5559 (mult:<ssedoublemode>
5560 (any_extend:<ssedoublemode>
5561 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
5562 (any_extend:<ssedoublemode>
5563 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
5566 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5568 (define_insn "*<s>mul<mode>3_highpart"
5569 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5571 (lshiftrt:<ssedoublemode>
5572 (mult:<ssedoublemode>
5573 (any_extend:<ssedoublemode>
5574 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5575 (any_extend:<ssedoublemode>
5576 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5578 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5580 pmulh<u>w\t{%2, %0|%0, %2}
5581 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5582 [(set_attr "isa" "noavx,avx")
5583 (set_attr "type" "sseimul")
5584 (set_attr "prefix_data16" "1,*")
5585 (set_attr "prefix" "orig,vex")
5586 (set_attr "mode" "<sseinsnmode>")])
5588 (define_expand "vec_widen_umult_even_v8si"
5589 [(set (match_operand:V4DI 0 "register_operand")
5593 (match_operand:V8SI 1 "nonimmediate_operand")
5594 (parallel [(const_int 0) (const_int 2)
5595 (const_int 4) (const_int 6)])))
5598 (match_operand:V8SI 2 "nonimmediate_operand")
5599 (parallel [(const_int 0) (const_int 2)
5600 (const_int 4) (const_int 6)])))))]
5602 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5604 (define_insn "*vec_widen_umult_even_v8si"
5605 [(set (match_operand:V4DI 0 "register_operand" "=x")
5609 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5610 (parallel [(const_int 0) (const_int 2)
5611 (const_int 4) (const_int 6)])))
5614 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5615 (parallel [(const_int 0) (const_int 2)
5616 (const_int 4) (const_int 6)])))))]
5617 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5618 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5619 [(set_attr "type" "sseimul")
5620 (set_attr "prefix" "vex")
5621 (set_attr "mode" "OI")])
5623 (define_expand "vec_widen_umult_even_v4si"
5624 [(set (match_operand:V2DI 0 "register_operand")
5628 (match_operand:V4SI 1 "nonimmediate_operand")
5629 (parallel [(const_int 0) (const_int 2)])))
5632 (match_operand:V4SI 2 "nonimmediate_operand")
5633 (parallel [(const_int 0) (const_int 2)])))))]
5635 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5637 (define_insn "*vec_widen_umult_even_v4si"
5638 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5642 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5643 (parallel [(const_int 0) (const_int 2)])))
5646 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5647 (parallel [(const_int 0) (const_int 2)])))))]
5648 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5650 pmuludq\t{%2, %0|%0, %2}
5651 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5652 [(set_attr "isa" "noavx,avx")
5653 (set_attr "type" "sseimul")
5654 (set_attr "prefix_data16" "1,*")
5655 (set_attr "prefix" "orig,vex")
5656 (set_attr "mode" "TI")])
5658 (define_expand "vec_widen_smult_even_v8si"
5659 [(set (match_operand:V4DI 0 "register_operand")
5663 (match_operand:V8SI 1 "nonimmediate_operand")
5664 (parallel [(const_int 0) (const_int 2)
5665 (const_int 4) (const_int 6)])))
5668 (match_operand:V8SI 2 "nonimmediate_operand")
5669 (parallel [(const_int 0) (const_int 2)
5670 (const_int 4) (const_int 6)])))))]
5672 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5674 (define_insn "*vec_widen_smult_even_v8si"
5675 [(set (match_operand:V4DI 0 "register_operand" "=x")
5679 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5680 (parallel [(const_int 0) (const_int 2)
5681 (const_int 4) (const_int 6)])))
5684 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5685 (parallel [(const_int 0) (const_int 2)
5686 (const_int 4) (const_int 6)])))))]
5687 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5688 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5689 [(set_attr "isa" "avx")
5690 (set_attr "type" "sseimul")
5691 (set_attr "prefix_extra" "1")
5692 (set_attr "prefix" "vex")
5693 (set_attr "mode" "OI")])
5695 (define_expand "sse4_1_mulv2siv2di3"
5696 [(set (match_operand:V2DI 0 "register_operand")
5700 (match_operand:V4SI 1 "nonimmediate_operand")
5701 (parallel [(const_int 0) (const_int 2)])))
5704 (match_operand:V4SI 2 "nonimmediate_operand")
5705 (parallel [(const_int 0) (const_int 2)])))))]
5707 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5709 (define_insn "*sse4_1_mulv2siv2di3"
5710 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5714 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5715 (parallel [(const_int 0) (const_int 2)])))
5718 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5719 (parallel [(const_int 0) (const_int 2)])))))]
5720 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5722 pmuldq\t{%2, %0|%0, %2}
5723 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5724 [(set_attr "isa" "noavx,avx")
5725 (set_attr "type" "sseimul")
5726 (set_attr "prefix_data16" "1,*")
5727 (set_attr "prefix_extra" "1")
5728 (set_attr "prefix" "orig,vex")
5729 (set_attr "mode" "TI")])
5731 (define_expand "avx2_pmaddwd"
5732 [(set (match_operand:V8SI 0 "register_operand")
5737 (match_operand:V16HI 1 "nonimmediate_operand")
5738 (parallel [(const_int 0) (const_int 2)
5739 (const_int 4) (const_int 6)
5740 (const_int 8) (const_int 10)
5741 (const_int 12) (const_int 14)])))
5744 (match_operand:V16HI 2 "nonimmediate_operand")
5745 (parallel [(const_int 0) (const_int 2)
5746 (const_int 4) (const_int 6)
5747 (const_int 8) (const_int 10)
5748 (const_int 12) (const_int 14)]))))
5751 (vec_select:V8HI (match_dup 1)
5752 (parallel [(const_int 1) (const_int 3)
5753 (const_int 5) (const_int 7)
5754 (const_int 9) (const_int 11)
5755 (const_int 13) (const_int 15)])))
5757 (vec_select:V8HI (match_dup 2)
5758 (parallel [(const_int 1) (const_int 3)
5759 (const_int 5) (const_int 7)
5760 (const_int 9) (const_int 11)
5761 (const_int 13) (const_int 15)]))))))]
5763 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5765 (define_insn "*avx2_pmaddwd"
5766 [(set (match_operand:V8SI 0 "register_operand" "=x")
5771 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5772 (parallel [(const_int 0) (const_int 2)
5773 (const_int 4) (const_int 6)
5774 (const_int 8) (const_int 10)
5775 (const_int 12) (const_int 14)])))
5778 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5779 (parallel [(const_int 0) (const_int 2)
5780 (const_int 4) (const_int 6)
5781 (const_int 8) (const_int 10)
5782 (const_int 12) (const_int 14)]))))
5785 (vec_select:V8HI (match_dup 1)
5786 (parallel [(const_int 1) (const_int 3)
5787 (const_int 5) (const_int 7)
5788 (const_int 9) (const_int 11)
5789 (const_int 13) (const_int 15)])))
5791 (vec_select:V8HI (match_dup 2)
5792 (parallel [(const_int 1) (const_int 3)
5793 (const_int 5) (const_int 7)
5794 (const_int 9) (const_int 11)
5795 (const_int 13) (const_int 15)]))))))]
5796 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5797 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5798 [(set_attr "type" "sseiadd")
5799 (set_attr "prefix" "vex")
5800 (set_attr "mode" "OI")])
5802 (define_expand "sse2_pmaddwd"
5803 [(set (match_operand:V4SI 0 "register_operand")
5808 (match_operand:V8HI 1 "nonimmediate_operand")
5809 (parallel [(const_int 0) (const_int 2)
5810 (const_int 4) (const_int 6)])))
5813 (match_operand:V8HI 2 "nonimmediate_operand")
5814 (parallel [(const_int 0) (const_int 2)
5815 (const_int 4) (const_int 6)]))))
5818 (vec_select:V4HI (match_dup 1)
5819 (parallel [(const_int 1) (const_int 3)
5820 (const_int 5) (const_int 7)])))
5822 (vec_select:V4HI (match_dup 2)
5823 (parallel [(const_int 1) (const_int 3)
5824 (const_int 5) (const_int 7)]))))))]
5826 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5828 (define_insn "*sse2_pmaddwd"
5829 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5834 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5835 (parallel [(const_int 0) (const_int 2)
5836 (const_int 4) (const_int 6)])))
5839 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5840 (parallel [(const_int 0) (const_int 2)
5841 (const_int 4) (const_int 6)]))))
5844 (vec_select:V4HI (match_dup 1)
5845 (parallel [(const_int 1) (const_int 3)
5846 (const_int 5) (const_int 7)])))
5848 (vec_select:V4HI (match_dup 2)
5849 (parallel [(const_int 1) (const_int 3)
5850 (const_int 5) (const_int 7)]))))))]
5851 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5853 pmaddwd\t{%2, %0|%0, %2}
5854 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5855 [(set_attr "isa" "noavx,avx")
5856 (set_attr "type" "sseiadd")
5857 (set_attr "atom_unit" "simul")
5858 (set_attr "prefix_data16" "1,*")
5859 (set_attr "prefix" "orig,vex")
5860 (set_attr "mode" "TI")])
5862 (define_expand "mul<mode>3"
5863 [(set (match_operand:VI4_AVX512F 0 "register_operand")
5865 (match_operand:VI4_AVX512F 1 "general_vector_operand")
5866 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
5871 if (!nonimmediate_operand (operands[1], <MODE>mode))
5872 operands[1] = force_reg (<MODE>mode, operands[1]);
5873 if (!nonimmediate_operand (operands[2], <MODE>mode))
5874 operands[2] = force_reg (<MODE>mode, operands[2]);
5875 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5879 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
5884 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5885 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
5887 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
5888 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
5889 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5891 pmulld\t{%2, %0|%0, %2}
5892 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5893 [(set_attr "isa" "noavx,avx")
5894 (set_attr "type" "sseimul")
5895 (set_attr "prefix_extra" "1")
5896 (set_attr "prefix" "orig,vex")
5897 (set_attr "btver2_decode" "vector,vector")
5898 (set_attr "mode" "<sseinsnmode>")])
5900 (define_expand "mul<mode>3"
5901 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
5902 (mult:VI8_AVX2_AVX512F
5903 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
5904 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
5907 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
5911 (define_expand "vec_widen_<s>mult_hi_<mode>"
5912 [(match_operand:<sseunpackmode> 0 "register_operand")
5913 (any_extend:<sseunpackmode>
5914 (match_operand:VI124_AVX2 1 "register_operand"))
5915 (match_operand:VI124_AVX2 2 "register_operand")]
5918 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5923 (define_expand "vec_widen_<s>mult_lo_<mode>"
5924 [(match_operand:<sseunpackmode> 0 "register_operand")
5925 (any_extend:<sseunpackmode>
5926 (match_operand:VI124_AVX2 1 "register_operand"))
5927 (match_operand:VI124_AVX2 2 "register_operand")]
5930 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5935 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
5936 ;; named patterns, but signed V4SI needs special help for plain SSE2.
5937 (define_expand "vec_widen_smult_even_v4si"
5938 [(match_operand:V2DI 0 "register_operand")
5939 (match_operand:V4SI 1 "nonimmediate_operand")
5940 (match_operand:V4SI 2 "nonimmediate_operand")]
5943 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5948 (define_expand "vec_widen_<s>mult_odd_<mode>"
5949 [(match_operand:<sseunpackmode> 0 "register_operand")
5950 (any_extend:<sseunpackmode>
5951 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
5952 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
5955 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5960 (define_expand "sdot_prod<mode>"
5961 [(match_operand:<sseunpackmode> 0 "register_operand")
5962 (match_operand:VI2_AVX2 1 "register_operand")
5963 (match_operand:VI2_AVX2 2 "register_operand")
5964 (match_operand:<sseunpackmode> 3 "register_operand")]
5967 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5968 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5969 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5970 gen_rtx_PLUS (<sseunpackmode>mode,
5975 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
5976 ;; back together when madd is available.
5977 (define_expand "sdot_prodv4si"
5978 [(match_operand:V2DI 0 "register_operand")
5979 (match_operand:V4SI 1 "register_operand")
5980 (match_operand:V4SI 2 "register_operand")
5981 (match_operand:V2DI 3 "register_operand")]
5984 rtx t = gen_reg_rtx (V2DImode);
5985 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
5986 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
5990 (define_insn "ashr<mode>3"
5991 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5993 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5994 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5997 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5998 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5999 [(set_attr "isa" "noavx,avx")
6000 (set_attr "type" "sseishft")
6001 (set (attr "length_immediate")
6002 (if_then_else (match_operand 2 "const_int_operand")
6004 (const_string "0")))
6005 (set_attr "prefix_data16" "1,*")
6006 (set_attr "prefix" "orig,vex")
6007 (set_attr "mode" "<sseinsnmode>")])
6009 (define_insn "<shift_insn><mode>3"
6010 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
6011 (any_lshift:VI248_AVX2
6012 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
6013 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
6016 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
6017 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6018 [(set_attr "isa" "noavx,avx")
6019 (set_attr "type" "sseishft")
6020 (set (attr "length_immediate")
6021 (if_then_else (match_operand 2 "const_int_operand")
6023 (const_string "0")))
6024 (set_attr "prefix_data16" "1,*")
6025 (set_attr "prefix" "orig,vex")
6026 (set_attr "mode" "<sseinsnmode>")])
6028 (define_expand "vec_shl_<mode>"
6029 [(set (match_operand:VI_128 0 "register_operand")
6031 (match_operand:VI_128 1 "register_operand")
6032 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
6035 operands[0] = gen_lowpart (V1TImode, operands[0]);
6036 operands[1] = gen_lowpart (V1TImode, operands[1]);
6039 (define_insn "<sse2_avx2>_ashl<mode>3"
6040 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
6042 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
6043 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
6046 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6048 switch (which_alternative)
6051 return "pslldq\t{%2, %0|%0, %2}";
6053 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6058 [(set_attr "isa" "noavx,avx")
6059 (set_attr "type" "sseishft")
6060 (set_attr "length_immediate" "1")
6061 (set_attr "prefix_data16" "1,*")
6062 (set_attr "prefix" "orig,vex")
6063 (set_attr "mode" "<sseinsnmode>")])
6065 (define_expand "vec_shr_<mode>"
6066 [(set (match_operand:VI_128 0 "register_operand")
6068 (match_operand:VI_128 1 "register_operand")
6069 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
6072 operands[0] = gen_lowpart (V1TImode, operands[0]);
6073 operands[1] = gen_lowpart (V1TImode, operands[1]);
6076 (define_insn "<sse2_avx2>_lshr<mode>3"
6077 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
6078 (lshiftrt:VIMAX_AVX2
6079 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
6080 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
6083 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6085 switch (which_alternative)
6088 return "psrldq\t{%2, %0|%0, %2}";
6090 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6095 [(set_attr "isa" "noavx,avx")
6096 (set_attr "type" "sseishft")
6097 (set_attr "length_immediate" "1")
6098 (set_attr "atom_unit" "sishuf")
6099 (set_attr "prefix_data16" "1,*")
6100 (set_attr "prefix" "orig,vex")
6101 (set_attr "mode" "<sseinsnmode>")])
6104 (define_expand "<code><mode>3"
6105 [(set (match_operand:VI124_256 0 "register_operand")
6107 (match_operand:VI124_256 1 "nonimmediate_operand")
6108 (match_operand:VI124_256 2 "nonimmediate_operand")))]
6110 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6112 (define_insn "*avx2_<code><mode>3"
6113 [(set (match_operand:VI124_256 0 "register_operand" "=v")
6115 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
6116 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
6117 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6118 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6119 [(set_attr "type" "sseiadd")
6120 (set_attr "prefix_extra" "1")
6121 (set_attr "prefix" "vex")
6122 (set_attr "mode" "OI")])
6124 (define_expand "<code><mode>3"
6125 [(set (match_operand:VI8_AVX2 0 "register_operand")
6127 (match_operand:VI8_AVX2 1 "register_operand")
6128 (match_operand:VI8_AVX2 2 "register_operand")))]
6135 xops[0] = operands[0];
6137 if (<CODE> == SMAX || <CODE> == UMAX)
6139 xops[1] = operands[1];
6140 xops[2] = operands[2];
6144 xops[1] = operands[2];
6145 xops[2] = operands[1];
6148 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
6150 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6151 xops[4] = operands[1];
6152 xops[5] = operands[2];
6154 ok = ix86_expand_int_vcond (xops);
6159 (define_expand "<code><mode>3"
6160 [(set (match_operand:VI124_128 0 "register_operand")
6162 (match_operand:VI124_128 1 "nonimmediate_operand")
6163 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6166 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6167 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6173 xops[0] = operands[0];
6174 operands[1] = force_reg (<MODE>mode, operands[1]);
6175 operands[2] = force_reg (<MODE>mode, operands[2]);
6179 xops[1] = operands[1];
6180 xops[2] = operands[2];
6184 xops[1] = operands[2];
6185 xops[2] = operands[1];
6188 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6189 xops[4] = operands[1];
6190 xops[5] = operands[2];
6192 ok = ix86_expand_int_vcond (xops);
6198 (define_insn "*sse4_1_<code><mode>3"
6199 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6201 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6202 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6203 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6205 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6206 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6207 [(set_attr "isa" "noavx,avx")
6208 (set_attr "type" "sseiadd")
6209 (set_attr "prefix_extra" "1,*")
6210 (set_attr "prefix" "orig,vex")
6211 (set_attr "mode" "TI")])
6213 (define_insn "*<code>v8hi3"
6214 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6216 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6217 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6218 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6220 p<maxmin_int>w\t{%2, %0|%0, %2}
6221 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6222 [(set_attr "isa" "noavx,avx")
6223 (set_attr "type" "sseiadd")
6224 (set_attr "prefix_data16" "1,*")
6225 (set_attr "prefix_extra" "*,1")
6226 (set_attr "prefix" "orig,vex")
6227 (set_attr "mode" "TI")])
6229 (define_expand "<code><mode>3"
6230 [(set (match_operand:VI124_128 0 "register_operand")
6232 (match_operand:VI124_128 1 "nonimmediate_operand")
6233 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6236 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6237 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6238 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6240 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6241 operands[1] = force_reg (<MODE>mode, operands[1]);
6242 if (rtx_equal_p (op3, op2))
6243 op3 = gen_reg_rtx (V8HImode);
6244 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6245 emit_insn (gen_addv8hi3 (op0, op3, op2));
6253 operands[1] = force_reg (<MODE>mode, operands[1]);
6254 operands[2] = force_reg (<MODE>mode, operands[2]);
6256 xops[0] = operands[0];
6260 xops[1] = operands[1];
6261 xops[2] = operands[2];
6265 xops[1] = operands[2];
6266 xops[2] = operands[1];
6269 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6270 xops[4] = operands[1];
6271 xops[5] = operands[2];
6273 ok = ix86_expand_int_vcond (xops);
6279 (define_insn "*sse4_1_<code><mode>3"
6280 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6282 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6283 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6284 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6286 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6287 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6288 [(set_attr "isa" "noavx,avx")
6289 (set_attr "type" "sseiadd")
6290 (set_attr "prefix_extra" "1,*")
6291 (set_attr "prefix" "orig,vex")
6292 (set_attr "mode" "TI")])
6294 (define_insn "*<code>v16qi3"
6295 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6297 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6298 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6299 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6301 p<maxmin_int>b\t{%2, %0|%0, %2}
6302 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6303 [(set_attr "isa" "noavx,avx")
6304 (set_attr "type" "sseiadd")
6305 (set_attr "prefix_data16" "1,*")
6306 (set_attr "prefix_extra" "*,1")
6307 (set_attr "prefix" "orig,vex")
6308 (set_attr "mode" "TI")])
6310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6312 ;; Parallel integral comparisons
6314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6316 (define_expand "avx2_eq<mode>3"
6317 [(set (match_operand:VI_256 0 "register_operand")
6319 (match_operand:VI_256 1 "nonimmediate_operand")
6320 (match_operand:VI_256 2 "nonimmediate_operand")))]
6322 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6324 (define_insn "*avx2_eq<mode>3"
6325 [(set (match_operand:VI_256 0 "register_operand" "=x")
6327 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6328 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6329 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6330 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6331 [(set_attr "type" "ssecmp")
6332 (set_attr "prefix_extra" "1")
6333 (set_attr "prefix" "vex")
6334 (set_attr "mode" "OI")])
6336 (define_insn "*sse4_1_eqv2di3"
6337 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6339 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6340 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6341 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6343 pcmpeqq\t{%2, %0|%0, %2}
6344 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6345 [(set_attr "isa" "noavx,avx")
6346 (set_attr "type" "ssecmp")
6347 (set_attr "prefix_extra" "1")
6348 (set_attr "prefix" "orig,vex")
6349 (set_attr "mode" "TI")])
6351 (define_insn "*sse2_eq<mode>3"
6352 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6354 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6355 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6356 "TARGET_SSE2 && !TARGET_XOP
6357 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6359 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6360 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6361 [(set_attr "isa" "noavx,avx")
6362 (set_attr "type" "ssecmp")
6363 (set_attr "prefix_data16" "1,*")
6364 (set_attr "prefix" "orig,vex")
6365 (set_attr "mode" "TI")])
6367 (define_expand "sse2_eq<mode>3"
6368 [(set (match_operand:VI124_128 0 "register_operand")
6370 (match_operand:VI124_128 1 "nonimmediate_operand")
6371 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6372 "TARGET_SSE2 && !TARGET_XOP "
6373 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6375 (define_expand "sse4_1_eqv2di3"
6376 [(set (match_operand:V2DI 0 "register_operand")
6378 (match_operand:V2DI 1 "nonimmediate_operand")
6379 (match_operand:V2DI 2 "nonimmediate_operand")))]
6381 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6383 (define_insn "sse4_2_gtv2di3"
6384 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6386 (match_operand:V2DI 1 "register_operand" "0,x")
6387 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6390 pcmpgtq\t{%2, %0|%0, %2}
6391 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6392 [(set_attr "isa" "noavx,avx")
6393 (set_attr "type" "ssecmp")
6394 (set_attr "prefix_extra" "1")
6395 (set_attr "prefix" "orig,vex")
6396 (set_attr "mode" "TI")])
6398 (define_insn "avx2_gt<mode>3"
6399 [(set (match_operand:VI_256 0 "register_operand" "=x")
6401 (match_operand:VI_256 1 "register_operand" "x")
6402 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6404 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6405 [(set_attr "type" "ssecmp")
6406 (set_attr "prefix_extra" "1")
6407 (set_attr "prefix" "vex")
6408 (set_attr "mode" "OI")])
6410 (define_insn "sse2_gt<mode>3"
6411 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6413 (match_operand:VI124_128 1 "register_operand" "0,x")
6414 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6415 "TARGET_SSE2 && !TARGET_XOP"
6417 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6418 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6419 [(set_attr "isa" "noavx,avx")
6420 (set_attr "type" "ssecmp")
6421 (set_attr "prefix_data16" "1,*")
6422 (set_attr "prefix" "orig,vex")
6423 (set_attr "mode" "TI")])
6425 (define_expand "vcond<V_256:mode><VI_256:mode>"
6426 [(set (match_operand:V_256 0 "register_operand")
6428 (match_operator 3 ""
6429 [(match_operand:VI_256 4 "nonimmediate_operand")
6430 (match_operand:VI_256 5 "general_operand")])
6431 (match_operand:V_256 1)
6432 (match_operand:V_256 2)))]
6434 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6435 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6437 bool ok = ix86_expand_int_vcond (operands);
6442 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6443 [(set (match_operand:V_128 0 "register_operand")
6445 (match_operator 3 ""
6446 [(match_operand:VI124_128 4 "nonimmediate_operand")
6447 (match_operand:VI124_128 5 "general_operand")])
6448 (match_operand:V_128 1)
6449 (match_operand:V_128 2)))]
6451 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6452 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6454 bool ok = ix86_expand_int_vcond (operands);
6459 (define_expand "vcond<VI8F_128:mode>v2di"
6460 [(set (match_operand:VI8F_128 0 "register_operand")
6461 (if_then_else:VI8F_128
6462 (match_operator 3 ""
6463 [(match_operand:V2DI 4 "nonimmediate_operand")
6464 (match_operand:V2DI 5 "general_operand")])
6465 (match_operand:VI8F_128 1)
6466 (match_operand:VI8F_128 2)))]
6469 bool ok = ix86_expand_int_vcond (operands);
6474 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6475 [(set (match_operand:V_256 0 "register_operand")
6477 (match_operator 3 ""
6478 [(match_operand:VI_256 4 "nonimmediate_operand")
6479 (match_operand:VI_256 5 "nonimmediate_operand")])
6480 (match_operand:V_256 1 "general_operand")
6481 (match_operand:V_256 2 "general_operand")))]
6483 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6484 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6486 bool ok = ix86_expand_int_vcond (operands);
6491 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6492 [(set (match_operand:V_128 0 "register_operand")
6494 (match_operator 3 ""
6495 [(match_operand:VI124_128 4 "nonimmediate_operand")
6496 (match_operand:VI124_128 5 "nonimmediate_operand")])
6497 (match_operand:V_128 1 "general_operand")
6498 (match_operand:V_128 2 "general_operand")))]
6500 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6501 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6503 bool ok = ix86_expand_int_vcond (operands);
6508 (define_expand "vcondu<VI8F_128:mode>v2di"
6509 [(set (match_operand:VI8F_128 0 "register_operand")
6510 (if_then_else:VI8F_128
6511 (match_operator 3 ""
6512 [(match_operand:V2DI 4 "nonimmediate_operand")
6513 (match_operand:V2DI 5 "nonimmediate_operand")])
6514 (match_operand:VI8F_128 1 "general_operand")
6515 (match_operand:VI8F_128 2 "general_operand")))]
6518 bool ok = ix86_expand_int_vcond (operands);
6523 (define_mode_iterator VEC_PERM_AVX2
6524 [V16QI V8HI V4SI V2DI V4SF V2DF
6525 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6526 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6527 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6529 (define_expand "vec_perm<mode>"
6530 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
6531 (match_operand:VEC_PERM_AVX2 1 "register_operand")
6532 (match_operand:VEC_PERM_AVX2 2 "register_operand")
6533 (match_operand:<sseintvecmode> 3 "register_operand")]
6534 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6536 ix86_expand_vec_perm (operands);
6540 (define_mode_iterator VEC_PERM_CONST
6541 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6542 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6543 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6544 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6545 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6546 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6548 (define_expand "vec_perm_const<mode>"
6549 [(match_operand:VEC_PERM_CONST 0 "register_operand")
6550 (match_operand:VEC_PERM_CONST 1 "register_operand")
6551 (match_operand:VEC_PERM_CONST 2 "register_operand")
6552 (match_operand:<sseintvecmode> 3)]
6555 if (ix86_expand_vec_perm_const (operands))
6561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6563 ;; Parallel bitwise logical operations
6565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6567 (define_expand "one_cmpl<mode>2"
6568 [(set (match_operand:VI 0 "register_operand")
6569 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
6573 int i, n = GET_MODE_NUNITS (<MODE>mode);
6574 rtvec v = rtvec_alloc (n);
6576 for (i = 0; i < n; ++i)
6577 RTVEC_ELT (v, i) = constm1_rtx;
6579 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6582 (define_expand "<sse2_avx2>_andnot<mode>3"
6583 [(set (match_operand:VI_AVX2 0 "register_operand")
6585 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
6586 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
6589 (define_insn "*andnot<mode>3"
6590 [(set (match_operand:VI 0 "register_operand" "=x,v")
6592 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
6593 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
6596 static char buf[64];
6600 switch (get_attr_mode (insn))
6603 gcc_assert (TARGET_AVX512F);
6605 tmp = "pandn<ssemodesuffix>";
6609 gcc_assert (TARGET_AVX2);
6611 gcc_assert (TARGET_SSE2);
6617 gcc_assert (TARGET_AVX);
6619 gcc_assert (TARGET_SSE);
6628 switch (which_alternative)
6631 ops = "%s\t{%%2, %%0|%%0, %%2}";
6634 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6640 snprintf (buf, sizeof (buf), ops, tmp);
6643 [(set_attr "isa" "noavx,avx")
6644 (set_attr "type" "sselog")
6645 (set (attr "prefix_data16")
6647 (and (eq_attr "alternative" "0")
6648 (eq_attr "mode" "TI"))
6650 (const_string "*")))
6651 (set_attr "prefix" "orig,vex")
6653 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6654 (const_string "<ssePSmode>")
6655 (match_test "TARGET_AVX2")
6656 (const_string "<sseinsnmode>")
6657 (match_test "TARGET_AVX")
6659 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6660 (const_string "V8SF")
6661 (const_string "<sseinsnmode>"))
6662 (ior (not (match_test "TARGET_SSE2"))
6663 (match_test "optimize_function_for_size_p (cfun)"))
6664 (const_string "V4SF")
6666 (const_string "<sseinsnmode>")))])
6668 (define_expand "<code><mode>3"
6669 [(set (match_operand:VI 0 "register_operand")
6671 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
6672 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
6675 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
6679 (define_insn "*<code><mode>3"
6680 [(set (match_operand:VI 0 "register_operand" "=x,v")
6682 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
6683 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
6685 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6687 static char buf[64];
6691 switch (get_attr_mode (insn))
6694 gcc_assert (TARGET_AVX512F);
6695 tmp = "p<logic><ssemodesuffix>";
6699 gcc_assert (TARGET_AVX2);
6701 gcc_assert (TARGET_SSE2);
6707 gcc_assert (TARGET_AVX512F);
6709 gcc_assert (TARGET_AVX);
6711 gcc_assert (TARGET_SSE);
6720 switch (which_alternative)
6723 ops = "%s\t{%%2, %%0|%%0, %%2}";
6726 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6732 snprintf (buf, sizeof (buf), ops, tmp);
6735 [(set_attr "isa" "noavx,avx")
6736 (set_attr "type" "sselog")
6737 (set (attr "prefix_data16")
6739 (and (eq_attr "alternative" "0")
6740 (eq_attr "mode" "TI"))
6742 (const_string "*")))
6743 (set_attr "prefix" "orig,vex")
6745 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6746 (const_string "<ssePSmode>")
6747 (match_test "TARGET_AVX2")
6748 (const_string "<sseinsnmode>")
6749 (match_test "TARGET_AVX")
6751 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6752 (const_string "V8SF")
6753 (const_string "<sseinsnmode>"))
6754 (ior (not (match_test "TARGET_SSE2"))
6755 (match_test "optimize_function_for_size_p (cfun)"))
6756 (const_string "V4SF")
6758 (const_string "<sseinsnmode>")))])
6760 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6762 ;; Parallel integral element swizzling
6764 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6766 (define_expand "vec_pack_trunc_<mode>"
6767 [(match_operand:<ssepackmode> 0 "register_operand")
6768 (match_operand:VI248_AVX2 1 "register_operand")
6769 (match_operand:VI248_AVX2 2 "register_operand")]
6772 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6773 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6774 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6778 (define_insn "<sse2_avx2>_packsswb"
6779 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6780 (vec_concat:VI1_AVX2
6781 (ss_truncate:<ssehalfvecmode>
6782 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6783 (ss_truncate:<ssehalfvecmode>
6784 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6787 packsswb\t{%2, %0|%0, %2}
6788 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6789 [(set_attr "isa" "noavx,avx")
6790 (set_attr "type" "sselog")
6791 (set_attr "prefix_data16" "1,*")
6792 (set_attr "prefix" "orig,vex")
6793 (set_attr "mode" "<sseinsnmode>")])
6795 (define_insn "<sse2_avx2>_packssdw"
6796 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6797 (vec_concat:VI2_AVX2
6798 (ss_truncate:<ssehalfvecmode>
6799 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6800 (ss_truncate:<ssehalfvecmode>
6801 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6804 packssdw\t{%2, %0|%0, %2}
6805 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6806 [(set_attr "isa" "noavx,avx")
6807 (set_attr "type" "sselog")
6808 (set_attr "prefix_data16" "1,*")
6809 (set_attr "prefix" "orig,vex")
6810 (set_attr "mode" "<sseinsnmode>")])
6812 (define_insn "<sse2_avx2>_packuswb"
6813 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6814 (vec_concat:VI1_AVX2
6815 (us_truncate:<ssehalfvecmode>
6816 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6817 (us_truncate:<ssehalfvecmode>
6818 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6821 packuswb\t{%2, %0|%0, %2}
6822 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6823 [(set_attr "isa" "noavx,avx")
6824 (set_attr "type" "sselog")
6825 (set_attr "prefix_data16" "1,*")
6826 (set_attr "prefix" "orig,vex")
6827 (set_attr "mode" "<sseinsnmode>")])
6829 (define_insn "avx2_interleave_highv32qi"
6830 [(set (match_operand:V32QI 0 "register_operand" "=x")
6833 (match_operand:V32QI 1 "register_operand" "x")
6834 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6835 (parallel [(const_int 8) (const_int 40)
6836 (const_int 9) (const_int 41)
6837 (const_int 10) (const_int 42)
6838 (const_int 11) (const_int 43)
6839 (const_int 12) (const_int 44)
6840 (const_int 13) (const_int 45)
6841 (const_int 14) (const_int 46)
6842 (const_int 15) (const_int 47)
6843 (const_int 24) (const_int 56)
6844 (const_int 25) (const_int 57)
6845 (const_int 26) (const_int 58)
6846 (const_int 27) (const_int 59)
6847 (const_int 28) (const_int 60)
6848 (const_int 29) (const_int 61)
6849 (const_int 30) (const_int 62)
6850 (const_int 31) (const_int 63)])))]
6852 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6853 [(set_attr "type" "sselog")
6854 (set_attr "prefix" "vex")
6855 (set_attr "mode" "OI")])
6857 (define_insn "vec_interleave_highv16qi"
6858 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6861 (match_operand:V16QI 1 "register_operand" "0,x")
6862 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6863 (parallel [(const_int 8) (const_int 24)
6864 (const_int 9) (const_int 25)
6865 (const_int 10) (const_int 26)
6866 (const_int 11) (const_int 27)
6867 (const_int 12) (const_int 28)
6868 (const_int 13) (const_int 29)
6869 (const_int 14) (const_int 30)
6870 (const_int 15) (const_int 31)])))]
6873 punpckhbw\t{%2, %0|%0, %2}
6874 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6875 [(set_attr "isa" "noavx,avx")
6876 (set_attr "type" "sselog")
6877 (set_attr "prefix_data16" "1,*")
6878 (set_attr "prefix" "orig,vex")
6879 (set_attr "mode" "TI")])
6881 (define_insn "avx2_interleave_lowv32qi"
6882 [(set (match_operand:V32QI 0 "register_operand" "=x")
6885 (match_operand:V32QI 1 "register_operand" "x")
6886 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6887 (parallel [(const_int 0) (const_int 32)
6888 (const_int 1) (const_int 33)
6889 (const_int 2) (const_int 34)
6890 (const_int 3) (const_int 35)
6891 (const_int 4) (const_int 36)
6892 (const_int 5) (const_int 37)
6893 (const_int 6) (const_int 38)
6894 (const_int 7) (const_int 39)
6895 (const_int 16) (const_int 48)
6896 (const_int 17) (const_int 49)
6897 (const_int 18) (const_int 50)
6898 (const_int 19) (const_int 51)
6899 (const_int 20) (const_int 52)
6900 (const_int 21) (const_int 53)
6901 (const_int 22) (const_int 54)
6902 (const_int 23) (const_int 55)])))]
6904 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6905 [(set_attr "type" "sselog")
6906 (set_attr "prefix" "vex")
6907 (set_attr "mode" "OI")])
6909 (define_insn "vec_interleave_lowv16qi"
6910 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6913 (match_operand:V16QI 1 "register_operand" "0,x")
6914 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6915 (parallel [(const_int 0) (const_int 16)
6916 (const_int 1) (const_int 17)
6917 (const_int 2) (const_int 18)
6918 (const_int 3) (const_int 19)
6919 (const_int 4) (const_int 20)
6920 (const_int 5) (const_int 21)
6921 (const_int 6) (const_int 22)
6922 (const_int 7) (const_int 23)])))]
6925 punpcklbw\t{%2, %0|%0, %2}
6926 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6927 [(set_attr "isa" "noavx,avx")
6928 (set_attr "type" "sselog")
6929 (set_attr "prefix_data16" "1,*")
6930 (set_attr "prefix" "orig,vex")
6931 (set_attr "mode" "TI")])
6933 (define_insn "avx2_interleave_highv16hi"
6934 [(set (match_operand:V16HI 0 "register_operand" "=x")
6937 (match_operand:V16HI 1 "register_operand" "x")
6938 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6939 (parallel [(const_int 4) (const_int 20)
6940 (const_int 5) (const_int 21)
6941 (const_int 6) (const_int 22)
6942 (const_int 7) (const_int 23)
6943 (const_int 12) (const_int 28)
6944 (const_int 13) (const_int 29)
6945 (const_int 14) (const_int 30)
6946 (const_int 15) (const_int 31)])))]
6948 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6949 [(set_attr "type" "sselog")
6950 (set_attr "prefix" "vex")
6951 (set_attr "mode" "OI")])
6953 (define_insn "vec_interleave_highv8hi"
6954 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6957 (match_operand:V8HI 1 "register_operand" "0,x")
6958 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6959 (parallel [(const_int 4) (const_int 12)
6960 (const_int 5) (const_int 13)
6961 (const_int 6) (const_int 14)
6962 (const_int 7) (const_int 15)])))]
6965 punpckhwd\t{%2, %0|%0, %2}
6966 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6967 [(set_attr "isa" "noavx,avx")
6968 (set_attr "type" "sselog")
6969 (set_attr "prefix_data16" "1,*")
6970 (set_attr "prefix" "orig,vex")
6971 (set_attr "mode" "TI")])
6973 (define_insn "avx2_interleave_lowv16hi"
6974 [(set (match_operand:V16HI 0 "register_operand" "=x")
6977 (match_operand:V16HI 1 "register_operand" "x")
6978 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6979 (parallel [(const_int 0) (const_int 16)
6980 (const_int 1) (const_int 17)
6981 (const_int 2) (const_int 18)
6982 (const_int 3) (const_int 19)
6983 (const_int 8) (const_int 24)
6984 (const_int 9) (const_int 25)
6985 (const_int 10) (const_int 26)
6986 (const_int 11) (const_int 27)])))]
6988 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6989 [(set_attr "type" "sselog")
6990 (set_attr "prefix" "vex")
6991 (set_attr "mode" "OI")])
6993 (define_insn "vec_interleave_lowv8hi"
6994 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6997 (match_operand:V8HI 1 "register_operand" "0,x")
6998 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6999 (parallel [(const_int 0) (const_int 8)
7000 (const_int 1) (const_int 9)
7001 (const_int 2) (const_int 10)
7002 (const_int 3) (const_int 11)])))]
7005 punpcklwd\t{%2, %0|%0, %2}
7006 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
7007 [(set_attr "isa" "noavx,avx")
7008 (set_attr "type" "sselog")
7009 (set_attr "prefix_data16" "1,*")
7010 (set_attr "prefix" "orig,vex")
7011 (set_attr "mode" "TI")])
7013 (define_insn "avx2_interleave_highv8si"
7014 [(set (match_operand:V8SI 0 "register_operand" "=x")
7017 (match_operand:V8SI 1 "register_operand" "x")
7018 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
7019 (parallel [(const_int 2) (const_int 10)
7020 (const_int 3) (const_int 11)
7021 (const_int 6) (const_int 14)
7022 (const_int 7) (const_int 15)])))]
7024 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7025 [(set_attr "type" "sselog")
7026 (set_attr "prefix" "vex")
7027 (set_attr "mode" "OI")])
7029 (define_insn "vec_interleave_highv4si"
7030 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7033 (match_operand:V4SI 1 "register_operand" "0,x")
7034 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
7035 (parallel [(const_int 2) (const_int 6)
7036 (const_int 3) (const_int 7)])))]
7039 punpckhdq\t{%2, %0|%0, %2}
7040 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7041 [(set_attr "isa" "noavx,avx")
7042 (set_attr "type" "sselog")
7043 (set_attr "prefix_data16" "1,*")
7044 (set_attr "prefix" "orig,vex")
7045 (set_attr "mode" "TI")])
7047 (define_insn "avx2_interleave_lowv8si"
7048 [(set (match_operand:V8SI 0 "register_operand" "=x")
7051 (match_operand:V8SI 1 "register_operand" "x")
7052 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
7053 (parallel [(const_int 0) (const_int 8)
7054 (const_int 1) (const_int 9)
7055 (const_int 4) (const_int 12)
7056 (const_int 5) (const_int 13)])))]
7058 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7059 [(set_attr "type" "sselog")
7060 (set_attr "prefix" "vex")
7061 (set_attr "mode" "OI")])
7063 (define_insn "vec_interleave_lowv4si"
7064 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7067 (match_operand:V4SI 1 "register_operand" "0,x")
7068 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
7069 (parallel [(const_int 0) (const_int 4)
7070 (const_int 1) (const_int 5)])))]
7073 punpckldq\t{%2, %0|%0, %2}
7074 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7075 [(set_attr "isa" "noavx,avx")
7076 (set_attr "type" "sselog")
7077 (set_attr "prefix_data16" "1,*")
7078 (set_attr "prefix" "orig,vex")
7079 (set_attr "mode" "TI")])
7081 (define_expand "vec_interleave_high<mode>"
7082 [(match_operand:VI_256 0 "register_operand" "=x")
7083 (match_operand:VI_256 1 "register_operand" "x")
7084 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7087 rtx t1 = gen_reg_rtx (<MODE>mode);
7088 rtx t2 = gen_reg_rtx (<MODE>mode);
7089 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7090 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7091 emit_insn (gen_avx2_permv2ti
7092 (gen_lowpart (V4DImode, operands[0]),
7093 gen_lowpart (V4DImode, t1),
7094 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
7098 (define_expand "vec_interleave_low<mode>"
7099 [(match_operand:VI_256 0 "register_operand" "=x")
7100 (match_operand:VI_256 1 "register_operand" "x")
7101 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7104 rtx t1 = gen_reg_rtx (<MODE>mode);
7105 rtx t2 = gen_reg_rtx (<MODE>mode);
7106 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
7107 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
7108 emit_insn (gen_avx2_permv2ti
7109 (gen_lowpart (V4DImode, operands[0]),
7110 gen_lowpart (V4DImode, t1),
7111 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
7115 ;; Modes handled by pinsr patterns.
7116 (define_mode_iterator PINSR_MODE
7117 [(V16QI "TARGET_SSE4_1") V8HI
7118 (V4SI "TARGET_SSE4_1")
7119 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
7121 (define_mode_attr sse2p4_1
7122 [(V16QI "sse4_1") (V8HI "sse2")
7123 (V4SI "sse4_1") (V2DI "sse4_1")])
7125 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
7126 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
7127 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
7128 (vec_merge:PINSR_MODE
7129 (vec_duplicate:PINSR_MODE
7130 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
7131 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
7132 (match_operand:SI 3 "const_int_operand")))]
7134 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7135 < GET_MODE_NUNITS (<MODE>mode))"
7137 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7139 switch (which_alternative)
7142 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7143 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7146 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7148 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7149 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7152 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7157 [(set_attr "isa" "noavx,noavx,avx,avx")
7158 (set_attr "type" "sselog")
7159 (set (attr "prefix_rex")
7161 (and (not (match_test "TARGET_AVX"))
7162 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7164 (const_string "*")))
7165 (set (attr "prefix_data16")
7167 (and (not (match_test "TARGET_AVX"))
7168 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7170 (const_string "*")))
7171 (set (attr "prefix_extra")
7173 (and (not (match_test "TARGET_AVX"))
7174 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7176 (const_string "1")))
7177 (set_attr "length_immediate" "1")
7178 (set_attr "prefix" "orig,orig,vex,vex")
7179 (set_attr "mode" "TI")])
7181 (define_expand "avx2_pshufdv3"
7182 [(match_operand:V8SI 0 "register_operand")
7183 (match_operand:V8SI 1 "nonimmediate_operand")
7184 (match_operand:SI 2 "const_0_to_255_operand")]
7187 int mask = INTVAL (operands[2]);
7188 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7189 GEN_INT ((mask >> 0) & 3),
7190 GEN_INT ((mask >> 2) & 3),
7191 GEN_INT ((mask >> 4) & 3),
7192 GEN_INT ((mask >> 6) & 3),
7193 GEN_INT (((mask >> 0) & 3) + 4),
7194 GEN_INT (((mask >> 2) & 3) + 4),
7195 GEN_INT (((mask >> 4) & 3) + 4),
7196 GEN_INT (((mask >> 6) & 3) + 4)));
7200 (define_insn "avx2_pshufd_1"
7201 [(set (match_operand:V8SI 0 "register_operand" "=x")
7203 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7204 (parallel [(match_operand 2 "const_0_to_3_operand")
7205 (match_operand 3 "const_0_to_3_operand")
7206 (match_operand 4 "const_0_to_3_operand")
7207 (match_operand 5 "const_0_to_3_operand")
7208 (match_operand 6 "const_4_to_7_operand")
7209 (match_operand 7 "const_4_to_7_operand")
7210 (match_operand 8 "const_4_to_7_operand")
7211 (match_operand 9 "const_4_to_7_operand")])))]
7213 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7214 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7215 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7216 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7219 mask |= INTVAL (operands[2]) << 0;
7220 mask |= INTVAL (operands[3]) << 2;
7221 mask |= INTVAL (operands[4]) << 4;
7222 mask |= INTVAL (operands[5]) << 6;
7223 operands[2] = GEN_INT (mask);
7225 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7227 [(set_attr "type" "sselog1")
7228 (set_attr "prefix" "vex")
7229 (set_attr "length_immediate" "1")
7230 (set_attr "mode" "OI")])
7232 (define_expand "sse2_pshufd"
7233 [(match_operand:V4SI 0 "register_operand")
7234 (match_operand:V4SI 1 "nonimmediate_operand")
7235 (match_operand:SI 2 "const_int_operand")]
7238 int mask = INTVAL (operands[2]);
7239 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7240 GEN_INT ((mask >> 0) & 3),
7241 GEN_INT ((mask >> 2) & 3),
7242 GEN_INT ((mask >> 4) & 3),
7243 GEN_INT ((mask >> 6) & 3)));
7247 (define_insn "sse2_pshufd_1"
7248 [(set (match_operand:V4SI 0 "register_operand" "=x")
7250 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7251 (parallel [(match_operand 2 "const_0_to_3_operand")
7252 (match_operand 3 "const_0_to_3_operand")
7253 (match_operand 4 "const_0_to_3_operand")
7254 (match_operand 5 "const_0_to_3_operand")])))]
7258 mask |= INTVAL (operands[2]) << 0;
7259 mask |= INTVAL (operands[3]) << 2;
7260 mask |= INTVAL (operands[4]) << 4;
7261 mask |= INTVAL (operands[5]) << 6;
7262 operands[2] = GEN_INT (mask);
7264 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7266 [(set_attr "type" "sselog1")
7267 (set_attr "prefix_data16" "1")
7268 (set_attr "prefix" "maybe_vex")
7269 (set_attr "length_immediate" "1")
7270 (set_attr "mode" "TI")])
7272 (define_expand "avx2_pshuflwv3"
7273 [(match_operand:V16HI 0 "register_operand")
7274 (match_operand:V16HI 1 "nonimmediate_operand")
7275 (match_operand:SI 2 "const_0_to_255_operand")]
7278 int mask = INTVAL (operands[2]);
7279 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7280 GEN_INT ((mask >> 0) & 3),
7281 GEN_INT ((mask >> 2) & 3),
7282 GEN_INT ((mask >> 4) & 3),
7283 GEN_INT ((mask >> 6) & 3),
7284 GEN_INT (((mask >> 0) & 3) + 8),
7285 GEN_INT (((mask >> 2) & 3) + 8),
7286 GEN_INT (((mask >> 4) & 3) + 8),
7287 GEN_INT (((mask >> 6) & 3) + 8)));
7291 (define_insn "avx2_pshuflw_1"
7292 [(set (match_operand:V16HI 0 "register_operand" "=x")
7294 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7295 (parallel [(match_operand 2 "const_0_to_3_operand")
7296 (match_operand 3 "const_0_to_3_operand")
7297 (match_operand 4 "const_0_to_3_operand")
7298 (match_operand 5 "const_0_to_3_operand")
7303 (match_operand 6 "const_8_to_11_operand")
7304 (match_operand 7 "const_8_to_11_operand")
7305 (match_operand 8 "const_8_to_11_operand")
7306 (match_operand 9 "const_8_to_11_operand")
7312 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7313 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7314 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7315 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7318 mask |= INTVAL (operands[2]) << 0;
7319 mask |= INTVAL (operands[3]) << 2;
7320 mask |= INTVAL (operands[4]) << 4;
7321 mask |= INTVAL (operands[5]) << 6;
7322 operands[2] = GEN_INT (mask);
7324 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7326 [(set_attr "type" "sselog")
7327 (set_attr "prefix" "vex")
7328 (set_attr "length_immediate" "1")
7329 (set_attr "mode" "OI")])
7331 (define_expand "sse2_pshuflw"
7332 [(match_operand:V8HI 0 "register_operand")
7333 (match_operand:V8HI 1 "nonimmediate_operand")
7334 (match_operand:SI 2 "const_int_operand")]
7337 int mask = INTVAL (operands[2]);
7338 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7339 GEN_INT ((mask >> 0) & 3),
7340 GEN_INT ((mask >> 2) & 3),
7341 GEN_INT ((mask >> 4) & 3),
7342 GEN_INT ((mask >> 6) & 3)));
7346 (define_insn "sse2_pshuflw_1"
7347 [(set (match_operand:V8HI 0 "register_operand" "=x")
7349 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7350 (parallel [(match_operand 2 "const_0_to_3_operand")
7351 (match_operand 3 "const_0_to_3_operand")
7352 (match_operand 4 "const_0_to_3_operand")
7353 (match_operand 5 "const_0_to_3_operand")
7361 mask |= INTVAL (operands[2]) << 0;
7362 mask |= INTVAL (operands[3]) << 2;
7363 mask |= INTVAL (operands[4]) << 4;
7364 mask |= INTVAL (operands[5]) << 6;
7365 operands[2] = GEN_INT (mask);
7367 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7369 [(set_attr "type" "sselog")
7370 (set_attr "prefix_data16" "0")
7371 (set_attr "prefix_rep" "1")
7372 (set_attr "prefix" "maybe_vex")
7373 (set_attr "length_immediate" "1")
7374 (set_attr "mode" "TI")])
7376 (define_expand "avx2_pshufhwv3"
7377 [(match_operand:V16HI 0 "register_operand")
7378 (match_operand:V16HI 1 "nonimmediate_operand")
7379 (match_operand:SI 2 "const_0_to_255_operand")]
7382 int mask = INTVAL (operands[2]);
7383 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7384 GEN_INT (((mask >> 0) & 3) + 4),
7385 GEN_INT (((mask >> 2) & 3) + 4),
7386 GEN_INT (((mask >> 4) & 3) + 4),
7387 GEN_INT (((mask >> 6) & 3) + 4),
7388 GEN_INT (((mask >> 0) & 3) + 12),
7389 GEN_INT (((mask >> 2) & 3) + 12),
7390 GEN_INT (((mask >> 4) & 3) + 12),
7391 GEN_INT (((mask >> 6) & 3) + 12)));
7395 (define_insn "avx2_pshufhw_1"
7396 [(set (match_operand:V16HI 0 "register_operand" "=x")
7398 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7399 (parallel [(const_int 0)
7403 (match_operand 2 "const_4_to_7_operand")
7404 (match_operand 3 "const_4_to_7_operand")
7405 (match_operand 4 "const_4_to_7_operand")
7406 (match_operand 5 "const_4_to_7_operand")
7411 (match_operand 6 "const_12_to_15_operand")
7412 (match_operand 7 "const_12_to_15_operand")
7413 (match_operand 8 "const_12_to_15_operand")
7414 (match_operand 9 "const_12_to_15_operand")])))]
7416 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7417 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7418 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7419 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7422 mask |= (INTVAL (operands[2]) - 4) << 0;
7423 mask |= (INTVAL (operands[3]) - 4) << 2;
7424 mask |= (INTVAL (operands[4]) - 4) << 4;
7425 mask |= (INTVAL (operands[5]) - 4) << 6;
7426 operands[2] = GEN_INT (mask);
7428 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7430 [(set_attr "type" "sselog")
7431 (set_attr "prefix" "vex")
7432 (set_attr "length_immediate" "1")
7433 (set_attr "mode" "OI")])
7435 (define_expand "sse2_pshufhw"
7436 [(match_operand:V8HI 0 "register_operand")
7437 (match_operand:V8HI 1 "nonimmediate_operand")
7438 (match_operand:SI 2 "const_int_operand")]
7441 int mask = INTVAL (operands[2]);
7442 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7443 GEN_INT (((mask >> 0) & 3) + 4),
7444 GEN_INT (((mask >> 2) & 3) + 4),
7445 GEN_INT (((mask >> 4) & 3) + 4),
7446 GEN_INT (((mask >> 6) & 3) + 4)));
7450 (define_insn "sse2_pshufhw_1"
7451 [(set (match_operand:V8HI 0 "register_operand" "=x")
7453 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7454 (parallel [(const_int 0)
7458 (match_operand 2 "const_4_to_7_operand")
7459 (match_operand 3 "const_4_to_7_operand")
7460 (match_operand 4 "const_4_to_7_operand")
7461 (match_operand 5 "const_4_to_7_operand")])))]
7465 mask |= (INTVAL (operands[2]) - 4) << 0;
7466 mask |= (INTVAL (operands[3]) - 4) << 2;
7467 mask |= (INTVAL (operands[4]) - 4) << 4;
7468 mask |= (INTVAL (operands[5]) - 4) << 6;
7469 operands[2] = GEN_INT (mask);
7471 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7473 [(set_attr "type" "sselog")
7474 (set_attr "prefix_rep" "1")
7475 (set_attr "prefix_data16" "0")
7476 (set_attr "prefix" "maybe_vex")
7477 (set_attr "length_immediate" "1")
7478 (set_attr "mode" "TI")])
7480 (define_expand "sse2_loadd"
7481 [(set (match_operand:V4SI 0 "register_operand")
7484 (match_operand:SI 1 "nonimmediate_operand"))
7488 "operands[2] = CONST0_RTX (V4SImode);")
7490 (define_insn "sse2_loadld"
7491 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7494 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7495 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7499 %vmovd\t{%2, %0|%0, %2}
7500 %vmovd\t{%2, %0|%0, %2}
7501 movss\t{%2, %0|%0, %2}
7502 movss\t{%2, %0|%0, %2}
7503 vmovss\t{%2, %1, %0|%0, %1, %2}"
7504 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7505 (set_attr "type" "ssemov")
7506 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7507 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7509 (define_insn "*vec_extract<mode>"
7510 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
7511 (vec_select:<ssescalarmode>
7512 (match_operand:VI12_128 1 "register_operand" "x,x")
7514 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
7517 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
7518 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7519 [(set_attr "type" "sselog1")
7520 (set (attr "prefix_data16")
7522 (and (eq_attr "alternative" "0")
7523 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7525 (const_string "*")))
7526 (set (attr "prefix_extra")
7528 (and (eq_attr "alternative" "0")
7529 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7531 (const_string "1")))
7532 (set_attr "length_immediate" "1")
7533 (set_attr "prefix" "maybe_vex")
7534 (set_attr "mode" "TI")])
7536 (define_insn "*vec_extractv8hi_sse2"
7537 [(set (match_operand:HI 0 "register_operand" "=r")
7539 (match_operand:V8HI 1 "register_operand" "x")
7541 [(match_operand:SI 2 "const_0_to_7_operand")])))]
7542 "TARGET_SSE2 && !TARGET_SSE4_1"
7543 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
7544 [(set_attr "type" "sselog1")
7545 (set_attr "prefix_data16" "1")
7546 (set_attr "length_immediate" "1")
7547 (set_attr "mode" "TI")])
7549 (define_insn "*vec_extractv16qi_zext"
7550 [(set (match_operand:SWI48 0 "register_operand" "=r")
7553 (match_operand:V16QI 1 "register_operand" "x")
7555 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
7557 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7558 [(set_attr "type" "sselog1")
7559 (set_attr "prefix_extra" "1")
7560 (set_attr "length_immediate" "1")
7561 (set_attr "prefix" "maybe_vex")
7562 (set_attr "mode" "TI")])
7564 (define_insn "*vec_extractv8hi_zext"
7565 [(set (match_operand:SWI48 0 "register_operand" "=r")
7568 (match_operand:V8HI 1 "register_operand" "x")
7570 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
7572 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7573 [(set_attr "type" "sselog1")
7574 (set_attr "prefix_data16" "1")
7575 (set_attr "length_immediate" "1")
7576 (set_attr "prefix" "maybe_vex")
7577 (set_attr "mode" "TI")])
7579 (define_insn "*vec_extract<mode>_mem"
7580 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
7581 (vec_select:<ssescalarmode>
7582 (match_operand:VI12_128 1 "memory_operand" "o")
7584 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
7588 (define_insn "*vec_extract<ssevecmodelower>_0"
7589 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
7591 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
7592 (parallel [(const_int 0)])))]
7593 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7595 [(set_attr "isa" "*,sse4,*,*")])
7597 (define_insn_and_split "*vec_extractv4si_0_zext"
7598 [(set (match_operand:DI 0 "register_operand" "=r")
7601 (match_operand:V4SI 1 "register_operand" "x")
7602 (parallel [(const_int 0)]))))]
7603 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
7605 "&& reload_completed"
7606 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
7607 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7609 (define_insn "*vec_extractv2di_0_sse"
7610 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
7612 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
7613 (parallel [(const_int 0)])))]
7614 "TARGET_SSE && !TARGET_64BIT
7615 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7619 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
7621 (match_operand:<ssevecmode> 1 "register_operand")
7622 (parallel [(const_int 0)])))]
7623 "TARGET_SSE && reload_completed"
7624 [(set (match_dup 0) (match_dup 1))]
7625 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
7627 (define_insn "*vec_extractv4si"
7628 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
7630 (match_operand:V4SI 1 "register_operand" "x,0,x")
7631 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
7634 switch (which_alternative)
7637 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
7640 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
7641 return "psrldq\t{%2, %0|%0, %2}";
7644 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
7645 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
7651 [(set_attr "isa" "*,noavx,avx")
7652 (set_attr "type" "sselog1,sseishft1,sseishft1")
7653 (set_attr "prefix_extra" "1,*,*")
7654 (set_attr "length_immediate" "1")
7655 (set_attr "prefix" "maybe_vex,orig,vex")
7656 (set_attr "mode" "TI")])
7658 (define_insn "*vec_extractv4si_zext"
7659 [(set (match_operand:DI 0 "register_operand" "=r")
7662 (match_operand:V4SI 1 "register_operand" "x")
7663 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
7664 "TARGET_64BIT && TARGET_SSE4_1"
7665 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7666 [(set_attr "type" "sselog1")
7667 (set_attr "prefix_extra" "1")
7668 (set_attr "length_immediate" "1")
7669 (set_attr "prefix" "maybe_vex")
7670 (set_attr "mode" "TI")])
7672 (define_insn "*vec_extractv4si_mem"
7673 [(set (match_operand:SI 0 "register_operand" "=x,r")
7675 (match_operand:V4SI 1 "memory_operand" "o,o")
7676 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
7680 (define_insn_and_split "*vec_extractv4si_zext_mem"
7681 [(set (match_operand:DI 0 "register_operand" "=x,r")
7684 (match_operand:V4SI 1 "memory_operand" "o,o")
7685 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
7686 "TARGET_64BIT && TARGET_SSE"
7688 "&& reload_completed"
7689 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
7691 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
7694 (define_insn "*vec_extractv2di_1"
7695 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
7697 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
7698 (parallel [(const_int 1)])))]
7699 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7701 %vpextrq\t{$1, %1, %0|%0, %1, 1}
7702 %vmovhps\t{%1, %0|%0, %1}
7703 psrldq\t{$8, %0|%0, 8}
7704 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7705 movhlps\t{%1, %0|%0, %1}
7708 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
7709 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
7710 (set_attr "length_immediate" "1,*,1,1,*,*,*")
7711 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
7712 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7713 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
7714 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
7717 [(set (match_operand:<ssescalarmode> 0 "register_operand")
7718 (vec_select:<ssescalarmode>
7719 (match_operand:VI_128 1 "memory_operand")
7721 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
7722 "TARGET_SSE && reload_completed"
7723 [(set (match_dup 0) (match_dup 1))]
7725 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
7727 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
7730 (define_insn "*vec_dupv4si"
7731 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7733 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7736 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7737 vbroadcastss\t{%1, %0|%0, %1}
7738 shufps\t{$0, %0, %0|%0, %0, 0}"
7739 [(set_attr "isa" "sse2,avx,noavx")
7740 (set_attr "type" "sselog1,ssemov,sselog1")
7741 (set_attr "length_immediate" "1,0,1")
7742 (set_attr "prefix_extra" "0,1,*")
7743 (set_attr "prefix" "maybe_vex,vex,orig")
7744 (set_attr "mode" "TI,V4SF,V4SF")])
7746 (define_insn "*vec_dupv2di"
7747 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7749 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7753 vpunpcklqdq\t{%d1, %0|%0, %d1}
7754 %vmovddup\t{%1, %0|%0, %1}
7756 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7757 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7758 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7759 (set_attr "mode" "TI,TI,DF,V4SF")])
7761 (define_insn "*vec_concatv2si_sse4_1"
7762 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7764 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7765 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7768 pinsrd\t{$1, %2, %0|%0, %2, 1}
7769 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7770 punpckldq\t{%2, %0|%0, %2}
7771 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7772 %vmovd\t{%1, %0|%0, %1}
7773 punpckldq\t{%2, %0|%0, %2}
7774 movd\t{%1, %0|%0, %1}"
7775 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7776 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7777 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7778 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7779 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7780 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7782 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7783 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7784 ;; alternatives pretty much forces the MMX alternative to be chosen.
7785 (define_insn "*vec_concatv2si"
7786 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
7788 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
7789 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
7790 "TARGET_SSE && !TARGET_SSE4_1"
7792 punpckldq\t{%2, %0|%0, %2}
7793 movd\t{%1, %0|%0, %1}
7794 movd\t{%1, %0|%0, %1}
7795 unpcklps\t{%2, %0|%0, %2}
7796 movss\t{%1, %0|%0, %1}
7797 punpckldq\t{%2, %0|%0, %2}
7798 movd\t{%1, %0|%0, %1}"
7799 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
7800 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
7801 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
7803 (define_insn "*vec_concatv4si"
7804 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7806 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7807 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7810 punpcklqdq\t{%2, %0|%0, %2}
7811 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7812 movlhps\t{%2, %0|%0, %2}
7813 movhps\t{%2, %0|%0, %q2}
7814 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7815 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7816 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7817 (set_attr "prefix" "orig,vex,orig,orig,vex")
7818 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7820 ;; movd instead of movq is required to handle broken assemblers.
7821 (define_insn "vec_concatv2di"
7822 [(set (match_operand:V2DI 0 "register_operand"
7823 "=x,x ,Yi,x ,!x,x,x,x,x,x")
7825 (match_operand:DI 1 "nonimmediate_operand"
7826 " 0,x ,r ,xm,*y,0,x,0,0,x")
7827 (match_operand:DI 2 "vector_move_operand"
7828 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
7831 pinsrq\t{$1, %2, %0|%0, %2, 1}
7832 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7833 %vmovd\t{%1, %0|%0, %1}
7834 %vmovq\t{%1, %0|%0, %1}
7835 movq2dq\t{%1, %0|%0, %1}
7836 punpcklqdq\t{%2, %0|%0, %2}
7837 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7838 movlhps\t{%2, %0|%0, %2}
7839 movhps\t{%2, %0|%0, %2}
7840 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7841 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7844 (eq_attr "alternative" "0,1,5,6")
7845 (const_string "sselog")
7846 (const_string "ssemov")))
7847 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
7848 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
7849 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
7850 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
7851 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7853 (define_expand "vec_unpacks_lo_<mode>"
7854 [(match_operand:<sseunpackmode> 0 "register_operand")
7855 (match_operand:VI124_AVX512F 1 "register_operand")]
7857 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
7859 (define_expand "vec_unpacks_hi_<mode>"
7860 [(match_operand:<sseunpackmode> 0 "register_operand")
7861 (match_operand:VI124_AVX512F 1 "register_operand")]
7863 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
7865 (define_expand "vec_unpacku_lo_<mode>"
7866 [(match_operand:<sseunpackmode> 0 "register_operand")
7867 (match_operand:VI124_AVX512F 1 "register_operand")]
7869 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
7871 (define_expand "vec_unpacku_hi_<mode>"
7872 [(match_operand:<sseunpackmode> 0 "register_operand")
7873 (match_operand:VI124_AVX512F 1 "register_operand")]
7875 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
7877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7883 (define_expand "<sse2_avx2>_uavg<mode>3"
7884 [(set (match_operand:VI12_AVX2 0 "register_operand")
7886 (lshiftrt:<ssedoublemode>
7887 (plus:<ssedoublemode>
7888 (plus:<ssedoublemode>
7889 (zero_extend:<ssedoublemode>
7890 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
7891 (zero_extend:<ssedoublemode>
7892 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
7897 operands[3] = CONST1_RTX(<MODE>mode);
7898 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7901 (define_insn "*<sse2_avx2>_uavg<mode>3"
7902 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
7904 (lshiftrt:<ssedoublemode>
7905 (plus:<ssedoublemode>
7906 (plus:<ssedoublemode>
7907 (zero_extend:<ssedoublemode>
7908 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
7909 (zero_extend:<ssedoublemode>
7910 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
7911 (match_operand:VI12_AVX2 3 "const1_operand"))
7913 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7915 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
7916 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7917 [(set_attr "isa" "noavx,avx")
7918 (set_attr "type" "sseiadd")
7919 (set_attr "prefix_data16" "1,*")
7920 (set_attr "prefix" "orig,vex")
7921 (set_attr "mode" "<sseinsnmode>")])
7923 ;; The correct representation for this is absolutely enormous, and
7924 ;; surely not generally useful.
7925 (define_insn "<sse2_avx2>_psadbw"
7926 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7928 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7929 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7933 psadbw\t{%2, %0|%0, %2}
7934 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7935 [(set_attr "isa" "noavx,avx")
7936 (set_attr "type" "sseiadd")
7937 (set_attr "atom_unit" "simul")
7938 (set_attr "prefix_data16" "1,*")
7939 (set_attr "prefix" "orig,vex")
7940 (set_attr "mode" "<sseinsnmode>")])
7942 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7943 [(set (match_operand:SI 0 "register_operand" "=r")
7945 [(match_operand:VF_128_256 1 "register_operand" "x")]
7948 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7949 [(set_attr "type" "ssemov")
7950 (set_attr "prefix" "maybe_vex")
7951 (set_attr "mode" "<MODE>")])
7953 (define_insn "avx2_pmovmskb"
7954 [(set (match_operand:SI 0 "register_operand" "=r")
7955 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7958 "vpmovmskb\t{%1, %0|%0, %1}"
7959 [(set_attr "type" "ssemov")
7960 (set_attr "prefix" "vex")
7961 (set_attr "mode" "DI")])
7963 (define_insn "sse2_pmovmskb"
7964 [(set (match_operand:SI 0 "register_operand" "=r")
7965 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7968 "%vpmovmskb\t{%1, %0|%0, %1}"
7969 [(set_attr "type" "ssemov")
7970 (set_attr "prefix_data16" "1")
7971 (set_attr "prefix" "maybe_vex")
7972 (set_attr "mode" "SI")])
7974 (define_expand "sse2_maskmovdqu"
7975 [(set (match_operand:V16QI 0 "memory_operand")
7976 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
7977 (match_operand:V16QI 2 "register_operand")
7982 (define_insn "*sse2_maskmovdqu"
7983 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7984 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7985 (match_operand:V16QI 2 "register_operand" "x")
7986 (mem:V16QI (match_dup 0))]
7990 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
7991 that requires %v to be at the beginning of the opcode name. */
7992 if (Pmode != word_mode)
7993 fputs ("\taddr32", asm_out_file);
7994 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
7996 [(set_attr "type" "ssemov")
7997 (set_attr "prefix_data16" "1")
7998 (set (attr "length_address")
7999 (symbol_ref ("Pmode != word_mode")))
8000 ;; The implicit %rdi operand confuses default length_vex computation.
8001 (set (attr "length_vex")
8002 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8003 (set_attr "prefix" "maybe_vex")
8004 (set_attr "mode" "TI")])
8006 (define_insn "sse_ldmxcsr"
8007 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8011 [(set_attr "type" "sse")
8012 (set_attr "atom_sse_attr" "mxcsr")
8013 (set_attr "prefix" "maybe_vex")
8014 (set_attr "memory" "load")])
8016 (define_insn "sse_stmxcsr"
8017 [(set (match_operand:SI 0 "memory_operand" "=m")
8018 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8021 [(set_attr "type" "sse")
8022 (set_attr "atom_sse_attr" "mxcsr")
8023 (set_attr "prefix" "maybe_vex")
8024 (set_attr "memory" "store")])
8026 (define_insn "sse2_clflush"
8027 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8031 [(set_attr "type" "sse")
8032 (set_attr "atom_sse_attr" "fence")
8033 (set_attr "memory" "unknown")])
8036 (define_insn "sse3_mwait"
8037 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8038 (match_operand:SI 1 "register_operand" "c")]
8041 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8042 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8043 ;; we only need to set up 32bit registers.
8045 [(set_attr "length" "3")])
8047 (define_insn "sse3_monitor_<mode>"
8048 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
8049 (match_operand:SI 1 "register_operand" "c")
8050 (match_operand:SI 2 "register_operand" "d")]
8053 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8054 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8055 ;; zero extended to 64bit, we only need to set up 32bit registers.
8057 [(set (attr "length")
8058 (symbol_ref ("(Pmode != word_mode) + 3")))])
8060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8062 ;; SSSE3 instructions
8064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8066 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
8068 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
8069 [(set (match_operand:V16HI 0 "register_operand" "=x")
8076 (match_operand:V16HI 1 "register_operand" "x")
8077 (parallel [(const_int 0)]))
8078 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8080 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8081 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8084 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8085 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8087 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8088 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8092 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8093 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8095 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8096 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8099 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8100 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8103 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8109 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8110 (parallel [(const_int 0)]))
8111 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8113 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8114 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8117 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8118 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8120 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8125 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8126 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8128 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8129 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8132 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8133 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8135 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8136 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8138 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
8139 [(set_attr "type" "sseiadd")
8140 (set_attr "prefix_extra" "1")
8141 (set_attr "prefix" "vex")
8142 (set_attr "mode" "OI")])
8144 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
8145 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8151 (match_operand:V8HI 1 "register_operand" "0,x")
8152 (parallel [(const_int 0)]))
8153 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8155 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8156 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8159 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8160 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8162 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8163 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8168 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8169 (parallel [(const_int 0)]))
8170 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8172 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8173 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8176 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8177 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8179 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8180 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8183 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
8184 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
8185 [(set_attr "isa" "noavx,avx")
8186 (set_attr "type" "sseiadd")
8187 (set_attr "atom_unit" "complex")
8188 (set_attr "prefix_data16" "1,*")
8189 (set_attr "prefix_extra" "1")
8190 (set_attr "prefix" "orig,vex")
8191 (set_attr "mode" "TI")])
8193 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
8194 [(set (match_operand:V4HI 0 "register_operand" "=y")
8199 (match_operand:V4HI 1 "register_operand" "0")
8200 (parallel [(const_int 0)]))
8201 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8204 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8208 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8209 (parallel [(const_int 0)]))
8210 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8212 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8215 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
8216 [(set_attr "type" "sseiadd")
8217 (set_attr "atom_unit" "complex")
8218 (set_attr "prefix_extra" "1")
8219 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8220 (set_attr "mode" "DI")])
8222 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
8223 [(set (match_operand:V8SI 0 "register_operand" "=x")
8229 (match_operand:V8SI 1 "register_operand" "x")
8230 (parallel [(const_int 0)]))
8231 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8233 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8234 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8237 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8238 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8240 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8241 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8246 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8247 (parallel [(const_int 0)]))
8248 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8250 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8251 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8254 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8255 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8257 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8258 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8260 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8261 [(set_attr "type" "sseiadd")
8262 (set_attr "prefix_extra" "1")
8263 (set_attr "prefix" "vex")
8264 (set_attr "mode" "OI")])
8266 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
8267 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8272 (match_operand:V4SI 1 "register_operand" "0,x")
8273 (parallel [(const_int 0)]))
8274 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8276 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8277 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8281 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8282 (parallel [(const_int 0)]))
8283 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8285 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8286 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8289 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
8290 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8291 [(set_attr "isa" "noavx,avx")
8292 (set_attr "type" "sseiadd")
8293 (set_attr "atom_unit" "complex")
8294 (set_attr "prefix_data16" "1,*")
8295 (set_attr "prefix_extra" "1")
8296 (set_attr "prefix" "orig,vex")
8297 (set_attr "mode" "TI")])
8299 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
8300 [(set (match_operand:V2SI 0 "register_operand" "=y")
8304 (match_operand:V2SI 1 "register_operand" "0")
8305 (parallel [(const_int 0)]))
8306 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8309 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8310 (parallel [(const_int 0)]))
8311 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8313 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
8314 [(set_attr "type" "sseiadd")
8315 (set_attr "atom_unit" "complex")
8316 (set_attr "prefix_extra" "1")
8317 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8318 (set_attr "mode" "DI")])
8320 (define_insn "avx2_pmaddubsw256"
8321 [(set (match_operand:V16HI 0 "register_operand" "=x")
8326 (match_operand:V32QI 1 "register_operand" "x")
8327 (parallel [(const_int 0) (const_int 2)
8328 (const_int 4) (const_int 6)
8329 (const_int 8) (const_int 10)
8330 (const_int 12) (const_int 14)
8331 (const_int 16) (const_int 18)
8332 (const_int 20) (const_int 22)
8333 (const_int 24) (const_int 26)
8334 (const_int 28) (const_int 30)])))
8337 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8338 (parallel [(const_int 0) (const_int 2)
8339 (const_int 4) (const_int 6)
8340 (const_int 8) (const_int 10)
8341 (const_int 12) (const_int 14)
8342 (const_int 16) (const_int 18)
8343 (const_int 20) (const_int 22)
8344 (const_int 24) (const_int 26)
8345 (const_int 28) (const_int 30)]))))
8348 (vec_select:V16QI (match_dup 1)
8349 (parallel [(const_int 1) (const_int 3)
8350 (const_int 5) (const_int 7)
8351 (const_int 9) (const_int 11)
8352 (const_int 13) (const_int 15)
8353 (const_int 17) (const_int 19)
8354 (const_int 21) (const_int 23)
8355 (const_int 25) (const_int 27)
8356 (const_int 29) (const_int 31)])))
8358 (vec_select:V16QI (match_dup 2)
8359 (parallel [(const_int 1) (const_int 3)
8360 (const_int 5) (const_int 7)
8361 (const_int 9) (const_int 11)
8362 (const_int 13) (const_int 15)
8363 (const_int 17) (const_int 19)
8364 (const_int 21) (const_int 23)
8365 (const_int 25) (const_int 27)
8366 (const_int 29) (const_int 31)]))))))]
8368 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8369 [(set_attr "type" "sseiadd")
8370 (set_attr "prefix_extra" "1")
8371 (set_attr "prefix" "vex")
8372 (set_attr "mode" "OI")])
8374 (define_insn "ssse3_pmaddubsw128"
8375 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8380 (match_operand:V16QI 1 "register_operand" "0,x")
8381 (parallel [(const_int 0) (const_int 2)
8382 (const_int 4) (const_int 6)
8383 (const_int 8) (const_int 10)
8384 (const_int 12) (const_int 14)])))
8387 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8388 (parallel [(const_int 0) (const_int 2)
8389 (const_int 4) (const_int 6)
8390 (const_int 8) (const_int 10)
8391 (const_int 12) (const_int 14)]))))
8394 (vec_select:V8QI (match_dup 1)
8395 (parallel [(const_int 1) (const_int 3)
8396 (const_int 5) (const_int 7)
8397 (const_int 9) (const_int 11)
8398 (const_int 13) (const_int 15)])))
8400 (vec_select:V8QI (match_dup 2)
8401 (parallel [(const_int 1) (const_int 3)
8402 (const_int 5) (const_int 7)
8403 (const_int 9) (const_int 11)
8404 (const_int 13) (const_int 15)]))))))]
8407 pmaddubsw\t{%2, %0|%0, %2}
8408 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8409 [(set_attr "isa" "noavx,avx")
8410 (set_attr "type" "sseiadd")
8411 (set_attr "atom_unit" "simul")
8412 (set_attr "prefix_data16" "1,*")
8413 (set_attr "prefix_extra" "1")
8414 (set_attr "prefix" "orig,vex")
8415 (set_attr "mode" "TI")])
8417 (define_insn "ssse3_pmaddubsw"
8418 [(set (match_operand:V4HI 0 "register_operand" "=y")
8423 (match_operand:V8QI 1 "register_operand" "0")
8424 (parallel [(const_int 0) (const_int 2)
8425 (const_int 4) (const_int 6)])))
8428 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8429 (parallel [(const_int 0) (const_int 2)
8430 (const_int 4) (const_int 6)]))))
8433 (vec_select:V4QI (match_dup 1)
8434 (parallel [(const_int 1) (const_int 3)
8435 (const_int 5) (const_int 7)])))
8437 (vec_select:V4QI (match_dup 2)
8438 (parallel [(const_int 1) (const_int 3)
8439 (const_int 5) (const_int 7)]))))))]
8441 "pmaddubsw\t{%2, %0|%0, %2}"
8442 [(set_attr "type" "sseiadd")
8443 (set_attr "atom_unit" "simul")
8444 (set_attr "prefix_extra" "1")
8445 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8446 (set_attr "mode" "DI")])
8448 (define_mode_iterator PMULHRSW
8449 [V4HI V8HI (V16HI "TARGET_AVX2")])
8451 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
8452 [(set (match_operand:PMULHRSW 0 "register_operand")
8454 (lshiftrt:<ssedoublemode>
8455 (plus:<ssedoublemode>
8456 (lshiftrt:<ssedoublemode>
8457 (mult:<ssedoublemode>
8458 (sign_extend:<ssedoublemode>
8459 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
8460 (sign_extend:<ssedoublemode>
8461 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
8467 operands[3] = CONST1_RTX(<MODE>mode);
8468 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8471 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
8472 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8474 (lshiftrt:<ssedoublemode>
8475 (plus:<ssedoublemode>
8476 (lshiftrt:<ssedoublemode>
8477 (mult:<ssedoublemode>
8478 (sign_extend:<ssedoublemode>
8479 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
8480 (sign_extend:<ssedoublemode>
8481 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
8483 (match_operand:VI2_AVX2 3 "const1_operand"))
8485 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8487 pmulhrsw\t{%2, %0|%0, %2}
8488 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8489 [(set_attr "isa" "noavx,avx")
8490 (set_attr "type" "sseimul")
8491 (set_attr "prefix_data16" "1,*")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "prefix" "orig,vex")
8494 (set_attr "mode" "<sseinsnmode>")])
8496 (define_insn "*ssse3_pmulhrswv4hi3"
8497 [(set (match_operand:V4HI 0 "register_operand" "=y")
8504 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8506 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8508 (match_operand:V4HI 3 "const1_operand"))
8510 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8511 "pmulhrsw\t{%2, %0|%0, %2}"
8512 [(set_attr "type" "sseimul")
8513 (set_attr "prefix_extra" "1")
8514 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8515 (set_attr "mode" "DI")])
8517 (define_insn "<ssse3_avx2>_pshufb<mode>3"
8518 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8520 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8521 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
8525 pshufb\t{%2, %0|%0, %2}
8526 vpshufb\t{%2, %1, %0|%0, %1, %2}"
8527 [(set_attr "isa" "noavx,avx")
8528 (set_attr "type" "sselog1")
8529 (set_attr "prefix_data16" "1,*")
8530 (set_attr "prefix_extra" "1")
8531 (set_attr "prefix" "orig,vex")
8532 (set_attr "btver2_decode" "vector,vector")
8533 (set_attr "mode" "<sseinsnmode>")])
8535 (define_insn "ssse3_pshufbv8qi3"
8536 [(set (match_operand:V8QI 0 "register_operand" "=y")
8537 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8538 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8541 "pshufb\t{%2, %0|%0, %2}";
8542 [(set_attr "type" "sselog1")
8543 (set_attr "prefix_extra" "1")
8544 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8545 (set_attr "mode" "DI")])
8547 (define_insn "<ssse3_avx2>_psign<mode>3"
8548 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
8550 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
8551 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
8555 psign<ssemodesuffix>\t{%2, %0|%0, %2}
8556 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8557 [(set_attr "isa" "noavx,avx")
8558 (set_attr "type" "sselog1")
8559 (set_attr "prefix_data16" "1,*")
8560 (set_attr "prefix_extra" "1")
8561 (set_attr "prefix" "orig,vex")
8562 (set_attr "mode" "<sseinsnmode>")])
8564 (define_insn "ssse3_psign<mode>3"
8565 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8567 [(match_operand:MMXMODEI 1 "register_operand" "0")
8568 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8571 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8572 [(set_attr "type" "sselog1")
8573 (set_attr "prefix_extra" "1")
8574 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8575 (set_attr "mode" "DI")])
8577 (define_insn "<ssse3_avx2>_palignr<mode>"
8578 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
8579 (unspec:SSESCALARMODE
8580 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
8581 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
8582 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
8586 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8588 switch (which_alternative)
8591 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8593 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8598 [(set_attr "isa" "noavx,avx")
8599 (set_attr "type" "sseishft")
8600 (set_attr "atom_unit" "sishuf")
8601 (set_attr "prefix_data16" "1,*")
8602 (set_attr "prefix_extra" "1")
8603 (set_attr "length_immediate" "1")
8604 (set_attr "prefix" "orig,vex")
8605 (set_attr "mode" "<sseinsnmode>")])
8607 (define_insn "ssse3_palignrdi"
8608 [(set (match_operand:DI 0 "register_operand" "=y")
8609 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8610 (match_operand:DI 2 "nonimmediate_operand" "ym")
8611 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8615 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8616 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8618 [(set_attr "type" "sseishft")
8619 (set_attr "atom_unit" "sishuf")
8620 (set_attr "prefix_extra" "1")
8621 (set_attr "length_immediate" "1")
8622 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8623 (set_attr "mode" "DI")])
8625 (define_insn "abs<mode>2"
8626 [(set (match_operand:VI124_AVX2 0 "register_operand" "=v")
8628 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "vm")))]
8630 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
8631 [(set_attr "type" "sselog1")
8632 (set_attr "prefix_data16" "1")
8633 (set_attr "prefix_extra" "1")
8634 (set_attr "prefix" "maybe_vex")
8635 (set_attr "mode" "<sseinsnmode>")])
8637 (define_insn "abs<mode>2"
8638 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8640 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8642 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8643 [(set_attr "type" "sselog1")
8644 (set_attr "prefix_rep" "0")
8645 (set_attr "prefix_extra" "1")
8646 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8647 (set_attr "mode" "DI")])
8649 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8651 ;; AMD SSE4A instructions
8653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8655 (define_insn "sse4a_movnt<mode>"
8656 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8658 [(match_operand:MODEF 1 "register_operand" "x")]
8661 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
8662 [(set_attr "type" "ssemov")
8663 (set_attr "mode" "<MODE>")])
8665 (define_insn "sse4a_vmmovnt<mode>"
8666 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8667 (unspec:<ssescalarmode>
8668 [(vec_select:<ssescalarmode>
8669 (match_operand:VF_128 1 "register_operand" "x")
8670 (parallel [(const_int 0)]))]
8673 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
8674 [(set_attr "type" "ssemov")
8675 (set_attr "mode" "<ssescalarmode>")])
8677 (define_insn "sse4a_extrqi"
8678 [(set (match_operand:V2DI 0 "register_operand" "=x")
8679 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8680 (match_operand 2 "const_0_to_255_operand")
8681 (match_operand 3 "const_0_to_255_operand")]
8684 "extrq\t{%3, %2, %0|%0, %2, %3}"
8685 [(set_attr "type" "sse")
8686 (set_attr "prefix_data16" "1")
8687 (set_attr "length_immediate" "2")
8688 (set_attr "mode" "TI")])
8690 (define_insn "sse4a_extrq"
8691 [(set (match_operand:V2DI 0 "register_operand" "=x")
8692 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8693 (match_operand:V16QI 2 "register_operand" "x")]
8696 "extrq\t{%2, %0|%0, %2}"
8697 [(set_attr "type" "sse")
8698 (set_attr "prefix_data16" "1")
8699 (set_attr "mode" "TI")])
8701 (define_insn "sse4a_insertqi"
8702 [(set (match_operand:V2DI 0 "register_operand" "=x")
8703 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8704 (match_operand:V2DI 2 "register_operand" "x")
8705 (match_operand 3 "const_0_to_255_operand")
8706 (match_operand 4 "const_0_to_255_operand")]
8709 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8710 [(set_attr "type" "sseins")
8711 (set_attr "prefix_data16" "0")
8712 (set_attr "prefix_rep" "1")
8713 (set_attr "length_immediate" "2")
8714 (set_attr "mode" "TI")])
8716 (define_insn "sse4a_insertq"
8717 [(set (match_operand:V2DI 0 "register_operand" "=x")
8718 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8719 (match_operand:V2DI 2 "register_operand" "x")]
8722 "insertq\t{%2, %0|%0, %2}"
8723 [(set_attr "type" "sseins")
8724 (set_attr "prefix_data16" "0")
8725 (set_attr "prefix_rep" "1")
8726 (set_attr "mode" "TI")])
8728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8730 ;; Intel SSE4.1 instructions
8732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8734 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
8735 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
8736 (vec_merge:VF_128_256
8737 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
8738 (match_operand:VF_128_256 1 "register_operand" "0,x")
8739 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
8742 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8743 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8744 [(set_attr "isa" "noavx,avx")
8745 (set_attr "type" "ssemov")
8746 (set_attr "length_immediate" "1")
8747 (set_attr "prefix_data16" "1,*")
8748 (set_attr "prefix_extra" "1")
8749 (set_attr "prefix" "orig,vex")
8750 (set_attr "mode" "<MODE>")])
8752 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
8753 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
8755 [(match_operand:VF_128_256 1 "register_operand" "0,x")
8756 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
8757 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
8761 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8762 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8763 [(set_attr "isa" "noavx,avx")
8764 (set_attr "type" "ssemov")
8765 (set_attr "length_immediate" "1")
8766 (set_attr "prefix_data16" "1,*")
8767 (set_attr "prefix_extra" "1")
8768 (set_attr "prefix" "orig,vex")
8769 (set_attr "btver2_decode" "vector,vector")
8770 (set_attr "mode" "<MODE>")])
8772 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
8773 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
8775 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
8776 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
8777 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8781 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8782 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8783 [(set_attr "isa" "noavx,avx")
8784 (set_attr "type" "ssemul")
8785 (set_attr "length_immediate" "1")
8786 (set_attr "prefix_data16" "1,*")
8787 (set_attr "prefix_extra" "1")
8788 (set_attr "prefix" "orig,vex")
8789 (set_attr "btver2_decode" "vector,vector")
8790 (set_attr "mode" "<MODE>")])
8792 (define_insn "<sse4_1_avx2>_movntdqa"
8793 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
8794 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
8797 "%vmovntdqa\t{%1, %0|%0, %1}"
8798 [(set_attr "type" "ssemov")
8799 (set_attr "prefix_extra" "1")
8800 (set_attr "prefix" "maybe_vex")
8801 (set_attr "mode" "<sseinsnmode>")])
8803 (define_insn "<sse4_1_avx2>_mpsadbw"
8804 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8806 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8807 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8808 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8812 mpsadbw\t{%3, %2, %0|%0, %2, %3}
8813 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8814 [(set_attr "isa" "noavx,avx")
8815 (set_attr "type" "sselog1")
8816 (set_attr "length_immediate" "1")
8817 (set_attr "prefix_extra" "1")
8818 (set_attr "prefix" "orig,vex")
8819 (set_attr "btver2_decode" "vector,vector")
8820 (set_attr "mode" "<sseinsnmode>")])
8822 (define_insn "avx2_packusdw"
8823 [(set (match_operand:V16HI 0 "register_operand" "=x")
8826 (match_operand:V8SI 1 "register_operand" "x"))
8828 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
8830 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8831 [(set_attr "type" "sselog")
8832 (set_attr "prefix_extra" "1")
8833 (set_attr "prefix" "vex")
8834 (set_attr "mode" "OI")])
8836 (define_insn "sse4_1_packusdw"
8837 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8840 (match_operand:V4SI 1 "register_operand" "0,x"))
8842 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
8845 packusdw\t{%2, %0|%0, %2}
8846 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8847 [(set_attr "isa" "noavx,avx")
8848 (set_attr "type" "sselog")
8849 (set_attr "prefix_extra" "1")
8850 (set_attr "prefix" "orig,vex")
8851 (set_attr "mode" "TI")])
8853 (define_insn "<sse4_1_avx2>_pblendvb"
8854 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8856 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8857 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8858 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
8862 pblendvb\t{%3, %2, %0|%0, %2, %3}
8863 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8864 [(set_attr "isa" "noavx,avx")
8865 (set_attr "type" "ssemov")
8866 (set_attr "prefix_extra" "1")
8867 (set_attr "length_immediate" "*,1")
8868 (set_attr "prefix" "orig,vex")
8869 (set_attr "btver2_decode" "vector,vector")
8870 (set_attr "mode" "<sseinsnmode>")])
8872 (define_insn "sse4_1_pblendw"
8873 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8875 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8876 (match_operand:V8HI 1 "register_operand" "0,x")
8877 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8880 pblendw\t{%3, %2, %0|%0, %2, %3}
8881 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8882 [(set_attr "isa" "noavx,avx")
8883 (set_attr "type" "ssemov")
8884 (set_attr "prefix_extra" "1")
8885 (set_attr "length_immediate" "1")
8886 (set_attr "prefix" "orig,vex")
8887 (set_attr "mode" "TI")])
8889 ;; The builtin uses an 8-bit immediate. Expand that.
8890 (define_expand "avx2_pblendw"
8891 [(set (match_operand:V16HI 0 "register_operand")
8893 (match_operand:V16HI 2 "nonimmediate_operand")
8894 (match_operand:V16HI 1 "register_operand")
8895 (match_operand:SI 3 "const_0_to_255_operand")))]
8898 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
8899 operands[3] = GEN_INT (val << 8 | val);
8902 (define_insn "*avx2_pblendw"
8903 [(set (match_operand:V16HI 0 "register_operand" "=x")
8905 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8906 (match_operand:V16HI 1 "register_operand" "x")
8907 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
8910 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
8911 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8913 [(set_attr "type" "ssemov")
8914 (set_attr "prefix_extra" "1")
8915 (set_attr "length_immediate" "1")
8916 (set_attr "prefix" "vex")
8917 (set_attr "mode" "OI")])
8919 (define_insn "avx2_pblendd<mode>"
8920 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
8922 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
8923 (match_operand:VI4_AVX2 1 "register_operand" "x")
8924 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8926 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8927 [(set_attr "type" "ssemov")
8928 (set_attr "prefix_extra" "1")
8929 (set_attr "length_immediate" "1")
8930 (set_attr "prefix" "vex")
8931 (set_attr "mode" "<sseinsnmode>")])
8933 (define_insn "sse4_1_phminposuw"
8934 [(set (match_operand:V8HI 0 "register_operand" "=x")
8935 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8936 UNSPEC_PHMINPOSUW))]
8938 "%vphminposuw\t{%1, %0|%0, %1}"
8939 [(set_attr "type" "sselog1")
8940 (set_attr "prefix_extra" "1")
8941 (set_attr "prefix" "maybe_vex")
8942 (set_attr "mode" "TI")])
8944 (define_insn "avx2_<code>v16qiv16hi2"
8945 [(set (match_operand:V16HI 0 "register_operand" "=x")
8947 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
8949 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8950 [(set_attr "type" "ssemov")
8951 (set_attr "prefix_extra" "1")
8952 (set_attr "prefix" "vex")
8953 (set_attr "mode" "OI")])
8955 (define_insn "sse4_1_<code>v8qiv8hi2"
8956 [(set (match_operand:V8HI 0 "register_operand" "=x")
8959 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8960 (parallel [(const_int 0) (const_int 1)
8961 (const_int 2) (const_int 3)
8962 (const_int 4) (const_int 5)
8963 (const_int 6) (const_int 7)]))))]
8965 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
8966 [(set_attr "type" "ssemov")
8967 (set_attr "prefix_extra" "1")
8968 (set_attr "prefix" "maybe_vex")
8969 (set_attr "mode" "TI")])
8971 (define_insn "avx2_<code>v8qiv8si2"
8972 [(set (match_operand:V8SI 0 "register_operand" "=x")
8975 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8976 (parallel [(const_int 0) (const_int 1)
8977 (const_int 2) (const_int 3)
8978 (const_int 4) (const_int 5)
8979 (const_int 6) (const_int 7)]))))]
8981 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
8982 [(set_attr "type" "ssemov")
8983 (set_attr "prefix_extra" "1")
8984 (set_attr "prefix" "vex")
8985 (set_attr "mode" "OI")])
8987 (define_insn "sse4_1_<code>v4qiv4si2"
8988 [(set (match_operand:V4SI 0 "register_operand" "=x")
8991 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8992 (parallel [(const_int 0) (const_int 1)
8993 (const_int 2) (const_int 3)]))))]
8995 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
8996 [(set_attr "type" "ssemov")
8997 (set_attr "prefix_extra" "1")
8998 (set_attr "prefix" "maybe_vex")
8999 (set_attr "mode" "TI")])
9001 (define_insn "avx2_<code>v8hiv8si2"
9002 [(set (match_operand:V8SI 0 "register_operand" "=x")
9004 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9006 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9007 [(set_attr "type" "ssemov")
9008 (set_attr "prefix_extra" "1")
9009 (set_attr "prefix" "vex")
9010 (set_attr "mode" "OI")])
9012 (define_insn "sse4_1_<code>v4hiv4si2"
9013 [(set (match_operand:V4SI 0 "register_operand" "=x")
9016 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9017 (parallel [(const_int 0) (const_int 1)
9018 (const_int 2) (const_int 3)]))))]
9020 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9021 [(set_attr "type" "ssemov")
9022 (set_attr "prefix_extra" "1")
9023 (set_attr "prefix" "maybe_vex")
9024 (set_attr "mode" "TI")])
9026 (define_insn "avx2_<code>v4qiv4di2"
9027 [(set (match_operand:V4DI 0 "register_operand" "=x")
9030 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9031 (parallel [(const_int 0) (const_int 1)
9032 (const_int 2) (const_int 3)]))))]
9034 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9035 [(set_attr "type" "ssemov")
9036 (set_attr "prefix_extra" "1")
9037 (set_attr "prefix" "vex")
9038 (set_attr "mode" "OI")])
9040 (define_insn "sse4_1_<code>v2qiv2di2"
9041 [(set (match_operand:V2DI 0 "register_operand" "=x")
9044 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9045 (parallel [(const_int 0) (const_int 1)]))))]
9047 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9048 [(set_attr "type" "ssemov")
9049 (set_attr "prefix_extra" "1")
9050 (set_attr "prefix" "maybe_vex")
9051 (set_attr "mode" "TI")])
9053 (define_insn "avx2_<code>v4hiv4di2"
9054 [(set (match_operand:V4DI 0 "register_operand" "=x")
9057 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9058 (parallel [(const_int 0) (const_int 1)
9059 (const_int 2) (const_int 3)]))))]
9061 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9062 [(set_attr "type" "ssemov")
9063 (set_attr "prefix_extra" "1")
9064 (set_attr "prefix" "vex")
9065 (set_attr "mode" "OI")])
9067 (define_insn "sse4_1_<code>v2hiv2di2"
9068 [(set (match_operand:V2DI 0 "register_operand" "=x")
9071 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9072 (parallel [(const_int 0) (const_int 1)]))))]
9074 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9075 [(set_attr "type" "ssemov")
9076 (set_attr "prefix_extra" "1")
9077 (set_attr "prefix" "maybe_vex")
9078 (set_attr "mode" "TI")])
9080 (define_insn "avx2_<code>v4siv4di2"
9081 [(set (match_operand:V4DI 0 "register_operand" "=x")
9083 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9085 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9086 [(set_attr "type" "ssemov")
9087 (set_attr "prefix_extra" "1")
9088 (set_attr "mode" "OI")])
9090 (define_insn "sse4_1_<code>v2siv2di2"
9091 [(set (match_operand:V2DI 0 "register_operand" "=x")
9094 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9095 (parallel [(const_int 0) (const_int 1)]))))]
9097 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9098 [(set_attr "type" "ssemov")
9099 (set_attr "prefix_extra" "1")
9100 (set_attr "prefix" "maybe_vex")
9101 (set_attr "mode" "TI")])
9103 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9104 ;; setting FLAGS_REG. But it is not a really compare instruction.
9105 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9106 [(set (reg:CC FLAGS_REG)
9107 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
9108 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
9111 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9112 [(set_attr "type" "ssecomi")
9113 (set_attr "prefix_extra" "1")
9114 (set_attr "prefix" "vex")
9115 (set_attr "mode" "<MODE>")])
9117 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9118 ;; But it is not a really compare instruction.
9119 (define_insn "avx_ptest256"
9120 [(set (reg:CC FLAGS_REG)
9121 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9122 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9125 "vptest\t{%1, %0|%0, %1}"
9126 [(set_attr "type" "ssecomi")
9127 (set_attr "prefix_extra" "1")
9128 (set_attr "prefix" "vex")
9129 (set_attr "btver2_decode" "vector")
9130 (set_attr "mode" "OI")])
9132 (define_insn "sse4_1_ptest"
9133 [(set (reg:CC FLAGS_REG)
9134 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9135 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9138 "%vptest\t{%1, %0|%0, %1}"
9139 [(set_attr "type" "ssecomi")
9140 (set_attr "prefix_extra" "1")
9141 (set_attr "prefix" "maybe_vex")
9142 (set_attr "mode" "TI")])
9144 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9145 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
9147 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
9148 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9151 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9152 [(set_attr "type" "ssecvt")
9153 (set (attr "prefix_data16")
9155 (match_test "TARGET_AVX")
9157 (const_string "1")))
9158 (set_attr "prefix_extra" "1")
9159 (set_attr "length_immediate" "1")
9160 (set_attr "prefix" "maybe_vex")
9161 (set_attr "mode" "<MODE>")])
9163 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9164 [(match_operand:<sseintvecmode> 0 "register_operand")
9165 (match_operand:VF1_128_256 1 "nonimmediate_operand")
9166 (match_operand:SI 2 "const_0_to_15_operand")]
9169 rtx tmp = gen_reg_rtx (<MODE>mode);
9172 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9175 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9179 (define_expand "avx512f_roundpd512"
9180 [(match_operand:V8DF 0 "register_operand")
9181 (match_operand:V8DF 1 "nonimmediate_operand")
9182 (match_operand:SI 2 "const_0_to_15_operand")]
9185 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
9189 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9190 [(match_operand:<ssepackfltmode> 0 "register_operand")
9191 (match_operand:VF2 1 "nonimmediate_operand")
9192 (match_operand:VF2 2 "nonimmediate_operand")
9193 (match_operand:SI 3 "const_0_to_15_operand")]
9198 if (<MODE>mode == V2DFmode
9199 && TARGET_AVX && !TARGET_PREFER_AVX128)
9201 rtx tmp2 = gen_reg_rtx (V4DFmode);
9203 tmp0 = gen_reg_rtx (V4DFmode);
9204 tmp1 = force_reg (V2DFmode, operands[1]);
9206 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9207 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9208 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9212 tmp0 = gen_reg_rtx (<MODE>mode);
9213 tmp1 = gen_reg_rtx (<MODE>mode);
9216 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9219 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9222 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9227 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9228 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9231 [(match_operand:VF_128 2 "register_operand" "x,x")
9232 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9234 (match_operand:VF_128 1 "register_operand" "0,x")
9238 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9239 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9240 [(set_attr "isa" "noavx,avx")
9241 (set_attr "type" "ssecvt")
9242 (set_attr "length_immediate" "1")
9243 (set_attr "prefix_data16" "1,*")
9244 (set_attr "prefix_extra" "1")
9245 (set_attr "prefix" "orig,vex")
9246 (set_attr "mode" "<MODE>")])
9248 (define_expand "round<mode>2"
9251 (match_operand:VF 1 "register_operand")
9253 (set (match_operand:VF 0 "register_operand")
9255 [(match_dup 4) (match_dup 5)]
9257 "TARGET_ROUND && !flag_trapping_math"
9259 enum machine_mode scalar_mode;
9260 const struct real_format *fmt;
9261 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9264 scalar_mode = GET_MODE_INNER (<MODE>mode);
9266 /* load nextafter (0.5, 0.0) */
9267 fmt = REAL_MODE_FORMAT (scalar_mode);
9268 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9269 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9270 half = const_double_from_real_value (pred_half, scalar_mode);
9272 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9273 vec_half = force_reg (<MODE>mode, vec_half);
9275 operands[3] = gen_reg_rtx (<MODE>mode);
9276 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9278 operands[4] = gen_reg_rtx (<MODE>mode);
9279 operands[5] = GEN_INT (ROUND_TRUNC);
9282 (define_expand "round<mode>2_sfix"
9283 [(match_operand:<sseintvecmode> 0 "register_operand")
9284 (match_operand:VF1_128_256 1 "register_operand")]
9285 "TARGET_ROUND && !flag_trapping_math"
9287 rtx tmp = gen_reg_rtx (<MODE>mode);
9289 emit_insn (gen_round<mode>2 (tmp, operands[1]));
9292 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9296 (define_expand "round<mode>2_vec_pack_sfix"
9297 [(match_operand:<ssepackfltmode> 0 "register_operand")
9298 (match_operand:VF2 1 "register_operand")
9299 (match_operand:VF2 2 "register_operand")]
9300 "TARGET_ROUND && !flag_trapping_math"
9304 if (<MODE>mode == V2DFmode
9305 && TARGET_AVX && !TARGET_PREFER_AVX128)
9307 rtx tmp2 = gen_reg_rtx (V4DFmode);
9309 tmp0 = gen_reg_rtx (V4DFmode);
9310 tmp1 = force_reg (V2DFmode, operands[1]);
9312 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9313 emit_insn (gen_roundv4df2 (tmp2, tmp0));
9314 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9318 tmp0 = gen_reg_rtx (<MODE>mode);
9319 tmp1 = gen_reg_rtx (<MODE>mode);
9321 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
9322 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
9325 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9332 ;; Intel SSE4.2 string/text processing instructions
9334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9336 (define_insn_and_split "sse4_2_pcmpestr"
9337 [(set (match_operand:SI 0 "register_operand" "=c,c")
9339 [(match_operand:V16QI 2 "register_operand" "x,x")
9340 (match_operand:SI 3 "register_operand" "a,a")
9341 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
9342 (match_operand:SI 5 "register_operand" "d,d")
9343 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9345 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9353 (set (reg:CC FLAGS_REG)
9362 && can_create_pseudo_p ()"
9367 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9368 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9369 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9372 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9373 operands[3], operands[4],
9374 operands[5], operands[6]));
9376 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9377 operands[3], operands[4],
9378 operands[5], operands[6]));
9379 if (flags && !(ecx || xmm0))
9380 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9381 operands[2], operands[3],
9382 operands[4], operands[5],
9384 if (!(flags || ecx || xmm0))
9385 emit_note (NOTE_INSN_DELETED);
9389 [(set_attr "type" "sselog")
9390 (set_attr "prefix_data16" "1")
9391 (set_attr "prefix_extra" "1")
9392 (set_attr "length_immediate" "1")
9393 (set_attr "memory" "none,load")
9394 (set_attr "mode" "TI")])
9396 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
9397 [(set (match_operand:SI 0 "register_operand" "=c")
9399 [(match_operand:V16QI 2 "register_operand" "x")
9400 (match_operand:SI 3 "register_operand" "a")
9402 [(match_operand:V16QI 4 "memory_operand" "m")]
9404 (match_operand:SI 5 "register_operand" "d")
9405 (match_operand:SI 6 "const_0_to_255_operand" "n")]
9407 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9411 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9415 (set (reg:CC FLAGS_REG)
9419 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9424 && can_create_pseudo_p ()"
9429 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9430 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9431 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9434 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9435 operands[3], operands[4],
9436 operands[5], operands[6]));
9438 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9439 operands[3], operands[4],
9440 operands[5], operands[6]));
9441 if (flags && !(ecx || xmm0))
9442 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9443 operands[2], operands[3],
9444 operands[4], operands[5],
9446 if (!(flags || ecx || xmm0))
9447 emit_note (NOTE_INSN_DELETED);
9451 [(set_attr "type" "sselog")
9452 (set_attr "prefix_data16" "1")
9453 (set_attr "prefix_extra" "1")
9454 (set_attr "length_immediate" "1")
9455 (set_attr "memory" "load")
9456 (set_attr "mode" "TI")])
9458 (define_insn "sse4_2_pcmpestri"
9459 [(set (match_operand:SI 0 "register_operand" "=c,c")
9461 [(match_operand:V16QI 1 "register_operand" "x,x")
9462 (match_operand:SI 2 "register_operand" "a,a")
9463 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9464 (match_operand:SI 4 "register_operand" "d,d")
9465 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9467 (set (reg:CC FLAGS_REG)
9476 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9477 [(set_attr "type" "sselog")
9478 (set_attr "prefix_data16" "1")
9479 (set_attr "prefix_extra" "1")
9480 (set_attr "prefix" "maybe_vex")
9481 (set_attr "length_immediate" "1")
9482 (set_attr "btver2_decode" "vector")
9483 (set_attr "memory" "none,load")
9484 (set_attr "mode" "TI")])
9486 (define_insn "sse4_2_pcmpestrm"
9487 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9489 [(match_operand:V16QI 1 "register_operand" "x,x")
9490 (match_operand:SI 2 "register_operand" "a,a")
9491 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9492 (match_operand:SI 4 "register_operand" "d,d")
9493 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9495 (set (reg:CC FLAGS_REG)
9504 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9505 [(set_attr "type" "sselog")
9506 (set_attr "prefix_data16" "1")
9507 (set_attr "prefix_extra" "1")
9508 (set_attr "length_immediate" "1")
9509 (set_attr "prefix" "maybe_vex")
9510 (set_attr "btver2_decode" "vector")
9511 (set_attr "memory" "none,load")
9512 (set_attr "mode" "TI")])
9514 (define_insn "sse4_2_pcmpestr_cconly"
9515 [(set (reg:CC FLAGS_REG)
9517 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9518 (match_operand:SI 3 "register_operand" "a,a,a,a")
9519 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9520 (match_operand:SI 5 "register_operand" "d,d,d,d")
9521 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9523 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9524 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9527 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9528 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9529 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9530 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9531 [(set_attr "type" "sselog")
9532 (set_attr "prefix_data16" "1")
9533 (set_attr "prefix_extra" "1")
9534 (set_attr "length_immediate" "1")
9535 (set_attr "memory" "none,load,none,load")
9536 (set_attr "btver2_decode" "vector,vector,vector,vector")
9537 (set_attr "prefix" "maybe_vex")
9538 (set_attr "mode" "TI")])
9540 (define_insn_and_split "sse4_2_pcmpistr"
9541 [(set (match_operand:SI 0 "register_operand" "=c,c")
9543 [(match_operand:V16QI 2 "register_operand" "x,x")
9544 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9545 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9547 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9553 (set (reg:CC FLAGS_REG)
9560 && can_create_pseudo_p ()"
9565 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9566 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9567 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9570 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9571 operands[3], operands[4]));
9573 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9574 operands[3], operands[4]));
9575 if (flags && !(ecx || xmm0))
9576 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9577 operands[2], operands[3],
9579 if (!(flags || ecx || xmm0))
9580 emit_note (NOTE_INSN_DELETED);
9584 [(set_attr "type" "sselog")
9585 (set_attr "prefix_data16" "1")
9586 (set_attr "prefix_extra" "1")
9587 (set_attr "length_immediate" "1")
9588 (set_attr "memory" "none,load")
9589 (set_attr "mode" "TI")])
9591 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
9592 [(set (match_operand:SI 0 "register_operand" "=c")
9594 [(match_operand:V16QI 2 "register_operand" "x")
9596 [(match_operand:V16QI 3 "memory_operand" "m")]
9598 (match_operand:SI 4 "const_0_to_255_operand" "n")]
9600 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9603 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9606 (set (reg:CC FLAGS_REG)
9609 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9613 && can_create_pseudo_p ()"
9618 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9619 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9620 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9623 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9624 operands[3], operands[4]));
9626 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9627 operands[3], operands[4]));
9628 if (flags && !(ecx || xmm0))
9629 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9630 operands[2], operands[3],
9632 if (!(flags || ecx || xmm0))
9633 emit_note (NOTE_INSN_DELETED);
9637 [(set_attr "type" "sselog")
9638 (set_attr "prefix_data16" "1")
9639 (set_attr "prefix_extra" "1")
9640 (set_attr "length_immediate" "1")
9641 (set_attr "memory" "load")
9642 (set_attr "mode" "TI")])
9644 (define_insn "sse4_2_pcmpistri"
9645 [(set (match_operand:SI 0 "register_operand" "=c,c")
9647 [(match_operand:V16QI 1 "register_operand" "x,x")
9648 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9649 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9651 (set (reg:CC FLAGS_REG)
9658 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9659 [(set_attr "type" "sselog")
9660 (set_attr "prefix_data16" "1")
9661 (set_attr "prefix_extra" "1")
9662 (set_attr "length_immediate" "1")
9663 (set_attr "prefix" "maybe_vex")
9664 (set_attr "memory" "none,load")
9665 (set_attr "btver2_decode" "vector")
9666 (set_attr "mode" "TI")])
9668 (define_insn "sse4_2_pcmpistrm"
9669 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9671 [(match_operand:V16QI 1 "register_operand" "x,x")
9672 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9673 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9675 (set (reg:CC FLAGS_REG)
9682 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9683 [(set_attr "type" "sselog")
9684 (set_attr "prefix_data16" "1")
9685 (set_attr "prefix_extra" "1")
9686 (set_attr "length_immediate" "1")
9687 (set_attr "prefix" "maybe_vex")
9688 (set_attr "memory" "none,load")
9689 (set_attr "btver2_decode" "vector")
9690 (set_attr "mode" "TI")])
9692 (define_insn "sse4_2_pcmpistr_cconly"
9693 [(set (reg:CC FLAGS_REG)
9695 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9696 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9697 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9699 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9700 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9703 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9704 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9705 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9706 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9707 [(set_attr "type" "sselog")
9708 (set_attr "prefix_data16" "1")
9709 (set_attr "prefix_extra" "1")
9710 (set_attr "length_immediate" "1")
9711 (set_attr "memory" "none,load,none,load")
9712 (set_attr "prefix" "maybe_vex")
9713 (set_attr "btver2_decode" "vector,vector,vector,vector")
9714 (set_attr "mode" "TI")])
9716 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9720 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9722 (define_code_iterator xop_plus [plus ss_plus])
9724 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
9725 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
9727 ;; XOP parallel integer multiply/add instructions.
9729 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
9730 [(set (match_operand:VI24_128 0 "register_operand" "=x")
9733 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
9734 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
9735 (match_operand:VI24_128 3 "register_operand" "x")))]
9737 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9738 [(set_attr "type" "ssemuladd")
9739 (set_attr "mode" "TI")])
9741 (define_insn "xop_p<macs>dql"
9742 [(set (match_operand:V2DI 0 "register_operand" "=x")
9747 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9748 (parallel [(const_int 0) (const_int 2)])))
9751 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9752 (parallel [(const_int 0) (const_int 2)]))))
9753 (match_operand:V2DI 3 "register_operand" "x")))]
9755 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9756 [(set_attr "type" "ssemuladd")
9757 (set_attr "mode" "TI")])
9759 (define_insn "xop_p<macs>dqh"
9760 [(set (match_operand:V2DI 0 "register_operand" "=x")
9765 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9766 (parallel [(const_int 1) (const_int 3)])))
9769 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9770 (parallel [(const_int 1) (const_int 3)]))))
9771 (match_operand:V2DI 3 "register_operand" "x")))]
9773 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9774 [(set_attr "type" "ssemuladd")
9775 (set_attr "mode" "TI")])
9777 ;; XOP parallel integer multiply/add instructions for the intrinisics
9778 (define_insn "xop_p<macs>wd"
9779 [(set (match_operand:V4SI 0 "register_operand" "=x")
9784 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9785 (parallel [(const_int 1) (const_int 3)
9786 (const_int 5) (const_int 7)])))
9789 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9790 (parallel [(const_int 1) (const_int 3)
9791 (const_int 5) (const_int 7)]))))
9792 (match_operand:V4SI 3 "register_operand" "x")))]
9794 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9795 [(set_attr "type" "ssemuladd")
9796 (set_attr "mode" "TI")])
9798 (define_insn "xop_p<madcs>wd"
9799 [(set (match_operand:V4SI 0 "register_operand" "=x")
9805 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9806 (parallel [(const_int 0) (const_int 2)
9807 (const_int 4) (const_int 6)])))
9810 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9811 (parallel [(const_int 0) (const_int 2)
9812 (const_int 4) (const_int 6)]))))
9817 (parallel [(const_int 1) (const_int 3)
9818 (const_int 5) (const_int 7)])))
9822 (parallel [(const_int 1) (const_int 3)
9823 (const_int 5) (const_int 7)])))))
9824 (match_operand:V4SI 3 "register_operand" "x")))]
9826 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9827 [(set_attr "type" "ssemuladd")
9828 (set_attr "mode" "TI")])
9830 ;; XOP parallel XMM conditional moves
9831 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
9832 [(set (match_operand:V 0 "register_operand" "=x,x")
9834 (match_operand:V 3 "nonimmediate_operand" "x,m")
9835 (match_operand:V 1 "register_operand" "x,x")
9836 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
9838 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9839 [(set_attr "type" "sse4arg")])
9841 ;; XOP horizontal add/subtract instructions
9842 (define_insn "xop_phadd<u>bw"
9843 [(set (match_operand:V8HI 0 "register_operand" "=x")
9847 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9848 (parallel [(const_int 0) (const_int 2)
9849 (const_int 4) (const_int 6)
9850 (const_int 8) (const_int 10)
9851 (const_int 12) (const_int 14)])))
9855 (parallel [(const_int 1) (const_int 3)
9856 (const_int 5) (const_int 7)
9857 (const_int 9) (const_int 11)
9858 (const_int 13) (const_int 15)])))))]
9860 "vphadd<u>bw\t{%1, %0|%0, %1}"
9861 [(set_attr "type" "sseiadd1")])
9863 (define_insn "xop_phadd<u>bd"
9864 [(set (match_operand:V4SI 0 "register_operand" "=x")
9869 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9870 (parallel [(const_int 0) (const_int 4)
9871 (const_int 8) (const_int 12)])))
9875 (parallel [(const_int 1) (const_int 5)
9876 (const_int 9) (const_int 13)]))))
9881 (parallel [(const_int 2) (const_int 6)
9882 (const_int 10) (const_int 14)])))
9886 (parallel [(const_int 3) (const_int 7)
9887 (const_int 11) (const_int 15)]))))))]
9889 "vphadd<u>bd\t{%1, %0|%0, %1}"
9890 [(set_attr "type" "sseiadd1")])
9892 (define_insn "xop_phadd<u>bq"
9893 [(set (match_operand:V2DI 0 "register_operand" "=x")
9899 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9900 (parallel [(const_int 0) (const_int 8)])))
9904 (parallel [(const_int 1) (const_int 9)]))))
9909 (parallel [(const_int 2) (const_int 10)])))
9913 (parallel [(const_int 3) (const_int 11)])))))
9919 (parallel [(const_int 4) (const_int 12)])))
9923 (parallel [(const_int 5) (const_int 13)]))))
9928 (parallel [(const_int 6) (const_int 14)])))
9932 (parallel [(const_int 7) (const_int 15)])))))))]
9934 "vphadd<u>bq\t{%1, %0|%0, %1}"
9935 [(set_attr "type" "sseiadd1")])
9937 (define_insn "xop_phadd<u>wd"
9938 [(set (match_operand:V4SI 0 "register_operand" "=x")
9942 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9943 (parallel [(const_int 0) (const_int 2)
9944 (const_int 4) (const_int 6)])))
9948 (parallel [(const_int 1) (const_int 3)
9949 (const_int 5) (const_int 7)])))))]
9951 "vphadd<u>wd\t{%1, %0|%0, %1}"
9952 [(set_attr "type" "sseiadd1")])
9954 (define_insn "xop_phadd<u>wq"
9955 [(set (match_operand:V2DI 0 "register_operand" "=x")
9960 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9961 (parallel [(const_int 0) (const_int 4)])))
9965 (parallel [(const_int 1) (const_int 5)]))))
9970 (parallel [(const_int 2) (const_int 6)])))
9974 (parallel [(const_int 3) (const_int 7)]))))))]
9976 "vphadd<u>wq\t{%1, %0|%0, %1}"
9977 [(set_attr "type" "sseiadd1")])
9979 (define_insn "xop_phadd<u>dq"
9980 [(set (match_operand:V2DI 0 "register_operand" "=x")
9984 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9985 (parallel [(const_int 0) (const_int 2)])))
9989 (parallel [(const_int 1) (const_int 3)])))))]
9991 "vphadd<u>dq\t{%1, %0|%0, %1}"
9992 [(set_attr "type" "sseiadd1")])
9994 (define_insn "xop_phsubbw"
9995 [(set (match_operand:V8HI 0 "register_operand" "=x")
9999 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10000 (parallel [(const_int 0) (const_int 2)
10001 (const_int 4) (const_int 6)
10002 (const_int 8) (const_int 10)
10003 (const_int 12) (const_int 14)])))
10007 (parallel [(const_int 1) (const_int 3)
10008 (const_int 5) (const_int 7)
10009 (const_int 9) (const_int 11)
10010 (const_int 13) (const_int 15)])))))]
10012 "vphsubbw\t{%1, %0|%0, %1}"
10013 [(set_attr "type" "sseiadd1")])
10015 (define_insn "xop_phsubwd"
10016 [(set (match_operand:V4SI 0 "register_operand" "=x")
10020 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10021 (parallel [(const_int 0) (const_int 2)
10022 (const_int 4) (const_int 6)])))
10026 (parallel [(const_int 1) (const_int 3)
10027 (const_int 5) (const_int 7)])))))]
10029 "vphsubwd\t{%1, %0|%0, %1}"
10030 [(set_attr "type" "sseiadd1")])
10032 (define_insn "xop_phsubdq"
10033 [(set (match_operand:V2DI 0 "register_operand" "=x")
10037 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10038 (parallel [(const_int 0) (const_int 2)])))
10042 (parallel [(const_int 1) (const_int 3)])))))]
10044 "vphsubdq\t{%1, %0|%0, %1}"
10045 [(set_attr "type" "sseiadd1")])
10047 ;; XOP permute instructions
10048 (define_insn "xop_pperm"
10049 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10051 [(match_operand:V16QI 1 "register_operand" "x,x")
10052 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10053 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10054 UNSPEC_XOP_PERMUTE))]
10055 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10056 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10057 [(set_attr "type" "sse4arg")
10058 (set_attr "mode" "TI")])
10060 ;; XOP pack instructions that combine two vectors into a smaller vector
10061 (define_insn "xop_pperm_pack_v2di_v4si"
10062 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10065 (match_operand:V2DI 1 "register_operand" "x,x"))
10067 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10068 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10069 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10070 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10071 [(set_attr "type" "sse4arg")
10072 (set_attr "mode" "TI")])
10074 (define_insn "xop_pperm_pack_v4si_v8hi"
10075 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10078 (match_operand:V4SI 1 "register_operand" "x,x"))
10080 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10081 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10082 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10083 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10084 [(set_attr "type" "sse4arg")
10085 (set_attr "mode" "TI")])
10087 (define_insn "xop_pperm_pack_v8hi_v16qi"
10088 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10091 (match_operand:V8HI 1 "register_operand" "x,x"))
10093 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10094 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10095 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10096 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10097 [(set_attr "type" "sse4arg")
10098 (set_attr "mode" "TI")])
10100 ;; XOP packed rotate instructions
10101 (define_expand "rotl<mode>3"
10102 [(set (match_operand:VI_128 0 "register_operand")
10104 (match_operand:VI_128 1 "nonimmediate_operand")
10105 (match_operand:SI 2 "general_operand")))]
10108 /* If we were given a scalar, convert it to parallel */
10109 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10111 rtvec vs = rtvec_alloc (<ssescalarnum>);
10112 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10113 rtx reg = gen_reg_rtx (<MODE>mode);
10114 rtx op2 = operands[2];
10117 if (GET_MODE (op2) != <ssescalarmode>mode)
10119 op2 = gen_reg_rtx (<ssescalarmode>mode);
10120 convert_move (op2, operands[2], false);
10123 for (i = 0; i < <ssescalarnum>; i++)
10124 RTVEC_ELT (vs, i) = op2;
10126 emit_insn (gen_vec_init<mode> (reg, par));
10127 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10132 (define_expand "rotr<mode>3"
10133 [(set (match_operand:VI_128 0 "register_operand")
10135 (match_operand:VI_128 1 "nonimmediate_operand")
10136 (match_operand:SI 2 "general_operand")))]
10139 /* If we were given a scalar, convert it to parallel */
10140 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10142 rtvec vs = rtvec_alloc (<ssescalarnum>);
10143 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10144 rtx neg = gen_reg_rtx (<MODE>mode);
10145 rtx reg = gen_reg_rtx (<MODE>mode);
10146 rtx op2 = operands[2];
10149 if (GET_MODE (op2) != <ssescalarmode>mode)
10151 op2 = gen_reg_rtx (<ssescalarmode>mode);
10152 convert_move (op2, operands[2], false);
10155 for (i = 0; i < <ssescalarnum>; i++)
10156 RTVEC_ELT (vs, i) = op2;
10158 emit_insn (gen_vec_init<mode> (reg, par));
10159 emit_insn (gen_neg<mode>2 (neg, reg));
10160 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10165 (define_insn "xop_rotl<mode>3"
10166 [(set (match_operand:VI_128 0 "register_operand" "=x")
10168 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10169 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10171 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10172 [(set_attr "type" "sseishft")
10173 (set_attr "length_immediate" "1")
10174 (set_attr "mode" "TI")])
10176 (define_insn "xop_rotr<mode>3"
10177 [(set (match_operand:VI_128 0 "register_operand" "=x")
10179 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
10180 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10184 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
10185 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
10187 [(set_attr "type" "sseishft")
10188 (set_attr "length_immediate" "1")
10189 (set_attr "mode" "TI")])
10191 (define_expand "vrotr<mode>3"
10192 [(match_operand:VI_128 0 "register_operand")
10193 (match_operand:VI_128 1 "register_operand")
10194 (match_operand:VI_128 2 "register_operand")]
10197 rtx reg = gen_reg_rtx (<MODE>mode);
10198 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10199 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10203 (define_expand "vrotl<mode>3"
10204 [(match_operand:VI_128 0 "register_operand")
10205 (match_operand:VI_128 1 "register_operand")
10206 (match_operand:VI_128 2 "register_operand")]
10209 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10213 (define_insn "xop_vrotl<mode>3"
10214 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10215 (if_then_else:VI_128
10217 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10220 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10224 (neg:VI_128 (match_dup 2)))))]
10225 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10226 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10227 [(set_attr "type" "sseishft")
10228 (set_attr "prefix_data16" "0")
10229 (set_attr "prefix_extra" "2")
10230 (set_attr "mode" "TI")])
10232 ;; XOP packed shift instructions.
10233 (define_expand "vlshr<mode>3"
10234 [(set (match_operand:VI12_128 0 "register_operand")
10236 (match_operand:VI12_128 1 "register_operand")
10237 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10240 rtx neg = gen_reg_rtx (<MODE>mode);
10241 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10242 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10246 (define_expand "vlshr<mode>3"
10247 [(set (match_operand:VI48_128 0 "register_operand")
10249 (match_operand:VI48_128 1 "register_operand")
10250 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10251 "TARGET_AVX2 || TARGET_XOP"
10255 rtx neg = gen_reg_rtx (<MODE>mode);
10256 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10257 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
10262 (define_expand "vlshr<mode>3"
10263 [(set (match_operand:VI48_256 0 "register_operand")
10265 (match_operand:VI48_256 1 "register_operand")
10266 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10269 (define_expand "vashr<mode>3"
10270 [(set (match_operand:VI128_128 0 "register_operand")
10271 (ashiftrt:VI128_128
10272 (match_operand:VI128_128 1 "register_operand")
10273 (match_operand:VI128_128 2 "nonimmediate_operand")))]
10276 rtx neg = gen_reg_rtx (<MODE>mode);
10277 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10278 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
10282 (define_expand "vashrv4si3"
10283 [(set (match_operand:V4SI 0 "register_operand")
10284 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
10285 (match_operand:V4SI 2 "nonimmediate_operand")))]
10286 "TARGET_AVX2 || TARGET_XOP"
10290 rtx neg = gen_reg_rtx (V4SImode);
10291 emit_insn (gen_negv4si2 (neg, operands[2]));
10292 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
10297 (define_expand "vashrv8si3"
10298 [(set (match_operand:V8SI 0 "register_operand")
10299 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
10300 (match_operand:V8SI 2 "nonimmediate_operand")))]
10303 (define_expand "vashl<mode>3"
10304 [(set (match_operand:VI12_128 0 "register_operand")
10306 (match_operand:VI12_128 1 "register_operand")
10307 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10310 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10314 (define_expand "vashl<mode>3"
10315 [(set (match_operand:VI48_128 0 "register_operand")
10317 (match_operand:VI48_128 1 "register_operand")
10318 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10319 "TARGET_AVX2 || TARGET_XOP"
10323 operands[2] = force_reg (<MODE>mode, operands[2]);
10324 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10329 (define_expand "vashl<mode>3"
10330 [(set (match_operand:VI48_256 0 "register_operand")
10332 (match_operand:VI48_256 1 "register_operand")
10333 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10336 (define_insn "xop_sha<mode>3"
10337 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10338 (if_then_else:VI_128
10340 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10343 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10347 (neg:VI_128 (match_dup 2)))))]
10348 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10349 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10350 [(set_attr "type" "sseishft")
10351 (set_attr "prefix_data16" "0")
10352 (set_attr "prefix_extra" "2")
10353 (set_attr "mode" "TI")])
10355 (define_insn "xop_shl<mode>3"
10356 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10357 (if_then_else:VI_128
10359 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10362 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10366 (neg:VI_128 (match_dup 2)))))]
10367 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10368 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10369 [(set_attr "type" "sseishft")
10370 (set_attr "prefix_data16" "0")
10371 (set_attr "prefix_extra" "2")
10372 (set_attr "mode" "TI")])
10374 (define_expand "<shift_insn><mode>3"
10375 [(set (match_operand:VI1_AVX2 0 "register_operand")
10376 (any_shift:VI1_AVX2
10377 (match_operand:VI1_AVX2 1 "register_operand")
10378 (match_operand:SI 2 "nonmemory_operand")))]
10381 if (TARGET_XOP && <MODE>mode == V16QImode)
10383 bool negate = false;
10384 rtx (*gen) (rtx, rtx, rtx);
10388 if (<CODE> != ASHIFT)
10390 if (CONST_INT_P (operands[2]))
10391 operands[2] = GEN_INT (-INTVAL (operands[2]));
10395 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
10396 for (i = 0; i < 16; i++)
10397 XVECEXP (par, 0, i) = operands[2];
10399 tmp = gen_reg_rtx (V16QImode);
10400 emit_insn (gen_vec_initv16qi (tmp, par));
10403 emit_insn (gen_negv16qi2 (tmp, tmp));
10405 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
10406 emit_insn (gen (operands[0], operands[1], tmp));
10409 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
10413 (define_expand "ashrv2di3"
10414 [(set (match_operand:V2DI 0 "register_operand")
10416 (match_operand:V2DI 1 "register_operand")
10417 (match_operand:DI 2 "nonmemory_operand")))]
10420 rtx reg = gen_reg_rtx (V2DImode);
10422 bool negate = false;
10425 if (CONST_INT_P (operands[2]))
10426 operands[2] = GEN_INT (-INTVAL (operands[2]));
10430 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
10431 for (i = 0; i < 2; i++)
10432 XVECEXP (par, 0, i) = operands[2];
10434 emit_insn (gen_vec_initv2di (reg, par));
10437 emit_insn (gen_negv2di2 (reg, reg));
10439 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
10443 ;; XOP FRCZ support
10444 (define_insn "xop_frcz<mode>2"
10445 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
10447 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
10450 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
10451 [(set_attr "type" "ssecvt1")
10452 (set_attr "mode" "<MODE>")])
10455 (define_expand "xop_vmfrcz<mode>2"
10456 [(set (match_operand:VF_128 0 "register_operand")
10459 [(match_operand:VF_128 1 "nonimmediate_operand")]
10465 operands[3] = CONST0_RTX (<MODE>mode);
10468 (define_insn "*xop_vmfrcz_<mode>"
10469 [(set (match_operand:VF_128 0 "register_operand" "=x")
10472 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
10474 (match_operand:VF_128 2 "const0_operand")
10477 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
10478 [(set_attr "type" "ssecvt1")
10479 (set_attr "mode" "<MODE>")])
10481 (define_insn "xop_maskcmp<mode>3"
10482 [(set (match_operand:VI_128 0 "register_operand" "=x")
10483 (match_operator:VI_128 1 "ix86_comparison_int_operator"
10484 [(match_operand:VI_128 2 "register_operand" "x")
10485 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10487 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10488 [(set_attr "type" "sse4arg")
10489 (set_attr "prefix_data16" "0")
10490 (set_attr "prefix_rep" "0")
10491 (set_attr "prefix_extra" "2")
10492 (set_attr "length_immediate" "1")
10493 (set_attr "mode" "TI")])
10495 (define_insn "xop_maskcmp_uns<mode>3"
10496 [(set (match_operand:VI_128 0 "register_operand" "=x")
10497 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
10498 [(match_operand:VI_128 2 "register_operand" "x")
10499 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10501 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10502 [(set_attr "type" "ssecmp")
10503 (set_attr "prefix_data16" "0")
10504 (set_attr "prefix_rep" "0")
10505 (set_attr "prefix_extra" "2")
10506 (set_attr "length_immediate" "1")
10507 (set_attr "mode" "TI")])
10509 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
10510 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
10511 ;; the exact instruction generated for the intrinsic.
10512 (define_insn "xop_maskcmp_uns2<mode>3"
10513 [(set (match_operand:VI_128 0 "register_operand" "=x")
10515 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
10516 [(match_operand:VI_128 2 "register_operand" "x")
10517 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
10518 UNSPEC_XOP_UNSIGNED_CMP))]
10520 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10521 [(set_attr "type" "ssecmp")
10522 (set_attr "prefix_data16" "0")
10523 (set_attr "prefix_extra" "2")
10524 (set_attr "length_immediate" "1")
10525 (set_attr "mode" "TI")])
10527 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
10528 ;; being added here to be complete.
10529 (define_insn "xop_pcom_tf<mode>3"
10530 [(set (match_operand:VI_128 0 "register_operand" "=x")
10532 [(match_operand:VI_128 1 "register_operand" "x")
10533 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
10534 (match_operand:SI 3 "const_int_operand" "n")]
10535 UNSPEC_XOP_TRUEFALSE))]
10538 return ((INTVAL (operands[3]) != 0)
10539 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10540 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
10542 [(set_attr "type" "ssecmp")
10543 (set_attr "prefix_data16" "0")
10544 (set_attr "prefix_extra" "2")
10545 (set_attr "length_immediate" "1")
10546 (set_attr "mode" "TI")])
10548 (define_insn "xop_vpermil2<mode>3"
10549 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
10551 [(match_operand:VF_128_256 1 "register_operand" "x")
10552 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
10553 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
10554 (match_operand:SI 4 "const_0_to_3_operand" "n")]
10557 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
10558 [(set_attr "type" "sse4arg")
10559 (set_attr "length_immediate" "1")
10560 (set_attr "mode" "<MODE>")])
10562 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10564 (define_insn "aesenc"
10565 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10566 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10567 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10571 aesenc\t{%2, %0|%0, %2}
10572 vaesenc\t{%2, %1, %0|%0, %1, %2}"
10573 [(set_attr "isa" "noavx,avx")
10574 (set_attr "type" "sselog1")
10575 (set_attr "prefix_extra" "1")
10576 (set_attr "prefix" "orig,vex")
10577 (set_attr "btver2_decode" "double,double")
10578 (set_attr "mode" "TI")])
10580 (define_insn "aesenclast"
10581 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10582 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10583 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10584 UNSPEC_AESENCLAST))]
10587 aesenclast\t{%2, %0|%0, %2}
10588 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
10589 [(set_attr "isa" "noavx,avx")
10590 (set_attr "type" "sselog1")
10591 (set_attr "prefix_extra" "1")
10592 (set_attr "prefix" "orig,vex")
10593 (set_attr "btver2_decode" "double,double")
10594 (set_attr "mode" "TI")])
10596 (define_insn "aesdec"
10597 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10598 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10599 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10603 aesdec\t{%2, %0|%0, %2}
10604 vaesdec\t{%2, %1, %0|%0, %1, %2}"
10605 [(set_attr "isa" "noavx,avx")
10606 (set_attr "type" "sselog1")
10607 (set_attr "prefix_extra" "1")
10608 (set_attr "prefix" "orig,vex")
10609 (set_attr "btver2_decode" "double,double")
10610 (set_attr "mode" "TI")])
10612 (define_insn "aesdeclast"
10613 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10614 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10615 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10616 UNSPEC_AESDECLAST))]
10619 aesdeclast\t{%2, %0|%0, %2}
10620 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
10621 [(set_attr "isa" "noavx,avx")
10622 (set_attr "type" "sselog1")
10623 (set_attr "prefix_extra" "1")
10624 (set_attr "prefix" "orig,vex")
10625 (set_attr "btver2_decode" "double,double")
10626 (set_attr "mode" "TI")])
10628 (define_insn "aesimc"
10629 [(set (match_operand:V2DI 0 "register_operand" "=x")
10630 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10633 "%vaesimc\t{%1, %0|%0, %1}"
10634 [(set_attr "type" "sselog1")
10635 (set_attr "prefix_extra" "1")
10636 (set_attr "prefix" "maybe_vex")
10637 (set_attr "mode" "TI")])
10639 (define_insn "aeskeygenassist"
10640 [(set (match_operand:V2DI 0 "register_operand" "=x")
10641 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
10642 (match_operand:SI 2 "const_0_to_255_operand" "n")]
10643 UNSPEC_AESKEYGENASSIST))]
10645 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
10646 [(set_attr "type" "sselog1")
10647 (set_attr "prefix_extra" "1")
10648 (set_attr "length_immediate" "1")
10649 (set_attr "prefix" "maybe_vex")
10650 (set_attr "mode" "TI")])
10652 (define_insn "pclmulqdq"
10653 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10654 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10655 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
10656 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10660 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
10661 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10662 [(set_attr "isa" "noavx,avx")
10663 (set_attr "type" "sselog1")
10664 (set_attr "prefix_extra" "1")
10665 (set_attr "length_immediate" "1")
10666 (set_attr "prefix" "orig,vex")
10667 (set_attr "mode" "TI")])
10669 (define_expand "avx_vzeroall"
10670 [(match_par_dup 0 [(const_int 0)])]
10673 int nregs = TARGET_64BIT ? 16 : 8;
10676 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
10678 XVECEXP (operands[0], 0, 0)
10679 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
10682 for (regno = 0; regno < nregs; regno++)
10683 XVECEXP (operands[0], 0, regno + 1)
10684 = gen_rtx_SET (VOIDmode,
10685 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
10686 CONST0_RTX (V8SImode));
10689 (define_insn "*avx_vzeroall"
10690 [(match_parallel 0 "vzeroall_operation"
10691 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
10694 [(set_attr "type" "sse")
10695 (set_attr "modrm" "0")
10696 (set_attr "memory" "none")
10697 (set_attr "prefix" "vex")
10698 (set_attr "btver2_decode" "vector")
10699 (set_attr "mode" "OI")])
10701 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
10702 ;; if the upper 128bits are unused.
10703 (define_insn "avx_vzeroupper"
10704 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
10707 [(set_attr "type" "sse")
10708 (set_attr "modrm" "0")
10709 (set_attr "memory" "none")
10710 (set_attr "prefix" "vex")
10711 (set_attr "btver2_decode" "vector")
10712 (set_attr "mode" "OI")])
10714 (define_insn "avx2_pbroadcast<mode>"
10715 [(set (match_operand:VI 0 "register_operand" "=x")
10717 (vec_select:<ssescalarmode>
10718 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
10719 (parallel [(const_int 0)]))))]
10721 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
10722 [(set_attr "type" "ssemov")
10723 (set_attr "prefix_extra" "1")
10724 (set_attr "prefix" "vex")
10725 (set_attr "mode" "<sseinsnmode>")])
10727 (define_insn "avx2_pbroadcast<mode>_1"
10728 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
10729 (vec_duplicate:VI_256
10730 (vec_select:<ssescalarmode>
10731 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
10732 (parallel [(const_int 0)]))))]
10735 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
10736 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
10737 [(set_attr "type" "ssemov")
10738 (set_attr "prefix_extra" "1")
10739 (set_attr "prefix" "vex")
10740 (set_attr "mode" "<sseinsnmode>")])
10742 (define_insn "avx2_permvar<mode>"
10743 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
10745 [(match_operand:VI4F_256 1 "nonimmediate_operand" "vm")
10746 (match_operand:V8SI 2 "register_operand" "v")]
10749 "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
10750 [(set_attr "type" "sselog")
10751 (set_attr "prefix" "vex")
10752 (set_attr "mode" "OI")])
10754 (define_expand "avx2_perm<mode>"
10755 [(match_operand:VI8F_256 0 "register_operand")
10756 (match_operand:VI8F_256 1 "nonimmediate_operand")
10757 (match_operand:SI 2 "const_0_to_255_operand")]
10760 int mask = INTVAL (operands[2]);
10761 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
10762 GEN_INT ((mask >> 0) & 3),
10763 GEN_INT ((mask >> 2) & 3),
10764 GEN_INT ((mask >> 4) & 3),
10765 GEN_INT ((mask >> 6) & 3)));
10769 (define_insn "avx2_perm<mode>_1"
10770 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
10771 (vec_select:VI8F_256
10772 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
10773 (parallel [(match_operand 2 "const_0_to_3_operand")
10774 (match_operand 3 "const_0_to_3_operand")
10775 (match_operand 4 "const_0_to_3_operand")
10776 (match_operand 5 "const_0_to_3_operand")])))]
10780 mask |= INTVAL (operands[2]) << 0;
10781 mask |= INTVAL (operands[3]) << 2;
10782 mask |= INTVAL (operands[4]) << 4;
10783 mask |= INTVAL (operands[5]) << 6;
10784 operands[2] = GEN_INT (mask);
10785 return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10787 [(set_attr "type" "sselog")
10788 (set_attr "prefix" "vex")
10789 (set_attr "mode" "<sseinsnmode>")])
10791 (define_insn "avx2_permv2ti"
10792 [(set (match_operand:V4DI 0 "register_operand" "=x")
10794 [(match_operand:V4DI 1 "register_operand" "x")
10795 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
10796 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10799 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10800 [(set_attr "type" "sselog")
10801 (set_attr "prefix" "vex")
10802 (set_attr "mode" "OI")])
10804 (define_insn "avx2_vec_dupv4df"
10805 [(set (match_operand:V4DF 0 "register_operand" "=x")
10806 (vec_duplicate:V4DF
10808 (match_operand:V2DF 1 "register_operand" "x")
10809 (parallel [(const_int 0)]))))]
10811 "vbroadcastsd\t{%1, %0|%0, %1}"
10812 [(set_attr "type" "sselog1")
10813 (set_attr "prefix" "vex")
10814 (set_attr "mode" "V4DF")])
10816 ;; Modes handled by AVX vec_dup patterns.
10817 (define_mode_iterator AVX_VEC_DUP_MODE
10818 [V8SI V8SF V4DI V4DF])
10820 (define_insn "vec_dup<mode>"
10821 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
10822 (vec_duplicate:AVX_VEC_DUP_MODE
10823 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
10826 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
10827 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
10829 [(set_attr "type" "ssemov")
10830 (set_attr "prefix_extra" "1")
10831 (set_attr "prefix" "vex")
10832 (set_attr "isa" "*,avx2,noavx2")
10833 (set_attr "mode" "V8SF")])
10835 (define_insn "avx2_vbroadcasti128_<mode>"
10836 [(set (match_operand:VI_256 0 "register_operand" "=x")
10838 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
10841 "vbroadcasti128\t{%1, %0|%0, %1}"
10842 [(set_attr "type" "ssemov")
10843 (set_attr "prefix_extra" "1")
10844 (set_attr "prefix" "vex")
10845 (set_attr "mode" "OI")])
10848 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
10849 (vec_duplicate:AVX_VEC_DUP_MODE
10850 (match_operand:<ssescalarmode> 1 "register_operand")))]
10851 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
10852 [(set (match_dup 2)
10853 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
10855 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
10856 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
10858 (define_insn "avx_vbroadcastf128_<mode>"
10859 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
10861 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
10865 vbroadcast<i128>\t{%1, %0|%0, %1}
10866 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
10867 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
10868 [(set_attr "type" "ssemov,sselog1,sselog1")
10869 (set_attr "prefix_extra" "1")
10870 (set_attr "length_immediate" "0,1,1")
10871 (set_attr "prefix" "vex")
10872 (set_attr "mode" "<sseinsnmode>")])
10874 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
10875 ;; If it so happens that the input is in memory, use vbroadcast.
10876 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
10877 (define_insn "*avx_vperm_broadcast_v4sf"
10878 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
10880 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
10881 (match_parallel 2 "avx_vbroadcast_operand"
10882 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10885 int elt = INTVAL (operands[3]);
10886 switch (which_alternative)
10890 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
10891 return "vbroadcastss\t{%1, %0|%0, %k1}";
10893 operands[2] = GEN_INT (elt * 0x55);
10894 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
10896 gcc_unreachable ();
10899 [(set_attr "type" "ssemov,ssemov,sselog1")
10900 (set_attr "prefix_extra" "1")
10901 (set_attr "length_immediate" "0,0,1")
10902 (set_attr "prefix" "vex")
10903 (set_attr "mode" "SF,SF,V4SF")])
10905 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
10906 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
10908 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
10909 (match_parallel 2 "avx_vbroadcast_operand"
10910 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10913 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
10914 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
10916 rtx op0 = operands[0], op1 = operands[1];
10917 int elt = INTVAL (operands[3]);
10923 if (TARGET_AVX2 && elt == 0)
10925 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
10930 /* Shuffle element we care about into all elements of the 128-bit lane.
10931 The other lane gets shuffled too, but we don't care. */
10932 if (<MODE>mode == V4DFmode)
10933 mask = (elt & 1 ? 15 : 0);
10935 mask = (elt & 3) * 0x55;
10936 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
10938 /* Shuffle the lane we care about into both lanes of the dest. */
10939 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
10940 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10944 operands[1] = adjust_address (op1, <ssescalarmode>mode,
10945 elt * GET_MODE_SIZE (<ssescalarmode>mode));
10948 (define_expand "<sse2_avx_avx512f>_vpermil<mode>"
10949 [(set (match_operand:VF2 0 "register_operand")
10951 (match_operand:VF2 1 "nonimmediate_operand")
10952 (match_operand:SI 2 "const_0_to_255_operand")))]
10955 int mask = INTVAL (operands[2]);
10956 rtx perm[<ssescalarnum>];
10959 for (i = 0; i < <ssescalarnum>; i = i + 2)
10961 perm[i] = GEN_INT (((mask >> i) & 1) + i);
10962 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
10966 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10969 (define_expand "<sse2_avx_avx512f>_vpermil<mode>"
10970 [(set (match_operand:VF1 0 "register_operand")
10972 (match_operand:VF1 1 "nonimmediate_operand")
10973 (match_operand:SI 2 "const_0_to_255_operand")))]
10976 int mask = INTVAL (operands[2]);
10977 rtx perm[<ssescalarnum>];
10980 for (i = 0; i < <ssescalarnum>; i = i + 4)
10982 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
10983 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
10984 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
10985 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
10989 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10992 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode>"
10993 [(set (match_operand:VF 0 "register_operand" "=v")
10995 (match_operand:VF 1 "nonimmediate_operand" "vm")
10996 (match_parallel 2 ""
10997 [(match_operand 3 "const_int_operand")])))]
10999 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11001 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11002 operands[2] = GEN_INT (mask);
11003 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11005 [(set_attr "type" "sselog")
11006 (set_attr "prefix_extra" "1")
11007 (set_attr "length_immediate" "1")
11008 (set_attr "prefix" "vex")
11009 (set_attr "mode" "<sseinsnmode>")])
11011 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3"
11012 [(set (match_operand:VF 0 "register_operand" "=v")
11014 [(match_operand:VF 1 "register_operand" "v")
11015 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
11018 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11019 [(set_attr "type" "sselog")
11020 (set_attr "prefix_extra" "1")
11021 (set_attr "btver2_decode" "vector")
11022 (set_attr "prefix" "vex")
11023 (set_attr "mode" "<sseinsnmode>")])
11026 (define_expand "avx_vperm2f128<mode>3"
11027 [(set (match_operand:AVX256MODE2P 0 "register_operand")
11028 (unspec:AVX256MODE2P
11029 [(match_operand:AVX256MODE2P 1 "register_operand")
11030 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
11031 (match_operand:SI 3 "const_0_to_255_operand")]
11032 UNSPEC_VPERMIL2F128))]
11035 int mask = INTVAL (operands[3]);
11036 if ((mask & 0x88) == 0)
11038 rtx perm[<ssescalarnum>], t1, t2;
11039 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11041 base = (mask & 3) * nelt2;
11042 for (i = 0; i < nelt2; ++i)
11043 perm[i] = GEN_INT (base + i);
11045 base = ((mask >> 4) & 3) * nelt2;
11046 for (i = 0; i < nelt2; ++i)
11047 perm[i + nelt2] = GEN_INT (base + i);
11049 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11050 operands[1], operands[2]);
11051 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11052 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11053 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11059 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11060 ;; means that in order to represent this properly in rtl we'd have to
11061 ;; nest *another* vec_concat with a zero operand and do the select from
11062 ;; a 4x wide vector. That doesn't seem very nice.
11063 (define_insn "*avx_vperm2f128<mode>_full"
11064 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11065 (unspec:AVX256MODE2P
11066 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11067 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11068 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11069 UNSPEC_VPERMIL2F128))]
11071 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11072 [(set_attr "type" "sselog")
11073 (set_attr "prefix_extra" "1")
11074 (set_attr "length_immediate" "1")
11075 (set_attr "prefix" "vex")
11076 (set_attr "mode" "<sseinsnmode>")])
11078 (define_insn "*avx_vperm2f128<mode>_nozero"
11079 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11080 (vec_select:AVX256MODE2P
11081 (vec_concat:<ssedoublevecmode>
11082 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11083 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11084 (match_parallel 3 ""
11085 [(match_operand 4 "const_int_operand")])))]
11087 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11089 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11091 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
11093 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
11094 operands[3] = GEN_INT (mask);
11095 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11097 [(set_attr "type" "sselog")
11098 (set_attr "prefix_extra" "1")
11099 (set_attr "length_immediate" "1")
11100 (set_attr "prefix" "vex")
11101 (set_attr "mode" "<sseinsnmode>")])
11103 (define_expand "avx_vinsertf128<mode>"
11104 [(match_operand:V_256 0 "register_operand")
11105 (match_operand:V_256 1 "register_operand")
11106 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11107 (match_operand:SI 3 "const_0_to_1_operand")]
11110 rtx (*insn)(rtx, rtx, rtx);
11112 switch (INTVAL (operands[3]))
11115 insn = gen_vec_set_lo_<mode>;
11118 insn = gen_vec_set_hi_<mode>;
11121 gcc_unreachable ();
11124 emit_insn (insn (operands[0], operands[1], operands[2]));
11128 (define_insn "avx2_vec_set_lo_v4di"
11129 [(set (match_operand:V4DI 0 "register_operand" "=x")
11131 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11133 (match_operand:V4DI 1 "register_operand" "x")
11134 (parallel [(const_int 2) (const_int 3)]))))]
11136 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11137 [(set_attr "type" "sselog")
11138 (set_attr "prefix_extra" "1")
11139 (set_attr "length_immediate" "1")
11140 (set_attr "prefix" "vex")
11141 (set_attr "mode" "OI")])
11143 (define_insn "avx2_vec_set_hi_v4di"
11144 [(set (match_operand:V4DI 0 "register_operand" "=x")
11147 (match_operand:V4DI 1 "register_operand" "x")
11148 (parallel [(const_int 0) (const_int 1)]))
11149 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11151 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11152 [(set_attr "type" "sselog")
11153 (set_attr "prefix_extra" "1")
11154 (set_attr "length_immediate" "1")
11155 (set_attr "prefix" "vex")
11156 (set_attr "mode" "OI")])
11158 (define_insn "vec_set_lo_<mode>"
11159 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11160 (vec_concat:VI8F_256
11161 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11162 (vec_select:<ssehalfvecmode>
11163 (match_operand:VI8F_256 1 "register_operand" "x")
11164 (parallel [(const_int 2) (const_int 3)]))))]
11166 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11167 [(set_attr "type" "sselog")
11168 (set_attr "prefix_extra" "1")
11169 (set_attr "length_immediate" "1")
11170 (set_attr "prefix" "vex")
11171 (set_attr "mode" "<sseinsnmode>")])
11173 (define_insn "vec_set_hi_<mode>"
11174 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11175 (vec_concat:VI8F_256
11176 (vec_select:<ssehalfvecmode>
11177 (match_operand:VI8F_256 1 "register_operand" "x")
11178 (parallel [(const_int 0) (const_int 1)]))
11179 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11181 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11182 [(set_attr "type" "sselog")
11183 (set_attr "prefix_extra" "1")
11184 (set_attr "length_immediate" "1")
11185 (set_attr "prefix" "vex")
11186 (set_attr "mode" "<sseinsnmode>")])
11188 (define_insn "vec_set_lo_<mode>"
11189 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11190 (vec_concat:VI4F_256
11191 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11192 (vec_select:<ssehalfvecmode>
11193 (match_operand:VI4F_256 1 "register_operand" "x")
11194 (parallel [(const_int 4) (const_int 5)
11195 (const_int 6) (const_int 7)]))))]
11197 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11198 [(set_attr "type" "sselog")
11199 (set_attr "prefix_extra" "1")
11200 (set_attr "length_immediate" "1")
11201 (set_attr "prefix" "vex")
11202 (set_attr "mode" "<sseinsnmode>")])
11204 (define_insn "vec_set_hi_<mode>"
11205 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11206 (vec_concat:VI4F_256
11207 (vec_select:<ssehalfvecmode>
11208 (match_operand:VI4F_256 1 "register_operand" "x")
11209 (parallel [(const_int 0) (const_int 1)
11210 (const_int 2) (const_int 3)]))
11211 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11213 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11214 [(set_attr "type" "sselog")
11215 (set_attr "prefix_extra" "1")
11216 (set_attr "length_immediate" "1")
11217 (set_attr "prefix" "vex")
11218 (set_attr "mode" "<sseinsnmode>")])
11220 (define_insn "vec_set_lo_v16hi"
11221 [(set (match_operand:V16HI 0 "register_operand" "=x")
11223 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11225 (match_operand:V16HI 1 "register_operand" "x")
11226 (parallel [(const_int 8) (const_int 9)
11227 (const_int 10) (const_int 11)
11228 (const_int 12) (const_int 13)
11229 (const_int 14) (const_int 15)]))))]
11231 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11232 [(set_attr "type" "sselog")
11233 (set_attr "prefix_extra" "1")
11234 (set_attr "length_immediate" "1")
11235 (set_attr "prefix" "vex")
11236 (set_attr "mode" "OI")])
11238 (define_insn "vec_set_hi_v16hi"
11239 [(set (match_operand:V16HI 0 "register_operand" "=x")
11242 (match_operand:V16HI 1 "register_operand" "x")
11243 (parallel [(const_int 0) (const_int 1)
11244 (const_int 2) (const_int 3)
11245 (const_int 4) (const_int 5)
11246 (const_int 6) (const_int 7)]))
11247 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11249 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11250 [(set_attr "type" "sselog")
11251 (set_attr "prefix_extra" "1")
11252 (set_attr "length_immediate" "1")
11253 (set_attr "prefix" "vex")
11254 (set_attr "mode" "OI")])
11256 (define_insn "vec_set_lo_v32qi"
11257 [(set (match_operand:V32QI 0 "register_operand" "=x")
11259 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11261 (match_operand:V32QI 1 "register_operand" "x")
11262 (parallel [(const_int 16) (const_int 17)
11263 (const_int 18) (const_int 19)
11264 (const_int 20) (const_int 21)
11265 (const_int 22) (const_int 23)
11266 (const_int 24) (const_int 25)
11267 (const_int 26) (const_int 27)
11268 (const_int 28) (const_int 29)
11269 (const_int 30) (const_int 31)]))))]
11271 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11272 [(set_attr "type" "sselog")
11273 (set_attr "prefix_extra" "1")
11274 (set_attr "length_immediate" "1")
11275 (set_attr "prefix" "vex")
11276 (set_attr "mode" "OI")])
11278 (define_insn "vec_set_hi_v32qi"
11279 [(set (match_operand:V32QI 0 "register_operand" "=x")
11282 (match_operand:V32QI 1 "register_operand" "x")
11283 (parallel [(const_int 0) (const_int 1)
11284 (const_int 2) (const_int 3)
11285 (const_int 4) (const_int 5)
11286 (const_int 6) (const_int 7)
11287 (const_int 8) (const_int 9)
11288 (const_int 10) (const_int 11)
11289 (const_int 12) (const_int 13)
11290 (const_int 14) (const_int 15)]))
11291 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11293 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11294 [(set_attr "type" "sselog")
11295 (set_attr "prefix_extra" "1")
11296 (set_attr "length_immediate" "1")
11297 (set_attr "prefix" "vex")
11298 (set_attr "mode" "OI")])
11300 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
11301 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
11303 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
11304 (match_operand:V48_AVX2 1 "memory_operand" "m")]
11307 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11308 [(set_attr "type" "sselog1")
11309 (set_attr "prefix_extra" "1")
11310 (set_attr "prefix" "vex")
11311 (set_attr "btver2_decode" "vector")
11312 (set_attr "mode" "<sseinsnmode>")])
11314 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
11315 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
11317 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
11318 (match_operand:V48_AVX2 2 "register_operand" "x")
11322 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11323 [(set_attr "type" "sselog1")
11324 (set_attr "prefix_extra" "1")
11325 (set_attr "prefix" "vex")
11326 (set_attr "btver2_decode" "vector")
11327 (set_attr "mode" "<sseinsnmode>")])
11329 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
11330 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11331 (unspec:AVX256MODE2P
11332 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11336 "&& reload_completed"
11339 rtx op0 = operands[0];
11340 rtx op1 = operands[1];
11342 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
11344 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11345 emit_move_insn (op0, op1);
11349 (define_expand "vec_init<mode>"
11350 [(match_operand:V_256 0 "register_operand")
11354 ix86_expand_vector_init (false, operands[0], operands[1]);
11358 (define_expand "avx2_extracti128"
11359 [(match_operand:V2DI 0 "nonimmediate_operand")
11360 (match_operand:V4DI 1 "register_operand")
11361 (match_operand:SI 2 "const_0_to_1_operand")]
11364 rtx (*insn)(rtx, rtx);
11366 switch (INTVAL (operands[2]))
11369 insn = gen_vec_extract_lo_v4di;
11372 insn = gen_vec_extract_hi_v4di;
11375 gcc_unreachable ();
11378 emit_insn (insn (operands[0], operands[1]));
11382 (define_expand "avx2_inserti128"
11383 [(match_operand:V4DI 0 "register_operand")
11384 (match_operand:V4DI 1 "register_operand")
11385 (match_operand:V2DI 2 "nonimmediate_operand")
11386 (match_operand:SI 3 "const_0_to_1_operand")]
11389 rtx (*insn)(rtx, rtx, rtx);
11391 switch (INTVAL (operands[3]))
11394 insn = gen_avx2_vec_set_lo_v4di;
11397 insn = gen_avx2_vec_set_hi_v4di;
11400 gcc_unreachable ();
11403 emit_insn (insn (operands[0], operands[1], operands[2]));
11407 (define_insn "avx2_ashrv<mode>"
11408 [(set (match_operand:VI4_AVX2 0 "register_operand" "=v")
11410 (match_operand:VI4_AVX2 1 "register_operand" "v")
11411 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "vm")))]
11413 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
11414 [(set_attr "type" "sseishft")
11415 (set_attr "prefix" "vex")
11416 (set_attr "mode" "<sseinsnmode>")])
11418 (define_insn "avx2_<shift_insn>v<mode>"
11419 [(set (match_operand:VI48_AVX2 0 "register_operand" "=v")
11420 (any_lshift:VI48_AVX2
11421 (match_operand:VI48_AVX2 1 "register_operand" "v")
11422 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "vm")))]
11424 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11425 [(set_attr "type" "sseishft")
11426 (set_attr "prefix" "vex")
11427 (set_attr "mode" "<sseinsnmode>")])
11429 ;; For avx_vec_concat<mode> insn pattern
11430 (define_mode_attr concat_tg_mode
11431 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
11432 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
11434 (define_insn "avx_vec_concat<mode>"
11435 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
11436 (vec_concat:V_256_512
11437 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
11438 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
11441 switch (which_alternative)
11444 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
11446 switch (get_attr_mode (insn))
11449 return "vmovaps\t{%1, %t0|%t0, %1}";
11451 return "vmovapd\t{%1, %t0|%t0, %1}";
11453 return "vmovaps\t{%1, %x0|%x0, %1}";
11455 return "vmovapd\t{%1, %x0|%x0, %1}";
11457 return "vmovdqa\t{%1, %t0|%t0, %1}";
11459 return "vmovdqa\t{%1, %x0|%x0, %1}";
11461 gcc_unreachable ();
11464 gcc_unreachable ();
11467 [(set_attr "type" "sselog,ssemov")
11468 (set_attr "prefix_extra" "1,*")
11469 (set_attr "length_immediate" "1,*")
11470 (set_attr "prefix" "maybe_evex")
11471 (set_attr "mode" "<sseinsnmode>")])
11473 (define_insn "vcvtph2ps"
11474 [(set (match_operand:V4SF 0 "register_operand" "=x")
11476 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11478 (parallel [(const_int 0) (const_int 1)
11479 (const_int 2) (const_int 3)])))]
11481 "vcvtph2ps\t{%1, %0|%0, %1}"
11482 [(set_attr "type" "ssecvt")
11483 (set_attr "prefix" "vex")
11484 (set_attr "mode" "V4SF")])
11486 (define_insn "*vcvtph2ps_load"
11487 [(set (match_operand:V4SF 0 "register_operand" "=x")
11488 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11489 UNSPEC_VCVTPH2PS))]
11491 "vcvtph2ps\t{%1, %0|%0, %1}"
11492 [(set_attr "type" "ssecvt")
11493 (set_attr "prefix" "vex")
11494 (set_attr "mode" "V8SF")])
11496 (define_insn "vcvtph2ps256"
11497 [(set (match_operand:V8SF 0 "register_operand" "=x")
11498 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11499 UNSPEC_VCVTPH2PS))]
11501 "vcvtph2ps\t{%1, %0|%0, %1}"
11502 [(set_attr "type" "ssecvt")
11503 (set_attr "prefix" "vex")
11504 (set_attr "btver2_decode" "double")
11505 (set_attr "mode" "V8SF")])
11507 (define_expand "vcvtps2ph"
11508 [(set (match_operand:V8HI 0 "register_operand")
11510 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
11511 (match_operand:SI 2 "const_0_to_255_operand")]
11515 "operands[3] = CONST0_RTX (V4HImode);")
11517 (define_insn "*vcvtps2ph"
11518 [(set (match_operand:V8HI 0 "register_operand" "=x")
11520 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11521 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11523 (match_operand:V4HI 3 "const0_operand")))]
11525 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11526 [(set_attr "type" "ssecvt")
11527 (set_attr "prefix" "vex")
11528 (set_attr "mode" "V4SF")])
11530 (define_insn "*vcvtps2ph_store"
11531 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11532 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11533 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11534 UNSPEC_VCVTPS2PH))]
11536 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11537 [(set_attr "type" "ssecvt")
11538 (set_attr "prefix" "vex")
11539 (set_attr "mode" "V4SF")])
11541 (define_insn "vcvtps2ph256"
11542 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
11543 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
11544 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11545 UNSPEC_VCVTPS2PH))]
11547 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11548 [(set_attr "type" "ssecvt")
11549 (set_attr "prefix" "vex")
11550 (set_attr "btver2_decode" "vector")
11551 (set_attr "mode" "V8SF")])
11553 ;; For gather* insn patterns
11554 (define_mode_iterator VEC_GATHER_MODE
11555 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
11556 (define_mode_attr VEC_GATHER_IDXSI
11557 [(V2DI "V4SI") (V2DF "V4SI")
11558 (V4DI "V4SI") (V4DF "V4SI")
11559 (V4SI "V4SI") (V4SF "V4SI")
11560 (V8SI "V8SI") (V8SF "V8SI")])
11561 (define_mode_attr VEC_GATHER_IDXDI
11562 [(V2DI "V2DI") (V2DF "V2DI")
11563 (V4DI "V4DI") (V4DF "V4DI")
11564 (V4SI "V2DI") (V4SF "V2DI")
11565 (V8SI "V4DI") (V8SF "V4DI")])
11566 (define_mode_attr VEC_GATHER_SRCDI
11567 [(V2DI "V2DI") (V2DF "V2DF")
11568 (V4DI "V4DI") (V4DF "V4DF")
11569 (V4SI "V4SI") (V4SF "V4SF")
11570 (V8SI "V4SI") (V8SF "V4SF")])
11572 (define_expand "avx2_gathersi<mode>"
11573 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11574 (unspec:VEC_GATHER_MODE
11575 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
11576 (mem:<ssescalarmode>
11578 [(match_operand 2 "vsib_address_operand")
11579 (match_operand:<VEC_GATHER_IDXSI>
11580 3 "register_operand")
11581 (match_operand:SI 5 "const1248_operand ")]))
11582 (mem:BLK (scratch))
11583 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
11585 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11589 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11590 operands[5]), UNSPEC_VSIBADDR);
11593 (define_insn "*avx2_gathersi<mode>"
11594 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11595 (unspec:VEC_GATHER_MODE
11596 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
11597 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11599 [(match_operand:P 3 "vsib_address_operand" "p")
11600 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
11601 (match_operand:SI 6 "const1248_operand" "n")]
11603 (mem:BLK (scratch))
11604 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
11606 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11608 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
11609 [(set_attr "type" "ssemov")
11610 (set_attr "prefix" "vex")
11611 (set_attr "mode" "<sseinsnmode>")])
11613 (define_insn "*avx2_gathersi<mode>_2"
11614 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11615 (unspec:VEC_GATHER_MODE
11617 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11619 [(match_operand:P 2 "vsib_address_operand" "p")
11620 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
11621 (match_operand:SI 5 "const1248_operand" "n")]
11623 (mem:BLK (scratch))
11624 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
11626 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11628 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
11629 [(set_attr "type" "ssemov")
11630 (set_attr "prefix" "vex")
11631 (set_attr "mode" "<sseinsnmode>")])
11633 (define_expand "avx2_gatherdi<mode>"
11634 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11635 (unspec:VEC_GATHER_MODE
11636 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
11637 (mem:<ssescalarmode>
11639 [(match_operand 2 "vsib_address_operand")
11640 (match_operand:<VEC_GATHER_IDXDI>
11641 3 "register_operand")
11642 (match_operand:SI 5 "const1248_operand ")]))
11643 (mem:BLK (scratch))
11644 (match_operand:<VEC_GATHER_SRCDI>
11645 4 "register_operand")]
11647 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11651 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11652 operands[5]), UNSPEC_VSIBADDR);
11655 (define_insn "*avx2_gatherdi<mode>"
11656 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11657 (unspec:VEC_GATHER_MODE
11658 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11659 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11661 [(match_operand:P 3 "vsib_address_operand" "p")
11662 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11663 (match_operand:SI 6 "const1248_operand" "n")]
11665 (mem:BLK (scratch))
11666 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11668 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11670 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
11671 [(set_attr "type" "ssemov")
11672 (set_attr "prefix" "vex")
11673 (set_attr "mode" "<sseinsnmode>")])
11675 (define_insn "*avx2_gatherdi<mode>_2"
11676 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11677 (unspec:VEC_GATHER_MODE
11679 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11681 [(match_operand:P 2 "vsib_address_operand" "p")
11682 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11683 (match_operand:SI 5 "const1248_operand" "n")]
11685 (mem:BLK (scratch))
11686 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11688 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11691 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
11692 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
11693 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
11695 [(set_attr "type" "ssemov")
11696 (set_attr "prefix" "vex")
11697 (set_attr "mode" "<sseinsnmode>")])
11699 (define_insn "*avx2_gatherdi<mode>_3"
11700 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11701 (vec_select:<VEC_GATHER_SRCDI>
11703 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11704 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11706 [(match_operand:P 3 "vsib_address_operand" "p")
11707 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11708 (match_operand:SI 6 "const1248_operand" "n")]
11710 (mem:BLK (scratch))
11711 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11713 (parallel [(const_int 0) (const_int 1)
11714 (const_int 2) (const_int 3)])))
11715 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11717 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
11718 [(set_attr "type" "ssemov")
11719 (set_attr "prefix" "vex")
11720 (set_attr "mode" "<sseinsnmode>")])
11722 (define_insn "*avx2_gatherdi<mode>_4"
11723 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11724 (vec_select:<VEC_GATHER_SRCDI>
11727 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11729 [(match_operand:P 2 "vsib_address_operand" "p")
11730 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11731 (match_operand:SI 5 "const1248_operand" "n")]
11733 (mem:BLK (scratch))
11734 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11736 (parallel [(const_int 0) (const_int 1)
11737 (const_int 2) (const_int 3)])))
11738 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11740 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
11741 [(set_attr "type" "ssemov")
11742 (set_attr "prefix" "vex")
11743 (set_attr "mode" "<sseinsnmode>")])