1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
63 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI])
67 ;; Mapping from float mode to required SSE level
68 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
70 ;; Mapping from integer vector mode to mnemonic suffix
71 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
73 ;; Mapping of the sse5 suffix
74 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
75 (V4SF "ps") (V2DF "pd")])
76 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
77 (V4SF "ss") (V2DF "sd")])
78 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
80 ;; Mapping of the max integer size for sse5 rotate immediate constraint
81 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
83 ;; Mapping of vector modes back to the scalar modes
84 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
85 (V16QI "QI") (V8HI "HI")
86 (V4SI "SI") (V2DI "DI")])
88 ;; Mapping of vector modes to a vector mode of double size
89 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
90 (V4SF "V8SF") (V4SI "V8SI")])
92 ;; Number of scalar elements in each vector type
93 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
94 (V16QI "16") (V8HI "8")
95 (V4SI "4") (V2DI "2")])
98 (define_mode_attr avxvecmode
99 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
100 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
101 (V8SF "V8SF") (V4DF "V4DF")])
102 (define_mode_attr avxvecpsmode
103 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
104 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
105 (define_mode_attr avxhalfvecmode
106 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
107 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
108 (define_mode_attr avxscalarmode
109 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
110 (V8SF "SF") (V4DF "DF")])
111 (define_mode_attr avxcvtvecmode
112 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
113 (define_mode_attr avxpermvecmode
114 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
115 (define_mode_attr avxmodesuffixf2c
116 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
117 (define_mode_attr avxmodesuffixp
118 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
120 (define_mode_attr avxmodesuffix
121 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
122 (V8SI "256") (V8SF "256") (V4DF "256")])
124 ;; Mapping of immediate bits for blend instructions
125 (define_mode_attr blendbits
126 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
128 ;; Mapping of immediate bits for vpermil instructions
129 (define_mode_attr vpermilbits
130 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
132 ;; Mapping of immediate bits for pinsr instructions
133 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
135 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
143 (define_expand "mov<mode>"
144 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
145 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
148 ix86_expand_vector_move (<MODE>mode, operands);
152 (define_insn "*avx_mov<mode>_internal"
153 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
154 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
156 && (register_operand (operands[0], <MODE>mode)
157 || register_operand (operands[1], <MODE>mode))"
159 switch (which_alternative)
162 return standard_sse_constant_opcode (insn, operands[1]);
165 switch (get_attr_mode (insn))
169 return "vmovaps\t{%1, %0|%0, %1}";
172 return "vmovapd\t{%1, %0|%0, %1}";
174 return "vmovdqa\t{%1, %0|%0, %1}";
180 [(set_attr "type" "sselog1,ssemov,ssemov")
181 (set_attr "prefix" "vex")
182 (set_attr "mode" "<avxvecmode>")])
184 ;; All of these patterns are enabled for SSE1 as well as SSE2.
185 ;; This is essential for maintaining stable calling conventions.
187 (define_expand "mov<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move (<MODE>mode, operands);
196 (define_insn "*mov<mode>_internal"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
198 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
200 && (register_operand (operands[0], <MODE>mode)
201 || register_operand (operands[1], <MODE>mode))"
203 switch (which_alternative)
206 return standard_sse_constant_opcode (insn, operands[1]);
209 switch (get_attr_mode (insn))
212 return "movaps\t{%1, %0|%0, %1}";
214 return "movapd\t{%1, %0|%0, %1}";
216 return "movdqa\t{%1, %0|%0, %1}";
222 [(set_attr "type" "sselog1,ssemov,ssemov")
224 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
225 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
226 (and (eq_attr "alternative" "2")
227 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
229 (const_string "V4SF")
230 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
231 (const_string "V4SF")
232 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
233 (const_string "V2DF")
235 (const_string "TI")))])
237 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
238 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
239 ;; from memory, we'd prefer to load the memory directly into the %xmm
240 ;; register. To facilitate this happy circumstance, this pattern won't
241 ;; split until after register allocation. If the 64-bit value didn't
242 ;; come from memory, this is the best we can do. This is much better
243 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
246 (define_insn_and_split "movdi_to_sse"
248 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
249 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
250 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
251 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
253 "&& reload_completed"
256 if (register_operand (operands[1], DImode))
258 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
259 Assemble the 64-bit DImode value in an xmm register. */
260 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
261 gen_rtx_SUBREG (SImode, operands[1], 0)));
262 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
263 gen_rtx_SUBREG (SImode, operands[1], 4)));
264 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
266 else if (memory_operand (operands[1], DImode))
267 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
273 [(set (match_operand:V4SF 0 "register_operand" "")
274 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
275 "TARGET_SSE && reload_completed"
278 (vec_duplicate:V4SF (match_dup 1))
282 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
283 operands[2] = CONST0_RTX (V4SFmode);
287 [(set (match_operand:V2DF 0 "register_operand" "")
288 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
289 "TARGET_SSE2 && reload_completed"
290 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
292 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
293 operands[2] = CONST0_RTX (DFmode);
296 (define_expand "push<mode>1"
297 [(match_operand:AVX256MODE 0 "register_operand" "")]
300 ix86_expand_push (<MODE>mode, operands[0]);
304 (define_expand "push<mode>1"
305 [(match_operand:SSEMODE 0 "register_operand" "")]
308 ix86_expand_push (<MODE>mode, operands[0]);
312 (define_expand "movmisalign<mode>"
313 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
314 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
317 ix86_expand_vector_move_misalign (<MODE>mode, operands);
321 (define_expand "movmisalign<mode>"
322 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
323 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
326 ix86_expand_vector_move_misalign (<MODE>mode, operands);
330 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
331 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
333 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
335 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
336 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
337 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
338 [(set_attr "type" "ssemov")
339 (set_attr "movu" "1")
340 (set_attr "prefix" "vex")
341 (set_attr "mode" "<MODE>")])
343 (define_insn "sse2_movq128"
344 [(set (match_operand:V2DI 0 "register_operand" "=x")
347 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
348 (parallel [(const_int 0)]))
351 "%vmovq\t{%1, %0|%0, %1}"
352 [(set_attr "type" "ssemov")
353 (set_attr "prefix" "maybe_vex")
354 (set_attr "mode" "TI")])
356 (define_insn "<sse>_movup<ssemodesuffixf2c>"
357 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
359 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
361 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
362 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
363 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
364 [(set_attr "type" "ssemov")
365 (set_attr "movu" "1")
366 (set_attr "mode" "<MODE>")])
368 (define_insn "avx_movdqu<avxmodesuffix>"
369 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
371 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
373 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
374 "vmovdqu\t{%1, %0|%0, %1}"
375 [(set_attr "type" "ssemov")
376 (set_attr "movu" "1")
377 (set_attr "prefix" "vex")
378 (set_attr "mode" "<avxvecmode>")])
380 (define_insn "sse2_movdqu"
381 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
382 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
384 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
385 "movdqu\t{%1, %0|%0, %1}"
386 [(set_attr "type" "ssemov")
387 (set_attr "movu" "1")
388 (set_attr "prefix_data16" "1")
389 (set_attr "mode" "TI")])
391 (define_insn "avx_movnt<mode>"
392 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
394 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
396 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
397 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
398 [(set_attr "type" "ssemov")
399 (set_attr "prefix" "vex")
400 (set_attr "mode" "<MODE>")])
402 (define_insn "<sse>_movnt<mode>"
403 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
405 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
407 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
408 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
409 [(set_attr "type" "ssemov")
410 (set_attr "mode" "<MODE>")])
412 (define_insn "avx_movnt<mode>"
413 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
415 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
418 "vmovntdq\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssecvt")
420 (set_attr "prefix" "vex")
421 (set_attr "mode" "<avxvecmode>")])
423 (define_insn "sse2_movntv2di"
424 [(set (match_operand:V2DI 0 "memory_operand" "=m")
425 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
428 "movntdq\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssemov")
430 (set_attr "prefix_data16" "1")
431 (set_attr "mode" "TI")])
433 (define_insn "sse2_movntsi"
434 [(set (match_operand:SI 0 "memory_operand" "=m")
435 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
438 "movnti\t{%1, %0|%0, %1}"
439 [(set_attr "type" "ssemov")
440 (set_attr "prefix_data16" "0")
441 (set_attr "mode" "V2DF")])
443 (define_insn "avx_lddqu<avxmodesuffix>"
444 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
446 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
449 "vlddqu\t{%1, %0|%0, %1}"
450 [(set_attr "type" "ssecvt")
451 (set_attr "movu" "1")
452 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")])
455 (define_insn "sse3_lddqu"
456 [(set (match_operand:V16QI 0 "register_operand" "=x")
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
460 "lddqu\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssemov")
462 (set_attr "movu" "1")
463 (set_attr "prefix_data16" "0")
464 (set_attr "prefix_rep" "1")
465 (set_attr "mode" "TI")])
467 ; Expand patterns for non-temporal stores. At the moment, only those
468 ; that directly map to insns are defined; it would be possible to
469 ; define patterns for other modes that would expand to several insns.
471 (define_expand "storent<mode>"
472 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
474 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
476 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
479 (define_expand "storent<mode>"
480 [(set (match_operand:MODEF 0 "memory_operand" "")
482 [(match_operand:MODEF 1 "register_operand" "")]
487 (define_expand "storentv2di"
488 [(set (match_operand:V2DI 0 "memory_operand" "")
489 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
494 (define_expand "storentsi"
495 [(set (match_operand:SI 0 "memory_operand" "")
496 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
503 ;; Parallel floating point arithmetic
505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
507 (define_expand "<code><mode>2"
508 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
510 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
512 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
514 (define_expand "<plusminus_insn><mode>3"
515 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
516 (plusminus:AVX256MODEF2P
517 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
518 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
519 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
520 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
522 (define_insn "*avx_<plusminus_insn><mode>3"
523 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
524 (plusminus:AVXMODEF2P
525 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
526 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
527 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
528 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
529 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
530 [(set_attr "type" "sseadd")
531 (set_attr "prefix" "vex")
532 (set_attr "mode" "<avxvecmode>")])
534 (define_expand "<plusminus_insn><mode>3"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
536 (plusminus:SSEMODEF2P
537 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
538 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
539 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
542 (define_insn "*<plusminus_insn><mode>3"
543 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
544 (plusminus:SSEMODEF2P
545 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
546 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
547 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
549 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
550 [(set_attr "type" "sseadd")
551 (set_attr "mode" "<MODE>")])
553 (define_insn "*avx_vm<plusminus_insn><mode>3"
554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
555 (vec_merge:SSEMODEF2P
556 (plusminus:SSEMODEF2P
557 (match_operand:SSEMODEF2P 1 "register_operand" "x")
558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
561 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
562 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
563 [(set_attr "type" "sseadd")
564 (set_attr "prefix" "vex")
565 (set_attr "mode" "<ssescalarmode>")])
567 (define_insn "<sse>_vm<plusminus_insn><mode>3"
568 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
569 (vec_merge:SSEMODEF2P
570 (plusminus:SSEMODEF2P
571 (match_operand:SSEMODEF2P 1 "register_operand" "0")
572 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
575 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
576 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<ssescalarmode>")])
580 (define_expand "mul<mode>3"
581 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
583 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
584 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
585 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
586 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
588 (define_insn "*avx_mul<mode>3"
589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
591 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
592 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
593 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
594 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
595 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
596 [(set_attr "type" "ssemul")
597 (set_attr "prefix" "vex")
598 (set_attr "mode" "<avxvecmode>")])
600 (define_expand "mul<mode>3"
601 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
603 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
604 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
605 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
606 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
608 (define_insn "*mul<mode>3"
609 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
611 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
612 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
613 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
614 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
615 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
616 [(set_attr "type" "ssemul")
617 (set_attr "mode" "<MODE>")])
619 (define_insn "*avx_vmmul<mode>3"
620 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
621 (vec_merge:SSEMODEF2P
623 (match_operand:SSEMODEF2P 1 "register_operand" "x")
624 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
627 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
628 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
629 [(set_attr "type" "ssemul")
630 (set_attr "prefix" "vex")
631 (set_attr "mode" "<ssescalarmode>")])
633 (define_insn "<sse>_vmmul<mode>3"
634 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
635 (vec_merge:SSEMODEF2P
637 (match_operand:SSEMODEF2P 1 "register_operand" "0")
638 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
641 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
642 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<ssescalarmode>")])
646 (define_expand "divv8sf3"
647 [(set (match_operand:V8SF 0 "register_operand" "")
648 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
649 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
652 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
654 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
655 && flag_finite_math_only && !flag_trapping_math
656 && flag_unsafe_math_optimizations)
658 ix86_emit_swdivsf (operands[0], operands[1],
659 operands[2], V8SFmode);
664 (define_expand "divv4df3"
665 [(set (match_operand:V4DF 0 "register_operand" "")
666 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
667 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
669 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
671 (define_insn "avx_div<mode>3"
672 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
674 (match_operand:AVXMODEF2P 1 "register_operand" "x")
675 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
676 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
677 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
678 [(set_attr "type" "ssediv")
679 (set_attr "prefix" "vex")
680 (set_attr "mode" "<MODE>")])
682 (define_expand "divv4sf3"
683 [(set (match_operand:V4SF 0 "register_operand" "")
684 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
685 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
688 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
689 && flag_finite_math_only && !flag_trapping_math
690 && flag_unsafe_math_optimizations)
692 ix86_emit_swdivsf (operands[0], operands[1],
693 operands[2], V4SFmode);
698 (define_expand "divv2df3"
699 [(set (match_operand:V2DF 0 "register_operand" "")
700 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
701 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
705 (define_insn "*avx_div<mode>3"
706 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
708 (match_operand:SSEMODEF2P 1 "register_operand" "x")
709 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
710 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
711 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
712 [(set_attr "type" "ssediv")
713 (set_attr "prefix" "vex")
714 (set_attr "mode" "<MODE>")])
716 (define_insn "<sse>_div<mode>3"
717 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
719 (match_operand:SSEMODEF2P 1 "register_operand" "0")
720 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
721 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
722 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
723 [(set_attr "type" "ssediv")
724 (set_attr "mode" "<MODE>")])
726 (define_insn "*avx_vmdiv<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (vec_merge:SSEMODEF2P
730 (match_operand:SSEMODEF2P 1 "register_operand" "x")
731 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
734 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
735 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
736 [(set_attr "type" "ssediv")
737 (set_attr "prefix" "vex")
738 (set_attr "mode" "<ssescalarmode>")])
740 (define_insn "<sse>_vmdiv<mode>3"
741 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
742 (vec_merge:SSEMODEF2P
744 (match_operand:SSEMODEF2P 1 "register_operand" "0")
745 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<ssescalarmode>")])
753 (define_insn "avx_rcpv8sf2"
754 [(set (match_operand:V8SF 0 "register_operand" "=x")
756 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
758 "vrcpps\t{%1, %0|%0, %1}"
759 [(set_attr "type" "sse")
760 (set_attr "prefix" "vex")
761 (set_attr "mode" "V8SF")])
763 (define_insn "sse_rcpv4sf2"
764 [(set (match_operand:V4SF 0 "register_operand" "=x")
766 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
768 "%vrcpps\t{%1, %0|%0, %1}"
769 [(set_attr "type" "sse")
770 (set_attr "atom_sse_attr" "rcp")
771 (set_attr "prefix" "maybe_vex")
772 (set_attr "mode" "V4SF")])
774 (define_insn "*avx_vmrcpv4sf2"
775 [(set (match_operand:V4SF 0 "register_operand" "=x")
777 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
779 (match_operand:V4SF 2 "register_operand" "x")
782 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
783 [(set_attr "type" "sse")
784 (set_attr "prefix" "vex")
785 (set_attr "mode" "SF")])
787 (define_insn "sse_vmrcpv4sf2"
788 [(set (match_operand:V4SF 0 "register_operand" "=x")
790 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
792 (match_operand:V4SF 2 "register_operand" "0")
795 "rcpss\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "mode" "SF")])
800 (define_expand "sqrtv8sf2"
801 [(set (match_operand:V8SF 0 "register_operand" "")
802 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
805 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
806 && flag_finite_math_only && !flag_trapping_math
807 && flag_unsafe_math_optimizations)
809 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
814 (define_insn "avx_sqrtv8sf2"
815 [(set (match_operand:V8SF 0 "register_operand" "=x")
816 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
818 "vsqrtps\t{%1, %0|%0, %1}"
819 [(set_attr "type" "sse")
820 (set_attr "prefix" "vex")
821 (set_attr "mode" "V8SF")])
823 (define_expand "sqrtv4sf2"
824 [(set (match_operand:V4SF 0 "register_operand" "")
825 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
828 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
829 && flag_finite_math_only && !flag_trapping_math
830 && flag_unsafe_math_optimizations)
832 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
837 (define_insn "sse_sqrtv4sf2"
838 [(set (match_operand:V4SF 0 "register_operand" "=x")
839 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
841 "%vsqrtps\t{%1, %0|%0, %1}"
842 [(set_attr "type" "sse")
843 (set_attr "atom_sse_attr" "sqrt")
844 (set_attr "prefix" "maybe_vex")
845 (set_attr "mode" "V4SF")])
847 (define_insn "sqrtv4df2"
848 [(set (match_operand:V4DF 0 "register_operand" "=x")
849 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
851 "vsqrtpd\t{%1, %0|%0, %1}"
852 [(set_attr "type" "sse")
853 (set_attr "prefix" "vex")
854 (set_attr "mode" "V4DF")])
856 (define_insn "sqrtv2df2"
857 [(set (match_operand:V2DF 0 "register_operand" "=x")
858 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
860 "%vsqrtpd\t{%1, %0|%0, %1}"
861 [(set_attr "type" "sse")
862 (set_attr "prefix" "maybe_vex")
863 (set_attr "mode" "V2DF")])
865 (define_insn "*avx_vmsqrt<mode>2"
866 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
867 (vec_merge:SSEMODEF2P
869 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
870 (match_operand:SSEMODEF2P 2 "register_operand" "x")
872 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
873 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
874 [(set_attr "type" "sse")
875 (set_attr "prefix" "vex")
876 (set_attr "mode" "<ssescalarmode>")])
878 (define_insn "<sse>_vmsqrt<mode>2"
879 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
880 (vec_merge:SSEMODEF2P
882 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
883 (match_operand:SSEMODEF2P 2 "register_operand" "0")
885 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
886 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
887 [(set_attr "type" "sse")
888 (set_attr "atom_sse_attr" "sqrt")
889 (set_attr "mode" "<ssescalarmode>")])
891 (define_expand "rsqrtv8sf2"
892 [(set (match_operand:V8SF 0 "register_operand" "")
894 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
895 "TARGET_AVX && TARGET_SSE_MATH"
897 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
901 (define_insn "avx_rsqrtv8sf2"
902 [(set (match_operand:V8SF 0 "register_operand" "=x")
904 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
906 "vrsqrtps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "prefix" "vex")
909 (set_attr "mode" "V8SF")])
911 (define_expand "rsqrtv4sf2"
912 [(set (match_operand:V4SF 0 "register_operand" "")
914 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
917 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
921 (define_insn "sse_rsqrtv4sf2"
922 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
926 "%vrsqrtps\t{%1, %0|%0, %1}"
927 [(set_attr "type" "sse")
928 (set_attr "prefix" "maybe_vex")
929 (set_attr "mode" "V4SF")])
931 (define_insn "*avx_vmrsqrtv4sf2"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
934 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
936 (match_operand:V4SF 2 "register_operand" "x")
939 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
940 [(set_attr "type" "sse")
941 (set_attr "prefix" "vex")
942 (set_attr "mode" "SF")])
944 (define_insn "sse_vmrsqrtv4sf2"
945 [(set (match_operand:V4SF 0 "register_operand" "=x")
947 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
949 (match_operand:V4SF 2 "register_operand" "0")
952 "rsqrtss\t{%1, %0|%0, %1}"
953 [(set_attr "type" "sse")
954 (set_attr "mode" "SF")])
956 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
957 ;; isn't really correct, as those rtl operators aren't defined when
958 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
960 (define_expand "<code><mode>3"
961 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
962 (smaxmin:AVX256MODEF2P
963 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
964 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
965 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
967 if (!flag_finite_math_only)
968 operands[1] = force_reg (<MODE>mode, operands[1]);
969 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
972 (define_expand "<code><mode>3"
973 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
975 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
976 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
977 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
979 if (!flag_finite_math_only)
980 operands[1] = force_reg (<MODE>mode, operands[1]);
981 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
984 (define_insn "*avx_<code><mode>3_finite"
985 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
987 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
988 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
989 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
990 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
991 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
992 [(set_attr "type" "sseadd")
993 (set_attr "prefix" "vex")
994 (set_attr "mode" "<MODE>")])
996 (define_insn "*<code><mode>3_finite"
997 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
999 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1000 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1001 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1002 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1003 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1004 [(set_attr "type" "sseadd")
1005 (set_attr "mode" "<MODE>")])
1007 (define_insn "*avx_<code><mode>3"
1008 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1010 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1011 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1012 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1013 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1014 [(set_attr "type" "sseadd")
1015 (set_attr "prefix" "vex")
1016 (set_attr "mode" "<avxvecmode>")])
1018 (define_insn "*<code><mode>3"
1019 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1021 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1023 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1024 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1025 [(set_attr "type" "sseadd")
1026 (set_attr "mode" "<MODE>")])
1028 (define_insn "*avx_vm<code><mode>3"
1029 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1030 (vec_merge:SSEMODEF2P
1032 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1033 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1036 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1037 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1038 [(set_attr "type" "sse")
1039 (set_attr "prefix" "vex")
1040 (set_attr "mode" "<ssescalarmode>")])
1042 (define_insn "<sse>_vm<code><mode>3"
1043 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1044 (vec_merge:SSEMODEF2P
1046 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1047 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<ssescalarmode>")])
1055 ;; These versions of the min/max patterns implement exactly the operations
1056 ;; min = (op1 < op2 ? op1 : op2)
1057 ;; max = (!(op1 < op2) ? op1 : op2)
1058 ;; Their operands are not commutative, and thus they may be used in the
1059 ;; presence of -0.0 and NaN.
1061 (define_insn "*avx_ieee_smin<mode>3"
1062 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1064 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1065 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1067 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1068 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1069 [(set_attr "type" "sseadd")
1070 (set_attr "prefix" "vex")
1071 (set_attr "mode" "<avxvecmode>")])
1073 (define_insn "*avx_ieee_smax<mode>3"
1074 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1076 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1077 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1079 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1080 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "type" "sseadd")
1082 (set_attr "prefix" "vex")
1083 (set_attr "mode" "<avxvecmode>")])
1085 (define_insn "*ieee_smin<mode>3"
1086 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1088 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1089 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1091 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1092 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1093 [(set_attr "type" "sseadd")
1094 (set_attr "mode" "<MODE>")])
1096 (define_insn "*ieee_smax<mode>3"
1097 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1099 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1100 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1102 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1103 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1104 [(set_attr "type" "sseadd")
1105 (set_attr "mode" "<MODE>")])
1107 (define_insn "avx_addsubv8sf3"
1108 [(set (match_operand:V8SF 0 "register_operand" "=x")
1111 (match_operand:V8SF 1 "register_operand" "x")
1112 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1113 (minus:V8SF (match_dup 1) (match_dup 2))
1116 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1117 [(set_attr "type" "sseadd")
1118 (set_attr "prefix" "vex")
1119 (set_attr "mode" "V8SF")])
1121 (define_insn "avx_addsubv4df3"
1122 [(set (match_operand:V4DF 0 "register_operand" "=x")
1125 (match_operand:V4DF 1 "register_operand" "x")
1126 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1127 (minus:V4DF (match_dup 1) (match_dup 2))
1130 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "prefix" "vex")
1133 (set_attr "mode" "V4DF")])
1135 (define_insn "*avx_addsubv4sf3"
1136 [(set (match_operand:V4SF 0 "register_operand" "=x")
1139 (match_operand:V4SF 1 "register_operand" "x")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1141 (minus:V4SF (match_dup 1) (match_dup 2))
1144 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1145 [(set_attr "type" "sseadd")
1146 (set_attr "prefix" "vex")
1147 (set_attr "mode" "V4SF")])
1149 (define_insn "sse3_addsubv4sf3"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x")
1153 (match_operand:V4SF 1 "register_operand" "0")
1154 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1155 (minus:V4SF (match_dup 1) (match_dup 2))
1158 "addsubps\t{%2, %0|%0, %2}"
1159 [(set_attr "type" "sseadd")
1160 (set_attr "prefix_rep" "1")
1161 (set_attr "mode" "V4SF")])
1163 (define_insn "*avx_addsubv2df3"
1164 [(set (match_operand:V2DF 0 "register_operand" "=x")
1167 (match_operand:V2DF 1 "register_operand" "x")
1168 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1169 (minus:V2DF (match_dup 1) (match_dup 2))
1172 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1173 [(set_attr "type" "sseadd")
1174 (set_attr "prefix" "vex")
1175 (set_attr "mode" "V2DF")])
1177 (define_insn "sse3_addsubv2df3"
1178 [(set (match_operand:V2DF 0 "register_operand" "=x")
1181 (match_operand:V2DF 1 "register_operand" "0")
1182 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1183 (minus:V2DF (match_dup 1) (match_dup 2))
1186 "addsubpd\t{%2, %0|%0, %2}"
1187 [(set_attr "type" "sseadd")
1188 (set_attr "atom_unit" "complex")
1189 (set_attr "mode" "V2DF")])
1191 (define_insn "avx_h<plusminus_insn>v4df3"
1192 [(set (match_operand:V4DF 0 "register_operand" "=x")
1197 (match_operand:V4DF 1 "register_operand" "x")
1198 (parallel [(const_int 0)]))
1199 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1201 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1202 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1206 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1207 (parallel [(const_int 0)]))
1208 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1210 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1211 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1213 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "prefix" "vex")
1216 (set_attr "mode" "V4DF")])
1218 (define_insn "avx_h<plusminus_insn>v8sf3"
1219 [(set (match_operand:V8SF 0 "register_operand" "=x")
1225 (match_operand:V8SF 1 "register_operand" "x")
1226 (parallel [(const_int 0)]))
1227 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1229 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1230 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1234 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1235 (parallel [(const_int 0)]))
1236 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1238 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1239 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1243 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1244 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1246 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1247 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1250 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1251 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1253 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1254 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1256 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1257 [(set_attr "type" "sseadd")
1258 (set_attr "prefix" "vex")
1259 (set_attr "mode" "V8SF")])
1261 (define_insn "*avx_h<plusminus_insn>v4sf3"
1262 [(set (match_operand:V4SF 0 "register_operand" "=x")
1267 (match_operand:V4SF 1 "register_operand" "x")
1268 (parallel [(const_int 0)]))
1269 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1272 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1276 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1277 (parallel [(const_int 0)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V4SF")])
1288 (define_insn "sse3_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "0")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "atom_unit" "complex")
1313 (set_attr "prefix_rep" "1")
1314 (set_attr "mode" "V4SF")])
1316 (define_insn "*avx_h<plusminus_insn>v2df3"
1317 [(set (match_operand:V2DF 0 "register_operand" "=x")
1321 (match_operand:V2DF 1 "register_operand" "x")
1322 (parallel [(const_int 0)]))
1323 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1326 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1327 (parallel [(const_int 0)]))
1328 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1330 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1331 [(set_attr "type" "sseadd")
1332 (set_attr "prefix" "vex")
1333 (set_attr "mode" "V2DF")])
1335 (define_insn "sse3_h<plusminus_insn>v2df3"
1336 [(set (match_operand:V2DF 0 "register_operand" "=x")
1340 (match_operand:V2DF 1 "register_operand" "0")
1341 (parallel [(const_int 0)]))
1342 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1345 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1346 (parallel [(const_int 0)]))
1347 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1349 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1350 [(set_attr "type" "sseadd")
1351 (set_attr "mode" "V2DF")])
1353 (define_expand "reduc_splus_v4sf"
1354 [(match_operand:V4SF 0 "register_operand" "")
1355 (match_operand:V4SF 1 "register_operand" "")]
1360 rtx tmp = gen_reg_rtx (V4SFmode);
1361 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1362 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1365 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1369 (define_expand "reduc_splus_v2df"
1370 [(match_operand:V2DF 0 "register_operand" "")
1371 (match_operand:V2DF 1 "register_operand" "")]
1374 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1378 (define_expand "reduc_smax_v4sf"
1379 [(match_operand:V4SF 0 "register_operand" "")
1380 (match_operand:V4SF 1 "register_operand" "")]
1383 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1387 (define_expand "reduc_smin_v4sf"
1388 [(match_operand:V4SF 0 "register_operand" "")
1389 (match_operand:V4SF 1 "register_operand" "")]
1392 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1398 ;; Parallel floating point comparisons
1400 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1402 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1403 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1405 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1406 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1407 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1410 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1411 [(set_attr "type" "ssecmp")
1412 (set_attr "length_immediate" "1")
1413 (set_attr "prefix" "vex")
1414 (set_attr "mode" "<MODE>")])
1416 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1417 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1418 (vec_merge:SSEMODEF2P
1420 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1421 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1422 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1427 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1428 [(set_attr "type" "ssecmp")
1429 (set_attr "length_immediate" "1")
1430 (set_attr "prefix" "vex")
1431 (set_attr "mode" "<ssescalarmode>")])
1433 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1434 ;; may generate 256bit vector compare instructions.
1435 (define_insn "*avx_maskcmp<mode>3"
1436 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1437 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1438 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1439 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1440 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1441 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1442 [(set_attr "type" "ssecmp")
1443 (set_attr "prefix" "vex")
1444 (set_attr "length_immediate" "1")
1445 (set_attr "mode" "<avxvecmode>")])
1447 (define_insn "<sse>_maskcmp<mode>3"
1448 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1449 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1450 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1451 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1452 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1454 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "mode" "<MODE>")])
1459 (define_insn "<sse>_vmmaskcmp<mode>3"
1460 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1461 (vec_merge:SSEMODEF2P
1462 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1463 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1464 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1467 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1468 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "length_immediate" "1")
1471 (set_attr "mode" "<ssescalarmode>")])
1473 (define_insn "<sse>_comi"
1474 [(set (reg:CCFP FLAGS_REG)
1477 (match_operand:<ssevecmode> 0 "register_operand" "x")
1478 (parallel [(const_int 0)]))
1480 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1481 (parallel [(const_int 0)]))))]
1482 "SSE_FLOAT_MODE_P (<MODE>mode)"
1483 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "ssecomi")
1485 (set_attr "prefix" "maybe_vex")
1486 (set_attr "prefix_rep" "0")
1487 (set (attr "prefix_data16")
1488 (if_then_else (eq_attr "mode" "DF")
1490 (const_string "0")))
1491 (set_attr "mode" "<MODE>")])
1493 (define_insn "<sse>_ucomi"
1494 [(set (reg:CCFPU FLAGS_REG)
1497 (match_operand:<ssevecmode> 0 "register_operand" "x")
1498 (parallel [(const_int 0)]))
1500 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1501 (parallel [(const_int 0)]))))]
1502 "SSE_FLOAT_MODE_P (<MODE>mode)"
1503 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1504 [(set_attr "type" "ssecomi")
1505 (set_attr "prefix" "maybe_vex")
1506 (set_attr "prefix_rep" "0")
1507 (set (attr "prefix_data16")
1508 (if_then_else (eq_attr "mode" "DF")
1510 (const_string "0")))
1511 (set_attr "mode" "<MODE>")])
1513 (define_expand "vcond<mode>"
1514 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1515 (if_then_else:SSEMODEF2P
1516 (match_operator 3 ""
1517 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1518 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1519 (match_operand:SSEMODEF2P 1 "general_operand" "")
1520 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1521 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1523 if (ix86_expand_fp_vcond (operands))
1529 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1531 ;; Parallel floating point logical operations
1533 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1535 (define_insn "avx_andnot<mode>3"
1536 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1539 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1540 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1541 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1542 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1543 [(set_attr "type" "sselog")
1544 (set_attr "prefix" "vex")
1545 (set_attr "mode" "<avxvecmode>")])
1547 (define_insn "<sse>_andnot<mode>3"
1548 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1551 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1552 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1553 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1554 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sselog")
1556 (set_attr "mode" "<MODE>")])
1558 (define_expand "<code><mode>3"
1559 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1560 (plogic:AVX256MODEF2P
1561 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1562 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1563 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1564 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1566 (define_insn "*avx_<code><mode>3"
1567 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1569 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1570 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1571 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1572 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1573 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1574 [(set_attr "type" "sselog")
1575 (set_attr "prefix" "vex")
1576 (set_attr "mode" "<avxvecmode>")])
1578 (define_expand "<code><mode>3"
1579 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1581 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1582 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1583 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1584 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1586 (define_insn "*<code><mode>3"
1587 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1589 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1590 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1591 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1592 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1593 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1594 [(set_attr "type" "sselog")
1595 (set_attr "mode" "<MODE>")])
1597 (define_expand "copysign<mode>3"
1600 (not:SSEMODEF2P (match_dup 3))
1601 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1603 (and:SSEMODEF2P (match_dup 3)
1604 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1605 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1606 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1607 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1609 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1611 operands[4] = gen_reg_rtx (<MODE>mode);
1612 operands[5] = gen_reg_rtx (<MODE>mode);
1615 ;; Also define scalar versions. These are used for abs, neg, and
1616 ;; conditional move. Using subregs into vector modes causes register
1617 ;; allocation lossage. These patterns do not allow memory operands
1618 ;; because the native instructions read the full 128-bits.
1620 (define_insn "*avx_andnot<mode>3"
1621 [(set (match_operand:MODEF 0 "register_operand" "=x")
1624 (match_operand:MODEF 1 "register_operand" "x"))
1625 (match_operand:MODEF 2 "register_operand" "x")))]
1626 "AVX_FLOAT_MODE_P (<MODE>mode)"
1627 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1628 [(set_attr "type" "sselog")
1629 (set_attr "prefix" "vex")
1630 (set_attr "mode" "<ssevecmode>")])
1632 (define_insn "*andnot<mode>3"
1633 [(set (match_operand:MODEF 0 "register_operand" "=x")
1636 (match_operand:MODEF 1 "register_operand" "0"))
1637 (match_operand:MODEF 2 "register_operand" "x")))]
1638 "SSE_FLOAT_MODE_P (<MODE>mode)"
1639 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1640 [(set_attr "type" "sselog")
1641 (set_attr "mode" "<ssevecmode>")])
1643 (define_insn "*avx_<code><mode>3"
1644 [(set (match_operand:MODEF 0 "register_operand" "=x")
1646 (match_operand:MODEF 1 "register_operand" "x")
1647 (match_operand:MODEF 2 "register_operand" "x")))]
1648 "AVX_FLOAT_MODE_P (<MODE>mode)"
1649 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1650 [(set_attr "type" "sselog")
1651 (set_attr "prefix" "vex")
1652 (set_attr "mode" "<ssevecmode>")])
1654 (define_insn "*<code><mode>3"
1655 [(set (match_operand:MODEF 0 "register_operand" "=x")
1657 (match_operand:MODEF 1 "register_operand" "0")
1658 (match_operand:MODEF 2 "register_operand" "x")))]
1659 "SSE_FLOAT_MODE_P (<MODE>mode)"
1660 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1661 [(set_attr "type" "sselog")
1662 (set_attr "mode" "<ssevecmode>")])
1664 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1666 ;; SSE5 floating point multiply/accumulate instructions This includes the
1667 ;; scalar version of the instructions as well as the vector
1669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1671 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1672 ;; combine to generate a multiply/add with two memory references. We then
1673 ;; split this insn, into loading up the destination register with one of the
1674 ;; memory operations. If we don't manage to split the insn, reload will
1675 ;; generate the appropriate moves. The reason this is needed, is that combine
1676 ;; has already folded one of the memory references into both the multiply and
1677 ;; add insns, and it can't generate a new pseudo. I.e.:
1678 ;; (set (reg1) (mem (addr1)))
1679 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1680 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1682 (define_insn "sse5_fmadd<mode>4"
1683 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1686 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1687 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1688 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1689 "TARGET_SSE5 && TARGET_FUSED_MADD
1690 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1691 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1692 [(set_attr "type" "ssemuladd")
1693 (set_attr "mode" "<MODE>")])
1695 ;; Split fmadd with two memory operands into a load and the fmadd.
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1704 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1705 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1706 && !reg_mentioned_p (operands[0], operands[1])
1707 && !reg_mentioned_p (operands[0], operands[2])
1708 && !reg_mentioned_p (operands[0], operands[3])"
1711 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1712 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1713 operands[2], operands[3]));
1717 ;; For the scalar operations, use operand1 for the upper words that aren't
1718 ;; modified, so restrict the forms that are generated.
1719 ;; Scalar version of fmadd
1720 (define_insn "sse5_vmfmadd<mode>4"
1721 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1722 (vec_merge:SSEMODEF2P
1725 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1726 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1727 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1730 "TARGET_SSE5 && TARGET_FUSED_MADD
1731 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1732 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1733 [(set_attr "type" "ssemuladd")
1734 (set_attr "mode" "<MODE>")])
1736 ;; Floating multiply and subtract
1737 ;; Allow two memory operands the same as fmadd
1738 (define_insn "sse5_fmsub<mode>4"
1739 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1742 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1743 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1744 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1745 "TARGET_SSE5 && TARGET_FUSED_MADD
1746 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1747 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1748 [(set_attr "type" "ssemuladd")
1749 (set_attr "mode" "<MODE>")])
1751 ;; Split fmsub with two memory operands into a load and the fmsub.
1753 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1756 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1757 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1758 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1760 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1761 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1762 && !reg_mentioned_p (operands[0], operands[1])
1763 && !reg_mentioned_p (operands[0], operands[2])
1764 && !reg_mentioned_p (operands[0], operands[3])"
1767 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1768 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1769 operands[2], operands[3]));
1773 ;; For the scalar operations, use operand1 for the upper words that aren't
1774 ;; modified, so restrict the forms that are generated.
1775 ;; Scalar version of fmsub
1776 (define_insn "sse5_vmfmsub<mode>4"
1777 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1778 (vec_merge:SSEMODEF2P
1781 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1782 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1783 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1786 "TARGET_SSE5 && TARGET_FUSED_MADD
1787 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1788 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1789 [(set_attr "type" "ssemuladd")
1790 (set_attr "mode" "<MODE>")])
1792 ;; Floating point negative multiply and add
1793 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1794 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1795 ;; Allow two memory operands to help in optimizing.
1796 (define_insn "sse5_fnmadd<mode>4"
1797 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1799 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1801 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1802 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1803 "TARGET_SSE5 && TARGET_FUSED_MADD
1804 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1805 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1806 [(set_attr "type" "ssemuladd")
1807 (set_attr "mode" "<MODE>")])
1809 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1811 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1813 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1815 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1816 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1818 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1819 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1820 && !reg_mentioned_p (operands[0], operands[1])
1821 && !reg_mentioned_p (operands[0], operands[2])
1822 && !reg_mentioned_p (operands[0], operands[3])"
1825 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1826 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1827 operands[2], operands[3]));
1831 ;; For the scalar operations, use operand1 for the upper words that aren't
1832 ;; modified, so restrict the forms that are generated.
1833 ;; Scalar version of fnmadd
1834 (define_insn "sse5_vmfnmadd<mode>4"
1835 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1836 (vec_merge:SSEMODEF2P
1838 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1840 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1841 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1844 "TARGET_SSE5 && TARGET_FUSED_MADD
1845 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1846 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1847 [(set_attr "type" "ssemuladd")
1848 (set_attr "mode" "<MODE>")])
1850 ;; Floating point negative multiply and subtract
1851 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1852 ;; Allow 2 memory operands to help with optimization
1853 (define_insn "sse5_fnmsub<mode>4"
1854 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1858 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1859 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1860 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1861 "TARGET_SSE5 && TARGET_FUSED_MADD
1862 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1863 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1864 [(set_attr "type" "ssemuladd")
1865 (set_attr "mode" "<MODE>")])
1867 ;; Split fnmsub with two memory operands into a load and the fmsub.
1869 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1873 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1874 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1875 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1877 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1878 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1879 && !reg_mentioned_p (operands[0], operands[1])
1880 && !reg_mentioned_p (operands[0], operands[2])
1881 && !reg_mentioned_p (operands[0], operands[3])"
1884 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1885 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1886 operands[2], operands[3]));
1890 ;; For the scalar operations, use operand1 for the upper words that aren't
1891 ;; modified, so restrict the forms that are generated.
1892 ;; Scalar version of fnmsub
1893 (define_insn "sse5_vmfnmsub<mode>4"
1894 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1895 (vec_merge:SSEMODEF2P
1899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1900 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1901 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1904 "TARGET_SSE5 && TARGET_FUSED_MADD
1905 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1906 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1907 [(set_attr "type" "ssemuladd")
1908 (set_attr "mode" "<MODE>")])
1910 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1911 ;; even if the user used -mno-fused-madd
1912 ;; Parallel instructions. During instruction generation, just default
1913 ;; to registers, and let combine later build the appropriate instruction.
1914 (define_expand "sse5i_fmadd<mode>4"
1915 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1919 (match_operand:SSEMODEF2P 1 "register_operand" "")
1920 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1921 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1922 UNSPEC_SSE5_INTRINSIC))]
1925 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1926 if (TARGET_FUSED_MADD)
1928 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1929 operands[2], operands[3]));
1934 (define_insn "*sse5i_fmadd<mode>4"
1935 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1939 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1940 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1941 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1942 UNSPEC_SSE5_INTRINSIC))]
1943 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1944 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1945 [(set_attr "type" "ssemuladd")
1946 (set_attr "mode" "<MODE>")])
1948 (define_expand "sse5i_fmsub<mode>4"
1949 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1953 (match_operand:SSEMODEF2P 1 "register_operand" "")
1954 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1955 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1956 UNSPEC_SSE5_INTRINSIC))]
1959 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1960 if (TARGET_FUSED_MADD)
1962 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1963 operands[2], operands[3]));
1968 (define_insn "*sse5i_fmsub<mode>4"
1969 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1973 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1974 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1975 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1976 UNSPEC_SSE5_INTRINSIC))]
1977 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1978 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1979 [(set_attr "type" "ssemuladd")
1980 (set_attr "mode" "<MODE>")])
1982 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1983 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1984 (define_expand "sse5i_fnmadd<mode>4"
1985 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1988 (match_operand:SSEMODEF2P 3 "register_operand" "")
1990 (match_operand:SSEMODEF2P 1 "register_operand" "")
1991 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1992 UNSPEC_SSE5_INTRINSIC))]
1995 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1996 if (TARGET_FUSED_MADD)
1998 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1999 operands[2], operands[3]));
2004 (define_insn "*sse5i_fnmadd<mode>4"
2005 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2008 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
2010 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
2011 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
2012 UNSPEC_SSE5_INTRINSIC))]
2013 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2014 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2015 [(set_attr "type" "ssemuladd")
2016 (set_attr "mode" "<MODE>")])
2018 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2019 (define_expand "sse5i_fnmsub<mode>4"
2020 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2025 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2026 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2027 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
2028 UNSPEC_SSE5_INTRINSIC))]
2031 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2032 if (TARGET_FUSED_MADD)
2034 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
2035 operands[2], operands[3]));
2040 (define_insn "*sse5i_fnmsub<mode>4"
2041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2046 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2047 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2048 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2049 UNSPEC_SSE5_INTRINSIC))]
2050 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2051 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2052 [(set_attr "type" "ssemuladd")
2053 (set_attr "mode" "<MODE>")])
2055 ;; Scalar instructions
2056 (define_expand "sse5i_vmfmadd<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2059 [(vec_merge:SSEMODEF2P
2062 (match_operand:SSEMODEF2P 1 "register_operand" "")
2063 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2064 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2067 UNSPEC_SSE5_INTRINSIC))]
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2073 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2079 ;; For the scalar operations, use operand1 for the upper words that aren't
2080 ;; modified, so restrict the forms that are accepted.
2081 (define_insn "*sse5i_vmfmadd<mode>4"
2082 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2084 [(vec_merge:SSEMODEF2P
2087 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2088 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2089 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2092 UNSPEC_SSE5_INTRINSIC))]
2093 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2094 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2095 [(set_attr "type" "ssemuladd")
2096 (set_attr "mode" "<ssescalarmode>")])
2098 (define_expand "sse5i_vmfmsub<mode>4"
2099 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2101 [(vec_merge:SSEMODEF2P
2104 (match_operand:SSEMODEF2P 1 "register_operand" "")
2105 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2106 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2109 UNSPEC_SSE5_INTRINSIC))]
2112 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2113 if (TARGET_FUSED_MADD)
2115 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2116 operands[2], operands[3]));
2121 (define_insn "*sse5i_vmfmsub<mode>4"
2122 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2124 [(vec_merge:SSEMODEF2P
2127 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2129 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2132 UNSPEC_SSE5_INTRINSIC))]
2133 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2134 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "<ssescalarmode>")])
2138 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2139 (define_expand "sse5i_vmfnmadd<mode>4"
2140 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2142 [(vec_merge:SSEMODEF2P
2144 (match_operand:SSEMODEF2P 3 "register_operand" "")
2146 (match_operand:SSEMODEF2P 1 "register_operand" "")
2147 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2150 UNSPEC_SSE5_INTRINSIC))]
2153 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2154 if (TARGET_FUSED_MADD)
2156 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2157 operands[2], operands[3]));
2162 (define_insn "*sse5i_vmfnmadd<mode>4"
2163 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2165 [(vec_merge:SSEMODEF2P
2167 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2169 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2170 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2173 UNSPEC_SSE5_INTRINSIC))]
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2175 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2176 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<ssescalarmode>")])
2179 (define_expand "sse5i_vmfnmsub<mode>4"
2180 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2182 [(vec_merge:SSEMODEF2P
2186 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2187 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2188 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2191 UNSPEC_SSE5_INTRINSIC))]
2194 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2195 if (TARGET_FUSED_MADD)
2197 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2198 operands[2], operands[3]));
2203 (define_insn "*sse5i_vmfnmsub<mode>4"
2204 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2206 [(vec_merge:SSEMODEF2P
2210 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2211 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2212 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2215 UNSPEC_SSE5_INTRINSIC))]
2216 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2217 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2218 [(set_attr "type" "ssemuladd")
2219 (set_attr "mode" "<ssescalarmode>")])
2221 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2223 ;; Parallel single-precision floating point conversion operations
2225 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2227 (define_insn "sse_cvtpi2ps"
2228 [(set (match_operand:V4SF 0 "register_operand" "=x")
2231 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2232 (match_operand:V4SF 1 "register_operand" "0")
2235 "cvtpi2ps\t{%2, %0|%0, %2}"
2236 [(set_attr "type" "ssecvt")
2237 (set_attr "mode" "V4SF")])
2239 (define_insn "sse_cvtps2pi"
2240 [(set (match_operand:V2SI 0 "register_operand" "=y")
2242 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2244 (parallel [(const_int 0) (const_int 1)])))]
2246 "cvtps2pi\t{%1, %0|%0, %1}"
2247 [(set_attr "type" "ssecvt")
2248 (set_attr "unit" "mmx")
2249 (set_attr "mode" "DI")])
2251 (define_insn "sse_cvttps2pi"
2252 [(set (match_operand:V2SI 0 "register_operand" "=y")
2254 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2255 (parallel [(const_int 0) (const_int 1)])))]
2257 "cvttps2pi\t{%1, %0|%0, %1}"
2258 [(set_attr "type" "ssecvt")
2259 (set_attr "unit" "mmx")
2260 (set_attr "prefix_rep" "0")
2261 (set_attr "mode" "SF")])
2263 (define_insn "*avx_cvtsi2ss"
2264 [(set (match_operand:V4SF 0 "register_operand" "=x")
2267 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2268 (match_operand:V4SF 1 "register_operand" "x")
2271 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2272 [(set_attr "type" "sseicvt")
2273 (set_attr "prefix" "vex")
2274 (set_attr "mode" "SF")])
2276 (define_insn "sse_cvtsi2ss"
2277 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2280 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2281 (match_operand:V4SF 1 "register_operand" "0,0")
2284 "cvtsi2ss\t{%2, %0|%0, %2}"
2285 [(set_attr "type" "sseicvt")
2286 (set_attr "athlon_decode" "vector,double")
2287 (set_attr "amdfam10_decode" "vector,double")
2288 (set_attr "mode" "SF")])
2290 (define_insn "*avx_cvtsi2ssq"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x")
2294 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2295 (match_operand:V4SF 1 "register_operand" "x")
2297 "TARGET_AVX && TARGET_64BIT"
2298 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2299 [(set_attr "type" "sseicvt")
2300 (set_attr "length_vex" "4")
2301 (set_attr "prefix" "vex")
2302 (set_attr "mode" "SF")])
2304 (define_insn "sse_cvtsi2ssq"
2305 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2308 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2309 (match_operand:V4SF 1 "register_operand" "0,0")
2311 "TARGET_SSE && TARGET_64BIT"
2312 "cvtsi2ssq\t{%2, %0|%0, %2}"
2313 [(set_attr "type" "sseicvt")
2314 (set_attr "prefix_rex" "1")
2315 (set_attr "athlon_decode" "vector,double")
2316 (set_attr "amdfam10_decode" "vector,double")
2317 (set_attr "mode" "SF")])
2319 (define_insn "sse_cvtss2si"
2320 [(set (match_operand:SI 0 "register_operand" "=r,r")
2323 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2324 (parallel [(const_int 0)]))]
2325 UNSPEC_FIX_NOTRUNC))]
2327 "%vcvtss2si\t{%1, %0|%0, %1}"
2328 [(set_attr "type" "sseicvt")
2329 (set_attr "athlon_decode" "double,vector")
2330 (set_attr "prefix_rep" "1")
2331 (set_attr "prefix" "maybe_vex")
2332 (set_attr "mode" "SI")])
2334 (define_insn "sse_cvtss2si_2"
2335 [(set (match_operand:SI 0 "register_operand" "=r,r")
2336 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2337 UNSPEC_FIX_NOTRUNC))]
2339 "%vcvtss2si\t{%1, %0|%0, %1}"
2340 [(set_attr "type" "sseicvt")
2341 (set_attr "athlon_decode" "double,vector")
2342 (set_attr "amdfam10_decode" "double,double")
2343 (set_attr "prefix_rep" "1")
2344 (set_attr "prefix" "maybe_vex")
2345 (set_attr "mode" "SI")])
2347 (define_insn "sse_cvtss2siq"
2348 [(set (match_operand:DI 0 "register_operand" "=r,r")
2351 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2352 (parallel [(const_int 0)]))]
2353 UNSPEC_FIX_NOTRUNC))]
2354 "TARGET_SSE && TARGET_64BIT"
2355 "%vcvtss2siq\t{%1, %0|%0, %1}"
2356 [(set_attr "type" "sseicvt")
2357 (set_attr "athlon_decode" "double,vector")
2358 (set_attr "prefix_rep" "1")
2359 (set_attr "prefix" "maybe_vex")
2360 (set_attr "mode" "DI")])
2362 (define_insn "sse_cvtss2siq_2"
2363 [(set (match_operand:DI 0 "register_operand" "=r,r")
2364 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2365 UNSPEC_FIX_NOTRUNC))]
2366 "TARGET_SSE && TARGET_64BIT"
2367 "%vcvtss2siq\t{%1, %0|%0, %1}"
2368 [(set_attr "type" "sseicvt")
2369 (set_attr "athlon_decode" "double,vector")
2370 (set_attr "amdfam10_decode" "double,double")
2371 (set_attr "prefix_rep" "1")
2372 (set_attr "prefix" "maybe_vex")
2373 (set_attr "mode" "DI")])
2375 (define_insn "sse_cvttss2si"
2376 [(set (match_operand:SI 0 "register_operand" "=r,r")
2379 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2380 (parallel [(const_int 0)]))))]
2382 "%vcvttss2si\t{%1, %0|%0, %1}"
2383 [(set_attr "type" "sseicvt")
2384 (set_attr "athlon_decode" "double,vector")
2385 (set_attr "amdfam10_decode" "double,double")
2386 (set_attr "prefix_rep" "1")
2387 (set_attr "prefix" "maybe_vex")
2388 (set_attr "mode" "SI")])
2390 (define_insn "sse_cvttss2siq"
2391 [(set (match_operand:DI 0 "register_operand" "=r,r")
2394 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2395 (parallel [(const_int 0)]))))]
2396 "TARGET_SSE && TARGET_64BIT"
2397 "%vcvttss2siq\t{%1, %0|%0, %1}"
2398 [(set_attr "type" "sseicvt")
2399 (set_attr "athlon_decode" "double,vector")
2400 (set_attr "amdfam10_decode" "double,double")
2401 (set_attr "prefix_rep" "1")
2402 (set_attr "prefix" "maybe_vex")
2403 (set_attr "mode" "DI")])
2405 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2406 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2407 (float:AVXMODEDCVTDQ2PS
2408 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2410 "vcvtdq2ps\t{%1, %0|%0, %1}"
2411 [(set_attr "type" "ssecvt")
2412 (set_attr "prefix" "vex")
2413 (set_attr "mode" "<avxvecmode>")])
2415 (define_insn "sse2_cvtdq2ps"
2416 [(set (match_operand:V4SF 0 "register_operand" "=x")
2417 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2419 "cvtdq2ps\t{%1, %0|%0, %1}"
2420 [(set_attr "type" "ssecvt")
2421 (set_attr "mode" "V4SF")])
2423 (define_expand "sse2_cvtudq2ps"
2425 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2427 (lt:V4SF (match_dup 5) (match_dup 3)))
2429 (and:V4SF (match_dup 6) (match_dup 4)))
2430 (set (match_operand:V4SF 0 "register_operand" "")
2431 (plus:V4SF (match_dup 5) (match_dup 7)))]
2434 REAL_VALUE_TYPE TWO32r;
2438 real_ldexp (&TWO32r, &dconst1, 32);
2439 x = const_double_from_real_value (TWO32r, SFmode);
2441 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2442 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2444 for (i = 5; i < 8; i++)
2445 operands[i] = gen_reg_rtx (V4SFmode);
2448 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2449 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2450 (unspec:AVXMODEDCVTPS2DQ
2451 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2452 UNSPEC_FIX_NOTRUNC))]
2454 "vcvtps2dq\t{%1, %0|%0, %1}"
2455 [(set_attr "type" "ssecvt")
2456 (set_attr "prefix" "vex")
2457 (set_attr "mode" "<avxvecmode>")])
2459 (define_insn "sse2_cvtps2dq"
2460 [(set (match_operand:V4SI 0 "register_operand" "=x")
2461 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2462 UNSPEC_FIX_NOTRUNC))]
2464 "cvtps2dq\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "ssecvt")
2466 (set_attr "prefix_data16" "1")
2467 (set_attr "mode" "TI")])
2469 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2470 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2471 (fix:AVXMODEDCVTPS2DQ
2472 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2474 "vcvttps2dq\t{%1, %0|%0, %1}"
2475 [(set_attr "type" "ssecvt")
2476 (set_attr "prefix" "vex")
2477 (set_attr "mode" "<avxvecmode>")])
2479 (define_insn "sse2_cvttps2dq"
2480 [(set (match_operand:V4SI 0 "register_operand" "=x")
2481 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2483 "cvttps2dq\t{%1, %0|%0, %1}"
2484 [(set_attr "type" "ssecvt")
2485 (set_attr "prefix_rep" "1")
2486 (set_attr "prefix_data16" "0")
2487 (set_attr "mode" "TI")])
2489 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2491 ;; Parallel double-precision floating point conversion operations
2493 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2495 (define_insn "sse2_cvtpi2pd"
2496 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2497 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2499 "cvtpi2pd\t{%1, %0|%0, %1}"
2500 [(set_attr "type" "ssecvt")
2501 (set_attr "unit" "mmx,*")
2502 (set_attr "prefix_data16" "1,*")
2503 (set_attr "mode" "V2DF")])
2505 (define_insn "sse2_cvtpd2pi"
2506 [(set (match_operand:V2SI 0 "register_operand" "=y")
2507 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2508 UNSPEC_FIX_NOTRUNC))]
2510 "cvtpd2pi\t{%1, %0|%0, %1}"
2511 [(set_attr "type" "ssecvt")
2512 (set_attr "unit" "mmx")
2513 (set_attr "prefix_data16" "1")
2514 (set_attr "mode" "DI")])
2516 (define_insn "sse2_cvttpd2pi"
2517 [(set (match_operand:V2SI 0 "register_operand" "=y")
2518 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2520 "cvttpd2pi\t{%1, %0|%0, %1}"
2521 [(set_attr "type" "ssecvt")
2522 (set_attr "unit" "mmx")
2523 (set_attr "prefix_data16" "1")
2524 (set_attr "mode" "TI")])
2526 (define_insn "*avx_cvtsi2sd"
2527 [(set (match_operand:V2DF 0 "register_operand" "=x")
2530 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2531 (match_operand:V2DF 1 "register_operand" "x")
2534 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2535 [(set_attr "type" "sseicvt")
2536 (set_attr "prefix" "vex")
2537 (set_attr "mode" "DF")])
2539 (define_insn "sse2_cvtsi2sd"
2540 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2543 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2544 (match_operand:V2DF 1 "register_operand" "0,0")
2547 "cvtsi2sd\t{%2, %0|%0, %2}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "mode" "DF")
2550 (set_attr "athlon_decode" "double,direct")
2551 (set_attr "amdfam10_decode" "vector,double")])
2553 (define_insn "*avx_cvtsi2sdq"
2554 [(set (match_operand:V2DF 0 "register_operand" "=x")
2557 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2558 (match_operand:V2DF 1 "register_operand" "x")
2560 "TARGET_AVX && TARGET_64BIT"
2561 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2562 [(set_attr "type" "sseicvt")
2563 (set_attr "length_vex" "4")
2564 (set_attr "prefix" "vex")
2565 (set_attr "mode" "DF")])
2567 (define_insn "sse2_cvtsi2sdq"
2568 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2571 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2572 (match_operand:V2DF 1 "register_operand" "0,0")
2574 "TARGET_SSE2 && TARGET_64BIT"
2575 "cvtsi2sdq\t{%2, %0|%0, %2}"
2576 [(set_attr "type" "sseicvt")
2577 (set_attr "prefix_rex" "1")
2578 (set_attr "mode" "DF")
2579 (set_attr "athlon_decode" "double,direct")
2580 (set_attr "amdfam10_decode" "vector,double")])
2582 (define_insn "sse2_cvtsd2si"
2583 [(set (match_operand:SI 0 "register_operand" "=r,r")
2586 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2587 (parallel [(const_int 0)]))]
2588 UNSPEC_FIX_NOTRUNC))]
2590 "%vcvtsd2si\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "sseicvt")
2592 (set_attr "athlon_decode" "double,vector")
2593 (set_attr "prefix_rep" "1")
2594 (set_attr "prefix" "maybe_vex")
2595 (set_attr "mode" "SI")])
2597 (define_insn "sse2_cvtsd2si_2"
2598 [(set (match_operand:SI 0 "register_operand" "=r,r")
2599 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2600 UNSPEC_FIX_NOTRUNC))]
2602 "%vcvtsd2si\t{%1, %0|%0, %1}"
2603 [(set_attr "type" "sseicvt")
2604 (set_attr "athlon_decode" "double,vector")
2605 (set_attr "amdfam10_decode" "double,double")
2606 (set_attr "prefix_rep" "1")
2607 (set_attr "prefix" "maybe_vex")
2608 (set_attr "mode" "SI")])
2610 (define_insn "sse2_cvtsd2siq"
2611 [(set (match_operand:DI 0 "register_operand" "=r,r")
2614 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2615 (parallel [(const_int 0)]))]
2616 UNSPEC_FIX_NOTRUNC))]
2617 "TARGET_SSE2 && TARGET_64BIT"
2618 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2619 [(set_attr "type" "sseicvt")
2620 (set_attr "athlon_decode" "double,vector")
2621 (set_attr "prefix_rep" "1")
2622 (set_attr "prefix" "maybe_vex")
2623 (set_attr "mode" "DI")])
2625 (define_insn "sse2_cvtsd2siq_2"
2626 [(set (match_operand:DI 0 "register_operand" "=r,r")
2627 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2628 UNSPEC_FIX_NOTRUNC))]
2629 "TARGET_SSE2 && TARGET_64BIT"
2630 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "sseicvt")
2632 (set_attr "athlon_decode" "double,vector")
2633 (set_attr "amdfam10_decode" "double,double")
2634 (set_attr "prefix_rep" "1")
2635 (set_attr "prefix" "maybe_vex")
2636 (set_attr "mode" "DI")])
2638 (define_insn "sse2_cvttsd2si"
2639 [(set (match_operand:SI 0 "register_operand" "=r,r")
2642 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2643 (parallel [(const_int 0)]))))]
2645 "%vcvttsd2si\t{%1, %0|%0, %1}"
2646 [(set_attr "type" "sseicvt")
2647 (set_attr "prefix_rep" "1")
2648 (set_attr "prefix" "maybe_vex")
2649 (set_attr "mode" "SI")
2650 (set_attr "athlon_decode" "double,vector")
2651 (set_attr "amdfam10_decode" "double,double")])
2653 (define_insn "sse2_cvttsd2siq"
2654 [(set (match_operand:DI 0 "register_operand" "=r,r")
2657 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2658 (parallel [(const_int 0)]))))]
2659 "TARGET_SSE2 && TARGET_64BIT"
2660 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "sseicvt")
2662 (set_attr "prefix_rep" "1")
2663 (set_attr "prefix" "maybe_vex")
2664 (set_attr "mode" "DI")
2665 (set_attr "athlon_decode" "double,vector")
2666 (set_attr "amdfam10_decode" "double,double")])
2668 (define_insn "avx_cvtdq2pd256"
2669 [(set (match_operand:V4DF 0 "register_operand" "=x")
2670 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2672 "vcvtdq2pd\t{%1, %0|%0, %1}"
2673 [(set_attr "type" "ssecvt")
2674 (set_attr "prefix" "vex")
2675 (set_attr "mode" "V4DF")])
2677 (define_insn "sse2_cvtdq2pd"
2678 [(set (match_operand:V2DF 0 "register_operand" "=x")
2681 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2682 (parallel [(const_int 0) (const_int 1)]))))]
2684 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "maybe_vex")
2687 (set_attr "mode" "V2DF")])
2689 (define_insn "avx_cvtpd2dq256"
2690 [(set (match_operand:V4SI 0 "register_operand" "=x")
2691 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2692 UNSPEC_FIX_NOTRUNC))]
2694 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2695 [(set_attr "type" "ssecvt")
2696 (set_attr "prefix" "vex")
2697 (set_attr "mode" "OI")])
2699 (define_expand "sse2_cvtpd2dq"
2700 [(set (match_operand:V4SI 0 "register_operand" "")
2702 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2706 "operands[2] = CONST0_RTX (V2SImode);")
2708 (define_insn "*sse2_cvtpd2dq"
2709 [(set (match_operand:V4SI 0 "register_operand" "=x")
2711 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2713 (match_operand:V2SI 2 "const0_operand" "")))]
2715 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2716 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2717 [(set_attr "type" "ssecvt")
2718 (set_attr "prefix_rep" "1")
2719 (set_attr "prefix_data16" "0")
2720 (set_attr "prefix" "maybe_vex")
2721 (set_attr "mode" "TI")
2722 (set_attr "amdfam10_decode" "double")])
2724 (define_insn "avx_cvttpd2dq256"
2725 [(set (match_operand:V4SI 0 "register_operand" "=x")
2726 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2728 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "prefix" "vex")
2731 (set_attr "mode" "OI")])
2733 (define_expand "sse2_cvttpd2dq"
2734 [(set (match_operand:V4SI 0 "register_operand" "")
2736 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2739 "operands[2] = CONST0_RTX (V2SImode);")
2741 (define_insn "*sse2_cvttpd2dq"
2742 [(set (match_operand:V4SI 0 "register_operand" "=x")
2744 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2745 (match_operand:V2SI 2 "const0_operand" "")))]
2747 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2748 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2749 [(set_attr "type" "ssecvt")
2750 (set_attr "prefix" "maybe_vex")
2751 (set_attr "mode" "TI")
2752 (set_attr "amdfam10_decode" "double")])
2754 (define_insn "*avx_cvtsd2ss"
2755 [(set (match_operand:V4SF 0 "register_operand" "=x")
2758 (float_truncate:V2SF
2759 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2760 (match_operand:V4SF 1 "register_operand" "x")
2763 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2764 [(set_attr "type" "ssecvt")
2765 (set_attr "prefix" "vex")
2766 (set_attr "mode" "SF")])
2768 (define_insn "sse2_cvtsd2ss"
2769 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2772 (float_truncate:V2SF
2773 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2774 (match_operand:V4SF 1 "register_operand" "0,0")
2777 "cvtsd2ss\t{%2, %0|%0, %2}"
2778 [(set_attr "type" "ssecvt")
2779 (set_attr "athlon_decode" "vector,double")
2780 (set_attr "amdfam10_decode" "vector,double")
2781 (set_attr "mode" "SF")])
2783 (define_insn "*avx_cvtss2sd"
2784 [(set (match_operand:V2DF 0 "register_operand" "=x")
2788 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2789 (parallel [(const_int 0) (const_int 1)])))
2790 (match_operand:V2DF 1 "register_operand" "x")
2793 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2794 [(set_attr "type" "ssecvt")
2795 (set_attr "prefix" "vex")
2796 (set_attr "mode" "DF")])
2798 (define_insn "sse2_cvtss2sd"
2799 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2803 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2804 (parallel [(const_int 0) (const_int 1)])))
2805 (match_operand:V2DF 1 "register_operand" "0,0")
2808 "cvtss2sd\t{%2, %0|%0, %2}"
2809 [(set_attr "type" "ssecvt")
2810 (set_attr "amdfam10_decode" "vector,double")
2811 (set_attr "mode" "DF")])
2813 (define_insn "avx_cvtpd2ps256"
2814 [(set (match_operand:V4SF 0 "register_operand" "=x")
2815 (float_truncate:V4SF
2816 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2818 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2819 [(set_attr "type" "ssecvt")
2820 (set_attr "prefix" "vex")
2821 (set_attr "mode" "V4SF")])
2823 (define_expand "sse2_cvtpd2ps"
2824 [(set (match_operand:V4SF 0 "register_operand" "")
2826 (float_truncate:V2SF
2827 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2830 "operands[2] = CONST0_RTX (V2SFmode);")
2832 (define_insn "*sse2_cvtpd2ps"
2833 [(set (match_operand:V4SF 0 "register_operand" "=x")
2835 (float_truncate:V2SF
2836 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2837 (match_operand:V2SF 2 "const0_operand" "")))]
2839 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2840 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2841 [(set_attr "type" "ssecvt")
2842 (set_attr "prefix_data16" "1")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "mode" "V4SF")
2845 (set_attr "amdfam10_decode" "double")])
2847 (define_insn "avx_cvtps2pd256"
2848 [(set (match_operand:V4DF 0 "register_operand" "=x")
2850 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2852 "vcvtps2pd\t{%1, %0|%0, %1}"
2853 [(set_attr "type" "ssecvt")
2854 (set_attr "prefix" "vex")
2855 (set_attr "mode" "V4DF")])
2857 (define_insn "sse2_cvtps2pd"
2858 [(set (match_operand:V2DF 0 "register_operand" "=x")
2861 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2862 (parallel [(const_int 0) (const_int 1)]))))]
2864 "%vcvtps2pd\t{%1, %0|%0, %1}"
2865 [(set_attr "type" "ssecvt")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "V2DF")
2868 (set_attr "prefix_data16" "0")
2869 (set_attr "amdfam10_decode" "direct")])
2871 (define_expand "vec_unpacks_hi_v4sf"
2876 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2877 (parallel [(const_int 6)
2881 (set (match_operand:V2DF 0 "register_operand" "")
2885 (parallel [(const_int 0) (const_int 1)]))))]
2888 operands[2] = gen_reg_rtx (V4SFmode);
2891 (define_expand "vec_unpacks_lo_v4sf"
2892 [(set (match_operand:V2DF 0 "register_operand" "")
2895 (match_operand:V4SF 1 "nonimmediate_operand" "")
2896 (parallel [(const_int 0) (const_int 1)]))))]
2899 (define_expand "vec_unpacks_float_hi_v8hi"
2900 [(match_operand:V4SF 0 "register_operand" "")
2901 (match_operand:V8HI 1 "register_operand" "")]
2904 rtx tmp = gen_reg_rtx (V4SImode);
2906 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2907 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2911 (define_expand "vec_unpacks_float_lo_v8hi"
2912 [(match_operand:V4SF 0 "register_operand" "")
2913 (match_operand:V8HI 1 "register_operand" "")]
2916 rtx tmp = gen_reg_rtx (V4SImode);
2918 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2919 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2923 (define_expand "vec_unpacku_float_hi_v8hi"
2924 [(match_operand:V4SF 0 "register_operand" "")
2925 (match_operand:V8HI 1 "register_operand" "")]
2928 rtx tmp = gen_reg_rtx (V4SImode);
2930 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2931 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2935 (define_expand "vec_unpacku_float_lo_v8hi"
2936 [(match_operand:V4SF 0 "register_operand" "")
2937 (match_operand:V8HI 1 "register_operand" "")]
2940 rtx tmp = gen_reg_rtx (V4SImode);
2942 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2943 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2947 (define_expand "vec_unpacks_float_hi_v4si"
2950 (match_operand:V4SI 1 "nonimmediate_operand" "")
2951 (parallel [(const_int 2)
2955 (set (match_operand:V2DF 0 "register_operand" "")
2959 (parallel [(const_int 0) (const_int 1)]))))]
2961 "operands[2] = gen_reg_rtx (V4SImode);")
2963 (define_expand "vec_unpacks_float_lo_v4si"
2964 [(set (match_operand:V2DF 0 "register_operand" "")
2967 (match_operand:V4SI 1 "nonimmediate_operand" "")
2968 (parallel [(const_int 0) (const_int 1)]))))]
2971 (define_expand "vec_unpacku_float_hi_v4si"
2974 (match_operand:V4SI 1 "nonimmediate_operand" "")
2975 (parallel [(const_int 2)
2983 (parallel [(const_int 0) (const_int 1)]))))
2985 (lt:V2DF (match_dup 6) (match_dup 3)))
2987 (and:V2DF (match_dup 7) (match_dup 4)))
2988 (set (match_operand:V2DF 0 "register_operand" "")
2989 (plus:V2DF (match_dup 6) (match_dup 8)))]
2992 REAL_VALUE_TYPE TWO32r;
2996 real_ldexp (&TWO32r, &dconst1, 32);
2997 x = const_double_from_real_value (TWO32r, DFmode);
2999 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3000 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3002 operands[5] = gen_reg_rtx (V4SImode);
3004 for (i = 6; i < 9; i++)
3005 operands[i] = gen_reg_rtx (V2DFmode);
3008 (define_expand "vec_unpacku_float_lo_v4si"
3012 (match_operand:V4SI 1 "nonimmediate_operand" "")
3013 (parallel [(const_int 0) (const_int 1)]))))
3015 (lt:V2DF (match_dup 5) (match_dup 3)))
3017 (and:V2DF (match_dup 6) (match_dup 4)))
3018 (set (match_operand:V2DF 0 "register_operand" "")
3019 (plus:V2DF (match_dup 5) (match_dup 7)))]
3022 REAL_VALUE_TYPE TWO32r;
3026 real_ldexp (&TWO32r, &dconst1, 32);
3027 x = const_double_from_real_value (TWO32r, DFmode);
3029 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3030 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3032 for (i = 5; i < 8; i++)
3033 operands[i] = gen_reg_rtx (V2DFmode);
3036 (define_expand "vec_pack_trunc_v2df"
3037 [(match_operand:V4SF 0 "register_operand" "")
3038 (match_operand:V2DF 1 "nonimmediate_operand" "")
3039 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3044 r1 = gen_reg_rtx (V4SFmode);
3045 r2 = gen_reg_rtx (V4SFmode);
3047 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3048 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3049 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3053 (define_expand "vec_pack_sfix_trunc_v2df"
3054 [(match_operand:V4SI 0 "register_operand" "")
3055 (match_operand:V2DF 1 "nonimmediate_operand" "")
3056 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3061 r1 = gen_reg_rtx (V4SImode);
3062 r2 = gen_reg_rtx (V4SImode);
3064 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3065 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3066 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3067 gen_lowpart (V2DImode, r1),
3068 gen_lowpart (V2DImode, r2)));
3072 (define_expand "vec_pack_sfix_v2df"
3073 [(match_operand:V4SI 0 "register_operand" "")
3074 (match_operand:V2DF 1 "nonimmediate_operand" "")
3075 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3080 r1 = gen_reg_rtx (V4SImode);
3081 r2 = gen_reg_rtx (V4SImode);
3083 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3084 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3085 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3086 gen_lowpart (V2DImode, r1),
3087 gen_lowpart (V2DImode, r2)));
3091 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3093 ;; Parallel single-precision floating point element swizzling
3095 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3097 (define_expand "sse_movhlps_exp"
3098 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3101 (match_operand:V4SF 1 "nonimmediate_operand" "")
3102 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3103 (parallel [(const_int 6)
3108 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3110 (define_insn "*avx_movhlps"
3111 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3114 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3115 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3116 (parallel [(const_int 6)
3120 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3122 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3123 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3124 vmovhps\t{%2, %0|%0, %2}"
3125 [(set_attr "type" "ssemov")
3126 (set_attr "prefix" "vex")
3127 (set_attr "mode" "V4SF,V2SF,V2SF")])
3129 (define_insn "sse_movhlps"
3130 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3133 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3134 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3135 (parallel [(const_int 6)
3139 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3141 movhlps\t{%2, %0|%0, %2}
3142 movlps\t{%H2, %0|%0, %H2}
3143 movhps\t{%2, %0|%0, %2}"
3144 [(set_attr "type" "ssemov")
3145 (set_attr "mode" "V4SF,V2SF,V2SF")])
3147 (define_expand "sse_movlhps_exp"
3148 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3151 (match_operand:V4SF 1 "nonimmediate_operand" "")
3152 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3153 (parallel [(const_int 0)
3158 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3160 (define_insn "*avx_movlhps"
3161 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3164 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3165 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3166 (parallel [(const_int 0)
3170 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3172 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3173 vmovhps\t{%2, %1, %0|%0, %1, %2}
3174 vmovlps\t{%2, %H0|%H0, %2}"
3175 [(set_attr "type" "ssemov")
3176 (set_attr "prefix" "vex")
3177 (set_attr "mode" "V4SF,V2SF,V2SF")])
3179 (define_insn "sse_movlhps"
3180 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3183 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3184 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3185 (parallel [(const_int 0)
3189 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3191 movlhps\t{%2, %0|%0, %2}
3192 movhps\t{%2, %0|%0, %2}
3193 movlps\t{%2, %H0|%H0, %2}"
3194 [(set_attr "type" "ssemov")
3195 (set_attr "mode" "V4SF,V2SF,V2SF")])
3197 (define_insn "avx_unpckhps256"
3198 [(set (match_operand:V8SF 0 "register_operand" "=x")
3201 (match_operand:V8SF 1 "register_operand" "x")
3202 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3203 (parallel [(const_int 2) (const_int 10)
3204 (const_int 3) (const_int 11)
3205 (const_int 6) (const_int 14)
3206 (const_int 7) (const_int 15)])))]
3208 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3209 [(set_attr "type" "sselog")
3210 (set_attr "prefix" "vex")
3211 (set_attr "mode" "V8SF")])
3213 (define_insn "*avx_unpckhps"
3214 [(set (match_operand:V4SF 0 "register_operand" "=x")
3217 (match_operand:V4SF 1 "register_operand" "x")
3218 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3219 (parallel [(const_int 2) (const_int 6)
3220 (const_int 3) (const_int 7)])))]
3222 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3223 [(set_attr "type" "sselog")
3224 (set_attr "prefix" "vex")
3225 (set_attr "mode" "V4SF")])
3227 (define_insn "sse_unpckhps"
3228 [(set (match_operand:V4SF 0 "register_operand" "=x")
3231 (match_operand:V4SF 1 "register_operand" "0")
3232 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3233 (parallel [(const_int 2) (const_int 6)
3234 (const_int 3) (const_int 7)])))]
3236 "unpckhps\t{%2, %0|%0, %2}"
3237 [(set_attr "type" "sselog")
3238 (set_attr "mode" "V4SF")])
3240 (define_insn "avx_unpcklps256"
3241 [(set (match_operand:V8SF 0 "register_operand" "=x")
3244 (match_operand:V8SF 1 "register_operand" "x")
3245 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3246 (parallel [(const_int 0) (const_int 8)
3247 (const_int 1) (const_int 9)
3248 (const_int 4) (const_int 12)
3249 (const_int 5) (const_int 13)])))]
3251 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3252 [(set_attr "type" "sselog")
3253 (set_attr "prefix" "vex")
3254 (set_attr "mode" "V8SF")])
3256 (define_insn "*avx_unpcklps"
3257 [(set (match_operand:V4SF 0 "register_operand" "=x")
3260 (match_operand:V4SF 1 "register_operand" "x")
3261 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3262 (parallel [(const_int 0) (const_int 4)
3263 (const_int 1) (const_int 5)])))]
3265 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3266 [(set_attr "type" "sselog")
3267 (set_attr "prefix" "vex")
3268 (set_attr "mode" "V4SF")])
3270 (define_insn "sse_unpcklps"
3271 [(set (match_operand:V4SF 0 "register_operand" "=x")
3274 (match_operand:V4SF 1 "register_operand" "0")
3275 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3276 (parallel [(const_int 0) (const_int 4)
3277 (const_int 1) (const_int 5)])))]
3279 "unpcklps\t{%2, %0|%0, %2}"
3280 [(set_attr "type" "sselog")
3281 (set_attr "mode" "V4SF")])
3283 ;; These are modeled with the same vec_concat as the others so that we
3284 ;; capture users of shufps that can use the new instructions
3285 (define_insn "avx_movshdup256"
3286 [(set (match_operand:V8SF 0 "register_operand" "=x")
3289 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3291 (parallel [(const_int 1) (const_int 1)
3292 (const_int 3) (const_int 3)
3293 (const_int 5) (const_int 5)
3294 (const_int 7) (const_int 7)])))]
3296 "vmovshdup\t{%1, %0|%0, %1}"
3297 [(set_attr "type" "sse")
3298 (set_attr "prefix" "vex")
3299 (set_attr "mode" "V8SF")])
3301 (define_insn "sse3_movshdup"
3302 [(set (match_operand:V4SF 0 "register_operand" "=x")
3305 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3307 (parallel [(const_int 1)
3312 "%vmovshdup\t{%1, %0|%0, %1}"
3313 [(set_attr "type" "sse")
3314 (set_attr "prefix_rep" "1")
3315 (set_attr "prefix" "maybe_vex")
3316 (set_attr "mode" "V4SF")])
3318 (define_insn "avx_movsldup256"
3319 [(set (match_operand:V8SF 0 "register_operand" "=x")
3322 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3324 (parallel [(const_int 0) (const_int 0)
3325 (const_int 2) (const_int 2)
3326 (const_int 4) (const_int 4)
3327 (const_int 6) (const_int 6)])))]
3329 "vmovsldup\t{%1, %0|%0, %1}"
3330 [(set_attr "type" "sse")
3331 (set_attr "prefix" "vex")
3332 (set_attr "mode" "V8SF")])
3334 (define_insn "sse3_movsldup"
3335 [(set (match_operand:V4SF 0 "register_operand" "=x")
3338 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3340 (parallel [(const_int 0)
3345 "%vmovsldup\t{%1, %0|%0, %1}"
3346 [(set_attr "type" "sse")
3347 (set_attr "prefix_rep" "1")
3348 (set_attr "prefix" "maybe_vex")
3349 (set_attr "mode" "V4SF")])
3351 (define_expand "avx_shufps256"
3352 [(match_operand:V8SF 0 "register_operand" "")
3353 (match_operand:V8SF 1 "register_operand" "")
3354 (match_operand:V8SF 2 "nonimmediate_operand" "")
3355 (match_operand:SI 3 "const_int_operand" "")]
3358 int mask = INTVAL (operands[3]);
3359 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3360 GEN_INT ((mask >> 0) & 3),
3361 GEN_INT ((mask >> 2) & 3),
3362 GEN_INT (((mask >> 4) & 3) + 8),
3363 GEN_INT (((mask >> 6) & 3) + 8),
3364 GEN_INT (((mask >> 0) & 3) + 4),
3365 GEN_INT (((mask >> 2) & 3) + 4),
3366 GEN_INT (((mask >> 4) & 3) + 12),
3367 GEN_INT (((mask >> 6) & 3) + 12)));
3371 ;; One bit in mask selects 2 elements.
3372 (define_insn "avx_shufps256_1"
3373 [(set (match_operand:V8SF 0 "register_operand" "=x")
3376 (match_operand:V8SF 1 "register_operand" "x")
3377 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3378 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3379 (match_operand 4 "const_0_to_3_operand" "")
3380 (match_operand 5 "const_8_to_11_operand" "")
3381 (match_operand 6 "const_8_to_11_operand" "")
3382 (match_operand 7 "const_4_to_7_operand" "")
3383 (match_operand 8 "const_4_to_7_operand" "")
3384 (match_operand 9 "const_12_to_15_operand" "")
3385 (match_operand 10 "const_12_to_15_operand" "")])))]
3387 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3388 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3389 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3390 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3393 mask = INTVAL (operands[3]);
3394 mask |= INTVAL (operands[4]) << 2;
3395 mask |= (INTVAL (operands[5]) - 8) << 4;
3396 mask |= (INTVAL (operands[6]) - 8) << 6;
3397 operands[3] = GEN_INT (mask);
3399 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3401 [(set_attr "type" "sselog")
3402 (set_attr "length_immediate" "1")
3403 (set_attr "prefix" "vex")
3404 (set_attr "mode" "V8SF")])
3406 (define_expand "sse_shufps"
3407 [(match_operand:V4SF 0 "register_operand" "")
3408 (match_operand:V4SF 1 "register_operand" "")
3409 (match_operand:V4SF 2 "nonimmediate_operand" "")
3410 (match_operand:SI 3 "const_int_operand" "")]
3413 int mask = INTVAL (operands[3]);
3414 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3415 GEN_INT ((mask >> 0) & 3),
3416 GEN_INT ((mask >> 2) & 3),
3417 GEN_INT (((mask >> 4) & 3) + 4),
3418 GEN_INT (((mask >> 6) & 3) + 4)));
3422 (define_insn "*avx_shufps_<mode>"
3423 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3424 (vec_select:SSEMODE4S
3425 (vec_concat:<ssedoublesizemode>
3426 (match_operand:SSEMODE4S 1 "register_operand" "x")
3427 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3428 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3429 (match_operand 4 "const_0_to_3_operand" "")
3430 (match_operand 5 "const_4_to_7_operand" "")
3431 (match_operand 6 "const_4_to_7_operand" "")])))]
3435 mask |= INTVAL (operands[3]) << 0;
3436 mask |= INTVAL (operands[4]) << 2;
3437 mask |= (INTVAL (operands[5]) - 4) << 4;
3438 mask |= (INTVAL (operands[6]) - 4) << 6;
3439 operands[3] = GEN_INT (mask);
3441 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3443 [(set_attr "type" "sselog")
3444 (set_attr "length_immediate" "1")
3445 (set_attr "prefix" "vex")
3446 (set_attr "mode" "V4SF")])
3448 (define_insn "sse_shufps_<mode>"
3449 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3450 (vec_select:SSEMODE4S
3451 (vec_concat:<ssedoublesizemode>
3452 (match_operand:SSEMODE4S 1 "register_operand" "0")
3453 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3454 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3455 (match_operand 4 "const_0_to_3_operand" "")
3456 (match_operand 5 "const_4_to_7_operand" "")
3457 (match_operand 6 "const_4_to_7_operand" "")])))]
3461 mask |= INTVAL (operands[3]) << 0;
3462 mask |= INTVAL (operands[4]) << 2;
3463 mask |= (INTVAL (operands[5]) - 4) << 4;
3464 mask |= (INTVAL (operands[6]) - 4) << 6;
3465 operands[3] = GEN_INT (mask);
3467 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3469 [(set_attr "type" "sselog")
3470 (set_attr "length_immediate" "1")
3471 (set_attr "mode" "V4SF")])
3473 (define_insn "sse_storehps"
3474 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3476 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3477 (parallel [(const_int 2) (const_int 3)])))]
3480 %vmovhps\t{%1, %0|%0, %1}
3481 %vmovhlps\t{%1, %d0|%d0, %1}
3482 %vmovlps\t{%H1, %d0|%d0, %H1}"
3483 [(set_attr "type" "ssemov")
3484 (set_attr "prefix" "maybe_vex")
3485 (set_attr "mode" "V2SF,V4SF,V2SF")])
3487 (define_expand "sse_loadhps_exp"
3488 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3491 (match_operand:V4SF 1 "nonimmediate_operand" "")
3492 (parallel [(const_int 0) (const_int 1)]))
3493 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3495 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3497 (define_insn "*avx_loadhps"
3498 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3501 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3502 (parallel [(const_int 0) (const_int 1)]))
3503 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3506 vmovhps\t{%2, %1, %0|%0, %1, %2}
3507 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3508 vmovlps\t{%2, %H0|%H0, %2}"
3509 [(set_attr "type" "ssemov")
3510 (set_attr "prefix" "vex")
3511 (set_attr "mode" "V2SF,V4SF,V2SF")])
3513 (define_insn "sse_loadhps"
3514 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3517 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3518 (parallel [(const_int 0) (const_int 1)]))
3519 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3522 movhps\t{%2, %0|%0, %2}
3523 movlhps\t{%2, %0|%0, %2}
3524 movlps\t{%2, %H0|%H0, %2}"
3525 [(set_attr "type" "ssemov")
3526 (set_attr "mode" "V2SF,V4SF,V2SF")])
3528 (define_insn "*avx_storelps"
3529 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3531 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3532 (parallel [(const_int 0) (const_int 1)])))]
3535 vmovlps\t{%1, %0|%0, %1}
3536 vmovaps\t{%1, %0|%0, %1}
3537 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3538 [(set_attr "type" "ssemov")
3539 (set_attr "prefix" "vex")
3540 (set_attr "mode" "V2SF,V2DF,V2SF")])
3542 (define_insn "sse_storelps"
3543 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3545 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3546 (parallel [(const_int 0) (const_int 1)])))]
3549 movlps\t{%1, %0|%0, %1}
3550 movaps\t{%1, %0|%0, %1}
3551 movlps\t{%1, %0|%0, %1}"
3552 [(set_attr "type" "ssemov")
3553 (set_attr "mode" "V2SF,V4SF,V2SF")])
3555 (define_expand "sse_loadlps_exp"
3556 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3558 (match_operand:V2SF 2 "nonimmediate_operand" "")
3560 (match_operand:V4SF 1 "nonimmediate_operand" "")
3561 (parallel [(const_int 2) (const_int 3)]))))]
3563 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3565 (define_insn "*avx_loadlps"
3566 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3568 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3570 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3571 (parallel [(const_int 2) (const_int 3)]))))]
3574 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3575 vmovlps\t{%2, %1, %0|%0, %1, %2}
3576 vmovlps\t{%2, %0|%0, %2}"
3577 [(set_attr "type" "sselog,ssemov,ssemov")
3578 (set_attr "length_immediate" "1,*,*")
3579 (set_attr "prefix" "vex")
3580 (set_attr "mode" "V4SF,V2SF,V2SF")])
3582 (define_insn "sse_loadlps"
3583 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3585 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3587 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3588 (parallel [(const_int 2) (const_int 3)]))))]
3591 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3592 movlps\t{%2, %0|%0, %2}
3593 movlps\t{%2, %0|%0, %2}"
3594 [(set_attr "type" "sselog,ssemov,ssemov")
3595 (set_attr "length_immediate" "1,*,*")
3596 (set_attr "mode" "V4SF,V2SF,V2SF")])
3598 (define_insn "*avx_movss"
3599 [(set (match_operand:V4SF 0 "register_operand" "=x")
3601 (match_operand:V4SF 2 "register_operand" "x")
3602 (match_operand:V4SF 1 "register_operand" "x")
3605 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3606 [(set_attr "type" "ssemov")
3607 (set_attr "prefix" "vex")
3608 (set_attr "mode" "SF")])
3610 (define_insn "sse_movss"
3611 [(set (match_operand:V4SF 0 "register_operand" "=x")
3613 (match_operand:V4SF 2 "register_operand" "x")
3614 (match_operand:V4SF 1 "register_operand" "0")
3617 "movss\t{%2, %0|%0, %2}"
3618 [(set_attr "type" "ssemov")
3619 (set_attr "mode" "SF")])
3621 (define_insn "*vec_dupv4sf_avx"
3622 [(set (match_operand:V4SF 0 "register_operand" "=x")
3624 (match_operand:SF 1 "register_operand" "x")))]
3626 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3627 [(set_attr "type" "sselog1")
3628 (set_attr "length_immediate" "1")
3629 (set_attr "prefix" "vex")
3630 (set_attr "mode" "V4SF")])
3632 (define_insn "*vec_dupv4sf"
3633 [(set (match_operand:V4SF 0 "register_operand" "=x")
3635 (match_operand:SF 1 "register_operand" "0")))]
3637 "shufps\t{$0, %0, %0|%0, %0, 0}"
3638 [(set_attr "type" "sselog1")
3639 (set_attr "length_immediate" "1")
3640 (set_attr "mode" "V4SF")])
3642 (define_insn "*vec_concatv2sf_avx"
3643 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3645 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3646 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3649 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3650 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3651 vmovss\t{%1, %0|%0, %1}
3652 punpckldq\t{%2, %0|%0, %2}
3653 movd\t{%1, %0|%0, %1}"
3654 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3655 (set_attr "length_immediate" "*,1,*,*,*")
3656 (set_attr "prefix_extra" "*,1,*,*,*")
3657 (set (attr "prefix")
3658 (if_then_else (eq_attr "alternative" "3,4")
3659 (const_string "orig")
3660 (const_string "vex")))
3661 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3663 ;; Although insertps takes register source, we prefer
3664 ;; unpcklps with register source since it is shorter.
3665 (define_insn "*vec_concatv2sf_sse4_1"
3666 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3668 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3669 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3672 unpcklps\t{%2, %0|%0, %2}
3673 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3674 movss\t{%1, %0|%0, %1}
3675 punpckldq\t{%2, %0|%0, %2}
3676 movd\t{%1, %0|%0, %1}"
3677 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3678 (set_attr "prefix_data16" "*,1,*,*,*")
3679 (set_attr "prefix_extra" "*,1,*,*,*")
3680 (set_attr "length_immediate" "*,1,*,*,*")
3681 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3683 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3684 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3685 ;; alternatives pretty much forces the MMX alternative to be chosen.
3686 (define_insn "*vec_concatv2sf_sse"
3687 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3689 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3690 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3693 unpcklps\t{%2, %0|%0, %2}
3694 movss\t{%1, %0|%0, %1}
3695 punpckldq\t{%2, %0|%0, %2}
3696 movd\t{%1, %0|%0, %1}"
3697 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3698 (set_attr "mode" "V4SF,SF,DI,DI")])
3700 (define_insn "*vec_concatv4sf_avx"
3701 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3703 (match_operand:V2SF 1 "register_operand" " x,x")
3704 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3707 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3708 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3709 [(set_attr "type" "ssemov")
3710 (set_attr "prefix" "vex")
3711 (set_attr "mode" "V4SF,V2SF")])
3713 (define_insn "*vec_concatv4sf_sse"
3714 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3716 (match_operand:V2SF 1 "register_operand" " 0,0")
3717 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3720 movlhps\t{%2, %0|%0, %2}
3721 movhps\t{%2, %0|%0, %2}"
3722 [(set_attr "type" "ssemov")
3723 (set_attr "mode" "V4SF,V2SF")])
3725 (define_expand "vec_init<mode>"
3726 [(match_operand:SSEMODE 0 "register_operand" "")
3727 (match_operand 1 "" "")]
3730 ix86_expand_vector_init (false, operands[0], operands[1]);
3734 (define_insn "*vec_setv4sf_0_avx"
3735 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3738 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3739 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3743 vmovss\t{%2, %1, %0|%0, %1, %2}
3744 vmovss\t{%2, %0|%0, %2}
3745 vmovd\t{%2, %0|%0, %2}
3747 [(set_attr "type" "ssemov")
3748 (set_attr "prefix" "vex")
3749 (set_attr "mode" "SF")])
3751 (define_insn "vec_setv4sf_0"
3752 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3755 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3756 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3760 movss\t{%2, %0|%0, %2}
3761 movss\t{%2, %0|%0, %2}
3762 movd\t{%2, %0|%0, %2}
3764 [(set_attr "type" "ssemov")
3765 (set_attr "mode" "SF")])
3767 ;; A subset is vec_setv4sf.
3768 (define_insn "*vec_setv4sf_avx"
3769 [(set (match_operand:V4SF 0 "register_operand" "=x")
3772 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3773 (match_operand:V4SF 1 "register_operand" "x")
3774 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3777 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3778 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3780 [(set_attr "type" "sselog")
3781 (set_attr "prefix_extra" "1")
3782 (set_attr "length_immediate" "1")
3783 (set_attr "prefix" "vex")
3784 (set_attr "mode" "V4SF")])
3786 (define_insn "*vec_setv4sf_sse4_1"
3787 [(set (match_operand:V4SF 0 "register_operand" "=x")
3790 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3791 (match_operand:V4SF 1 "register_operand" "0")
3792 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3795 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3796 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3798 [(set_attr "type" "sselog")
3799 (set_attr "prefix_data16" "1")
3800 (set_attr "prefix_extra" "1")
3801 (set_attr "length_immediate" "1")
3802 (set_attr "mode" "V4SF")])
3804 (define_insn "*avx_insertps"
3805 [(set (match_operand:V4SF 0 "register_operand" "=x")
3806 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3807 (match_operand:V4SF 1 "register_operand" "x")
3808 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3811 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3812 [(set_attr "type" "sselog")
3813 (set_attr "prefix" "vex")
3814 (set_attr "prefix_extra" "1")
3815 (set_attr "length_immediate" "1")
3816 (set_attr "mode" "V4SF")])
3818 (define_insn "sse4_1_insertps"
3819 [(set (match_operand:V4SF 0 "register_operand" "=x")
3820 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3821 (match_operand:V4SF 1 "register_operand" "0")
3822 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3825 "insertps\t{%3, %2, %0|%0, %2, %3}";
3826 [(set_attr "type" "sselog")
3827 (set_attr "prefix_data16" "1")
3828 (set_attr "prefix_extra" "1")
3829 (set_attr "length_immediate" "1")
3830 (set_attr "mode" "V4SF")])
3833 [(set (match_operand:V4SF 0 "memory_operand" "")
3836 (match_operand:SF 1 "nonmemory_operand" ""))
3839 "TARGET_SSE && reload_completed"
3842 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3846 (define_expand "vec_set<mode>"
3847 [(match_operand:SSEMODE 0 "register_operand" "")
3848 (match_operand:<ssescalarmode> 1 "register_operand" "")
3849 (match_operand 2 "const_int_operand" "")]
3852 ix86_expand_vector_set (false, operands[0], operands[1],
3853 INTVAL (operands[2]));
3857 (define_insn_and_split "*vec_extractv4sf_0"
3858 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3860 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3861 (parallel [(const_int 0)])))]
3862 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3864 "&& reload_completed"
3867 rtx op1 = operands[1];
3869 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3871 op1 = gen_lowpart (SFmode, op1);
3872 emit_move_insn (operands[0], op1);
3876 (define_expand "avx_vextractf128<mode>"
3877 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3878 (match_operand:AVX256MODE 1 "register_operand" "")
3879 (match_operand:SI 2 "const_0_to_1_operand" "")]
3882 switch (INTVAL (operands[2]))
3885 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3888 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3896 (define_insn "vec_extract_lo_<mode>"
3897 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3898 (vec_select:<avxhalfvecmode>
3899 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3900 (parallel [(const_int 0) (const_int 1)])))]
3902 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3903 [(set_attr "type" "sselog")
3904 (set_attr "prefix_extra" "1")
3905 (set_attr "length_immediate" "1")
3906 (set_attr "memory" "none,store")
3907 (set_attr "prefix" "vex")
3908 (set_attr "mode" "V8SF")])
3910 (define_insn "vec_extract_hi_<mode>"
3911 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3912 (vec_select:<avxhalfvecmode>
3913 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3914 (parallel [(const_int 2) (const_int 3)])))]
3916 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3917 [(set_attr "type" "sselog")
3918 (set_attr "prefix_extra" "1")
3919 (set_attr "length_immediate" "1")
3920 (set_attr "memory" "none,store")
3921 (set_attr "prefix" "vex")
3922 (set_attr "mode" "V8SF")])
3924 (define_insn "vec_extract_lo_<mode>"
3925 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3926 (vec_select:<avxhalfvecmode>
3927 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3928 (parallel [(const_int 0) (const_int 1)
3929 (const_int 2) (const_int 3)])))]
3931 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3932 [(set_attr "type" "sselog")
3933 (set_attr "prefix_extra" "1")
3934 (set_attr "length_immediate" "1")
3935 (set_attr "memory" "none,store")
3936 (set_attr "prefix" "vex")
3937 (set_attr "mode" "V8SF")])
3939 (define_insn "vec_extract_hi_<mode>"
3940 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3941 (vec_select:<avxhalfvecmode>
3942 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3943 (parallel [(const_int 4) (const_int 5)
3944 (const_int 6) (const_int 7)])))]
3946 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3947 [(set_attr "type" "sselog")
3948 (set_attr "prefix_extra" "1")
3949 (set_attr "length_immediate" "1")
3950 (set_attr "memory" "none,store")
3951 (set_attr "prefix" "vex")
3952 (set_attr "mode" "V8SF")])
3954 (define_insn "vec_extract_lo_v16hi"
3955 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3957 (match_operand:V16HI 1 "register_operand" "x,x")
3958 (parallel [(const_int 0) (const_int 1)
3959 (const_int 2) (const_int 3)
3960 (const_int 4) (const_int 5)
3961 (const_int 6) (const_int 7)])))]
3963 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3964 [(set_attr "type" "sselog")
3965 (set_attr "prefix_extra" "1")
3966 (set_attr "length_immediate" "1")
3967 (set_attr "memory" "none,store")
3968 (set_attr "prefix" "vex")
3969 (set_attr "mode" "V8SF")])
3971 (define_insn "vec_extract_hi_v16hi"
3972 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3974 (match_operand:V16HI 1 "register_operand" "x,x")
3975 (parallel [(const_int 8) (const_int 9)
3976 (const_int 10) (const_int 11)
3977 (const_int 12) (const_int 13)
3978 (const_int 14) (const_int 15)])))]
3980 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3981 [(set_attr "type" "sselog")
3982 (set_attr "prefix_extra" "1")
3983 (set_attr "length_immediate" "1")
3984 (set_attr "memory" "none,store")
3985 (set_attr "prefix" "vex")
3986 (set_attr "mode" "V8SF")])
3988 (define_insn "vec_extract_lo_v32qi"
3989 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3991 (match_operand:V32QI 1 "register_operand" "x,x")
3992 (parallel [(const_int 0) (const_int 1)
3993 (const_int 2) (const_int 3)
3994 (const_int 4) (const_int 5)
3995 (const_int 6) (const_int 7)
3996 (const_int 8) (const_int 9)
3997 (const_int 10) (const_int 11)
3998 (const_int 12) (const_int 13)
3999 (const_int 14) (const_int 15)])))]
4001 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4002 [(set_attr "type" "sselog")
4003 (set_attr "prefix_extra" "1")
4004 (set_attr "length_immediate" "1")
4005 (set_attr "memory" "none,store")
4006 (set_attr "prefix" "vex")
4007 (set_attr "mode" "V8SF")])
4009 (define_insn "vec_extract_hi_v32qi"
4010 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4012 (match_operand:V32QI 1 "register_operand" "x,x")
4013 (parallel [(const_int 16) (const_int 17)
4014 (const_int 18) (const_int 19)
4015 (const_int 20) (const_int 21)
4016 (const_int 22) (const_int 23)
4017 (const_int 24) (const_int 25)
4018 (const_int 26) (const_int 27)
4019 (const_int 28) (const_int 29)
4020 (const_int 30) (const_int 31)])))]
4022 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4023 [(set_attr "type" "sselog")
4024 (set_attr "prefix_extra" "1")
4025 (set_attr "length_immediate" "1")
4026 (set_attr "memory" "none,store")
4027 (set_attr "prefix" "vex")
4028 (set_attr "mode" "V8SF")])
4030 (define_insn "*sse4_1_extractps"
4031 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4033 (match_operand:V4SF 1 "register_operand" "x")
4034 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4036 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4037 [(set_attr "type" "sselog")
4038 (set_attr "prefix_data16" "1")
4039 (set_attr "prefix_extra" "1")
4040 (set_attr "length_immediate" "1")
4041 (set_attr "prefix" "maybe_vex")
4042 (set_attr "mode" "V4SF")])
4044 (define_insn_and_split "*vec_extract_v4sf_mem"
4045 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4047 (match_operand:V4SF 1 "memory_operand" "o")
4048 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4054 int i = INTVAL (operands[2]);
4056 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4060 (define_expand "vec_extract<mode>"
4061 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4062 (match_operand:SSEMODE 1 "register_operand" "")
4063 (match_operand 2 "const_int_operand" "")]
4066 ix86_expand_vector_extract (false, operands[0], operands[1],
4067 INTVAL (operands[2]));
4071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4073 ;; Parallel double-precision floating point element swizzling
4075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4077 (define_insn "avx_unpckhpd256"
4078 [(set (match_operand:V4DF 0 "register_operand" "=x")
4081 (match_operand:V4DF 1 "register_operand" "x")
4082 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4083 (parallel [(const_int 1) (const_int 5)
4084 (const_int 3) (const_int 7)])))]
4086 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4087 [(set_attr "type" "sselog")
4088 (set_attr "prefix" "vex")
4089 (set_attr "mode" "V4DF")])
4091 (define_expand "sse2_unpckhpd_exp"
4092 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4095 (match_operand:V2DF 1 "nonimmediate_operand" "")
4096 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4097 (parallel [(const_int 1)
4100 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4102 (define_insn "*avx_unpckhpd"
4103 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4106 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4107 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4108 (parallel [(const_int 1)
4110 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4112 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4113 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4114 vmovhpd\t{%1, %0|%0, %1}"
4115 [(set_attr "type" "sselog,ssemov,ssemov")
4116 (set_attr "prefix" "vex")
4117 (set_attr "mode" "V2DF,V1DF,V1DF")])
4119 (define_insn "sse2_unpckhpd"
4120 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4123 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4124 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4125 (parallel [(const_int 1)
4127 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4129 unpckhpd\t{%2, %0|%0, %2}
4130 movlpd\t{%H1, %0|%0, %H1}
4131 movhpd\t{%1, %0|%0, %1}"
4132 [(set_attr "type" "sselog,ssemov,ssemov")
4133 (set_attr "prefix_data16" "*,1,1")
4134 (set_attr "mode" "V2DF,V1DF,V1DF")])
4136 (define_insn "avx_movddup256"
4137 [(set (match_operand:V4DF 0 "register_operand" "=x")
4140 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4142 (parallel [(const_int 0) (const_int 2)
4143 (const_int 4) (const_int 6)])))]
4145 "vmovddup\t{%1, %0|%0, %1}"
4146 [(set_attr "type" "sselog1")
4147 (set_attr "prefix" "vex")
4148 (set_attr "mode" "V4DF")])
4150 (define_insn "*avx_movddup"
4151 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4154 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4156 (parallel [(const_int 0)
4158 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4160 vmovddup\t{%1, %0|%0, %1}
4162 [(set_attr "type" "sselog1,ssemov")
4163 (set_attr "prefix" "vex")
4164 (set_attr "mode" "V2DF")])
4166 (define_insn "*sse3_movddup"
4167 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4170 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4172 (parallel [(const_int 0)
4174 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4176 movddup\t{%1, %0|%0, %1}
4178 [(set_attr "type" "sselog1,ssemov")
4179 (set_attr "mode" "V2DF")])
4182 [(set (match_operand:V2DF 0 "memory_operand" "")
4185 (match_operand:V2DF 1 "register_operand" "")
4187 (parallel [(const_int 0)
4189 "TARGET_SSE3 && reload_completed"
4192 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4193 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4194 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4198 (define_insn "avx_unpcklpd256"
4199 [(set (match_operand:V4DF 0 "register_operand" "=x")
4202 (match_operand:V4DF 1 "register_operand" "x")
4203 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4204 (parallel [(const_int 0) (const_int 4)
4205 (const_int 2) (const_int 6)])))]
4207 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4208 [(set_attr "type" "sselog")
4209 (set_attr "prefix" "vex")
4210 (set_attr "mode" "V4DF")])
4212 (define_expand "sse2_unpcklpd_exp"
4213 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4216 (match_operand:V2DF 1 "nonimmediate_operand" "")
4217 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4218 (parallel [(const_int 0)
4221 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4223 (define_insn "*avx_unpcklpd"
4224 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4227 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4228 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4229 (parallel [(const_int 0)
4231 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4233 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4234 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4235 vmovlpd\t{%2, %H0|%H0, %2}"
4236 [(set_attr "type" "sselog,ssemov,ssemov")
4237 (set_attr "prefix" "vex")
4238 (set_attr "mode" "V2DF,V1DF,V1DF")])
4240 (define_insn "sse2_unpcklpd"
4241 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4244 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4245 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4246 (parallel [(const_int 0)
4248 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4250 unpcklpd\t{%2, %0|%0, %2}
4251 movhpd\t{%2, %0|%0, %2}
4252 movlpd\t{%2, %H0|%H0, %2}"
4253 [(set_attr "type" "sselog,ssemov,ssemov")
4254 (set_attr "prefix_data16" "*,1,1")
4255 (set_attr "mode" "V2DF,V1DF,V1DF")])
4257 (define_expand "avx_shufpd256"
4258 [(match_operand:V4DF 0 "register_operand" "")
4259 (match_operand:V4DF 1 "register_operand" "")
4260 (match_operand:V4DF 2 "nonimmediate_operand" "")
4261 (match_operand:SI 3 "const_int_operand" "")]
4264 int mask = INTVAL (operands[3]);
4265 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4267 GEN_INT (mask & 2 ? 5 : 4),
4268 GEN_INT (mask & 4 ? 3 : 2),
4269 GEN_INT (mask & 8 ? 7 : 6)));
4273 (define_insn "avx_shufpd256_1"
4274 [(set (match_operand:V4DF 0 "register_operand" "=x")
4277 (match_operand:V4DF 1 "register_operand" "x")
4278 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4279 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4280 (match_operand 4 "const_4_to_5_operand" "")
4281 (match_operand 5 "const_2_to_3_operand" "")
4282 (match_operand 6 "const_6_to_7_operand" "")])))]
4286 mask = INTVAL (operands[3]);
4287 mask |= (INTVAL (operands[4]) - 4) << 1;
4288 mask |= (INTVAL (operands[5]) - 2) << 2;
4289 mask |= (INTVAL (operands[6]) - 6) << 3;
4290 operands[3] = GEN_INT (mask);
4292 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4294 [(set_attr "type" "sselog")
4295 (set_attr "length_immediate" "1")
4296 (set_attr "prefix" "vex")
4297 (set_attr "mode" "V4DF")])
4299 (define_expand "sse2_shufpd"
4300 [(match_operand:V2DF 0 "register_operand" "")
4301 (match_operand:V2DF 1 "register_operand" "")
4302 (match_operand:V2DF 2 "nonimmediate_operand" "")
4303 (match_operand:SI 3 "const_int_operand" "")]
4306 int mask = INTVAL (operands[3]);
4307 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4309 GEN_INT (mask & 2 ? 3 : 2)));
4313 (define_expand "vec_extract_even<mode>"
4314 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4315 (vec_select:SSEMODE4S
4316 (vec_concat:<ssedoublesizemode>
4317 (match_operand:SSEMODE4S 1 "register_operand" "")
4318 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4319 (parallel [(const_int 0)
4325 (define_expand "vec_extract_odd<mode>"
4326 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4327 (vec_select:SSEMODE4S
4328 (vec_concat:<ssedoublesizemode>
4329 (match_operand:SSEMODE4S 1 "register_operand" "")
4330 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4331 (parallel [(const_int 1)
4337 (define_expand "vec_extract_even<mode>"
4338 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4339 (vec_select:SSEMODE2D
4340 (vec_concat:<ssedoublesizemode>
4341 (match_operand:SSEMODE2D 1 "register_operand" "")
4342 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4343 (parallel [(const_int 0)
4347 (define_expand "vec_extract_odd<mode>"
4348 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4349 (vec_select:SSEMODE2D
4350 (vec_concat:<ssedoublesizemode>
4351 (match_operand:SSEMODE2D 1 "register_operand" "")
4352 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4353 (parallel [(const_int 1)
4357 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4358 (define_insn "*avx_punpckhqdq"
4359 [(set (match_operand:V2DI 0 "register_operand" "=x")
4362 (match_operand:V2DI 1 "register_operand" "x")
4363 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4364 (parallel [(const_int 1)
4367 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4368 [(set_attr "type" "sselog")
4369 (set_attr "prefix" "vex")
4370 (set_attr "mode" "TI")])
4372 (define_insn "sse2_punpckhqdq"
4373 [(set (match_operand:V2DI 0 "register_operand" "=x")
4376 (match_operand:V2DI 1 "register_operand" "0")
4377 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4378 (parallel [(const_int 1)
4381 "punpckhqdq\t{%2, %0|%0, %2}"
4382 [(set_attr "type" "sselog")
4383 (set_attr "prefix_data16" "1")
4384 (set_attr "mode" "TI")])
4386 (define_insn "*avx_punpcklqdq"
4387 [(set (match_operand:V2DI 0 "register_operand" "=x")
4390 (match_operand:V2DI 1 "register_operand" "x")
4391 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4392 (parallel [(const_int 0)
4395 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4396 [(set_attr "type" "sselog")
4397 (set_attr "prefix" "vex")
4398 (set_attr "mode" "TI")])
4400 (define_insn "sse2_punpcklqdq"
4401 [(set (match_operand:V2DI 0 "register_operand" "=x")
4404 (match_operand:V2DI 1 "register_operand" "0")
4405 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4406 (parallel [(const_int 0)
4409 "punpcklqdq\t{%2, %0|%0, %2}"
4410 [(set_attr "type" "sselog")
4411 (set_attr "prefix_data16" "1")
4412 (set_attr "mode" "TI")])
4414 (define_insn "*avx_shufpd_<mode>"
4415 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4416 (vec_select:SSEMODE2D
4417 (vec_concat:<ssedoublesizemode>
4418 (match_operand:SSEMODE2D 1 "register_operand" "x")
4419 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4420 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4421 (match_operand 4 "const_2_to_3_operand" "")])))]
4425 mask = INTVAL (operands[3]);
4426 mask |= (INTVAL (operands[4]) - 2) << 1;
4427 operands[3] = GEN_INT (mask);
4429 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4431 [(set_attr "type" "sselog")
4432 (set_attr "length_immediate" "1")
4433 (set_attr "prefix" "vex")
4434 (set_attr "mode" "V2DF")])
4436 (define_insn "sse2_shufpd_<mode>"
4437 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4438 (vec_select:SSEMODE2D
4439 (vec_concat:<ssedoublesizemode>
4440 (match_operand:SSEMODE2D 1 "register_operand" "0")
4441 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4442 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4443 (match_operand 4 "const_2_to_3_operand" "")])))]
4447 mask = INTVAL (operands[3]);
4448 mask |= (INTVAL (operands[4]) - 2) << 1;
4449 operands[3] = GEN_INT (mask);
4451 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4453 [(set_attr "type" "sselog")
4454 (set_attr "length_immediate" "1")
4455 (set_attr "mode" "V2DF")])
4457 ;; Avoid combining registers from different units in a single alternative,
4458 ;; see comment above inline_secondary_memory_needed function in i386.c
4459 (define_insn "*avx_storehpd"
4460 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4462 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4463 (parallel [(const_int 1)])))]
4464 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4466 vmovhpd\t{%1, %0|%0, %1}
4467 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4471 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4472 (set_attr "prefix" "vex")
4473 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4475 (define_insn "sse2_storehpd"
4476 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4478 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4479 (parallel [(const_int 1)])))]
4480 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4482 movhpd\t{%1, %0|%0, %1}
4487 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4488 (set_attr "prefix_data16" "1,*,*,*,*")
4489 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4492 [(set (match_operand:DF 0 "register_operand" "")
4494 (match_operand:V2DF 1 "memory_operand" "")
4495 (parallel [(const_int 1)])))]
4496 "TARGET_SSE2 && reload_completed"
4497 [(set (match_dup 0) (match_dup 1))]
4499 operands[1] = adjust_address (operands[1], DFmode, 8);
4502 ;; Avoid combining registers from different units in a single alternative,
4503 ;; see comment above inline_secondary_memory_needed function in i386.c
4504 (define_insn "sse2_storelpd"
4505 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4507 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4508 (parallel [(const_int 0)])))]
4509 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4511 %vmovlpd\t{%1, %0|%0, %1}
4516 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4517 (set_attr "prefix_data16" "1,*,*,*,*")
4518 (set_attr "prefix" "maybe_vex")
4519 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4522 [(set (match_operand:DF 0 "register_operand" "")
4524 (match_operand:V2DF 1 "nonimmediate_operand" "")
4525 (parallel [(const_int 0)])))]
4526 "TARGET_SSE2 && reload_completed"
4529 rtx op1 = operands[1];
4531 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4533 op1 = gen_lowpart (DFmode, op1);
4534 emit_move_insn (operands[0], op1);
4538 (define_expand "sse2_loadhpd_exp"
4539 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4542 (match_operand:V2DF 1 "nonimmediate_operand" "")
4543 (parallel [(const_int 0)]))
4544 (match_operand:DF 2 "nonimmediate_operand" "")))]
4546 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4548 ;; Avoid combining registers from different units in a single alternative,
4549 ;; see comment above inline_secondary_memory_needed function in i386.c
4550 (define_insn "*avx_loadhpd"
4551 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4554 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4555 (parallel [(const_int 0)]))
4556 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4557 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4559 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4560 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4564 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4565 (set_attr "prefix" "vex")
4566 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4568 (define_insn "sse2_loadhpd"
4569 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4572 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4573 (parallel [(const_int 0)]))
4574 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4575 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4577 movhpd\t{%2, %0|%0, %2}
4578 unpcklpd\t{%2, %0|%0, %2}
4579 shufpd\t{$1, %1, %0|%0, %1, 1}
4583 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4584 (set_attr "prefix_data16" "1,*,*,*,*,*")
4585 (set_attr "length_immediate" "*,*,1,*,*,*")
4586 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4589 [(set (match_operand:V2DF 0 "memory_operand" "")
4591 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4592 (match_operand:DF 1 "register_operand" "")))]
4593 "TARGET_SSE2 && reload_completed"
4594 [(set (match_dup 0) (match_dup 1))]
4596 operands[0] = adjust_address (operands[0], DFmode, 8);
4599 (define_expand "sse2_loadlpd_exp"
4600 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4602 (match_operand:DF 2 "nonimmediate_operand" "")
4604 (match_operand:V2DF 1 "nonimmediate_operand" "")
4605 (parallel [(const_int 1)]))))]
4607 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4609 ;; Avoid combining registers from different units in a single alternative,
4610 ;; see comment above inline_secondary_memory_needed function in i386.c
4611 (define_insn "*avx_loadlpd"
4612 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4614 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4616 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4617 (parallel [(const_int 1)]))))]
4618 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4620 vmovsd\t{%2, %0|%0, %2}
4621 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4622 vmovsd\t{%2, %1, %0|%0, %1, %2}
4623 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4627 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4628 (set_attr "prefix" "vex")
4629 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4631 (define_insn "sse2_loadlpd"
4632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4634 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4636 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4637 (parallel [(const_int 1)]))))]
4638 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4640 movsd\t{%2, %0|%0, %2}
4641 movlpd\t{%2, %0|%0, %2}
4642 movsd\t{%2, %0|%0, %2}
4643 shufpd\t{$2, %2, %0|%0, %2, 2}
4644 movhpd\t{%H1, %0|%0, %H1}
4648 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4649 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4650 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4651 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4654 [(set (match_operand:V2DF 0 "memory_operand" "")
4656 (match_operand:DF 1 "register_operand" "")
4657 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4658 "TARGET_SSE2 && reload_completed"
4659 [(set (match_dup 0) (match_dup 1))]
4661 operands[0] = adjust_address (operands[0], DFmode, 8);
4664 ;; Not sure these two are ever used, but it doesn't hurt to have
4666 (define_insn "*vec_extractv2df_1_sse"
4667 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4669 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4670 (parallel [(const_int 1)])))]
4671 "!TARGET_SSE2 && TARGET_SSE
4672 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4674 movhps\t{%1, %0|%0, %1}
4675 movhlps\t{%1, %0|%0, %1}
4676 movlps\t{%H1, %0|%0, %H1}"
4677 [(set_attr "type" "ssemov")
4678 (set_attr "mode" "V2SF,V4SF,V2SF")])
4680 (define_insn "*vec_extractv2df_0_sse"
4681 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4683 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4684 (parallel [(const_int 0)])))]
4685 "!TARGET_SSE2 && TARGET_SSE
4686 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4688 movlps\t{%1, %0|%0, %1}
4689 movaps\t{%1, %0|%0, %1}
4690 movlps\t{%1, %0|%0, %1}"
4691 [(set_attr "type" "ssemov")
4692 (set_attr "mode" "V2SF,V4SF,V2SF")])
4694 (define_insn "*avx_movsd"
4695 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4697 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4698 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4702 vmovsd\t{%2, %1, %0|%0, %1, %2}
4703 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4704 vmovlpd\t{%2, %0|%0, %2}
4705 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4706 vmovhps\t{%1, %H0|%H0, %1}"
4707 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4708 (set_attr "prefix" "vex")
4709 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4711 (define_insn "sse2_movsd"
4712 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4714 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4715 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4719 movsd\t{%2, %0|%0, %2}
4720 movlpd\t{%2, %0|%0, %2}
4721 movlpd\t{%2, %0|%0, %2}
4722 shufpd\t{$2, %2, %0|%0, %2, 2}
4723 movhps\t{%H1, %0|%0, %H1}
4724 movhps\t{%1, %H0|%H0, %1}"
4725 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4726 (set_attr "prefix_data16" "*,1,1,*,*,*")
4727 (set_attr "length_immediate" "*,*,*,1,*,*")
4728 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4730 (define_insn "*vec_dupv2df_sse3"
4731 [(set (match_operand:V2DF 0 "register_operand" "=x")
4733 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4735 "%vmovddup\t{%1, %0|%0, %1}"
4736 [(set_attr "type" "sselog1")
4737 (set_attr "prefix" "maybe_vex")
4738 (set_attr "mode" "DF")])
4740 (define_insn "vec_dupv2df"
4741 [(set (match_operand:V2DF 0 "register_operand" "=x")
4743 (match_operand:DF 1 "register_operand" "0")))]
4746 [(set_attr "type" "sselog1")
4747 (set_attr "mode" "V2DF")])
4749 (define_insn "*vec_concatv2df_sse3"
4750 [(set (match_operand:V2DF 0 "register_operand" "=x")
4752 (match_operand:DF 1 "nonimmediate_operand" "xm")
4755 "%vmovddup\t{%1, %0|%0, %1}"
4756 [(set_attr "type" "sselog1")
4757 (set_attr "prefix" "maybe_vex")
4758 (set_attr "mode" "DF")])
4760 (define_insn "*vec_concatv2df_avx"
4761 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4763 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4764 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4767 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4768 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4769 vmovsd\t{%1, %0|%0, %1}"
4770 [(set_attr "type" "ssemov")
4771 (set_attr "prefix" "vex")
4772 (set_attr "mode" "DF,V1DF,DF")])
4774 (define_insn "*vec_concatv2df"
4775 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4777 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4778 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4781 unpcklpd\t{%2, %0|%0, %2}
4782 movhpd\t{%2, %0|%0, %2}
4783 movsd\t{%1, %0|%0, %1}
4784 movlhps\t{%2, %0|%0, %2}
4785 movhps\t{%2, %0|%0, %2}"
4786 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4787 (set_attr "prefix_data16" "*,1,*,*,*")
4788 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4792 ;; Parallel integral arithmetic
4794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4796 (define_expand "neg<mode>2"
4797 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4800 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4802 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4804 (define_expand "<plusminus_insn><mode>3"
4805 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4807 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4808 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4810 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4812 (define_insn "*avx_<plusminus_insn><mode>3"
4813 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4815 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4816 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4817 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4818 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4819 [(set_attr "type" "sseiadd")
4820 (set_attr "prefix" "vex")
4821 (set_attr "mode" "TI")])
4823 (define_insn "*<plusminus_insn><mode>3"
4824 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4826 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4827 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4828 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4829 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4830 [(set_attr "type" "sseiadd")
4831 (set_attr "prefix_data16" "1")
4832 (set_attr "mode" "TI")])
4834 (define_expand "sse2_<plusminus_insn><mode>3"
4835 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4836 (sat_plusminus:SSEMODE12
4837 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4838 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4840 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4842 (define_insn "*avx_<plusminus_insn><mode>3"
4843 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4844 (sat_plusminus:SSEMODE12
4845 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4846 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4847 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4848 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4849 [(set_attr "type" "sseiadd")
4850 (set_attr "prefix" "vex")
4851 (set_attr "mode" "TI")])
4853 (define_insn "*sse2_<plusminus_insn><mode>3"
4854 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4855 (sat_plusminus:SSEMODE12
4856 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4857 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4858 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4859 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4860 [(set_attr "type" "sseiadd")
4861 (set_attr "prefix_data16" "1")
4862 (set_attr "mode" "TI")])
4864 (define_insn_and_split "mulv16qi3"
4865 [(set (match_operand:V16QI 0 "register_operand" "")
4866 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4867 (match_operand:V16QI 2 "register_operand" "")))]
4869 && can_create_pseudo_p ()"
4874 rtx t[12], op0, op[3];
4879 /* On SSE5, we can take advantage of the pperm instruction to pack and
4880 unpack the bytes. Unpack data such that we've got a source byte in
4881 each low byte of each word. We don't care what goes into the high
4882 byte, so put 0 there. */
4883 for (i = 0; i < 6; ++i)
4884 t[i] = gen_reg_rtx (V8HImode);
4886 for (i = 0; i < 2; i++)
4889 op[1] = operands[i+1];
4890 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4893 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4896 /* Multiply words. */
4897 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4898 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4900 /* Pack the low byte of each word back into a single xmm */
4901 op[0] = operands[0];
4904 ix86_expand_sse5_pack (op);
4908 for (i = 0; i < 12; ++i)
4909 t[i] = gen_reg_rtx (V16QImode);
4911 /* Unpack data such that we've got a source byte in each low byte of
4912 each word. We don't care what goes into the high byte of each word.
4913 Rather than trying to get zero in there, most convenient is to let
4914 it be a copy of the low byte. */
4915 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4916 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4917 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4918 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4920 /* Multiply words. The end-of-line annotations here give a picture of what
4921 the output of that instruction looks like. Dot means don't care; the
4922 letters are the bytes of the result with A being the most significant. */
4923 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4924 gen_lowpart (V8HImode, t[0]),
4925 gen_lowpart (V8HImode, t[1])));
4926 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4927 gen_lowpart (V8HImode, t[2]),
4928 gen_lowpart (V8HImode, t[3])));
4930 /* Extract the relevant bytes and merge them back together. */
4931 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4932 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4933 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4934 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4935 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4936 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4939 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4943 (define_expand "mulv8hi3"
4944 [(set (match_operand:V8HI 0 "register_operand" "")
4945 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4946 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4948 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4950 (define_insn "*avx_mulv8hi3"
4951 [(set (match_operand:V8HI 0 "register_operand" "=x")
4952 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4953 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4954 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4955 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4956 [(set_attr "type" "sseimul")
4957 (set_attr "prefix" "vex")
4958 (set_attr "mode" "TI")])
4960 (define_insn "*mulv8hi3"
4961 [(set (match_operand:V8HI 0 "register_operand" "=x")
4962 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4963 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4964 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4965 "pmullw\t{%2, %0|%0, %2}"
4966 [(set_attr "type" "sseimul")
4967 (set_attr "prefix_data16" "1")
4968 (set_attr "mode" "TI")])
4970 (define_expand "smulv8hi3_highpart"
4971 [(set (match_operand:V8HI 0 "register_operand" "")
4976 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4978 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4981 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4983 (define_insn "*avxv8hi3_highpart"
4984 [(set (match_operand:V8HI 0 "register_operand" "=x")
4989 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4991 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4993 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4994 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4995 [(set_attr "type" "sseimul")
4996 (set_attr "prefix" "vex")
4997 (set_attr "mode" "TI")])
4999 (define_insn "*smulv8hi3_highpart"
5000 [(set (match_operand:V8HI 0 "register_operand" "=x")
5005 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5007 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5009 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5010 "pmulhw\t{%2, %0|%0, %2}"
5011 [(set_attr "type" "sseimul")
5012 (set_attr "prefix_data16" "1")
5013 (set_attr "mode" "TI")])
5015 (define_expand "umulv8hi3_highpart"
5016 [(set (match_operand:V8HI 0 "register_operand" "")
5021 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5023 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5026 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5028 (define_insn "*avx_umulv8hi3_highpart"
5029 [(set (match_operand:V8HI 0 "register_operand" "=x")
5034 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5036 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5038 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5039 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5040 [(set_attr "type" "sseimul")
5041 (set_attr "prefix" "vex")
5042 (set_attr "mode" "TI")])
5044 (define_insn "*umulv8hi3_highpart"
5045 [(set (match_operand:V8HI 0 "register_operand" "=x")
5050 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5052 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5054 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5055 "pmulhuw\t{%2, %0|%0, %2}"
5056 [(set_attr "type" "sseimul")
5057 (set_attr "prefix_data16" "1")
5058 (set_attr "mode" "TI")])
5060 (define_expand "sse2_umulv2siv2di3"
5061 [(set (match_operand:V2DI 0 "register_operand" "")
5065 (match_operand:V4SI 1 "nonimmediate_operand" "")
5066 (parallel [(const_int 0) (const_int 2)])))
5069 (match_operand:V4SI 2 "nonimmediate_operand" "")
5070 (parallel [(const_int 0) (const_int 2)])))))]
5072 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5074 (define_insn "*avx_umulv2siv2di3"
5075 [(set (match_operand:V2DI 0 "register_operand" "=x")
5079 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5080 (parallel [(const_int 0) (const_int 2)])))
5083 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5084 (parallel [(const_int 0) (const_int 2)])))))]
5085 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5086 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5087 [(set_attr "type" "sseimul")
5088 (set_attr "prefix" "vex")
5089 (set_attr "mode" "TI")])
5091 (define_insn "*sse2_umulv2siv2di3"
5092 [(set (match_operand:V2DI 0 "register_operand" "=x")
5096 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5097 (parallel [(const_int 0) (const_int 2)])))
5100 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5101 (parallel [(const_int 0) (const_int 2)])))))]
5102 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5103 "pmuludq\t{%2, %0|%0, %2}"
5104 [(set_attr "type" "sseimul")
5105 (set_attr "prefix_data16" "1")
5106 (set_attr "mode" "TI")])
5108 (define_expand "sse4_1_mulv2siv2di3"
5109 [(set (match_operand:V2DI 0 "register_operand" "")
5113 (match_operand:V4SI 1 "nonimmediate_operand" "")
5114 (parallel [(const_int 0) (const_int 2)])))
5117 (match_operand:V4SI 2 "nonimmediate_operand" "")
5118 (parallel [(const_int 0) (const_int 2)])))))]
5120 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5122 (define_insn "*avx_mulv2siv2di3"
5123 [(set (match_operand:V2DI 0 "register_operand" "=x")
5127 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5128 (parallel [(const_int 0) (const_int 2)])))
5131 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5132 (parallel [(const_int 0) (const_int 2)])))))]
5133 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5134 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5135 [(set_attr "type" "sseimul")
5136 (set_attr "prefix_extra" "1")
5137 (set_attr "prefix" "vex")
5138 (set_attr "mode" "TI")])
5140 (define_insn "*sse4_1_mulv2siv2di3"
5141 [(set (match_operand:V2DI 0 "register_operand" "=x")
5145 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5146 (parallel [(const_int 0) (const_int 2)])))
5149 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5150 (parallel [(const_int 0) (const_int 2)])))))]
5151 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5152 "pmuldq\t{%2, %0|%0, %2}"
5153 [(set_attr "type" "sseimul")
5154 (set_attr "prefix_extra" "1")
5155 (set_attr "mode" "TI")])
5157 (define_expand "sse2_pmaddwd"
5158 [(set (match_operand:V4SI 0 "register_operand" "")
5163 (match_operand:V8HI 1 "nonimmediate_operand" "")
5164 (parallel [(const_int 0)
5170 (match_operand:V8HI 2 "nonimmediate_operand" "")
5171 (parallel [(const_int 0)
5177 (vec_select:V4HI (match_dup 1)
5178 (parallel [(const_int 1)
5183 (vec_select:V4HI (match_dup 2)
5184 (parallel [(const_int 1)
5187 (const_int 7)]))))))]
5189 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5191 (define_insn "*avx_pmaddwd"
5192 [(set (match_operand:V4SI 0 "register_operand" "=x")
5197 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5198 (parallel [(const_int 0)
5204 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5205 (parallel [(const_int 0)
5211 (vec_select:V4HI (match_dup 1)
5212 (parallel [(const_int 1)
5217 (vec_select:V4HI (match_dup 2)
5218 (parallel [(const_int 1)
5221 (const_int 7)]))))))]
5222 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5223 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5224 [(set_attr "type" "sseiadd")
5225 (set_attr "prefix" "vex")
5226 (set_attr "mode" "TI")])
5228 (define_insn "*sse2_pmaddwd"
5229 [(set (match_operand:V4SI 0 "register_operand" "=x")
5234 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5235 (parallel [(const_int 0)
5241 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5242 (parallel [(const_int 0)
5248 (vec_select:V4HI (match_dup 1)
5249 (parallel [(const_int 1)
5254 (vec_select:V4HI (match_dup 2)
5255 (parallel [(const_int 1)
5258 (const_int 7)]))))))]
5259 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5260 "pmaddwd\t{%2, %0|%0, %2}"
5261 [(set_attr "type" "sseiadd")
5262 (set_attr "atom_unit" "simul")
5263 (set_attr "prefix_data16" "1")
5264 (set_attr "mode" "TI")])
5266 (define_expand "mulv4si3"
5267 [(set (match_operand:V4SI 0 "register_operand" "")
5268 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5269 (match_operand:V4SI 2 "register_operand" "")))]
5272 if (TARGET_SSE4_1 || TARGET_SSE5)
5273 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5276 (define_insn "*avx_mulv4si3"
5277 [(set (match_operand:V4SI 0 "register_operand" "=x")
5278 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5279 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5280 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5281 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5282 [(set_attr "type" "sseimul")
5283 (set_attr "prefix_extra" "1")
5284 (set_attr "prefix" "vex")
5285 (set_attr "mode" "TI")])
5287 (define_insn "*sse4_1_mulv4si3"
5288 [(set (match_operand:V4SI 0 "register_operand" "=x")
5289 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5290 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5291 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5292 "pmulld\t{%2, %0|%0, %2}"
5293 [(set_attr "type" "sseimul")
5294 (set_attr "prefix_extra" "1")
5295 (set_attr "mode" "TI")])
5297 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5298 ;; multiply/add. In general, we expect the define_split to occur before
5299 ;; register allocation, so we have to handle the corner case where the target
5300 ;; is the same as one of the inputs.
5301 (define_insn_and_split "*sse5_mulv4si3"
5302 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5303 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5304 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5307 "&& (reload_completed
5308 || (!reg_mentioned_p (operands[0], operands[1])
5309 && !reg_mentioned_p (operands[0], operands[2])))"
5313 (plus:V4SI (mult:V4SI (match_dup 1)
5317 operands[3] = CONST0_RTX (V4SImode);
5319 [(set_attr "type" "ssemuladd")
5320 (set_attr "mode" "TI")])
5322 (define_insn_and_split "*sse2_mulv4si3"
5323 [(set (match_operand:V4SI 0 "register_operand" "")
5324 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5325 (match_operand:V4SI 2 "register_operand" "")))]
5326 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5327 && can_create_pseudo_p ()"
5332 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5338 t1 = gen_reg_rtx (V4SImode);
5339 t2 = gen_reg_rtx (V4SImode);
5340 t3 = gen_reg_rtx (V4SImode);
5341 t4 = gen_reg_rtx (V4SImode);
5342 t5 = gen_reg_rtx (V4SImode);
5343 t6 = gen_reg_rtx (V4SImode);
5344 thirtytwo = GEN_INT (32);
5346 /* Multiply elements 2 and 0. */
5347 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5350 /* Shift both input vectors down one element, so that elements 3
5351 and 1 are now in the slots for elements 2 and 0. For K8, at
5352 least, this is faster than using a shuffle. */
5353 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5354 gen_lowpart (TImode, op1),
5356 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5357 gen_lowpart (TImode, op2),
5359 /* Multiply elements 3 and 1. */
5360 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5363 /* Move the results in element 2 down to element 1; we don't care
5364 what goes in elements 2 and 3. */
5365 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5366 const0_rtx, const0_rtx));
5367 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5368 const0_rtx, const0_rtx));
5370 /* Merge the parts back together. */
5371 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5375 (define_insn_and_split "mulv2di3"
5376 [(set (match_operand:V2DI 0 "register_operand" "")
5377 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5378 (match_operand:V2DI 2 "register_operand" "")))]
5380 && can_create_pseudo_p ()"
5385 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5390 /* op1: A,B,C,D, op2: E,F,G,H */
5392 op1 = gen_lowpart (V4SImode, operands[1]);
5393 op2 = gen_lowpart (V4SImode, operands[2]);
5394 t1 = gen_reg_rtx (V4SImode);
5395 t2 = gen_reg_rtx (V4SImode);
5396 t3 = gen_reg_rtx (V4SImode);
5397 t4 = gen_reg_rtx (V2DImode);
5398 t5 = gen_reg_rtx (V2DImode);
5401 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5408 emit_move_insn (t2, CONST0_RTX (V4SImode));
5410 /* t3: (B*E),(A*F),(D*G),(C*H) */
5411 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5413 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5414 emit_insn (gen_sse5_phadddq (t4, t3));
5416 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5417 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5419 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5420 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5427 t1 = gen_reg_rtx (V2DImode);
5428 t2 = gen_reg_rtx (V2DImode);
5429 t3 = gen_reg_rtx (V2DImode);
5430 t4 = gen_reg_rtx (V2DImode);
5431 t5 = gen_reg_rtx (V2DImode);
5432 t6 = gen_reg_rtx (V2DImode);
5433 thirtytwo = GEN_INT (32);
5435 /* Multiply low parts. */
5436 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5437 gen_lowpart (V4SImode, op2)));
5439 /* Shift input vectors left 32 bits so we can multiply high parts. */
5440 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5441 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5443 /* Multiply high parts by low parts. */
5444 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5445 gen_lowpart (V4SImode, t3)));
5446 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5447 gen_lowpart (V4SImode, t2)));
5449 /* Shift them back. */
5450 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5451 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5453 /* Add the three parts together. */
5454 emit_insn (gen_addv2di3 (t6, t1, t4));
5455 emit_insn (gen_addv2di3 (op0, t6, t5));
5459 (define_expand "vec_widen_smult_hi_v8hi"
5460 [(match_operand:V4SI 0 "register_operand" "")
5461 (match_operand:V8HI 1 "register_operand" "")
5462 (match_operand:V8HI 2 "register_operand" "")]
5465 rtx op1, op2, t1, t2, dest;
5469 t1 = gen_reg_rtx (V8HImode);
5470 t2 = gen_reg_rtx (V8HImode);
5471 dest = gen_lowpart (V8HImode, operands[0]);
5473 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5474 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5475 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5479 (define_expand "vec_widen_smult_lo_v8hi"
5480 [(match_operand:V4SI 0 "register_operand" "")
5481 (match_operand:V8HI 1 "register_operand" "")
5482 (match_operand:V8HI 2 "register_operand" "")]
5485 rtx op1, op2, t1, t2, dest;
5489 t1 = gen_reg_rtx (V8HImode);
5490 t2 = gen_reg_rtx (V8HImode);
5491 dest = gen_lowpart (V8HImode, operands[0]);
5493 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5494 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5495 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5499 (define_expand "vec_widen_umult_hi_v8hi"
5500 [(match_operand:V4SI 0 "register_operand" "")
5501 (match_operand:V8HI 1 "register_operand" "")
5502 (match_operand:V8HI 2 "register_operand" "")]
5505 rtx op1, op2, t1, t2, dest;
5509 t1 = gen_reg_rtx (V8HImode);
5510 t2 = gen_reg_rtx (V8HImode);
5511 dest = gen_lowpart (V8HImode, operands[0]);
5513 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5514 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5515 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5519 (define_expand "vec_widen_umult_lo_v8hi"
5520 [(match_operand:V4SI 0 "register_operand" "")
5521 (match_operand:V8HI 1 "register_operand" "")
5522 (match_operand:V8HI 2 "register_operand" "")]
5525 rtx op1, op2, t1, t2, dest;
5529 t1 = gen_reg_rtx (V8HImode);
5530 t2 = gen_reg_rtx (V8HImode);
5531 dest = gen_lowpart (V8HImode, operands[0]);
5533 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5534 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5535 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5539 (define_expand "vec_widen_smult_hi_v4si"
5540 [(match_operand:V2DI 0 "register_operand" "")
5541 (match_operand:V4SI 1 "register_operand" "")
5542 (match_operand:V4SI 2 "register_operand" "")]
5547 t1 = gen_reg_rtx (V4SImode);
5548 t2 = gen_reg_rtx (V4SImode);
5550 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5555 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5560 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5564 (define_expand "vec_widen_smult_lo_v4si"
5565 [(match_operand:V2DI 0 "register_operand" "")
5566 (match_operand:V4SI 1 "register_operand" "")
5567 (match_operand:V4SI 2 "register_operand" "")]
5572 t1 = gen_reg_rtx (V4SImode);
5573 t2 = gen_reg_rtx (V4SImode);
5575 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5580 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5585 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5590 (define_expand "vec_widen_umult_hi_v4si"
5591 [(match_operand:V2DI 0 "register_operand" "")
5592 (match_operand:V4SI 1 "register_operand" "")
5593 (match_operand:V4SI 2 "register_operand" "")]
5596 rtx op1, op2, t1, t2;
5600 t1 = gen_reg_rtx (V4SImode);
5601 t2 = gen_reg_rtx (V4SImode);
5603 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5604 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5605 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5609 (define_expand "vec_widen_umult_lo_v4si"
5610 [(match_operand:V2DI 0 "register_operand" "")
5611 (match_operand:V4SI 1 "register_operand" "")
5612 (match_operand:V4SI 2 "register_operand" "")]
5615 rtx op1, op2, t1, t2;
5619 t1 = gen_reg_rtx (V4SImode);
5620 t2 = gen_reg_rtx (V4SImode);
5622 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5623 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5624 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5628 (define_expand "sdot_prodv8hi"
5629 [(match_operand:V4SI 0 "register_operand" "")
5630 (match_operand:V8HI 1 "register_operand" "")
5631 (match_operand:V8HI 2 "register_operand" "")
5632 (match_operand:V4SI 3 "register_operand" "")]
5635 rtx t = gen_reg_rtx (V4SImode);
5636 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5637 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5641 (define_expand "udot_prodv4si"
5642 [(match_operand:V2DI 0 "register_operand" "")
5643 (match_operand:V4SI 1 "register_operand" "")
5644 (match_operand:V4SI 2 "register_operand" "")
5645 (match_operand:V2DI 3 "register_operand" "")]
5650 t1 = gen_reg_rtx (V2DImode);
5651 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5652 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5654 t2 = gen_reg_rtx (V4SImode);
5655 t3 = gen_reg_rtx (V4SImode);
5656 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5657 gen_lowpart (TImode, operands[1]),
5659 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5660 gen_lowpart (TImode, operands[2]),
5663 t4 = gen_reg_rtx (V2DImode);
5664 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5666 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5670 (define_insn "*avx_ashr<mode>3"
5671 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5673 (match_operand:SSEMODE24 1 "register_operand" "x")
5674 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5676 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5677 [(set_attr "type" "sseishft")
5678 (set_attr "prefix" "vex")
5679 (set (attr "length_immediate")
5680 (if_then_else (match_operand 2 "const_int_operand" "")
5682 (const_string "0")))
5683 (set_attr "mode" "TI")])
5685 (define_insn "ashr<mode>3"
5686 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5688 (match_operand:SSEMODE24 1 "register_operand" "0")
5689 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5691 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5692 [(set_attr "type" "sseishft")
5693 (set_attr "prefix_data16" "1")
5694 (set (attr "length_immediate")
5695 (if_then_else (match_operand 2 "const_int_operand" "")
5697 (const_string "0")))
5698 (set_attr "mode" "TI")])
5700 (define_insn "*avx_lshr<mode>3"
5701 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5702 (lshiftrt:SSEMODE248
5703 (match_operand:SSEMODE248 1 "register_operand" "x")
5704 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5706 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5707 [(set_attr "type" "sseishft")
5708 (set_attr "prefix" "vex")
5709 (set (attr "length_immediate")
5710 (if_then_else (match_operand 2 "const_int_operand" "")
5712 (const_string "0")))
5713 (set_attr "mode" "TI")])
5715 (define_insn "lshr<mode>3"
5716 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5717 (lshiftrt:SSEMODE248
5718 (match_operand:SSEMODE248 1 "register_operand" "0")
5719 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5721 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5722 [(set_attr "type" "sseishft")
5723 (set_attr "prefix_data16" "1")
5724 (set (attr "length_immediate")
5725 (if_then_else (match_operand 2 "const_int_operand" "")
5727 (const_string "0")))
5728 (set_attr "mode" "TI")])
5730 (define_insn "*avx_ashl<mode>3"
5731 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5733 (match_operand:SSEMODE248 1 "register_operand" "x")
5734 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5736 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5737 [(set_attr "type" "sseishft")
5738 (set_attr "prefix" "vex")
5739 (set (attr "length_immediate")
5740 (if_then_else (match_operand 2 "const_int_operand" "")
5742 (const_string "0")))
5743 (set_attr "mode" "TI")])
5745 (define_insn "ashl<mode>3"
5746 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5748 (match_operand:SSEMODE248 1 "register_operand" "0")
5749 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5751 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5752 [(set_attr "type" "sseishft")
5753 (set_attr "prefix_data16" "1")
5754 (set (attr "length_immediate")
5755 (if_then_else (match_operand 2 "const_int_operand" "")
5757 (const_string "0")))
5758 (set_attr "mode" "TI")])
5760 (define_expand "vec_shl_<mode>"
5761 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5762 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5763 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5766 operands[0] = gen_lowpart (TImode, operands[0]);
5767 operands[1] = gen_lowpart (TImode, operands[1]);
5770 (define_expand "vec_shr_<mode>"
5771 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5772 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5773 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5776 operands[0] = gen_lowpart (TImode, operands[0]);
5777 operands[1] = gen_lowpart (TImode, operands[1]);
5780 (define_insn "*avx_<code><mode>3"
5781 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5783 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5784 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5785 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5786 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5787 [(set_attr "type" "sseiadd")
5788 (set (attr "prefix_extra")
5790 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
5793 (const_string "0")))
5794 (set_attr "prefix" "vex")
5795 (set_attr "mode" "TI")])
5797 (define_expand "<code>v16qi3"
5798 [(set (match_operand:V16QI 0 "register_operand" "")
5800 (match_operand:V16QI 1 "nonimmediate_operand" "")
5801 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5803 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5805 (define_insn "*<code>v16qi3"
5806 [(set (match_operand:V16QI 0 "register_operand" "=x")
5808 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5809 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5810 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5811 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5812 [(set_attr "type" "sseiadd")
5813 (set_attr "prefix_data16" "1")
5814 (set_attr "mode" "TI")])
5816 (define_expand "<code>v8hi3"
5817 [(set (match_operand:V8HI 0 "register_operand" "")
5819 (match_operand:V8HI 1 "nonimmediate_operand" "")
5820 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5822 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5824 (define_insn "*<code>v8hi3"
5825 [(set (match_operand:V8HI 0 "register_operand" "=x")
5827 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5828 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5829 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5830 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5831 [(set_attr "type" "sseiadd")
5832 (set_attr "prefix_data16" "1")
5833 (set_attr "mode" "TI")])
5835 (define_expand "umaxv8hi3"
5836 [(set (match_operand:V8HI 0 "register_operand" "")
5837 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5838 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5842 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5845 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5846 if (rtx_equal_p (op3, op2))
5847 op3 = gen_reg_rtx (V8HImode);
5848 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5849 emit_insn (gen_addv8hi3 (op0, op3, op2));
5854 (define_expand "smax<mode>3"
5855 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5856 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5857 (match_operand:SSEMODE14 2 "register_operand" "")))]
5861 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5867 xops[0] = operands[0];
5868 xops[1] = operands[1];
5869 xops[2] = operands[2];
5870 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5871 xops[4] = operands[1];
5872 xops[5] = operands[2];
5873 ok = ix86_expand_int_vcond (xops);
5879 (define_insn "*sse4_1_<code><mode>3"
5880 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5882 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5883 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5884 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5885 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5886 [(set_attr "type" "sseiadd")
5887 (set_attr "prefix_extra" "1")
5888 (set_attr "mode" "TI")])
5890 (define_expand "umaxv4si3"
5891 [(set (match_operand:V4SI 0 "register_operand" "")
5892 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5893 (match_operand:V4SI 2 "register_operand" "")))]
5897 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5903 xops[0] = operands[0];
5904 xops[1] = operands[1];
5905 xops[2] = operands[2];
5906 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5907 xops[4] = operands[1];
5908 xops[5] = operands[2];
5909 ok = ix86_expand_int_vcond (xops);
5915 (define_insn "*sse4_1_<code><mode>3"
5916 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5918 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5919 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5920 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5921 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5922 [(set_attr "type" "sseiadd")
5923 (set_attr "prefix_extra" "1")
5924 (set_attr "mode" "TI")])
5926 (define_expand "smin<mode>3"
5927 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5928 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5929 (match_operand:SSEMODE14 2 "register_operand" "")))]
5933 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5939 xops[0] = operands[0];
5940 xops[1] = operands[2];
5941 xops[2] = operands[1];
5942 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5943 xops[4] = operands[1];
5944 xops[5] = operands[2];
5945 ok = ix86_expand_int_vcond (xops);
5951 (define_expand "umin<mode>3"
5952 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5953 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5954 (match_operand:SSEMODE24 2 "register_operand" "")))]
5958 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5964 xops[0] = operands[0];
5965 xops[1] = operands[2];
5966 xops[2] = operands[1];
5967 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5968 xops[4] = operands[1];
5969 xops[5] = operands[2];
5970 ok = ix86_expand_int_vcond (xops);
5976 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5978 ;; Parallel integral comparisons
5980 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5982 (define_expand "sse2_eq<mode>3"
5983 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5985 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5986 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5987 "TARGET_SSE2 && !TARGET_SSE5"
5988 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5990 (define_insn "*avx_eq<mode>3"
5991 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5993 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5994 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5995 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5996 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5997 [(set_attr "type" "ssecmp")
5998 (set (attr "prefix_extra")
5999 (if_then_else (match_operand:V2DI 0 "" "")
6001 (const_string "*")))
6002 (set_attr "prefix" "vex")
6003 (set_attr "mode" "TI")])
6005 (define_insn "*sse2_eq<mode>3"
6006 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6008 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6009 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6010 "TARGET_SSE2 && !TARGET_SSE5
6011 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6012 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6013 [(set_attr "type" "ssecmp")
6014 (set_attr "prefix_data16" "1")
6015 (set_attr "mode" "TI")])
6017 (define_expand "sse4_1_eqv2di3"
6018 [(set (match_operand:V2DI 0 "register_operand" "")
6020 (match_operand:V2DI 1 "nonimmediate_operand" "")
6021 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6023 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6025 (define_insn "*sse4_1_eqv2di3"
6026 [(set (match_operand:V2DI 0 "register_operand" "=x")
6028 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6029 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6030 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6031 "pcmpeqq\t{%2, %0|%0, %2}"
6032 [(set_attr "type" "ssecmp")
6033 (set_attr "prefix_extra" "1")
6034 (set_attr "mode" "TI")])
6036 (define_insn "*avx_gt<mode>3"
6037 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6039 (match_operand:SSEMODE1248 1 "register_operand" "x")
6040 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6042 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6043 [(set_attr "type" "ssecmp")
6044 (set (attr "prefix_extra")
6045 (if_then_else (match_operand:V2DI 0 "" "")
6047 (const_string "*")))
6048 (set_attr "prefix" "vex")
6049 (set_attr "mode" "TI")])
6051 (define_insn "sse2_gt<mode>3"
6052 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6054 (match_operand:SSEMODE124 1 "register_operand" "0")
6055 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6056 "TARGET_SSE2 && !TARGET_SSE5"
6057 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6058 [(set_attr "type" "ssecmp")
6059 (set_attr "prefix_data16" "1")
6060 (set_attr "mode" "TI")])
6062 (define_insn "sse4_2_gtv2di3"
6063 [(set (match_operand:V2DI 0 "register_operand" "=x")
6065 (match_operand:V2DI 1 "register_operand" "0")
6066 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6068 "pcmpgtq\t{%2, %0|%0, %2}"
6069 [(set_attr "type" "ssecmp")
6070 (set_attr "prefix_extra" "1")
6071 (set_attr "mode" "TI")])
6073 (define_expand "vcond<mode>"
6074 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6075 (if_then_else:SSEMODEI
6076 (match_operator 3 ""
6077 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
6078 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
6079 (match_operand:SSEMODEI 1 "general_operand" "")
6080 (match_operand:SSEMODEI 2 "general_operand" "")))]
6083 if (ix86_expand_int_vcond (operands))
6089 (define_expand "vcondu<mode>"
6090 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6091 (if_then_else:SSEMODEI
6092 (match_operator 3 ""
6093 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
6094 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
6095 (match_operand:SSEMODEI 1 "general_operand" "")
6096 (match_operand:SSEMODEI 2 "general_operand" "")))]
6099 if (ix86_expand_int_vcond (operands))
6105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6107 ;; Parallel bitwise logical operations
6109 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6111 (define_expand "one_cmpl<mode>2"
6112 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6113 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6117 int i, n = GET_MODE_NUNITS (<MODE>mode);
6118 rtvec v = rtvec_alloc (n);
6120 for (i = 0; i < n; ++i)
6121 RTVEC_ELT (v, i) = constm1_rtx;
6123 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6126 (define_insn "*avx_andnot<mode>3"
6127 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6129 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6130 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6132 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6133 [(set_attr "type" "sselog")
6134 (set_attr "prefix" "vex")
6135 (set_attr "mode" "<avxvecpsmode>")])
6137 (define_insn "*sse_andnot<mode>3"
6138 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6140 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6141 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6142 "(TARGET_SSE && !TARGET_SSE2)"
6143 "andnps\t{%2, %0|%0, %2}"
6144 [(set_attr "type" "sselog")
6145 (set_attr "mode" "V4SF")])
6147 (define_insn "*avx_andnot<mode>3"
6148 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6150 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6151 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6153 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6154 [(set_attr "type" "sselog")
6155 (set_attr "prefix" "vex")
6156 (set_attr "mode" "TI")])
6158 (define_insn "sse2_andnot<mode>3"
6159 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6161 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6162 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6164 "pandn\t{%2, %0|%0, %2}"
6165 [(set_attr "type" "sselog")
6166 (set_attr "prefix_data16" "1")
6167 (set_attr "mode" "TI")])
6169 (define_insn "*andnottf3"
6170 [(set (match_operand:TF 0 "register_operand" "=x")
6172 (not:TF (match_operand:TF 1 "register_operand" "0"))
6173 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6175 "pandn\t{%2, %0|%0, %2}"
6176 [(set_attr "type" "sselog")
6177 (set_attr "prefix_data16" "1")
6178 (set_attr "mode" "TI")])
6180 (define_expand "<code><mode>3"
6181 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6183 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6184 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6186 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6188 (define_insn "*avx_<code><mode>3"
6189 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6191 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6192 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6194 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6195 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6196 [(set_attr "type" "sselog")
6197 (set_attr "prefix" "vex")
6198 (set_attr "mode" "<avxvecpsmode>")])
6200 (define_insn "*sse_<code><mode>3"
6201 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6203 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6204 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6205 "(TARGET_SSE && !TARGET_SSE2)
6206 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6207 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6208 [(set_attr "type" "sselog")
6209 (set_attr "mode" "V4SF")])
6211 (define_insn "*avx_<code><mode>3"
6212 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6214 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6215 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6217 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6218 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6219 [(set_attr "type" "sselog")
6220 (set_attr "prefix" "vex")
6221 (set_attr "mode" "TI")])
6223 (define_insn "*sse2_<code><mode>3"
6224 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6226 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6227 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6228 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6229 "p<plogicprefix>\t{%2, %0|%0, %2}"
6230 [(set_attr "type" "sselog")
6231 (set_attr "prefix_data16" "1")
6232 (set_attr "mode" "TI")])
6234 (define_expand "<code>tf3"
6235 [(set (match_operand:TF 0 "register_operand" "")
6237 (match_operand:TF 1 "nonimmediate_operand" "")
6238 (match_operand:TF 2 "nonimmediate_operand" "")))]
6240 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6242 (define_insn "*<code>tf3"
6243 [(set (match_operand:TF 0 "register_operand" "=x")
6245 (match_operand:TF 1 "nonimmediate_operand" "%0")
6246 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6247 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6248 "p<plogicprefix>\t{%2, %0|%0, %2}"
6249 [(set_attr "type" "sselog")
6250 (set_attr "prefix_data16" "1")
6251 (set_attr "mode" "TI")])
6253 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6255 ;; Parallel integral element swizzling
6257 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6260 ;; op1 = abcdefghijklmnop
6261 ;; op2 = qrstuvwxyz012345
6262 ;; h1 = aqbrcsdteufvgwhx
6263 ;; l1 = iyjzk0l1m2n3o4p5
6264 ;; h2 = aiqybjrzcks0dlt1
6265 ;; l2 = emu2fnv3gow4hpx5
6266 ;; h3 = aeimquy2bfjnrvz3
6267 ;; l3 = cgkosw04dhlptx15
6268 ;; result = bdfhjlnprtvxz135
6269 (define_expand "vec_pack_trunc_v8hi"
6270 [(match_operand:V16QI 0 "register_operand" "")
6271 (match_operand:V8HI 1 "register_operand" "")
6272 (match_operand:V8HI 2 "register_operand" "")]
6275 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6279 ix86_expand_sse5_pack (operands);
6283 op1 = gen_lowpart (V16QImode, operands[1]);
6284 op2 = gen_lowpart (V16QImode, operands[2]);
6285 h1 = gen_reg_rtx (V16QImode);
6286 l1 = gen_reg_rtx (V16QImode);
6287 h2 = gen_reg_rtx (V16QImode);
6288 l2 = gen_reg_rtx (V16QImode);
6289 h3 = gen_reg_rtx (V16QImode);
6290 l3 = gen_reg_rtx (V16QImode);
6292 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6293 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6294 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6295 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6296 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6297 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6298 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6309 ;; result = bdfhjlnp
6310 (define_expand "vec_pack_trunc_v4si"
6311 [(match_operand:V8HI 0 "register_operand" "")
6312 (match_operand:V4SI 1 "register_operand" "")
6313 (match_operand:V4SI 2 "register_operand" "")]
6316 rtx op1, op2, h1, l1, h2, l2;
6320 ix86_expand_sse5_pack (operands);
6324 op1 = gen_lowpart (V8HImode, operands[1]);
6325 op2 = gen_lowpart (V8HImode, operands[2]);
6326 h1 = gen_reg_rtx (V8HImode);
6327 l1 = gen_reg_rtx (V8HImode);
6328 h2 = gen_reg_rtx (V8HImode);
6329 l2 = gen_reg_rtx (V8HImode);
6331 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6332 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6333 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6334 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6335 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6345 (define_expand "vec_pack_trunc_v2di"
6346 [(match_operand:V4SI 0 "register_operand" "")
6347 (match_operand:V2DI 1 "register_operand" "")
6348 (match_operand:V2DI 2 "register_operand" "")]
6351 rtx op1, op2, h1, l1;
6355 ix86_expand_sse5_pack (operands);
6359 op1 = gen_lowpart (V4SImode, operands[1]);
6360 op2 = gen_lowpart (V4SImode, operands[2]);
6361 h1 = gen_reg_rtx (V4SImode);
6362 l1 = gen_reg_rtx (V4SImode);
6364 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6365 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6366 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6370 (define_expand "vec_interleave_highv16qi"
6371 [(set (match_operand:V16QI 0 "register_operand" "")
6374 (match_operand:V16QI 1 "register_operand" "")
6375 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6376 (parallel [(const_int 8) (const_int 24)
6377 (const_int 9) (const_int 25)
6378 (const_int 10) (const_int 26)
6379 (const_int 11) (const_int 27)
6380 (const_int 12) (const_int 28)
6381 (const_int 13) (const_int 29)
6382 (const_int 14) (const_int 30)
6383 (const_int 15) (const_int 31)])))]
6386 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6390 (define_expand "vec_interleave_lowv16qi"
6391 [(set (match_operand:V16QI 0 "register_operand" "")
6394 (match_operand:V16QI 1 "register_operand" "")
6395 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6396 (parallel [(const_int 0) (const_int 16)
6397 (const_int 1) (const_int 17)
6398 (const_int 2) (const_int 18)
6399 (const_int 3) (const_int 19)
6400 (const_int 4) (const_int 20)
6401 (const_int 5) (const_int 21)
6402 (const_int 6) (const_int 22)
6403 (const_int 7) (const_int 23)])))]
6406 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6410 (define_expand "vec_interleave_highv8hi"
6411 [(set (match_operand:V8HI 0 "register_operand" "=")
6414 (match_operand:V8HI 1 "register_operand" "")
6415 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6416 (parallel [(const_int 4) (const_int 12)
6417 (const_int 5) (const_int 13)
6418 (const_int 6) (const_int 14)
6419 (const_int 7) (const_int 15)])))]
6422 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6426 (define_expand "vec_interleave_lowv8hi"
6427 [(set (match_operand:V8HI 0 "register_operand" "")
6430 (match_operand:V8HI 1 "register_operand" "")
6431 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6432 (parallel [(const_int 0) (const_int 8)
6433 (const_int 1) (const_int 9)
6434 (const_int 2) (const_int 10)
6435 (const_int 3) (const_int 11)])))]
6438 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6442 (define_expand "vec_interleave_highv4si"
6443 [(set (match_operand:V4SI 0 "register_operand" "")
6446 (match_operand:V4SI 1 "register_operand" "")
6447 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6448 (parallel [(const_int 2) (const_int 6)
6449 (const_int 3) (const_int 7)])))]
6452 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6456 (define_expand "vec_interleave_lowv4si"
6457 [(set (match_operand:V4SI 0 "register_operand" "")
6460 (match_operand:V4SI 1 "register_operand" "")
6461 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6462 (parallel [(const_int 0) (const_int 4)
6463 (const_int 1) (const_int 5)])))]
6466 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6470 (define_expand "vec_interleave_highv2di"
6471 [(set (match_operand:V2DI 0 "register_operand" "")
6474 (match_operand:V2DI 1 "register_operand" "")
6475 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6476 (parallel [(const_int 1)
6480 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6484 (define_expand "vec_interleave_lowv2di"
6485 [(set (match_operand:V2DI 0 "register_operand" "")
6488 (match_operand:V2DI 1 "register_operand" "")
6489 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6490 (parallel [(const_int 0)
6494 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6498 (define_expand "vec_interleave_highv4sf"
6499 [(set (match_operand:V4SF 0 "register_operand" "")
6502 (match_operand:V4SF 1 "register_operand" "")
6503 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6504 (parallel [(const_int 2) (const_int 6)
6505 (const_int 3) (const_int 7)])))]
6508 (define_expand "vec_interleave_lowv4sf"
6509 [(set (match_operand:V4SF 0 "register_operand" "")
6512 (match_operand:V4SF 1 "register_operand" "")
6513 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6514 (parallel [(const_int 0) (const_int 4)
6515 (const_int 1) (const_int 5)])))]
6518 (define_expand "vec_interleave_highv2df"
6519 [(set (match_operand:V2DF 0 "register_operand" "")
6522 (match_operand:V2DF 1 "register_operand" "")
6523 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6524 (parallel [(const_int 1)
6528 (define_expand "vec_interleave_lowv2df"
6529 [(set (match_operand:V2DF 0 "register_operand" "")
6532 (match_operand:V2DF 1 "register_operand" "")
6533 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6534 (parallel [(const_int 0)
6538 (define_insn "*avx_packsswb"
6539 [(set (match_operand:V16QI 0 "register_operand" "=x")
6542 (match_operand:V8HI 1 "register_operand" "x"))
6544 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6546 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6547 [(set_attr "type" "sselog")
6548 (set_attr "prefix" "vex")
6549 (set_attr "mode" "TI")])
6551 (define_insn "sse2_packsswb"
6552 [(set (match_operand:V16QI 0 "register_operand" "=x")
6555 (match_operand:V8HI 1 "register_operand" "0"))
6557 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6559 "packsswb\t{%2, %0|%0, %2}"
6560 [(set_attr "type" "sselog")
6561 (set_attr "prefix_data16" "1")
6562 (set_attr "mode" "TI")])
6564 (define_insn "*avx_packssdw"
6565 [(set (match_operand:V8HI 0 "register_operand" "=x")
6568 (match_operand:V4SI 1 "register_operand" "x"))
6570 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6572 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6573 [(set_attr "type" "sselog")
6574 (set_attr "prefix" "vex")
6575 (set_attr "mode" "TI")])
6577 (define_insn "sse2_packssdw"
6578 [(set (match_operand:V8HI 0 "register_operand" "=x")
6581 (match_operand:V4SI 1 "register_operand" "0"))
6583 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6585 "packssdw\t{%2, %0|%0, %2}"
6586 [(set_attr "type" "sselog")
6587 (set_attr "prefix_data16" "1")
6588 (set_attr "mode" "TI")])
6590 (define_insn "*avx_packuswb"
6591 [(set (match_operand:V16QI 0 "register_operand" "=x")
6594 (match_operand:V8HI 1 "register_operand" "x"))
6596 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6598 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6599 [(set_attr "type" "sselog")
6600 (set_attr "prefix" "vex")
6601 (set_attr "mode" "TI")])
6603 (define_insn "sse2_packuswb"
6604 [(set (match_operand:V16QI 0 "register_operand" "=x")
6607 (match_operand:V8HI 1 "register_operand" "0"))
6609 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6611 "packuswb\t{%2, %0|%0, %2}"
6612 [(set_attr "type" "sselog")
6613 (set_attr "prefix_data16" "1")
6614 (set_attr "mode" "TI")])
6616 (define_insn "*avx_punpckhbw"
6617 [(set (match_operand:V16QI 0 "register_operand" "=x")
6620 (match_operand:V16QI 1 "register_operand" "x")
6621 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6622 (parallel [(const_int 8) (const_int 24)
6623 (const_int 9) (const_int 25)
6624 (const_int 10) (const_int 26)
6625 (const_int 11) (const_int 27)
6626 (const_int 12) (const_int 28)
6627 (const_int 13) (const_int 29)
6628 (const_int 14) (const_int 30)
6629 (const_int 15) (const_int 31)])))]
6631 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6632 [(set_attr "type" "sselog")
6633 (set_attr "prefix" "vex")
6634 (set_attr "mode" "TI")])
6636 (define_insn "sse2_punpckhbw"
6637 [(set (match_operand:V16QI 0 "register_operand" "=x")
6640 (match_operand:V16QI 1 "register_operand" "0")
6641 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6642 (parallel [(const_int 8) (const_int 24)
6643 (const_int 9) (const_int 25)
6644 (const_int 10) (const_int 26)
6645 (const_int 11) (const_int 27)
6646 (const_int 12) (const_int 28)
6647 (const_int 13) (const_int 29)
6648 (const_int 14) (const_int 30)
6649 (const_int 15) (const_int 31)])))]
6651 "punpckhbw\t{%2, %0|%0, %2}"
6652 [(set_attr "type" "sselog")
6653 (set_attr "prefix_data16" "1")
6654 (set_attr "mode" "TI")])
6656 (define_insn "*avx_punpcklbw"
6657 [(set (match_operand:V16QI 0 "register_operand" "=x")
6660 (match_operand:V16QI 1 "register_operand" "x")
6661 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6662 (parallel [(const_int 0) (const_int 16)
6663 (const_int 1) (const_int 17)
6664 (const_int 2) (const_int 18)
6665 (const_int 3) (const_int 19)
6666 (const_int 4) (const_int 20)
6667 (const_int 5) (const_int 21)
6668 (const_int 6) (const_int 22)
6669 (const_int 7) (const_int 23)])))]
6671 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6672 [(set_attr "type" "sselog")
6673 (set_attr "prefix" "vex")
6674 (set_attr "mode" "TI")])
6676 (define_insn "sse2_punpcklbw"
6677 [(set (match_operand:V16QI 0 "register_operand" "=x")
6680 (match_operand:V16QI 1 "register_operand" "0")
6681 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6682 (parallel [(const_int 0) (const_int 16)
6683 (const_int 1) (const_int 17)
6684 (const_int 2) (const_int 18)
6685 (const_int 3) (const_int 19)
6686 (const_int 4) (const_int 20)
6687 (const_int 5) (const_int 21)
6688 (const_int 6) (const_int 22)
6689 (const_int 7) (const_int 23)])))]
6691 "punpcklbw\t{%2, %0|%0, %2}"
6692 [(set_attr "type" "sselog")
6693 (set_attr "prefix_data16" "1")
6694 (set_attr "mode" "TI")])
6696 (define_insn "*avx_punpckhwd"
6697 [(set (match_operand:V8HI 0 "register_operand" "=x")
6700 (match_operand:V8HI 1 "register_operand" "x")
6701 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6702 (parallel [(const_int 4) (const_int 12)
6703 (const_int 5) (const_int 13)
6704 (const_int 6) (const_int 14)
6705 (const_int 7) (const_int 15)])))]
6707 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6708 [(set_attr "type" "sselog")
6709 (set_attr "prefix" "vex")
6710 (set_attr "mode" "TI")])
6712 (define_insn "sse2_punpckhwd"
6713 [(set (match_operand:V8HI 0 "register_operand" "=x")
6716 (match_operand:V8HI 1 "register_operand" "0")
6717 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6718 (parallel [(const_int 4) (const_int 12)
6719 (const_int 5) (const_int 13)
6720 (const_int 6) (const_int 14)
6721 (const_int 7) (const_int 15)])))]
6723 "punpckhwd\t{%2, %0|%0, %2}"
6724 [(set_attr "type" "sselog")
6725 (set_attr "prefix_data16" "1")
6726 (set_attr "mode" "TI")])
6728 (define_insn "*avx_punpcklwd"
6729 [(set (match_operand:V8HI 0 "register_operand" "=x")
6732 (match_operand:V8HI 1 "register_operand" "x")
6733 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6734 (parallel [(const_int 0) (const_int 8)
6735 (const_int 1) (const_int 9)
6736 (const_int 2) (const_int 10)
6737 (const_int 3) (const_int 11)])))]
6739 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6740 [(set_attr "type" "sselog")
6741 (set_attr "prefix" "vex")
6742 (set_attr "mode" "TI")])
6744 (define_insn "sse2_punpcklwd"
6745 [(set (match_operand:V8HI 0 "register_operand" "=x")
6748 (match_operand:V8HI 1 "register_operand" "0")
6749 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6750 (parallel [(const_int 0) (const_int 8)
6751 (const_int 1) (const_int 9)
6752 (const_int 2) (const_int 10)
6753 (const_int 3) (const_int 11)])))]
6755 "punpcklwd\t{%2, %0|%0, %2}"
6756 [(set_attr "type" "sselog")
6757 (set_attr "prefix_data16" "1")
6758 (set_attr "mode" "TI")])
6760 (define_insn "*avx_punpckhdq"
6761 [(set (match_operand:V4SI 0 "register_operand" "=x")
6764 (match_operand:V4SI 1 "register_operand" "x")
6765 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6766 (parallel [(const_int 2) (const_int 6)
6767 (const_int 3) (const_int 7)])))]
6769 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6770 [(set_attr "type" "sselog")
6771 (set_attr "prefix" "vex")
6772 (set_attr "mode" "TI")])
6774 (define_insn "sse2_punpckhdq"
6775 [(set (match_operand:V4SI 0 "register_operand" "=x")
6778 (match_operand:V4SI 1 "register_operand" "0")
6779 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6780 (parallel [(const_int 2) (const_int 6)
6781 (const_int 3) (const_int 7)])))]
6783 "punpckhdq\t{%2, %0|%0, %2}"
6784 [(set_attr "type" "sselog")
6785 (set_attr "prefix_data16" "1")
6786 (set_attr "mode" "TI")])
6788 (define_insn "*avx_punpckldq"
6789 [(set (match_operand:V4SI 0 "register_operand" "=x")
6792 (match_operand:V4SI 1 "register_operand" "x")
6793 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6794 (parallel [(const_int 0) (const_int 4)
6795 (const_int 1) (const_int 5)])))]
6797 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6798 [(set_attr "type" "sselog")
6799 (set_attr "prefix" "vex")
6800 (set_attr "mode" "TI")])
6802 (define_insn "sse2_punpckldq"
6803 [(set (match_operand:V4SI 0 "register_operand" "=x")
6806 (match_operand:V4SI 1 "register_operand" "0")
6807 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6808 (parallel [(const_int 0) (const_int 4)
6809 (const_int 1) (const_int 5)])))]
6811 "punpckldq\t{%2, %0|%0, %2}"
6812 [(set_attr "type" "sselog")
6813 (set_attr "prefix_data16" "1")
6814 (set_attr "mode" "TI")])
6816 (define_insn "*avx_pinsr<ssevecsize>"
6817 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6818 (vec_merge:SSEMODE124
6819 (vec_duplicate:SSEMODE124
6820 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6821 (match_operand:SSEMODE124 1 "register_operand" "x")
6822 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6825 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6826 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6828 [(set_attr "type" "sselog")
6829 (set (attr "prefix_extra")
6830 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6832 (const_string "1")))
6833 (set_attr "length_immediate" "1")
6834 (set_attr "prefix" "vex")
6835 (set_attr "mode" "TI")])
6837 (define_insn "*sse4_1_pinsrb"
6838 [(set (match_operand:V16QI 0 "register_operand" "=x")
6840 (vec_duplicate:V16QI
6841 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6842 (match_operand:V16QI 1 "register_operand" "0")
6843 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6846 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6847 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6849 [(set_attr "type" "sselog")
6850 (set_attr "prefix_extra" "1")
6851 (set_attr "length_immediate" "1")
6852 (set_attr "mode" "TI")])
6854 (define_insn "*sse2_pinsrw"
6855 [(set (match_operand:V8HI 0 "register_operand" "=x")
6858 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6859 (match_operand:V8HI 1 "register_operand" "0")
6860 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6863 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6864 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6866 [(set_attr "type" "sselog")
6867 (set_attr "prefix_data16" "1")
6868 (set_attr "length_immediate" "1")
6869 (set_attr "mode" "TI")])
6871 ;; It must come before sse2_loadld since it is preferred.
6872 (define_insn "*sse4_1_pinsrd"
6873 [(set (match_operand:V4SI 0 "register_operand" "=x")
6876 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6877 (match_operand:V4SI 1 "register_operand" "0")
6878 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6881 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6882 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6884 [(set_attr "type" "sselog")
6885 (set_attr "prefix_extra" "1")
6886 (set_attr "length_immediate" "1")
6887 (set_attr "mode" "TI")])
6889 (define_insn "*avx_pinsrq"
6890 [(set (match_operand:V2DI 0 "register_operand" "=x")
6893 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6894 (match_operand:V2DI 1 "register_operand" "x")
6895 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6896 "TARGET_AVX && TARGET_64BIT"
6898 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6899 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6901 [(set_attr "type" "sselog")
6902 (set_attr "prefix_extra" "1")
6903 (set_attr "length_immediate" "1")
6904 (set_attr "prefix" "vex")
6905 (set_attr "mode" "TI")])
6907 (define_insn "*sse4_1_pinsrq"
6908 [(set (match_operand:V2DI 0 "register_operand" "=x")
6911 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6912 (match_operand:V2DI 1 "register_operand" "0")
6913 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6914 "TARGET_SSE4_1 && TARGET_64BIT"
6916 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6917 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6919 [(set_attr "type" "sselog")
6920 (set_attr "prefix_rex" "1")
6921 (set_attr "prefix_extra" "1")
6922 (set_attr "length_immediate" "1")
6923 (set_attr "mode" "TI")])
6925 (define_insn "*sse4_1_pextrb"
6926 [(set (match_operand:SI 0 "register_operand" "=r")
6929 (match_operand:V16QI 1 "register_operand" "x")
6930 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6932 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6933 [(set_attr "type" "sselog")
6934 (set_attr "prefix_extra" "1")
6935 (set_attr "length_immediate" "1")
6936 (set_attr "prefix" "maybe_vex")
6937 (set_attr "mode" "TI")])
6939 (define_insn "*sse4_1_pextrb_memory"
6940 [(set (match_operand:QI 0 "memory_operand" "=m")
6942 (match_operand:V16QI 1 "register_operand" "x")
6943 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6945 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6946 [(set_attr "type" "sselog")
6947 (set_attr "prefix_extra" "1")
6948 (set_attr "length_immediate" "1")
6949 (set_attr "prefix" "maybe_vex")
6950 (set_attr "mode" "TI")])
6952 (define_insn "*sse2_pextrw"
6953 [(set (match_operand:SI 0 "register_operand" "=r")
6956 (match_operand:V8HI 1 "register_operand" "x")
6957 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6959 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6960 [(set_attr "type" "sselog")
6961 (set_attr "prefix_data16" "1")
6962 (set_attr "length_immediate" "1")
6963 (set_attr "prefix" "maybe_vex")
6964 (set_attr "mode" "TI")])
6966 (define_insn "*sse4_1_pextrw_memory"
6967 [(set (match_operand:HI 0 "memory_operand" "=m")
6969 (match_operand:V8HI 1 "register_operand" "x")
6970 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6972 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6973 [(set_attr "type" "sselog")
6974 (set_attr "prefix_extra" "1")
6975 (set_attr "length_immediate" "1")
6976 (set_attr "prefix" "maybe_vex")
6977 (set_attr "mode" "TI")])
6979 (define_insn "*sse4_1_pextrd"
6980 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6982 (match_operand:V4SI 1 "register_operand" "x")
6983 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6985 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6986 [(set_attr "type" "sselog")
6987 (set_attr "prefix_extra" "1")
6988 (set_attr "length_immediate" "1")
6989 (set_attr "prefix" "maybe_vex")
6990 (set_attr "mode" "TI")])
6992 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6993 (define_insn "*sse4_1_pextrq"
6994 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6996 (match_operand:V2DI 1 "register_operand" "x")
6997 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6998 "TARGET_SSE4_1 && TARGET_64BIT"
6999 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7000 [(set_attr "type" "sselog")
7001 (set_attr "prefix_rex" "1")
7002 (set_attr "prefix_extra" "1")
7003 (set_attr "length_immediate" "1")
7004 (set_attr "prefix" "maybe_vex")
7005 (set_attr "mode" "TI")])
7007 (define_expand "sse2_pshufd"
7008 [(match_operand:V4SI 0 "register_operand" "")
7009 (match_operand:V4SI 1 "nonimmediate_operand" "")
7010 (match_operand:SI 2 "const_int_operand" "")]
7013 int mask = INTVAL (operands[2]);
7014 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7015 GEN_INT ((mask >> 0) & 3),
7016 GEN_INT ((mask >> 2) & 3),
7017 GEN_INT ((mask >> 4) & 3),
7018 GEN_INT ((mask >> 6) & 3)));
7022 (define_insn "sse2_pshufd_1"
7023 [(set (match_operand:V4SI 0 "register_operand" "=x")
7025 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7026 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7027 (match_operand 3 "const_0_to_3_operand" "")
7028 (match_operand 4 "const_0_to_3_operand" "")
7029 (match_operand 5 "const_0_to_3_operand" "")])))]
7033 mask |= INTVAL (operands[2]) << 0;
7034 mask |= INTVAL (operands[3]) << 2;
7035 mask |= INTVAL (operands[4]) << 4;
7036 mask |= INTVAL (operands[5]) << 6;
7037 operands[2] = GEN_INT (mask);
7039 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7041 [(set_attr "type" "sselog1")
7042 (set_attr "prefix_data16" "1")
7043 (set_attr "prefix" "maybe_vex")
7044 (set_attr "length_immediate" "1")
7045 (set_attr "mode" "TI")])
7047 (define_expand "sse2_pshuflw"
7048 [(match_operand:V8HI 0 "register_operand" "")
7049 (match_operand:V8HI 1 "nonimmediate_operand" "")
7050 (match_operand:SI 2 "const_int_operand" "")]
7053 int mask = INTVAL (operands[2]);
7054 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7055 GEN_INT ((mask >> 0) & 3),
7056 GEN_INT ((mask >> 2) & 3),
7057 GEN_INT ((mask >> 4) & 3),
7058 GEN_INT ((mask >> 6) & 3)));
7062 (define_insn "sse2_pshuflw_1"
7063 [(set (match_operand:V8HI 0 "register_operand" "=x")
7065 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7066 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7067 (match_operand 3 "const_0_to_3_operand" "")
7068 (match_operand 4 "const_0_to_3_operand" "")
7069 (match_operand 5 "const_0_to_3_operand" "")
7077 mask |= INTVAL (operands[2]) << 0;
7078 mask |= INTVAL (operands[3]) << 2;
7079 mask |= INTVAL (operands[4]) << 4;
7080 mask |= INTVAL (operands[5]) << 6;
7081 operands[2] = GEN_INT (mask);
7083 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7085 [(set_attr "type" "sselog")
7086 (set_attr "prefix_data16" "0")
7087 (set_attr "prefix_rep" "1")
7088 (set_attr "prefix" "maybe_vex")
7089 (set_attr "length_immediate" "1")
7090 (set_attr "mode" "TI")])
7092 (define_expand "sse2_pshufhw"
7093 [(match_operand:V8HI 0 "register_operand" "")
7094 (match_operand:V8HI 1 "nonimmediate_operand" "")
7095 (match_operand:SI 2 "const_int_operand" "")]
7098 int mask = INTVAL (operands[2]);
7099 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7100 GEN_INT (((mask >> 0) & 3) + 4),
7101 GEN_INT (((mask >> 2) & 3) + 4),
7102 GEN_INT (((mask >> 4) & 3) + 4),
7103 GEN_INT (((mask >> 6) & 3) + 4)));
7107 (define_insn "sse2_pshufhw_1"
7108 [(set (match_operand:V8HI 0 "register_operand" "=x")
7110 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7111 (parallel [(const_int 0)
7115 (match_operand 2 "const_4_to_7_operand" "")
7116 (match_operand 3 "const_4_to_7_operand" "")
7117 (match_operand 4 "const_4_to_7_operand" "")
7118 (match_operand 5 "const_4_to_7_operand" "")])))]
7122 mask |= (INTVAL (operands[2]) - 4) << 0;
7123 mask |= (INTVAL (operands[3]) - 4) << 2;
7124 mask |= (INTVAL (operands[4]) - 4) << 4;
7125 mask |= (INTVAL (operands[5]) - 4) << 6;
7126 operands[2] = GEN_INT (mask);
7128 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7130 [(set_attr "type" "sselog")
7131 (set_attr "prefix_rep" "1")
7132 (set_attr "prefix_data16" "0")
7133 (set_attr "prefix" "maybe_vex")
7134 (set_attr "length_immediate" "1")
7135 (set_attr "mode" "TI")])
7137 (define_expand "sse2_loadd"
7138 [(set (match_operand:V4SI 0 "register_operand" "")
7141 (match_operand:SI 1 "nonimmediate_operand" ""))
7145 "operands[2] = CONST0_RTX (V4SImode);")
7147 (define_insn "*avx_loadld"
7148 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7151 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7152 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7156 vmovd\t{%2, %0|%0, %2}
7157 vmovd\t{%2, %0|%0, %2}
7158 vmovss\t{%2, %1, %0|%0, %1, %2}"
7159 [(set_attr "type" "ssemov")
7160 (set_attr "prefix" "vex")
7161 (set_attr "mode" "TI,TI,V4SF")])
7163 (define_insn "sse2_loadld"
7164 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7167 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7168 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7172 movd\t{%2, %0|%0, %2}
7173 movd\t{%2, %0|%0, %2}
7174 movss\t{%2, %0|%0, %2}
7175 movss\t{%2, %0|%0, %2}"
7176 [(set_attr "type" "ssemov")
7177 (set_attr "mode" "TI,TI,V4SF,SF")])
7179 (define_insn_and_split "sse2_stored"
7180 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7182 (match_operand:V4SI 1 "register_operand" "x,Yi")
7183 (parallel [(const_int 0)])))]
7186 "&& reload_completed
7187 && (TARGET_INTER_UNIT_MOVES
7188 || MEM_P (operands [0])
7189 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7190 [(set (match_dup 0) (match_dup 1))]
7192 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7195 (define_insn_and_split "*vec_ext_v4si_mem"
7196 [(set (match_operand:SI 0 "register_operand" "=r")
7198 (match_operand:V4SI 1 "memory_operand" "o")
7199 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7205 int i = INTVAL (operands[2]);
7207 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7211 (define_expand "sse_storeq"
7212 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7214 (match_operand:V2DI 1 "register_operand" "")
7215 (parallel [(const_int 0)])))]
7219 (define_insn "*sse2_storeq_rex64"
7220 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7222 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7223 (parallel [(const_int 0)])))]
7224 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7228 %vmov{q}\t{%1, %0|%0, %1}"
7229 [(set_attr "type" "*,*,imov")
7230 (set_attr "prefix" "*,*,maybe_vex")
7231 (set_attr "mode" "*,*,DI")])
7233 (define_insn "*sse2_storeq"
7234 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7236 (match_operand:V2DI 1 "register_operand" "x")
7237 (parallel [(const_int 0)])))]
7242 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7244 (match_operand:V2DI 1 "register_operand" "")
7245 (parallel [(const_int 0)])))]
7248 && (TARGET_INTER_UNIT_MOVES
7249 || MEM_P (operands [0])
7250 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7251 [(set (match_dup 0) (match_dup 1))]
7253 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7256 (define_insn "*vec_extractv2di_1_rex64_avx"
7257 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7259 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7260 (parallel [(const_int 1)])))]
7263 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7265 vmovhps\t{%1, %0|%0, %1}
7266 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7267 vmovq\t{%H1, %0|%0, %H1}
7268 vmov{q}\t{%H1, %0|%0, %H1}"
7269 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7270 (set_attr "length_immediate" "*,1,*,*")
7271 (set_attr "memory" "*,none,*,*")
7272 (set_attr "prefix" "vex")
7273 (set_attr "mode" "V2SF,TI,TI,DI")])
7275 (define_insn "*vec_extractv2di_1_rex64"
7276 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7278 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7279 (parallel [(const_int 1)])))]
7280 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7282 movhps\t{%1, %0|%0, %1}
7283 psrldq\t{$8, %0|%0, 8}
7284 movq\t{%H1, %0|%0, %H1}
7285 mov{q}\t{%H1, %0|%0, %H1}"
7286 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7287 (set_attr "length_immediate" "*,1,*,*")
7288 (set_attr "atom_unit" "*,sishuf,*,*")
7289 (set_attr "memory" "*,none,*,*")
7290 (set_attr "mode" "V2SF,TI,TI,DI")])
7292 (define_insn "*vec_extractv2di_1_avx"
7293 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7295 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7296 (parallel [(const_int 1)])))]
7299 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7301 vmovhps\t{%1, %0|%0, %1}
7302 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7303 vmovq\t{%H1, %0|%0, %H1}"
7304 [(set_attr "type" "ssemov,sseishft,ssemov")
7305 (set_attr "length_immediate" "*,1,*")
7306 (set_attr "memory" "*,none,*")
7307 (set_attr "prefix" "vex")
7308 (set_attr "mode" "V2SF,TI,TI")])
7310 (define_insn "*vec_extractv2di_1_sse2"
7311 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7313 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7314 (parallel [(const_int 1)])))]
7316 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7318 movhps\t{%1, %0|%0, %1}
7319 psrldq\t{$8, %0|%0, 8}
7320 movq\t{%H1, %0|%0, %H1}"
7321 [(set_attr "type" "ssemov,sseishft,ssemov")
7322 (set_attr "length_immediate" "*,1,*")
7323 (set_attr "atom_unit" "*,sishuf,*")
7324 (set_attr "memory" "*,none,*")
7325 (set_attr "mode" "V2SF,TI,TI")])
7327 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7328 (define_insn "*vec_extractv2di_1_sse"
7329 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7331 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7332 (parallel [(const_int 1)])))]
7333 "!TARGET_SSE2 && TARGET_SSE
7334 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7336 movhps\t{%1, %0|%0, %1}
7337 movhlps\t{%1, %0|%0, %1}
7338 movlps\t{%H1, %0|%0, %H1}"
7339 [(set_attr "type" "ssemov")
7340 (set_attr "mode" "V2SF,V4SF,V2SF")])
7342 (define_insn "*vec_dupv4si"
7343 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7345 (match_operand:SI 1 "register_operand" " Y2,0")))]
7348 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7349 shufps\t{$0, %0, %0|%0, %0, 0}"
7350 [(set_attr "type" "sselog1")
7351 (set_attr "prefix" "maybe_vex,orig")
7352 (set_attr "length_immediate" "1")
7353 (set_attr "mode" "TI,V4SF")])
7355 (define_insn "*vec_dupv2di_avx"
7356 [(set (match_operand:V2DI 0 "register_operand" "=x")
7358 (match_operand:DI 1 "register_operand" "x")))]
7360 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7361 [(set_attr "type" "sselog1")
7362 (set_attr "prefix" "vex")
7363 (set_attr "mode" "TI")])
7365 (define_insn "*vec_dupv2di"
7366 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7368 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7373 [(set_attr "type" "sselog1,ssemov")
7374 (set_attr "mode" "TI,V4SF")])
7376 (define_insn "*vec_concatv2si_avx"
7377 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7379 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7380 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7383 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7384 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7385 vmovd\t{%1, %0|%0, %1}
7386 punpckldq\t{%2, %0|%0, %2}
7387 movd\t{%1, %0|%0, %1}"
7388 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7389 (set_attr "prefix_extra" "1,*,*,*,*")
7390 (set_attr "length_immediate" "1,*,*,*,*")
7391 (set (attr "prefix")
7392 (if_then_else (eq_attr "alternative" "3,4")
7393 (const_string "orig")
7394 (const_string "vex")))
7395 (set_attr "mode" "TI,TI,TI,DI,DI")])
7397 (define_insn "*vec_concatv2si_sse4_1"
7398 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7400 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7401 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7404 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7405 punpckldq\t{%2, %0|%0, %2}
7406 movd\t{%1, %0|%0, %1}
7407 punpckldq\t{%2, %0|%0, %2}
7408 movd\t{%1, %0|%0, %1}"
7409 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7410 (set_attr "prefix_extra" "1,*,*,*,*")
7411 (set_attr "length_immediate" "1,*,*,*,*")
7412 (set_attr "mode" "TI,TI,TI,DI,DI")])
7414 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7415 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7416 ;; alternatives pretty much forces the MMX alternative to be chosen.
7417 (define_insn "*vec_concatv2si_sse2"
7418 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7420 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7421 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7424 punpckldq\t{%2, %0|%0, %2}
7425 movd\t{%1, %0|%0, %1}
7426 punpckldq\t{%2, %0|%0, %2}
7427 movd\t{%1, %0|%0, %1}"
7428 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7429 (set_attr "mode" "TI,TI,DI,DI")])
7431 (define_insn "*vec_concatv2si_sse"
7432 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7434 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7435 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7438 unpcklps\t{%2, %0|%0, %2}
7439 movss\t{%1, %0|%0, %1}
7440 punpckldq\t{%2, %0|%0, %2}
7441 movd\t{%1, %0|%0, %1}"
7442 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7443 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7445 (define_insn "*vec_concatv4si_1_avx"
7446 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7448 (match_operand:V2SI 1 "register_operand" " x,x")
7449 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7452 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7453 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7454 [(set_attr "type" "sselog,ssemov")
7455 (set_attr "prefix" "vex")
7456 (set_attr "mode" "TI,V2SF")])
7458 (define_insn "*vec_concatv4si_1"
7459 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7461 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7462 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7465 punpcklqdq\t{%2, %0|%0, %2}
7466 movlhps\t{%2, %0|%0, %2}
7467 movhps\t{%2, %0|%0, %2}"
7468 [(set_attr "type" "sselog,ssemov,ssemov")
7469 (set_attr "mode" "TI,V4SF,V2SF")])
7471 (define_insn "*vec_concatv2di_avx"
7472 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7474 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7475 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7476 "!TARGET_64BIT && TARGET_AVX"
7478 vmovq\t{%1, %0|%0, %1}
7479 movq2dq\t{%1, %0|%0, %1}
7480 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7481 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7482 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7483 (set (attr "prefix")
7484 (if_then_else (eq_attr "alternative" "1")
7485 (const_string "orig")
7486 (const_string "vex")))
7487 (set_attr "mode" "TI,TI,TI,V2SF")])
7489 (define_insn "vec_concatv2di"
7490 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7492 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7493 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7494 "!TARGET_64BIT && TARGET_SSE"
7496 movq\t{%1, %0|%0, %1}
7497 movq2dq\t{%1, %0|%0, %1}
7498 punpcklqdq\t{%2, %0|%0, %2}
7499 movlhps\t{%2, %0|%0, %2}
7500 movhps\t{%2, %0|%0, %2}"
7501 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7502 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7504 (define_insn "*vec_concatv2di_rex64_avx"
7505 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7507 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7508 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7509 "TARGET_64BIT && TARGET_AVX"
7511 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7512 vmovq\t{%1, %0|%0, %1}
7513 vmovq\t{%1, %0|%0, %1}
7514 movq2dq\t{%1, %0|%0, %1}
7515 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7516 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7517 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7518 (set_attr "prefix_extra" "1,*,*,*,*,*")
7519 (set_attr "length_immediate" "1,*,*,*,*,*")
7520 (set (attr "prefix")
7521 (if_then_else (eq_attr "alternative" "3")
7522 (const_string "orig")
7523 (const_string "vex")))
7524 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7526 (define_insn "*vec_concatv2di_rex64_sse4_1"
7527 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7529 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7530 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7531 "TARGET_64BIT && TARGET_SSE4_1"
7533 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7534 movq\t{%1, %0|%0, %1}
7535 movq\t{%1, %0|%0, %1}
7536 movq2dq\t{%1, %0|%0, %1}
7537 punpcklqdq\t{%2, %0|%0, %2}
7538 movlhps\t{%2, %0|%0, %2}
7539 movhps\t{%2, %0|%0, %2}"
7540 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7541 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7542 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7543 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7544 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7546 (define_insn "*vec_concatv2di_rex64_sse"
7547 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7549 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7550 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7551 "TARGET_64BIT && TARGET_SSE"
7553 movq\t{%1, %0|%0, %1}
7554 movq\t{%1, %0|%0, %1}
7555 movq2dq\t{%1, %0|%0, %1}
7556 punpcklqdq\t{%2, %0|%0, %2}
7557 movlhps\t{%2, %0|%0, %2}
7558 movhps\t{%2, %0|%0, %2}"
7559 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7560 (set_attr "prefix_rex" "*,1,*,*,*,*")
7561 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7563 (define_expand "vec_unpacku_hi_v16qi"
7564 [(match_operand:V8HI 0 "register_operand" "")
7565 (match_operand:V16QI 1 "register_operand" "")]
7569 ix86_expand_sse4_unpack (operands, true, true);
7570 else if (TARGET_SSE5)
7571 ix86_expand_sse5_unpack (operands, true, true);
7573 ix86_expand_sse_unpack (operands, true, true);
7577 (define_expand "vec_unpacks_hi_v16qi"
7578 [(match_operand:V8HI 0 "register_operand" "")
7579 (match_operand:V16QI 1 "register_operand" "")]
7583 ix86_expand_sse4_unpack (operands, false, true);
7584 else if (TARGET_SSE5)
7585 ix86_expand_sse5_unpack (operands, false, true);
7587 ix86_expand_sse_unpack (operands, false, true);
7591 (define_expand "vec_unpacku_lo_v16qi"
7592 [(match_operand:V8HI 0 "register_operand" "")
7593 (match_operand:V16QI 1 "register_operand" "")]
7597 ix86_expand_sse4_unpack (operands, true, false);
7598 else if (TARGET_SSE5)
7599 ix86_expand_sse5_unpack (operands, true, false);
7601 ix86_expand_sse_unpack (operands, true, false);
7605 (define_expand "vec_unpacks_lo_v16qi"
7606 [(match_operand:V8HI 0 "register_operand" "")
7607 (match_operand:V16QI 1 "register_operand" "")]
7611 ix86_expand_sse4_unpack (operands, false, false);
7612 else if (TARGET_SSE5)
7613 ix86_expand_sse5_unpack (operands, false, false);
7615 ix86_expand_sse_unpack (operands, false, false);
7619 (define_expand "vec_unpacku_hi_v8hi"
7620 [(match_operand:V4SI 0 "register_operand" "")
7621 (match_operand:V8HI 1 "register_operand" "")]
7625 ix86_expand_sse4_unpack (operands, true, true);
7626 else if (TARGET_SSE5)
7627 ix86_expand_sse5_unpack (operands, true, true);
7629 ix86_expand_sse_unpack (operands, true, true);
7633 (define_expand "vec_unpacks_hi_v8hi"
7634 [(match_operand:V4SI 0 "register_operand" "")
7635 (match_operand:V8HI 1 "register_operand" "")]
7639 ix86_expand_sse4_unpack (operands, false, true);
7640 else if (TARGET_SSE5)
7641 ix86_expand_sse5_unpack (operands, false, true);
7643 ix86_expand_sse_unpack (operands, false, true);
7647 (define_expand "vec_unpacku_lo_v8hi"
7648 [(match_operand:V4SI 0 "register_operand" "")
7649 (match_operand:V8HI 1 "register_operand" "")]
7653 ix86_expand_sse4_unpack (operands, true, false);
7654 else if (TARGET_SSE5)
7655 ix86_expand_sse5_unpack (operands, true, false);
7657 ix86_expand_sse_unpack (operands, true, false);
7661 (define_expand "vec_unpacks_lo_v8hi"
7662 [(match_operand:V4SI 0 "register_operand" "")
7663 (match_operand:V8HI 1 "register_operand" "")]
7667 ix86_expand_sse4_unpack (operands, false, false);
7668 else if (TARGET_SSE5)
7669 ix86_expand_sse5_unpack (operands, false, false);
7671 ix86_expand_sse_unpack (operands, false, false);
7675 (define_expand "vec_unpacku_hi_v4si"
7676 [(match_operand:V2DI 0 "register_operand" "")
7677 (match_operand:V4SI 1 "register_operand" "")]
7681 ix86_expand_sse4_unpack (operands, true, true);
7682 else if (TARGET_SSE5)
7683 ix86_expand_sse5_unpack (operands, true, true);
7685 ix86_expand_sse_unpack (operands, true, true);
7689 (define_expand "vec_unpacks_hi_v4si"
7690 [(match_operand:V2DI 0 "register_operand" "")
7691 (match_operand:V4SI 1 "register_operand" "")]
7695 ix86_expand_sse4_unpack (operands, false, true);
7696 else if (TARGET_SSE5)
7697 ix86_expand_sse5_unpack (operands, false, true);
7699 ix86_expand_sse_unpack (operands, false, true);
7703 (define_expand "vec_unpacku_lo_v4si"
7704 [(match_operand:V2DI 0 "register_operand" "")
7705 (match_operand:V4SI 1 "register_operand" "")]
7709 ix86_expand_sse4_unpack (operands, true, false);
7710 else if (TARGET_SSE5)
7711 ix86_expand_sse5_unpack (operands, true, false);
7713 ix86_expand_sse_unpack (operands, true, false);
7717 (define_expand "vec_unpacks_lo_v4si"
7718 [(match_operand:V2DI 0 "register_operand" "")
7719 (match_operand:V4SI 1 "register_operand" "")]
7723 ix86_expand_sse4_unpack (operands, false, false);
7724 else if (TARGET_SSE5)
7725 ix86_expand_sse5_unpack (operands, false, false);
7727 ix86_expand_sse_unpack (operands, false, false);
7731 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7735 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7737 (define_expand "sse2_uavgv16qi3"
7738 [(set (match_operand:V16QI 0 "register_operand" "")
7744 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7746 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7747 (const_vector:V16QI [(const_int 1) (const_int 1)
7748 (const_int 1) (const_int 1)
7749 (const_int 1) (const_int 1)
7750 (const_int 1) (const_int 1)
7751 (const_int 1) (const_int 1)
7752 (const_int 1) (const_int 1)
7753 (const_int 1) (const_int 1)
7754 (const_int 1) (const_int 1)]))
7757 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7759 (define_insn "*avx_uavgv16qi3"
7760 [(set (match_operand:V16QI 0 "register_operand" "=x")
7766 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7768 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7769 (const_vector:V16QI [(const_int 1) (const_int 1)
7770 (const_int 1) (const_int 1)
7771 (const_int 1) (const_int 1)
7772 (const_int 1) (const_int 1)
7773 (const_int 1) (const_int 1)
7774 (const_int 1) (const_int 1)
7775 (const_int 1) (const_int 1)
7776 (const_int 1) (const_int 1)]))
7778 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7779 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7780 [(set_attr "type" "sseiadd")
7781 (set_attr "prefix" "vex")
7782 (set_attr "mode" "TI")])
7784 (define_insn "*sse2_uavgv16qi3"
7785 [(set (match_operand:V16QI 0 "register_operand" "=x")
7791 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7793 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7794 (const_vector:V16QI [(const_int 1) (const_int 1)
7795 (const_int 1) (const_int 1)
7796 (const_int 1) (const_int 1)
7797 (const_int 1) (const_int 1)
7798 (const_int 1) (const_int 1)
7799 (const_int 1) (const_int 1)
7800 (const_int 1) (const_int 1)
7801 (const_int 1) (const_int 1)]))
7803 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7804 "pavgb\t{%2, %0|%0, %2}"
7805 [(set_attr "type" "sseiadd")
7806 (set_attr "prefix_data16" "1")
7807 (set_attr "mode" "TI")])
7809 (define_expand "sse2_uavgv8hi3"
7810 [(set (match_operand:V8HI 0 "register_operand" "")
7816 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7818 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7819 (const_vector:V8HI [(const_int 1) (const_int 1)
7820 (const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)
7822 (const_int 1) (const_int 1)]))
7825 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7827 (define_insn "*avx_uavgv8hi3"
7828 [(set (match_operand:V8HI 0 "register_operand" "=x")
7834 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7836 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7837 (const_vector:V8HI [(const_int 1) (const_int 1)
7838 (const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)
7840 (const_int 1) (const_int 1)]))
7842 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7843 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7844 [(set_attr "type" "sseiadd")
7845 (set_attr "prefix" "vex")
7846 (set_attr "mode" "TI")])
7848 (define_insn "*sse2_uavgv8hi3"
7849 [(set (match_operand:V8HI 0 "register_operand" "=x")
7855 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7857 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7858 (const_vector:V8HI [(const_int 1) (const_int 1)
7859 (const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)
7861 (const_int 1) (const_int 1)]))
7863 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7864 "pavgw\t{%2, %0|%0, %2}"
7865 [(set_attr "type" "sseiadd")
7866 (set_attr "prefix_data16" "1")
7867 (set_attr "mode" "TI")])
7869 ;; The correct representation for this is absolutely enormous, and
7870 ;; surely not generally useful.
7871 (define_insn "*avx_psadbw"
7872 [(set (match_operand:V2DI 0 "register_operand" "=x")
7873 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7874 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7877 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7878 [(set_attr "type" "sseiadd")
7879 (set_attr "prefix" "vex")
7880 (set_attr "mode" "TI")])
7882 (define_insn "sse2_psadbw"
7883 [(set (match_operand:V2DI 0 "register_operand" "=x")
7884 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7885 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7888 "psadbw\t{%2, %0|%0, %2}"
7889 [(set_attr "type" "sseiadd")
7890 (set_attr "atom_unit" "simul")
7891 (set_attr "prefix_data16" "1")
7892 (set_attr "mode" "TI")])
7894 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7895 [(set (match_operand:SI 0 "register_operand" "=r")
7897 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7899 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7900 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7901 [(set_attr "type" "ssecvt")
7902 (set_attr "prefix" "vex")
7903 (set_attr "mode" "<MODE>")])
7905 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7906 [(set (match_operand:SI 0 "register_operand" "=r")
7908 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7910 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7911 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7912 [(set_attr "type" "ssemov")
7913 (set_attr "prefix" "maybe_vex")
7914 (set_attr "mode" "<MODE>")])
7916 (define_insn "sse2_pmovmskb"
7917 [(set (match_operand:SI 0 "register_operand" "=r")
7918 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7921 "%vpmovmskb\t{%1, %0|%0, %1}"
7922 [(set_attr "type" "ssemov")
7923 (set_attr "prefix_data16" "1")
7924 (set_attr "prefix" "maybe_vex")
7925 (set_attr "mode" "SI")])
7927 (define_expand "sse2_maskmovdqu"
7928 [(set (match_operand:V16QI 0 "memory_operand" "")
7929 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7930 (match_operand:V16QI 2 "register_operand" "")
7936 (define_insn "*sse2_maskmovdqu"
7937 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7938 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7939 (match_operand:V16QI 2 "register_operand" "x")
7940 (mem:V16QI (match_dup 0))]
7942 "TARGET_SSE2 && !TARGET_64BIT"
7943 ;; @@@ check ordering of operands in intel/nonintel syntax
7944 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7945 [(set_attr "type" "ssemov")
7946 (set_attr "prefix_data16" "1")
7947 ;; The implicit %rdi operand confuses default length_vex computation.
7948 (set_attr "length_vex" "3")
7949 (set_attr "prefix" "maybe_vex")
7950 (set_attr "mode" "TI")])
7952 (define_insn "*sse2_maskmovdqu_rex64"
7953 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7954 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7955 (match_operand:V16QI 2 "register_operand" "x")
7956 (mem:V16QI (match_dup 0))]
7958 "TARGET_SSE2 && TARGET_64BIT"
7959 ;; @@@ check ordering of operands in intel/nonintel syntax
7960 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7961 [(set_attr "type" "ssemov")
7962 (set_attr "prefix_data16" "1")
7963 ;; The implicit %rdi operand confuses default length_vex computation.
7964 (set (attr "length_vex")
7965 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
7966 (set_attr "prefix" "maybe_vex")
7967 (set_attr "mode" "TI")])
7969 (define_insn "sse_ldmxcsr"
7970 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7974 [(set_attr "type" "sse")
7975 (set_attr "atom_sse_attr" "mxcsr")
7976 (set_attr "prefix" "maybe_vex")
7977 (set_attr "memory" "load")])
7979 (define_insn "sse_stmxcsr"
7980 [(set (match_operand:SI 0 "memory_operand" "=m")
7981 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7984 [(set_attr "type" "sse")
7985 (set_attr "atom_sse_attr" "mxcsr")
7986 (set_attr "prefix" "maybe_vex")
7987 (set_attr "memory" "store")])
7989 (define_expand "sse_sfence"
7991 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7992 "TARGET_SSE || TARGET_3DNOW_A"
7994 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7995 MEM_VOLATILE_P (operands[0]) = 1;
7998 (define_insn "*sse_sfence"
7999 [(set (match_operand:BLK 0 "" "")
8000 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8001 "TARGET_SSE || TARGET_3DNOW_A"
8003 [(set_attr "type" "sse")
8004 (set_attr "length_address" "0")
8005 (set_attr "atom_sse_attr" "fence")
8006 (set_attr "memory" "unknown")])
8008 (define_insn "sse2_clflush"
8009 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8013 [(set_attr "type" "sse")
8014 (set_attr "atom_sse_attr" "fence")
8015 (set_attr "memory" "unknown")])
8017 (define_expand "sse2_mfence"
8019 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8022 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8023 MEM_VOLATILE_P (operands[0]) = 1;
8026 (define_insn "*sse2_mfence"
8027 [(set (match_operand:BLK 0 "" "")
8028 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8029 "TARGET_64BIT || TARGET_SSE2"
8031 [(set_attr "type" "sse")
8032 (set_attr "length_address" "0")
8033 (set_attr "atom_sse_attr" "fence")
8034 (set_attr "memory" "unknown")])
8036 (define_expand "sse2_lfence"
8038 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8041 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8042 MEM_VOLATILE_P (operands[0]) = 1;
8045 (define_insn "*sse2_lfence"
8046 [(set (match_operand:BLK 0 "" "")
8047 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8050 [(set_attr "type" "sse")
8051 (set_attr "length_address" "0")
8052 (set_attr "atom_sse_attr" "lfence")
8053 (set_attr "memory" "unknown")])
8055 (define_insn "sse3_mwait"
8056 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8057 (match_operand:SI 1 "register_operand" "c")]
8060 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8061 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8062 ;; we only need to set up 32bit registers.
8064 [(set_attr "length" "3")])
8066 (define_insn "sse3_monitor"
8067 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8068 (match_operand:SI 1 "register_operand" "c")
8069 (match_operand:SI 2 "register_operand" "d")]
8071 "TARGET_SSE3 && !TARGET_64BIT"
8072 "monitor\t%0, %1, %2"
8073 [(set_attr "length" "3")])
8075 (define_insn "sse3_monitor64"
8076 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8077 (match_operand:SI 1 "register_operand" "c")
8078 (match_operand:SI 2 "register_operand" "d")]
8080 "TARGET_SSE3 && TARGET_64BIT"
8081 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8082 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8083 ;; zero extended to 64bit, we only need to set up 32bit registers.
8085 [(set_attr "length" "3")])
8087 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8089 ;; SSSE3 instructions
8091 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8093 (define_insn "*avx_phaddwv8hi3"
8094 [(set (match_operand:V8HI 0 "register_operand" "=x")
8100 (match_operand:V8HI 1 "register_operand" "x")
8101 (parallel [(const_int 0)]))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8104 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8105 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8108 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8109 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8111 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8112 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8117 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8118 (parallel [(const_int 0)]))
8119 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8122 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8125 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8126 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8128 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8129 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8131 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8132 [(set_attr "type" "sseiadd")
8133 (set_attr "prefix_extra" "1")
8134 (set_attr "prefix" "vex")
8135 (set_attr "mode" "TI")])
8137 (define_insn "ssse3_phaddwv8hi3"
8138 [(set (match_operand:V8HI 0 "register_operand" "=x")
8144 (match_operand:V8HI 1 "register_operand" "0")
8145 (parallel [(const_int 0)]))
8146 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8148 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8149 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8152 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8153 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8155 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8156 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8161 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8162 (parallel [(const_int 0)]))
8163 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8165 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8166 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8169 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8170 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8172 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8173 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8175 "phaddw\t{%2, %0|%0, %2}"
8176 [(set_attr "type" "sseiadd")
8177 (set_attr "atom_unit" "complex")
8178 (set_attr "prefix_data16" "1")
8179 (set_attr "prefix_extra" "1")
8180 (set_attr "mode" "TI")])
8182 (define_insn "ssse3_phaddwv4hi3"
8183 [(set (match_operand:V4HI 0 "register_operand" "=y")
8188 (match_operand:V4HI 1 "register_operand" "0")
8189 (parallel [(const_int 0)]))
8190 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8192 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8193 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8197 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8198 (parallel [(const_int 0)]))
8199 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8201 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8202 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8204 "phaddw\t{%2, %0|%0, %2}"
8205 [(set_attr "type" "sseiadd")
8206 (set_attr "atom_unit" "complex")
8207 (set_attr "prefix_extra" "1")
8208 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8209 (set_attr "mode" "DI")])
8211 (define_insn "*avx_phadddv4si3"
8212 [(set (match_operand:V4SI 0 "register_operand" "=x")
8217 (match_operand:V4SI 1 "register_operand" "x")
8218 (parallel [(const_int 0)]))
8219 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8221 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8222 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8226 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8227 (parallel [(const_int 0)]))
8228 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8230 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8231 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8233 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8234 [(set_attr "type" "sseiadd")
8235 (set_attr "prefix_extra" "1")
8236 (set_attr "prefix" "vex")
8237 (set_attr "mode" "TI")])
8239 (define_insn "ssse3_phadddv4si3"
8240 [(set (match_operand:V4SI 0 "register_operand" "=x")
8245 (match_operand:V4SI 1 "register_operand" "0")
8246 (parallel [(const_int 0)]))
8247 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8249 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8250 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8254 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8255 (parallel [(const_int 0)]))
8256 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8258 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8259 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8261 "phaddd\t{%2, %0|%0, %2}"
8262 [(set_attr "type" "sseiadd")
8263 (set_attr "atom_unit" "complex")
8264 (set_attr "prefix_data16" "1")
8265 (set_attr "prefix_extra" "1")
8266 (set_attr "mode" "TI")])
8268 (define_insn "ssse3_phadddv2si3"
8269 [(set (match_operand:V2SI 0 "register_operand" "=y")
8273 (match_operand:V2SI 1 "register_operand" "0")
8274 (parallel [(const_int 0)]))
8275 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8278 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8279 (parallel [(const_int 0)]))
8280 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8282 "phaddd\t{%2, %0|%0, %2}"
8283 [(set_attr "type" "sseiadd")
8284 (set_attr "atom_unit" "complex")
8285 (set_attr "prefix_extra" "1")
8286 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8287 (set_attr "mode" "DI")])
8289 (define_insn "*avx_phaddswv8hi3"
8290 [(set (match_operand:V8HI 0 "register_operand" "=x")
8296 (match_operand:V8HI 1 "register_operand" "x")
8297 (parallel [(const_int 0)]))
8298 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8300 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8301 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8304 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8305 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8308 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8313 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8314 (parallel [(const_int 0)]))
8315 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8317 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8321 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8322 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8324 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8325 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8327 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8328 [(set_attr "type" "sseiadd")
8329 (set_attr "prefix_extra" "1")
8330 (set_attr "prefix" "vex")
8331 (set_attr "mode" "TI")])
8333 (define_insn "ssse3_phaddswv8hi3"
8334 [(set (match_operand:V8HI 0 "register_operand" "=x")
8340 (match_operand:V8HI 1 "register_operand" "0")
8341 (parallel [(const_int 0)]))
8342 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8344 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8345 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8348 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8349 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8351 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8352 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8357 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8358 (parallel [(const_int 0)]))
8359 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8361 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8362 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8365 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8366 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8368 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8369 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8371 "phaddsw\t{%2, %0|%0, %2}"
8372 [(set_attr "type" "sseiadd")
8373 (set_attr "atom_unit" "complex")
8374 (set_attr "prefix_data16" "1")
8375 (set_attr "prefix_extra" "1")
8376 (set_attr "mode" "TI")])
8378 (define_insn "ssse3_phaddswv4hi3"
8379 [(set (match_operand:V4HI 0 "register_operand" "=y")
8384 (match_operand:V4HI 1 "register_operand" "0")
8385 (parallel [(const_int 0)]))
8386 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8388 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8389 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8393 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8394 (parallel [(const_int 0)]))
8395 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8397 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8400 "phaddsw\t{%2, %0|%0, %2}"
8401 [(set_attr "type" "sseiadd")
8402 (set_attr "atom_unit" "complex")
8403 (set_attr "prefix_extra" "1")
8404 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8405 (set_attr "mode" "DI")])
8407 (define_insn "*avx_phsubwv8hi3"
8408 [(set (match_operand:V8HI 0 "register_operand" "=x")
8414 (match_operand:V8HI 1 "register_operand" "x")
8415 (parallel [(const_int 0)]))
8416 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8419 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8422 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8423 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8425 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8426 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8431 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8432 (parallel [(const_int 0)]))
8433 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8436 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8439 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8440 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8445 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8446 [(set_attr "type" "sseiadd")
8447 (set_attr "prefix_extra" "1")
8448 (set_attr "prefix" "vex")
8449 (set_attr "mode" "TI")])
8451 (define_insn "ssse3_phsubwv8hi3"
8452 [(set (match_operand:V8HI 0 "register_operand" "=x")
8458 (match_operand:V8HI 1 "register_operand" "0")
8459 (parallel [(const_int 0)]))
8460 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8462 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8463 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8466 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8467 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8469 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8475 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8476 (parallel [(const_int 0)]))
8477 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8479 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8480 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8484 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8486 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8489 "phsubw\t{%2, %0|%0, %2}"
8490 [(set_attr "type" "sseiadd")
8491 (set_attr "atom_unit" "complex")
8492 (set_attr "prefix_data16" "1")
8493 (set_attr "prefix_extra" "1")
8494 (set_attr "mode" "TI")])
8496 (define_insn "ssse3_phsubwv4hi3"
8497 [(set (match_operand:V4HI 0 "register_operand" "=y")
8502 (match_operand:V4HI 1 "register_operand" "0")
8503 (parallel [(const_int 0)]))
8504 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8506 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8507 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8511 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8512 (parallel [(const_int 0)]))
8513 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8515 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8516 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8518 "phsubw\t{%2, %0|%0, %2}"
8519 [(set_attr "type" "sseiadd")
8520 (set_attr "atom_unit" "complex")
8521 (set_attr "prefix_extra" "1")
8522 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8523 (set_attr "mode" "DI")])
8525 (define_insn "*avx_phsubdv4si3"
8526 [(set (match_operand:V4SI 0 "register_operand" "=x")
8531 (match_operand:V4SI 1 "register_operand" "x")
8532 (parallel [(const_int 0)]))
8533 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8535 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8536 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8540 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8541 (parallel [(const_int 0)]))
8542 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8544 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8545 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8547 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8548 [(set_attr "type" "sseiadd")
8549 (set_attr "prefix_extra" "1")
8550 (set_attr "prefix" "vex")
8551 (set_attr "mode" "TI")])
8553 (define_insn "ssse3_phsubdv4si3"
8554 [(set (match_operand:V4SI 0 "register_operand" "=x")
8559 (match_operand:V4SI 1 "register_operand" "0")
8560 (parallel [(const_int 0)]))
8561 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8563 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8564 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8568 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8569 (parallel [(const_int 0)]))
8570 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8572 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8573 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8575 "phsubd\t{%2, %0|%0, %2}"
8576 [(set_attr "type" "sseiadd")
8577 (set_attr "atom_unit" "complex")
8578 (set_attr "prefix_data16" "1")
8579 (set_attr "prefix_extra" "1")
8580 (set_attr "mode" "TI")])
8582 (define_insn "ssse3_phsubdv2si3"
8583 [(set (match_operand:V2SI 0 "register_operand" "=y")
8587 (match_operand:V2SI 1 "register_operand" "0")
8588 (parallel [(const_int 0)]))
8589 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8592 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8593 (parallel [(const_int 0)]))
8594 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8596 "phsubd\t{%2, %0|%0, %2}"
8597 [(set_attr "type" "sseiadd")
8598 (set_attr "atom_unit" "complex")
8599 (set_attr "prefix_extra" "1")
8600 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8601 (set_attr "mode" "DI")])
8603 (define_insn "*avx_phsubswv8hi3"
8604 [(set (match_operand:V8HI 0 "register_operand" "=x")
8610 (match_operand:V8HI 1 "register_operand" "x")
8611 (parallel [(const_int 0)]))
8612 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8614 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8615 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8618 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8619 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8621 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8622 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8627 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8628 (parallel [(const_int 0)]))
8629 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8631 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8635 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8636 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8638 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8641 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8642 [(set_attr "type" "sseiadd")
8643 (set_attr "prefix_extra" "1")
8644 (set_attr "prefix" "vex")
8645 (set_attr "mode" "TI")])
8647 (define_insn "ssse3_phsubswv8hi3"
8648 [(set (match_operand:V8HI 0 "register_operand" "=x")
8654 (match_operand:V8HI 1 "register_operand" "0")
8655 (parallel [(const_int 0)]))
8656 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8658 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8659 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8671 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8672 (parallel [(const_int 0)]))
8673 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8675 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8676 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8679 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8680 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8682 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8685 "phsubsw\t{%2, %0|%0, %2}"
8686 [(set_attr "type" "sseiadd")
8687 (set_attr "atom_unit" "complex")
8688 (set_attr "prefix_data16" "1")
8689 (set_attr "prefix_extra" "1")
8690 (set_attr "mode" "TI")])
8692 (define_insn "ssse3_phsubswv4hi3"
8693 [(set (match_operand:V4HI 0 "register_operand" "=y")
8698 (match_operand:V4HI 1 "register_operand" "0")
8699 (parallel [(const_int 0)]))
8700 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8702 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8707 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8708 (parallel [(const_int 0)]))
8709 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8711 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8712 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8714 "phsubsw\t{%2, %0|%0, %2}"
8715 [(set_attr "type" "sseiadd")
8716 (set_attr "atom_unit" "complex")
8717 (set_attr "prefix_extra" "1")
8718 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8719 (set_attr "mode" "DI")])
8721 (define_insn "*avx_pmaddubsw128"
8722 [(set (match_operand:V8HI 0 "register_operand" "=x")
8727 (match_operand:V16QI 1 "register_operand" "x")
8728 (parallel [(const_int 0)
8738 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8739 (parallel [(const_int 0)
8749 (vec_select:V16QI (match_dup 1)
8750 (parallel [(const_int 1)
8759 (vec_select:V16QI (match_dup 2)
8760 (parallel [(const_int 1)
8767 (const_int 15)]))))))]
8769 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8770 [(set_attr "type" "sseiadd")
8771 (set_attr "prefix_extra" "1")
8772 (set_attr "prefix" "vex")
8773 (set_attr "mode" "TI")])
8775 (define_insn "ssse3_pmaddubsw128"
8776 [(set (match_operand:V8HI 0 "register_operand" "=x")
8781 (match_operand:V16QI 1 "register_operand" "0")
8782 (parallel [(const_int 0)
8792 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8793 (parallel [(const_int 0)
8803 (vec_select:V16QI (match_dup 1)
8804 (parallel [(const_int 1)
8813 (vec_select:V16QI (match_dup 2)
8814 (parallel [(const_int 1)
8821 (const_int 15)]))))))]
8823 "pmaddubsw\t{%2, %0|%0, %2}"
8824 [(set_attr "type" "sseiadd")
8825 (set_attr "atom_unit" "simul")
8826 (set_attr "prefix_data16" "1")
8827 (set_attr "prefix_extra" "1")
8828 (set_attr "mode" "TI")])
8830 (define_insn "ssse3_pmaddubsw"
8831 [(set (match_operand:V4HI 0 "register_operand" "=y")
8836 (match_operand:V8QI 1 "register_operand" "0")
8837 (parallel [(const_int 0)
8843 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8844 (parallel [(const_int 0)
8850 (vec_select:V8QI (match_dup 1)
8851 (parallel [(const_int 1)
8856 (vec_select:V8QI (match_dup 2)
8857 (parallel [(const_int 1)
8860 (const_int 7)]))))))]
8862 "pmaddubsw\t{%2, %0|%0, %2}"
8863 [(set_attr "type" "sseiadd")
8864 (set_attr "atom_unit" "simul")
8865 (set_attr "prefix_extra" "1")
8866 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8867 (set_attr "mode" "DI")])
8869 (define_expand "ssse3_pmulhrswv8hi3"
8870 [(set (match_operand:V8HI 0 "register_operand" "")
8877 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8879 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8881 (const_vector:V8HI [(const_int 1) (const_int 1)
8882 (const_int 1) (const_int 1)
8883 (const_int 1) (const_int 1)
8884 (const_int 1) (const_int 1)]))
8887 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8889 (define_insn "*avx_pmulhrswv8hi3"
8890 [(set (match_operand:V8HI 0 "register_operand" "=x")
8897 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8899 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8901 (const_vector:V8HI [(const_int 1) (const_int 1)
8902 (const_int 1) (const_int 1)
8903 (const_int 1) (const_int 1)
8904 (const_int 1) (const_int 1)]))
8906 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8907 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8908 [(set_attr "type" "sseimul")
8909 (set_attr "prefix_extra" "1")
8910 (set_attr "prefix" "vex")
8911 (set_attr "mode" "TI")])
8913 (define_insn "*ssse3_pmulhrswv8hi3"
8914 [(set (match_operand:V8HI 0 "register_operand" "=x")
8921 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8923 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8925 (const_vector:V8HI [(const_int 1) (const_int 1)
8926 (const_int 1) (const_int 1)
8927 (const_int 1) (const_int 1)
8928 (const_int 1) (const_int 1)]))
8930 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8931 "pmulhrsw\t{%2, %0|%0, %2}"
8932 [(set_attr "type" "sseimul")
8933 (set_attr "prefix_data16" "1")
8934 (set_attr "prefix_extra" "1")
8935 (set_attr "mode" "TI")])
8937 (define_expand "ssse3_pmulhrswv4hi3"
8938 [(set (match_operand:V4HI 0 "register_operand" "")
8945 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8947 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8949 (const_vector:V4HI [(const_int 1) (const_int 1)
8950 (const_int 1) (const_int 1)]))
8953 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8955 (define_insn "*ssse3_pmulhrswv4hi3"
8956 [(set (match_operand:V4HI 0 "register_operand" "=y")
8963 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8965 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8967 (const_vector:V4HI [(const_int 1) (const_int 1)
8968 (const_int 1) (const_int 1)]))
8970 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8971 "pmulhrsw\t{%2, %0|%0, %2}"
8972 [(set_attr "type" "sseimul")
8973 (set_attr "prefix_extra" "1")
8974 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8975 (set_attr "mode" "DI")])
8977 (define_insn "*avx_pshufbv16qi3"
8978 [(set (match_operand:V16QI 0 "register_operand" "=x")
8979 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8980 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8983 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8984 [(set_attr "type" "sselog1")
8985 (set_attr "prefix_extra" "1")
8986 (set_attr "prefix" "vex")
8987 (set_attr "mode" "TI")])
8989 (define_insn "ssse3_pshufbv16qi3"
8990 [(set (match_operand:V16QI 0 "register_operand" "=x")
8991 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8992 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8995 "pshufb\t{%2, %0|%0, %2}";
8996 [(set_attr "type" "sselog1")
8997 (set_attr "prefix_data16" "1")
8998 (set_attr "prefix_extra" "1")
8999 (set_attr "mode" "TI")])
9001 (define_insn "ssse3_pshufbv8qi3"
9002 [(set (match_operand:V8QI 0 "register_operand" "=y")
9003 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9004 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9007 "pshufb\t{%2, %0|%0, %2}";
9008 [(set_attr "type" "sselog1")
9009 (set_attr "prefix_extra" "1")
9010 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9011 (set_attr "mode" "DI")])
9013 (define_insn "*avx_psign<mode>3"
9014 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9016 [(match_operand:SSEMODE124 1 "register_operand" "x")
9017 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9020 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9021 [(set_attr "type" "sselog1")
9022 (set_attr "prefix_extra" "1")
9023 (set_attr "prefix" "vex")
9024 (set_attr "mode" "TI")])
9026 (define_insn "ssse3_psign<mode>3"
9027 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9029 [(match_operand:SSEMODE124 1 "register_operand" "0")
9030 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9033 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9034 [(set_attr "type" "sselog1")
9035 (set_attr "prefix_data16" "1")
9036 (set_attr "prefix_extra" "1")
9037 (set_attr "mode" "TI")])
9039 (define_insn "ssse3_psign<mode>3"
9040 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9042 [(match_operand:MMXMODEI 1 "register_operand" "0")
9043 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9046 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9047 [(set_attr "type" "sselog1")
9048 (set_attr "prefix_extra" "1")
9049 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9050 (set_attr "mode" "DI")])
9052 (define_insn "*avx_palignrti"
9053 [(set (match_operand:TI 0 "register_operand" "=x")
9054 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9055 (match_operand:TI 2 "nonimmediate_operand" "xm")
9056 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9060 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9061 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9063 [(set_attr "type" "sseishft")
9064 (set_attr "prefix_extra" "1")
9065 (set_attr "length_immediate" "1")
9066 (set_attr "prefix" "vex")
9067 (set_attr "mode" "TI")])
9069 (define_insn "ssse3_palignrti"
9070 [(set (match_operand:TI 0 "register_operand" "=x")
9071 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9072 (match_operand:TI 2 "nonimmediate_operand" "xm")
9073 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9077 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9078 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9080 [(set_attr "type" "sseishft")
9081 (set_attr "atom_unit" "sishuf")
9082 (set_attr "prefix_data16" "1")
9083 (set_attr "prefix_extra" "1")
9084 (set_attr "length_immediate" "1")
9085 (set_attr "mode" "TI")])
9087 (define_insn "ssse3_palignrdi"
9088 [(set (match_operand:DI 0 "register_operand" "=y")
9089 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9090 (match_operand:DI 2 "nonimmediate_operand" "ym")
9091 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9095 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9096 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9098 [(set_attr "type" "sseishft")
9099 (set_attr "atom_unit" "sishuf")
9100 (set_attr "prefix_extra" "1")
9101 (set_attr "length_immediate" "1")
9102 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9103 (set_attr "mode" "DI")])
9105 (define_insn "abs<mode>2"
9106 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9107 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9109 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9110 [(set_attr "type" "sselog1")
9111 (set_attr "prefix_data16" "1")
9112 (set_attr "prefix_extra" "1")
9113 (set_attr "prefix" "maybe_vex")
9114 (set_attr "mode" "TI")])
9116 (define_insn "abs<mode>2"
9117 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9118 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9120 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9121 [(set_attr "type" "sselog1")
9122 (set_attr "prefix_rep" "0")
9123 (set_attr "prefix_extra" "1")
9124 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9125 (set_attr "mode" "DI")])
9127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9129 ;; AMD SSE4A instructions
9131 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9133 (define_insn "sse4a_movnt<mode>"
9134 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9136 [(match_operand:MODEF 1 "register_operand" "x")]
9139 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9140 [(set_attr "type" "ssemov")
9141 (set_attr "mode" "<MODE>")])
9143 (define_insn "sse4a_vmmovnt<mode>"
9144 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9145 (unspec:<ssescalarmode>
9146 [(vec_select:<ssescalarmode>
9147 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9148 (parallel [(const_int 0)]))]
9151 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9152 [(set_attr "type" "ssemov")
9153 (set_attr "mode" "<ssescalarmode>")])
9155 (define_insn "sse4a_extrqi"
9156 [(set (match_operand:V2DI 0 "register_operand" "=x")
9157 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9158 (match_operand 2 "const_int_operand" "")
9159 (match_operand 3 "const_int_operand" "")]
9162 "extrq\t{%3, %2, %0|%0, %2, %3}"
9163 [(set_attr "type" "sse")
9164 (set_attr "prefix_data16" "1")
9165 (set_attr "length_immediate" "2")
9166 (set_attr "mode" "TI")])
9168 (define_insn "sse4a_extrq"
9169 [(set (match_operand:V2DI 0 "register_operand" "=x")
9170 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9171 (match_operand:V16QI 2 "register_operand" "x")]
9174 "extrq\t{%2, %0|%0, %2}"
9175 [(set_attr "type" "sse")
9176 (set_attr "prefix_data16" "1")
9177 (set_attr "mode" "TI")])
9179 (define_insn "sse4a_insertqi"
9180 [(set (match_operand:V2DI 0 "register_operand" "=x")
9181 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9182 (match_operand:V2DI 2 "register_operand" "x")
9183 (match_operand 3 "const_int_operand" "")
9184 (match_operand 4 "const_int_operand" "")]
9187 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9188 [(set_attr "type" "sseins")
9189 (set_attr "prefix_data16" "0")
9190 (set_attr "prefix_rep" "1")
9191 (set_attr "length_immediate" "2")
9192 (set_attr "mode" "TI")])
9194 (define_insn "sse4a_insertq"
9195 [(set (match_operand:V2DI 0 "register_operand" "=x")
9196 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9197 (match_operand:V2DI 2 "register_operand" "x")]
9200 "insertq\t{%2, %0|%0, %2}"
9201 [(set_attr "type" "sseins")
9202 (set_attr "prefix_data16" "0")
9203 (set_attr "prefix_rep" "1")
9204 (set_attr "mode" "TI")])
9206 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9208 ;; Intel SSE4.1 instructions
9210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9212 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9213 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9214 (vec_merge:AVXMODEF2P
9215 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9216 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9217 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9219 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9220 [(set_attr "type" "ssemov")
9221 (set_attr "prefix_extra" "1")
9222 (set_attr "length_immediate" "1")
9223 (set_attr "prefix" "vex")
9224 (set_attr "mode" "<avxvecmode>")])
9226 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9227 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9229 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9230 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9231 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9234 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9235 [(set_attr "type" "ssemov")
9236 (set_attr "prefix_extra" "1")
9237 (set_attr "length_immediate" "1")
9238 (set_attr "prefix" "vex")
9239 (set_attr "mode" "<avxvecmode>")])
9241 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9242 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9243 (vec_merge:SSEMODEF2P
9244 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9245 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9246 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9248 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9249 [(set_attr "type" "ssemov")
9250 (set_attr "prefix_data16" "1")
9251 (set_attr "prefix_extra" "1")
9252 (set_attr "length_immediate" "1")
9253 (set_attr "mode" "<MODE>")])
9255 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9256 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9258 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9259 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9260 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9263 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9264 [(set_attr "type" "ssemov")
9265 (set_attr "prefix_data16" "1")
9266 (set_attr "prefix_extra" "1")
9267 (set_attr "mode" "<MODE>")])
9269 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9270 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9272 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9273 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9274 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9277 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9278 [(set_attr "type" "ssemul")
9279 (set_attr "prefix" "vex")
9280 (set_attr "prefix_extra" "1")
9281 (set_attr "length_immediate" "1")
9282 (set_attr "mode" "<avxvecmode>")])
9284 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9285 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9287 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9288 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9289 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9292 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9293 [(set_attr "type" "ssemul")
9294 (set_attr "prefix_data16" "1")
9295 (set_attr "prefix_extra" "1")
9296 (set_attr "length_immediate" "1")
9297 (set_attr "mode" "<MODE>")])
9299 (define_insn "sse4_1_movntdqa"
9300 [(set (match_operand:V2DI 0 "register_operand" "=x")
9301 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9304 "%vmovntdqa\t{%1, %0|%0, %1}"
9305 [(set_attr "type" "ssemov")
9306 (set_attr "prefix_extra" "1")
9307 (set_attr "prefix" "maybe_vex")
9308 (set_attr "mode" "TI")])
9310 (define_insn "*avx_mpsadbw"
9311 [(set (match_operand:V16QI 0 "register_operand" "=x")
9312 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9313 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9314 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9317 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9318 [(set_attr "type" "sselog1")
9319 (set_attr "prefix" "vex")
9320 (set_attr "prefix_extra" "1")
9321 (set_attr "length_immediate" "1")
9322 (set_attr "mode" "TI")])
9324 (define_insn "sse4_1_mpsadbw"
9325 [(set (match_operand:V16QI 0 "register_operand" "=x")
9326 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9327 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9328 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9331 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9332 [(set_attr "type" "sselog1")
9333 (set_attr "prefix_extra" "1")
9334 (set_attr "length_immediate" "1")
9335 (set_attr "mode" "TI")])
9337 (define_insn "*avx_packusdw"
9338 [(set (match_operand:V8HI 0 "register_operand" "=x")
9341 (match_operand:V4SI 1 "register_operand" "x"))
9343 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9345 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9346 [(set_attr "type" "sselog")
9347 (set_attr "prefix_extra" "1")
9348 (set_attr "prefix" "vex")
9349 (set_attr "mode" "TI")])
9351 (define_insn "sse4_1_packusdw"
9352 [(set (match_operand:V8HI 0 "register_operand" "=x")
9355 (match_operand:V4SI 1 "register_operand" "0"))
9357 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9359 "packusdw\t{%2, %0|%0, %2}"
9360 [(set_attr "type" "sselog")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "mode" "TI")])
9364 (define_insn "*avx_pblendvb"
9365 [(set (match_operand:V16QI 0 "register_operand" "=x")
9366 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9367 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9368 (match_operand:V16QI 3 "register_operand" "x")]
9371 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9372 [(set_attr "type" "ssemov")
9373 (set_attr "prefix_extra" "1")
9374 (set_attr "length_immediate" "1")
9375 (set_attr "prefix" "vex")
9376 (set_attr "mode" "TI")])
9378 (define_insn "sse4_1_pblendvb"
9379 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9380 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9381 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9382 (match_operand:V16QI 3 "register_operand" "Yz")]
9385 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9386 [(set_attr "type" "ssemov")
9387 (set_attr "prefix_extra" "1")
9388 (set_attr "mode" "TI")])
9390 (define_insn "*avx_pblendw"
9391 [(set (match_operand:V8HI 0 "register_operand" "=x")
9393 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9394 (match_operand:V8HI 1 "register_operand" "x")
9395 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9397 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9398 [(set_attr "type" "ssemov")
9399 (set_attr "prefix" "vex")
9400 (set_attr "prefix_extra" "1")
9401 (set_attr "length_immediate" "1")
9402 (set_attr "mode" "TI")])
9404 (define_insn "sse4_1_pblendw"
9405 [(set (match_operand:V8HI 0 "register_operand" "=x")
9407 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9408 (match_operand:V8HI 1 "register_operand" "0")
9409 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9411 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9412 [(set_attr "type" "ssemov")
9413 (set_attr "prefix_extra" "1")
9414 (set_attr "length_immediate" "1")
9415 (set_attr "mode" "TI")])
9417 (define_insn "sse4_1_phminposuw"
9418 [(set (match_operand:V8HI 0 "register_operand" "=x")
9419 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9420 UNSPEC_PHMINPOSUW))]
9422 "%vphminposuw\t{%1, %0|%0, %1}"
9423 [(set_attr "type" "sselog1")
9424 (set_attr "prefix_extra" "1")
9425 (set_attr "prefix" "maybe_vex")
9426 (set_attr "mode" "TI")])
9428 (define_insn "sse4_1_extendv8qiv8hi2"
9429 [(set (match_operand:V8HI 0 "register_operand" "=x")
9432 (match_operand:V16QI 1 "register_operand" "x")
9433 (parallel [(const_int 0)
9442 "%vpmovsxbw\t{%1, %0|%0, %1}"
9443 [(set_attr "type" "ssemov")
9444 (set_attr "prefix_extra" "1")
9445 (set_attr "prefix" "maybe_vex")
9446 (set_attr "mode" "TI")])
9448 (define_insn "*sse4_1_extendv8qiv8hi2"
9449 [(set (match_operand:V8HI 0 "register_operand" "=x")
9452 (vec_duplicate:V16QI
9453 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9454 (parallel [(const_int 0)
9463 "%vpmovsxbw\t{%1, %0|%0, %1}"
9464 [(set_attr "type" "ssemov")
9465 (set_attr "prefix_extra" "1")
9466 (set_attr "prefix" "maybe_vex")
9467 (set_attr "mode" "TI")])
9469 (define_insn "sse4_1_extendv4qiv4si2"
9470 [(set (match_operand:V4SI 0 "register_operand" "=x")
9473 (match_operand:V16QI 1 "register_operand" "x")
9474 (parallel [(const_int 0)
9479 "%vpmovsxbd\t{%1, %0|%0, %1}"
9480 [(set_attr "type" "ssemov")
9481 (set_attr "prefix_extra" "1")
9482 (set_attr "prefix" "maybe_vex")
9483 (set_attr "mode" "TI")])
9485 (define_insn "*sse4_1_extendv4qiv4si2"
9486 [(set (match_operand:V4SI 0 "register_operand" "=x")
9489 (vec_duplicate:V16QI
9490 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9491 (parallel [(const_int 0)
9496 "%vpmovsxbd\t{%1, %0|%0, %1}"
9497 [(set_attr "type" "ssemov")
9498 (set_attr "prefix_extra" "1")
9499 (set_attr "prefix" "maybe_vex")
9500 (set_attr "mode" "TI")])
9502 (define_insn "sse4_1_extendv2qiv2di2"
9503 [(set (match_operand:V2DI 0 "register_operand" "=x")
9506 (match_operand:V16QI 1 "register_operand" "x")
9507 (parallel [(const_int 0)
9510 "%vpmovsxbq\t{%1, %0|%0, %1}"
9511 [(set_attr "type" "ssemov")
9512 (set_attr "prefix_extra" "1")
9513 (set_attr "prefix" "maybe_vex")
9514 (set_attr "mode" "TI")])
9516 (define_insn "*sse4_1_extendv2qiv2di2"
9517 [(set (match_operand:V2DI 0 "register_operand" "=x")
9520 (vec_duplicate:V16QI
9521 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9522 (parallel [(const_int 0)
9525 "%vpmovsxbq\t{%1, %0|%0, %1}"
9526 [(set_attr "type" "ssemov")
9527 (set_attr "prefix_extra" "1")
9528 (set_attr "prefix" "maybe_vex")
9529 (set_attr "mode" "TI")])
9531 (define_insn "sse4_1_extendv4hiv4si2"
9532 [(set (match_operand:V4SI 0 "register_operand" "=x")
9535 (match_operand:V8HI 1 "register_operand" "x")
9536 (parallel [(const_int 0)
9541 "%vpmovsxwd\t{%1, %0|%0, %1}"
9542 [(set_attr "type" "ssemov")
9543 (set_attr "prefix_extra" "1")
9544 (set_attr "prefix" "maybe_vex")
9545 (set_attr "mode" "TI")])
9547 (define_insn "*sse4_1_extendv4hiv4si2"
9548 [(set (match_operand:V4SI 0 "register_operand" "=x")
9552 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9553 (parallel [(const_int 0)
9558 "%vpmovsxwd\t{%1, %0|%0, %1}"
9559 [(set_attr "type" "ssemov")
9560 (set_attr "prefix_extra" "1")
9561 (set_attr "prefix" "maybe_vex")
9562 (set_attr "mode" "TI")])
9564 (define_insn "sse4_1_extendv2hiv2di2"
9565 [(set (match_operand:V2DI 0 "register_operand" "=x")
9568 (match_operand:V8HI 1 "register_operand" "x")
9569 (parallel [(const_int 0)
9572 "%vpmovsxwq\t{%1, %0|%0, %1}"
9573 [(set_attr "type" "ssemov")
9574 (set_attr "prefix_extra" "1")
9575 (set_attr "prefix" "maybe_vex")
9576 (set_attr "mode" "TI")])
9578 (define_insn "*sse4_1_extendv2hiv2di2"
9579 [(set (match_operand:V2DI 0 "register_operand" "=x")
9583 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9584 (parallel [(const_int 0)
9587 "%vpmovsxwq\t{%1, %0|%0, %1}"
9588 [(set_attr "type" "ssemov")
9589 (set_attr "prefix_extra" "1")
9590 (set_attr "prefix" "maybe_vex")
9591 (set_attr "mode" "TI")])
9593 (define_insn "sse4_1_extendv2siv2di2"
9594 [(set (match_operand:V2DI 0 "register_operand" "=x")
9597 (match_operand:V4SI 1 "register_operand" "x")
9598 (parallel [(const_int 0)
9601 "%vpmovsxdq\t{%1, %0|%0, %1}"
9602 [(set_attr "type" "ssemov")
9603 (set_attr "prefix_extra" "1")
9604 (set_attr "prefix" "maybe_vex")
9605 (set_attr "mode" "TI")])
9607 (define_insn "*sse4_1_extendv2siv2di2"
9608 [(set (match_operand:V2DI 0 "register_operand" "=x")
9612 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9613 (parallel [(const_int 0)
9616 "%vpmovsxdq\t{%1, %0|%0, %1}"
9617 [(set_attr "type" "ssemov")
9618 (set_attr "prefix_extra" "1")
9619 (set_attr "prefix" "maybe_vex")
9620 (set_attr "mode" "TI")])
9622 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9623 [(set (match_operand:V8HI 0 "register_operand" "=x")
9626 (match_operand:V16QI 1 "register_operand" "x")
9627 (parallel [(const_int 0)
9636 "%vpmovzxbw\t{%1, %0|%0, %1}"
9637 [(set_attr "type" "ssemov")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "prefix" "maybe_vex")
9640 (set_attr "mode" "TI")])
9642 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9643 [(set (match_operand:V8HI 0 "register_operand" "=x")
9646 (vec_duplicate:V16QI
9647 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9648 (parallel [(const_int 0)
9657 "%vpmovzxbw\t{%1, %0|%0, %1}"
9658 [(set_attr "type" "ssemov")
9659 (set_attr "prefix_extra" "1")
9660 (set_attr "prefix" "maybe_vex")
9661 (set_attr "mode" "TI")])
9663 (define_insn "sse4_1_zero_extendv4qiv4si2"
9664 [(set (match_operand:V4SI 0 "register_operand" "=x")
9667 (match_operand:V16QI 1 "register_operand" "x")
9668 (parallel [(const_int 0)
9673 "%vpmovzxbd\t{%1, %0|%0, %1}"
9674 [(set_attr "type" "ssemov")
9675 (set_attr "prefix_extra" "1")
9676 (set_attr "prefix" "maybe_vex")
9677 (set_attr "mode" "TI")])
9679 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9680 [(set (match_operand:V4SI 0 "register_operand" "=x")
9683 (vec_duplicate:V16QI
9684 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9685 (parallel [(const_int 0)
9690 "%vpmovzxbd\t{%1, %0|%0, %1}"
9691 [(set_attr "type" "ssemov")
9692 (set_attr "prefix_extra" "1")
9693 (set_attr "prefix" "maybe_vex")
9694 (set_attr "mode" "TI")])
9696 (define_insn "sse4_1_zero_extendv2qiv2di2"
9697 [(set (match_operand:V2DI 0 "register_operand" "=x")
9700 (match_operand:V16QI 1 "register_operand" "x")
9701 (parallel [(const_int 0)
9704 "%vpmovzxbq\t{%1, %0|%0, %1}"
9705 [(set_attr "type" "ssemov")
9706 (set_attr "prefix_extra" "1")
9707 (set_attr "prefix" "maybe_vex")
9708 (set_attr "mode" "TI")])
9710 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9711 [(set (match_operand:V2DI 0 "register_operand" "=x")
9714 (vec_duplicate:V16QI
9715 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9716 (parallel [(const_int 0)
9719 "%vpmovzxbq\t{%1, %0|%0, %1}"
9720 [(set_attr "type" "ssemov")
9721 (set_attr "prefix_extra" "1")
9722 (set_attr "prefix" "maybe_vex")
9723 (set_attr "mode" "TI")])
9725 (define_insn "sse4_1_zero_extendv4hiv4si2"
9726 [(set (match_operand:V4SI 0 "register_operand" "=x")
9729 (match_operand:V8HI 1 "register_operand" "x")
9730 (parallel [(const_int 0)
9735 "%vpmovzxwd\t{%1, %0|%0, %1}"
9736 [(set_attr "type" "ssemov")
9737 (set_attr "prefix_extra" "1")
9738 (set_attr "prefix" "maybe_vex")
9739 (set_attr "mode" "TI")])
9741 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9742 [(set (match_operand:V4SI 0 "register_operand" "=x")
9746 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9747 (parallel [(const_int 0)
9752 "%vpmovzxwd\t{%1, %0|%0, %1}"
9753 [(set_attr "type" "ssemov")
9754 (set_attr "prefix_extra" "1")
9755 (set_attr "prefix" "maybe_vex")
9756 (set_attr "mode" "TI")])
9758 (define_insn "sse4_1_zero_extendv2hiv2di2"
9759 [(set (match_operand:V2DI 0 "register_operand" "=x")
9762 (match_operand:V8HI 1 "register_operand" "x")
9763 (parallel [(const_int 0)
9766 "%vpmovzxwq\t{%1, %0|%0, %1}"
9767 [(set_attr "type" "ssemov")
9768 (set_attr "prefix_extra" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "mode" "TI")])
9772 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9773 [(set (match_operand:V2DI 0 "register_operand" "=x")
9777 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9778 (parallel [(const_int 0)
9781 "%vpmovzxwq\t{%1, %0|%0, %1}"
9782 [(set_attr "type" "ssemov")
9783 (set_attr "prefix_extra" "1")
9784 (set_attr "prefix" "maybe_vex")
9785 (set_attr "mode" "TI")])
9787 (define_insn "sse4_1_zero_extendv2siv2di2"
9788 [(set (match_operand:V2DI 0 "register_operand" "=x")
9791 (match_operand:V4SI 1 "register_operand" "x")
9792 (parallel [(const_int 0)
9795 "%vpmovzxdq\t{%1, %0|%0, %1}"
9796 [(set_attr "type" "ssemov")
9797 (set_attr "prefix_extra" "1")
9798 (set_attr "prefix" "maybe_vex")
9799 (set_attr "mode" "TI")])
9801 (define_insn "*sse4_1_zero_extendv2siv2di2"
9802 [(set (match_operand:V2DI 0 "register_operand" "=x")
9806 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9807 (parallel [(const_int 0)
9810 "%vpmovzxdq\t{%1, %0|%0, %1}"
9811 [(set_attr "type" "ssemov")
9812 (set_attr "prefix_extra" "1")
9813 (set_attr "prefix" "maybe_vex")
9814 (set_attr "mode" "TI")])
9816 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9817 ;; setting FLAGS_REG. But it is not a really compare instruction.
9818 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9819 [(set (reg:CC FLAGS_REG)
9820 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9821 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9824 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9825 [(set_attr "type" "ssecomi")
9826 (set_attr "prefix_extra" "1")
9827 (set_attr "prefix" "vex")
9828 (set_attr "mode" "<MODE>")])
9830 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9831 ;; But it is not a really compare instruction.
9832 (define_insn "avx_ptest256"
9833 [(set (reg:CC FLAGS_REG)
9834 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9835 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9838 "vptest\t{%1, %0|%0, %1}"
9839 [(set_attr "type" "ssecomi")
9840 (set_attr "prefix_extra" "1")
9841 (set_attr "prefix" "vex")
9842 (set_attr "mode" "OI")])
9844 (define_insn "sse4_1_ptest"
9845 [(set (reg:CC FLAGS_REG)
9846 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9847 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9850 "%vptest\t{%1, %0|%0, %1}"
9851 [(set_attr "type" "ssecomi")
9852 (set_attr "prefix_extra" "1")
9853 (set_attr "prefix" "maybe_vex")
9854 (set_attr "mode" "TI")])
9856 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9857 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9858 (unspec:AVX256MODEF2P
9859 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9860 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9863 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9864 [(set_attr "type" "ssecvt")
9865 (set_attr "prefix_extra" "1")
9866 (set_attr "length_immediate" "1")
9867 (set_attr "prefix" "vex")
9868 (set_attr "mode" "<MODE>")])
9870 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9871 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9873 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9874 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9877 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9878 [(set_attr "type" "ssecvt")
9879 (set_attr "prefix_data16" "1")
9880 (set_attr "prefix_extra" "1")
9881 (set_attr "length_immediate" "1")
9882 (set_attr "prefix" "maybe_vex")
9883 (set_attr "mode" "<MODE>")])
9885 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9886 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9887 (vec_merge:SSEMODEF2P
9889 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9890 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9892 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9895 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9896 [(set_attr "type" "ssecvt")
9897 (set_attr "prefix_extra" "1")
9898 (set_attr "length_immediate" "1")
9899 (set_attr "prefix" "vex")
9900 (set_attr "mode" "<MODE>")])
9902 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9903 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9904 (vec_merge:SSEMODEF2P
9906 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9907 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9909 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9912 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9913 [(set_attr "type" "ssecvt")
9914 (set_attr "prefix_data16" "1")
9915 (set_attr "prefix_extra" "1")
9916 (set_attr "length_immediate" "1")
9917 (set_attr "mode" "<MODE>")])
9919 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9921 ;; Intel SSE4.2 string/text processing instructions
9923 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9925 (define_insn_and_split "sse4_2_pcmpestr"
9926 [(set (match_operand:SI 0 "register_operand" "=c,c")
9928 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9929 (match_operand:SI 3 "register_operand" "a,a")
9930 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9931 (match_operand:SI 5 "register_operand" "d,d")
9932 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9934 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9942 (set (reg:CC FLAGS_REG)
9951 && can_create_pseudo_p ()"
9956 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9957 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9958 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9961 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9962 operands[3], operands[4],
9963 operands[5], operands[6]));
9965 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9966 operands[3], operands[4],
9967 operands[5], operands[6]));
9968 if (flags && !(ecx || xmm0))
9969 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9970 operands[2], operands[3],
9971 operands[4], operands[5],
9975 [(set_attr "type" "sselog")
9976 (set_attr "prefix_data16" "1")
9977 (set_attr "prefix_extra" "1")
9978 (set_attr "length_immediate" "1")
9979 (set_attr "memory" "none,load")
9980 (set_attr "mode" "TI")])
9982 (define_insn "sse4_2_pcmpestri"
9983 [(set (match_operand:SI 0 "register_operand" "=c,c")
9985 [(match_operand:V16QI 1 "register_operand" "x,x")
9986 (match_operand:SI 2 "register_operand" "a,a")
9987 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9988 (match_operand:SI 4 "register_operand" "d,d")
9989 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9991 (set (reg:CC FLAGS_REG)
10000 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10001 [(set_attr "type" "sselog")
10002 (set_attr "prefix_data16" "1")
10003 (set_attr "prefix_extra" "1")
10004 (set_attr "prefix" "maybe_vex")
10005 (set_attr "length_immediate" "1")
10006 (set_attr "memory" "none,load")
10007 (set_attr "mode" "TI")])
10009 (define_insn "sse4_2_pcmpestrm"
10010 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10012 [(match_operand:V16QI 1 "register_operand" "x,x")
10013 (match_operand:SI 2 "register_operand" "a,a")
10014 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10015 (match_operand:SI 4 "register_operand" "d,d")
10016 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10018 (set (reg:CC FLAGS_REG)
10027 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10028 [(set_attr "type" "sselog")
10029 (set_attr "prefix_data16" "1")
10030 (set_attr "prefix_extra" "1")
10031 (set_attr "length_immediate" "1")
10032 (set_attr "prefix" "maybe_vex")
10033 (set_attr "memory" "none,load")
10034 (set_attr "mode" "TI")])
10036 (define_insn "sse4_2_pcmpestr_cconly"
10037 [(set (reg:CC FLAGS_REG)
10039 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10040 (match_operand:SI 3 "register_operand" "a,a,a,a")
10041 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10042 (match_operand:SI 5 "register_operand" "d,d,d,d")
10043 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10045 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10046 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10049 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10050 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10051 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10052 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10053 [(set_attr "type" "sselog")
10054 (set_attr "prefix_data16" "1")
10055 (set_attr "prefix_extra" "1")
10056 (set_attr "length_immediate" "1")
10057 (set_attr "memory" "none,load,none,load")
10058 (set_attr "prefix" "maybe_vex")
10059 (set_attr "mode" "TI")])
10061 (define_insn_and_split "sse4_2_pcmpistr"
10062 [(set (match_operand:SI 0 "register_operand" "=c,c")
10064 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10065 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10066 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10068 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10074 (set (reg:CC FLAGS_REG)
10081 && can_create_pseudo_p ()"
10086 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10087 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10088 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10091 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10092 operands[3], operands[4]));
10094 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10095 operands[3], operands[4]));
10096 if (flags && !(ecx || xmm0))
10097 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10098 operands[2], operands[3],
10102 [(set_attr "type" "sselog")
10103 (set_attr "prefix_data16" "1")
10104 (set_attr "prefix_extra" "1")
10105 (set_attr "length_immediate" "1")
10106 (set_attr "memory" "none,load")
10107 (set_attr "mode" "TI")])
10109 (define_insn "sse4_2_pcmpistri"
10110 [(set (match_operand:SI 0 "register_operand" "=c,c")
10112 [(match_operand:V16QI 1 "register_operand" "x,x")
10113 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10114 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10116 (set (reg:CC FLAGS_REG)
10123 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10124 [(set_attr "type" "sselog")
10125 (set_attr "prefix_data16" "1")
10126 (set_attr "prefix_extra" "1")
10127 (set_attr "length_immediate" "1")
10128 (set_attr "prefix" "maybe_vex")
10129 (set_attr "memory" "none,load")
10130 (set_attr "mode" "TI")])
10132 (define_insn "sse4_2_pcmpistrm"
10133 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10135 [(match_operand:V16QI 1 "register_operand" "x,x")
10136 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10137 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10139 (set (reg:CC FLAGS_REG)
10146 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10147 [(set_attr "type" "sselog")
10148 (set_attr "prefix_data16" "1")
10149 (set_attr "prefix_extra" "1")
10150 (set_attr "length_immediate" "1")
10151 (set_attr "prefix" "maybe_vex")
10152 (set_attr "memory" "none,load")
10153 (set_attr "mode" "TI")])
10155 (define_insn "sse4_2_pcmpistr_cconly"
10156 [(set (reg:CC FLAGS_REG)
10158 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10159 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10160 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10162 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10163 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10166 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10167 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10168 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10169 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10170 [(set_attr "type" "sselog")
10171 (set_attr "prefix_data16" "1")
10172 (set_attr "prefix_extra" "1")
10173 (set_attr "length_immediate" "1")
10174 (set_attr "memory" "none,load,none,load")
10175 (set_attr "prefix" "maybe_vex")
10176 (set_attr "mode" "TI")])
10178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10180 ;; SSE5 instructions
10182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10184 ;; SSE5 parallel integer multiply/add instructions.
10185 ;; Note the instruction does not allow the value being added to be a memory
10186 ;; operation. However by pretending via the nonimmediate_operand predicate
10187 ;; that it does and splitting it later allows the following to be recognized:
10188 ;; a[i] = b[i] * c[i] + d[i];
10189 (define_insn "sse5_pmacsww"
10190 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
10193 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
10194 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
10195 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
10196 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
10198 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10199 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10200 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10201 [(set_attr "type" "ssemuladd")
10202 (set_attr "mode" "TI")])
10204 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10206 [(set (match_operand:V8HI 0 "register_operand" "")
10208 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10209 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10210 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10212 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
10213 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
10214 && !reg_mentioned_p (operands[0], operands[1])
10215 && !reg_mentioned_p (operands[0], operands[2])
10216 && !reg_mentioned_p (operands[0], operands[3])"
10219 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
10220 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
10225 (define_insn "sse5_pmacssww"
10226 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
10228 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10229 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
10230 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
10231 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10233 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10234 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10235 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10236 [(set_attr "type" "ssemuladd")
10237 (set_attr "mode" "TI")])
10239 ;; Note the instruction does not allow the value being added to be a memory
10240 ;; operation. However by pretending via the nonimmediate_operand predicate
10241 ;; that it does and splitting it later allows the following to be recognized:
10242 ;; a[i] = b[i] * c[i] + d[i];
10243 (define_insn "sse5_pmacsdd"
10244 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10247 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10248 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
10249 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10250 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
10252 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10253 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10254 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10255 [(set_attr "type" "ssemuladd")
10256 (set_attr "mode" "TI")])
10258 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10260 [(set (match_operand:V4SI 0 "register_operand" "")
10262 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10263 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10264 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10266 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
10267 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
10268 && !reg_mentioned_p (operands[0], operands[1])
10269 && !reg_mentioned_p (operands[0], operands[2])
10270 && !reg_mentioned_p (operands[0], operands[3])"
10273 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
10274 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
10279 (define_insn "sse5_pmacssdd"
10280 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10282 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10283 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
10284 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10285 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10287 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10288 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10289 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10290 [(set_attr "type" "ssemuladd")
10291 (set_attr "mode" "TI")])
10293 (define_insn "sse5_pmacssdql"
10294 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10299 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10300 (parallel [(const_int 1)
10303 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10304 (parallel [(const_int 1)
10306 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10307 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10309 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10310 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10311 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10312 [(set_attr "type" "ssemuladd")
10313 (set_attr "mode" "TI")])
10315 (define_insn "sse5_pmacssdqh"
10316 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10321 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10322 (parallel [(const_int 0)
10326 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10327 (parallel [(const_int 0)
10329 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10330 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10332 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10333 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10334 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10335 [(set_attr "type" "ssemuladd")
10336 (set_attr "mode" "TI")])
10338 (define_insn "sse5_pmacsdql"
10339 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10344 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10345 (parallel [(const_int 1)
10349 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10350 (parallel [(const_int 1)
10352 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10353 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10355 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10356 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10357 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10358 [(set_attr "type" "ssemuladd")
10359 (set_attr "mode" "TI")])
10361 (define_insn_and_split "*sse5_pmacsdql_mem"
10362 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10367 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10368 (parallel [(const_int 1)
10372 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10373 (parallel [(const_int 1)
10375 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10376 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10378 "&& (reload_completed
10379 || (!reg_mentioned_p (operands[0], operands[1])
10380 && !reg_mentioned_p (operands[0], operands[2])))"
10381 [(set (match_dup 0)
10389 (parallel [(const_int 1)
10394 (parallel [(const_int 1)
10398 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10399 ;; fake it with a multiply/add. In general, we expect the define_split to
10400 ;; occur before register allocation, so we have to handle the corner case where
10401 ;; the target is the same as operands 1/2
10402 (define_insn_and_split "sse5_mulv2div2di3_low"
10403 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10407 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10408 (parallel [(const_int 1)
10412 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10413 (parallel [(const_int 1)
10414 (const_int 3)])))))]
10417 "&& (reload_completed
10418 || (!reg_mentioned_p (operands[0], operands[1])
10419 && !reg_mentioned_p (operands[0], operands[2])))"
10420 [(set (match_dup 0)
10428 (parallel [(const_int 1)
10433 (parallel [(const_int 1)
10437 operands[3] = CONST0_RTX (V2DImode);
10439 [(set_attr "type" "ssemuladd")
10440 (set_attr "mode" "TI")])
10442 (define_insn "sse5_pmacsdqh"
10443 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10448 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10449 (parallel [(const_int 0)
10453 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10454 (parallel [(const_int 0)
10456 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10457 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10459 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10460 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10461 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10462 [(set_attr "type" "ssemuladd")
10463 (set_attr "mode" "TI")])
10465 (define_insn_and_split "*sse5_pmacsdqh_mem"
10466 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10471 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10472 (parallel [(const_int 0)
10476 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10477 (parallel [(const_int 0)
10479 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10480 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10482 "&& (reload_completed
10483 || (!reg_mentioned_p (operands[0], operands[1])
10484 && !reg_mentioned_p (operands[0], operands[2])))"
10485 [(set (match_dup 0)
10493 (parallel [(const_int 0)
10498 (parallel [(const_int 0)
10502 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10503 ;; fake it with a multiply/add. In general, we expect the define_split to
10504 ;; occur before register allocation, so we have to handle the corner case where
10505 ;; the target is the same as either operands[1] or operands[2]
10506 (define_insn_and_split "sse5_mulv2div2di3_high"
10507 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10511 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10512 (parallel [(const_int 0)
10516 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10517 (parallel [(const_int 0)
10518 (const_int 2)])))))]
10521 "&& (reload_completed
10522 || (!reg_mentioned_p (operands[0], operands[1])
10523 && !reg_mentioned_p (operands[0], operands[2])))"
10524 [(set (match_dup 0)
10532 (parallel [(const_int 0)
10537 (parallel [(const_int 0)
10541 operands[3] = CONST0_RTX (V2DImode);
10543 [(set_attr "type" "ssemuladd")
10544 (set_attr "mode" "TI")])
10546 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10547 (define_insn "sse5_pmacsswd"
10548 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10553 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10554 (parallel [(const_int 1)
10560 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10561 (parallel [(const_int 1)
10565 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10566 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10568 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10569 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10570 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10571 [(set_attr "type" "ssemuladd")
10572 (set_attr "mode" "TI")])
10574 (define_insn "sse5_pmacswd"
10575 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10580 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10581 (parallel [(const_int 1)
10587 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10588 (parallel [(const_int 1)
10592 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10593 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10595 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10596 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10597 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10598 [(set_attr "type" "ssemuladd")
10599 (set_attr "mode" "TI")])
10601 (define_insn "sse5_pmadcsswd"
10602 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10608 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10609 (parallel [(const_int 0)
10615 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10616 (parallel [(const_int 0)
10624 (parallel [(const_int 1)
10631 (parallel [(const_int 1)
10634 (const_int 7)])))))
10635 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10636 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10638 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10639 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10640 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10641 [(set_attr "type" "ssemuladd")
10642 (set_attr "mode" "TI")])
10644 (define_insn "sse5_pmadcswd"
10645 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10651 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10652 (parallel [(const_int 0)
10658 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10659 (parallel [(const_int 0)
10667 (parallel [(const_int 1)
10674 (parallel [(const_int 1)
10677 (const_int 7)])))))
10678 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10679 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10681 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10682 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10683 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10684 [(set_attr "type" "ssemuladd")
10685 (set_attr "mode" "TI")])
10687 ;; SSE5 parallel XMM conditional moves
10688 (define_insn "sse5_pcmov_<mode>"
10689 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10690 (if_then_else:SSEMODE
10691 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10692 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10693 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10694 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10696 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10697 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10698 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10699 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10700 [(set_attr "type" "sse4arg")])
10702 ;; SSE5 horizontal add/subtract instructions
10703 (define_insn "sse5_phaddbw"
10704 [(set (match_operand:V8HI 0 "register_operand" "=x")
10708 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10709 (parallel [(const_int 0)
10720 (parallel [(const_int 1)
10727 (const_int 15)])))))]
10729 "phaddbw\t{%1, %0|%0, %1}"
10730 [(set_attr "type" "sseiadd1")])
10732 (define_insn "sse5_phaddbd"
10733 [(set (match_operand:V4SI 0 "register_operand" "=x")
10738 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10739 (parallel [(const_int 0)
10746 (parallel [(const_int 1)
10749 (const_int 13)]))))
10754 (parallel [(const_int 2)
10761 (parallel [(const_int 3)
10764 (const_int 15)]))))))]
10766 "phaddbd\t{%1, %0|%0, %1}"
10767 [(set_attr "type" "sseiadd1")])
10769 (define_insn "sse5_phaddbq"
10770 [(set (match_operand:V2DI 0 "register_operand" "=x")
10776 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10777 (parallel [(const_int 0)
10782 (parallel [(const_int 1)
10788 (parallel [(const_int 2)
10793 (parallel [(const_int 3)
10794 (const_int 7)])))))
10800 (parallel [(const_int 8)
10805 (parallel [(const_int 9)
10806 (const_int 13)]))))
10811 (parallel [(const_int 10)
10816 (parallel [(const_int 11)
10817 (const_int 15)])))))))]
10819 "phaddbq\t{%1, %0|%0, %1}"
10820 [(set_attr "type" "sseiadd1")])
10822 (define_insn "sse5_phaddwd"
10823 [(set (match_operand:V4SI 0 "register_operand" "=x")
10827 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10828 (parallel [(const_int 0)
10835 (parallel [(const_int 1)
10838 (const_int 7)])))))]
10840 "phaddwd\t{%1, %0|%0, %1}"
10841 [(set_attr "type" "sseiadd1")])
10843 (define_insn "sse5_phaddwq"
10844 [(set (match_operand:V2DI 0 "register_operand" "=x")
10849 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10850 (parallel [(const_int 0)
10855 (parallel [(const_int 1)
10861 (parallel [(const_int 2)
10866 (parallel [(const_int 3)
10867 (const_int 7)]))))))]
10869 "phaddwq\t{%1, %0|%0, %1}"
10870 [(set_attr "type" "sseiadd1")])
10872 (define_insn "sse5_phadddq"
10873 [(set (match_operand:V2DI 0 "register_operand" "=x")
10877 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10878 (parallel [(const_int 0)
10883 (parallel [(const_int 1)
10884 (const_int 3)])))))]
10886 "phadddq\t{%1, %0|%0, %1}"
10887 [(set_attr "type" "sseiadd1")])
10889 (define_insn "sse5_phaddubw"
10890 [(set (match_operand:V8HI 0 "register_operand" "=x")
10894 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10895 (parallel [(const_int 0)
10906 (parallel [(const_int 1)
10913 (const_int 15)])))))]
10915 "phaddubw\t{%1, %0|%0, %1}"
10916 [(set_attr "type" "sseiadd1")])
10918 (define_insn "sse5_phaddubd"
10919 [(set (match_operand:V4SI 0 "register_operand" "=x")
10924 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10925 (parallel [(const_int 0)
10932 (parallel [(const_int 1)
10935 (const_int 13)]))))
10940 (parallel [(const_int 2)
10947 (parallel [(const_int 3)
10950 (const_int 15)]))))))]
10952 "phaddubd\t{%1, %0|%0, %1}"
10953 [(set_attr "type" "sseiadd1")])
10955 (define_insn "sse5_phaddubq"
10956 [(set (match_operand:V2DI 0 "register_operand" "=x")
10962 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10963 (parallel [(const_int 0)
10968 (parallel [(const_int 1)
10974 (parallel [(const_int 2)
10979 (parallel [(const_int 3)
10980 (const_int 7)])))))
10986 (parallel [(const_int 8)
10991 (parallel [(const_int 9)
10992 (const_int 13)]))))
10997 (parallel [(const_int 10)
11002 (parallel [(const_int 11)
11003 (const_int 15)])))))))]
11005 "phaddubq\t{%1, %0|%0, %1}"
11006 [(set_attr "type" "sseiadd1")])
11008 (define_insn "sse5_phadduwd"
11009 [(set (match_operand:V4SI 0 "register_operand" "=x")
11013 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11014 (parallel [(const_int 0)
11021 (parallel [(const_int 1)
11024 (const_int 7)])))))]
11026 "phadduwd\t{%1, %0|%0, %1}"
11027 [(set_attr "type" "sseiadd1")])
11029 (define_insn "sse5_phadduwq"
11030 [(set (match_operand:V2DI 0 "register_operand" "=x")
11035 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11036 (parallel [(const_int 0)
11041 (parallel [(const_int 1)
11047 (parallel [(const_int 2)
11052 (parallel [(const_int 3)
11053 (const_int 7)]))))))]
11055 "phadduwq\t{%1, %0|%0, %1}"
11056 [(set_attr "type" "sseiadd1")])
11058 (define_insn "sse5_phaddudq"
11059 [(set (match_operand:V2DI 0 "register_operand" "=x")
11063 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11064 (parallel [(const_int 0)
11069 (parallel [(const_int 1)
11070 (const_int 3)])))))]
11072 "phaddudq\t{%1, %0|%0, %1}"
11073 [(set_attr "type" "sseiadd1")])
11075 (define_insn "sse5_phsubbw"
11076 [(set (match_operand:V8HI 0 "register_operand" "=x")
11080 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11081 (parallel [(const_int 0)
11092 (parallel [(const_int 1)
11099 (const_int 15)])))))]
11101 "phsubbw\t{%1, %0|%0, %1}"
11102 [(set_attr "type" "sseiadd1")])
11104 (define_insn "sse5_phsubwd"
11105 [(set (match_operand:V4SI 0 "register_operand" "=x")
11109 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11110 (parallel [(const_int 0)
11117 (parallel [(const_int 1)
11120 (const_int 7)])))))]
11122 "phsubwd\t{%1, %0|%0, %1}"
11123 [(set_attr "type" "sseiadd1")])
11125 (define_insn "sse5_phsubdq"
11126 [(set (match_operand:V2DI 0 "register_operand" "=x")
11130 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11131 (parallel [(const_int 0)
11136 (parallel [(const_int 1)
11137 (const_int 3)])))))]
11139 "phsubdq\t{%1, %0|%0, %1}"
11140 [(set_attr "type" "sseiadd1")])
11142 ;; SSE5 permute instructions
11143 (define_insn "sse5_pperm"
11144 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
11146 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
11147 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
11148 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
11149 UNSPEC_SSE5_PERMUTE))]
11150 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11151 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11152 [(set_attr "type" "sse4arg")
11153 (set_attr "mode" "TI")])
11155 ;; The following are for the various unpack insns which doesn't need the first
11156 ;; source operand, so we can just use the output operand for the first operand.
11157 ;; This allows either of the other two operands to be a memory operand. We
11158 ;; can't just use the first operand as an argument to the normal pperm because
11159 ;; then an output only argument, suddenly becomes an input operand.
11160 (define_insn "sse5_pperm_zero_v16qi_v8hi"
11161 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11164 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
11165 (match_operand 2 "" "")))) ;; parallel with const_int's
11166 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11168 && (register_operand (operands[1], V16QImode)
11169 || register_operand (operands[2], V16QImode))"
11170 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11171 [(set_attr "type" "sseadd")
11172 (set_attr "prefix_data16" "0")
11173 (set_attr "prefix_extra" "2")
11174 (set_attr "mode" "TI")])
11176 (define_insn "sse5_pperm_sign_v16qi_v8hi"
11177 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11180 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
11181 (match_operand 2 "" "")))) ;; parallel with const_int's
11182 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11184 && (register_operand (operands[1], V16QImode)
11185 || register_operand (operands[2], V16QImode))"
11186 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11187 [(set_attr "type" "sseadd")
11188 (set_attr "prefix_data16" "0")
11189 (set_attr "prefix_extra" "2")
11190 (set_attr "mode" "TI")])
11192 (define_insn "sse5_pperm_zero_v8hi_v4si"
11193 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11196 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
11197 (match_operand 2 "" "")))) ;; parallel with const_int's
11198 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11200 && (register_operand (operands[1], V8HImode)
11201 || register_operand (operands[2], V16QImode))"
11202 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11203 [(set_attr "type" "sseadd")
11204 (set_attr "prefix_data16" "0")
11205 (set_attr "prefix_extra" "2")
11206 (set_attr "mode" "TI")])
11208 (define_insn "sse5_pperm_sign_v8hi_v4si"
11209 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11212 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
11213 (match_operand 2 "" "")))) ;; parallel with const_int's
11214 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11216 && (register_operand (operands[1], V8HImode)
11217 || register_operand (operands[2], V16QImode))"
11218 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11219 [(set_attr "type" "sseadd")
11220 (set_attr "prefix_data16" "0")
11221 (set_attr "prefix_extra" "2")
11222 (set_attr "mode" "TI")])
11224 (define_insn "sse5_pperm_zero_v4si_v2di"
11225 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11228 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
11229 (match_operand 2 "" "")))) ;; parallel with const_int's
11230 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11232 && (register_operand (operands[1], V4SImode)
11233 || register_operand (operands[2], V16QImode))"
11234 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11235 [(set_attr "type" "sseadd")
11236 (set_attr "prefix_data16" "0")
11237 (set_attr "prefix_extra" "2")
11238 (set_attr "mode" "TI")])
11240 (define_insn "sse5_pperm_sign_v4si_v2di"
11241 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11244 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
11245 (match_operand 2 "" "")))) ;; parallel with const_int's
11246 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
11248 && (register_operand (operands[1], V4SImode)
11249 || register_operand (operands[2], V16QImode))"
11250 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
11251 [(set_attr "type" "sseadd")
11252 (set_attr "prefix_data16" "0")
11253 (set_attr "prefix_extra" "2")
11254 (set_attr "mode" "TI")])
11256 ;; SSE5 pack instructions that combine two vectors into a smaller vector
11257 (define_insn "sse5_pperm_pack_v2di_v4si"
11258 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
11261 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
11263 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11264 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11265 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11266 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11267 [(set_attr "type" "sse4arg")
11268 (set_attr "mode" "TI")])
11270 (define_insn "sse5_pperm_pack_v4si_v8hi"
11271 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
11274 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
11276 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11277 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11278 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11279 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11280 [(set_attr "type" "sse4arg")
11281 (set_attr "mode" "TI")])
11283 (define_insn "sse5_pperm_pack_v8hi_v16qi"
11284 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
11287 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
11289 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
11290 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
11291 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11292 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11293 [(set_attr "type" "sse4arg")
11294 (set_attr "mode" "TI")])
11296 ;; Floating point permutation (permps, permpd)
11297 (define_insn "sse5_perm<mode>"
11298 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
11300 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
11301 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
11302 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
11303 UNSPEC_SSE5_PERMUTE))]
11304 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
11305 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11306 [(set_attr "type" "sse4arg")
11307 (set_attr "mode" "<MODE>")])
11309 ;; SSE5 packed rotate instructions
11310 (define_expand "rotl<mode>3"
11311 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11312 (rotate:SSEMODE1248
11313 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11314 (match_operand:SI 2 "general_operand")))]
11317 /* If we were given a scalar, convert it to parallel */
11318 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11320 rtvec vs = rtvec_alloc (<ssescalarnum>);
11321 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11322 rtx reg = gen_reg_rtx (<MODE>mode);
11323 rtx op2 = operands[2];
11326 if (GET_MODE (op2) != <ssescalarmode>mode)
11328 op2 = gen_reg_rtx (<ssescalarmode>mode);
11329 convert_move (op2, operands[2], false);
11332 for (i = 0; i < <ssescalarnum>; i++)
11333 RTVEC_ELT (vs, i) = op2;
11335 emit_insn (gen_vec_init<mode> (reg, par));
11336 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11341 (define_expand "rotr<mode>3"
11342 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11343 (rotatert:SSEMODE1248
11344 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11345 (match_operand:SI 2 "general_operand")))]
11348 /* If we were given a scalar, convert it to parallel */
11349 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11351 rtvec vs = rtvec_alloc (<ssescalarnum>);
11352 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11353 rtx neg = gen_reg_rtx (<MODE>mode);
11354 rtx reg = gen_reg_rtx (<MODE>mode);
11355 rtx op2 = operands[2];
11358 if (GET_MODE (op2) != <ssescalarmode>mode)
11360 op2 = gen_reg_rtx (<ssescalarmode>mode);
11361 convert_move (op2, operands[2], false);
11364 for (i = 0; i < <ssescalarnum>; i++)
11365 RTVEC_ELT (vs, i) = op2;
11367 emit_insn (gen_vec_init<mode> (reg, par));
11368 emit_insn (gen_neg<mode>2 (neg, reg));
11369 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
11374 (define_insn "sse5_rotl<mode>3"
11375 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11376 (rotate:SSEMODE1248
11377 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11378 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11380 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11381 [(set_attr "type" "sseishft")
11382 (set_attr "length_immediate" "1")
11383 (set_attr "mode" "TI")])
11385 (define_insn "sse5_rotr<mode>3"
11386 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11387 (rotatert:SSEMODE1248
11388 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11389 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11392 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11393 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11395 [(set_attr "type" "sseishft")
11396 (set_attr "length_immediate" "1")
11397 (set_attr "mode" "TI")])
11399 (define_expand "vrotr<mode>3"
11400 [(match_operand:SSEMODE1248 0 "register_operand" "")
11401 (match_operand:SSEMODE1248 1 "register_operand" "")
11402 (match_operand:SSEMODE1248 2 "register_operand" "")]
11405 rtx reg = gen_reg_rtx (<MODE>mode);
11406 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11407 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11411 (define_expand "vrotl<mode>3"
11412 [(match_operand:SSEMODE1248 0 "register_operand" "")
11413 (match_operand:SSEMODE1248 1 "register_operand" "")
11414 (match_operand:SSEMODE1248 2 "register_operand" "")]
11417 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11421 (define_insn "sse5_vrotl<mode>3"
11422 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11423 (if_then_else:SSEMODE1248
11425 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11427 (rotate:SSEMODE1248
11428 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11430 (rotatert:SSEMODE1248
11432 (neg:SSEMODE1248 (match_dup 2)))))]
11433 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11434 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11435 [(set_attr "type" "sseishft")
11436 (set_attr "prefix_data16" "0")
11437 (set_attr "prefix_extra" "2")
11438 (set_attr "mode" "TI")])
11440 ;; SSE5 packed shift instructions.
11441 ;; FIXME: add V2DI back in
11442 (define_expand "vlshr<mode>3"
11443 [(match_operand:SSEMODE124 0 "register_operand" "")
11444 (match_operand:SSEMODE124 1 "register_operand" "")
11445 (match_operand:SSEMODE124 2 "register_operand" "")]
11448 rtx neg = gen_reg_rtx (<MODE>mode);
11449 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11450 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11454 (define_expand "vashr<mode>3"
11455 [(match_operand:SSEMODE124 0 "register_operand" "")
11456 (match_operand:SSEMODE124 1 "register_operand" "")
11457 (match_operand:SSEMODE124 2 "register_operand" "")]
11460 rtx neg = gen_reg_rtx (<MODE>mode);
11461 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11462 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11466 (define_expand "vashl<mode>3"
11467 [(match_operand:SSEMODE124 0 "register_operand" "")
11468 (match_operand:SSEMODE124 1 "register_operand" "")
11469 (match_operand:SSEMODE124 2 "register_operand" "")]
11472 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11476 (define_insn "sse5_ashl<mode>3"
11477 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11478 (if_then_else:SSEMODE1248
11480 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11482 (ashift:SSEMODE1248
11483 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11485 (ashiftrt:SSEMODE1248
11487 (neg:SSEMODE1248 (match_dup 2)))))]
11488 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11489 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11490 [(set_attr "type" "sseishft")
11491 (set_attr "prefix_data16" "0")
11492 (set_attr "prefix_extra" "2")
11493 (set_attr "mode" "TI")])
11495 (define_insn "sse5_lshl<mode>3"
11496 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11497 (if_then_else:SSEMODE1248
11499 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11501 (ashift:SSEMODE1248
11502 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11504 (lshiftrt:SSEMODE1248
11506 (neg:SSEMODE1248 (match_dup 2)))))]
11507 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11508 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11509 [(set_attr "type" "sseishft")
11510 (set_attr "prefix_data16" "0")
11511 (set_attr "prefix_extra" "2")
11512 (set_attr "mode" "TI")])
11514 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11515 (define_expand "ashlv16qi3"
11516 [(match_operand:V16QI 0 "register_operand" "")
11517 (match_operand:V16QI 1 "register_operand" "")
11518 (match_operand:SI 2 "nonmemory_operand" "")]
11521 rtvec vs = rtvec_alloc (16);
11522 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11523 rtx reg = gen_reg_rtx (V16QImode);
11525 for (i = 0; i < 16; i++)
11526 RTVEC_ELT (vs, i) = operands[2];
11528 emit_insn (gen_vec_initv16qi (reg, par));
11529 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11533 (define_expand "lshlv16qi3"
11534 [(match_operand:V16QI 0 "register_operand" "")
11535 (match_operand:V16QI 1 "register_operand" "")
11536 (match_operand:SI 2 "nonmemory_operand" "")]
11539 rtvec vs = rtvec_alloc (16);
11540 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11541 rtx reg = gen_reg_rtx (V16QImode);
11543 for (i = 0; i < 16; i++)
11544 RTVEC_ELT (vs, i) = operands[2];
11546 emit_insn (gen_vec_initv16qi (reg, par));
11547 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11551 (define_expand "ashrv16qi3"
11552 [(match_operand:V16QI 0 "register_operand" "")
11553 (match_operand:V16QI 1 "register_operand" "")
11554 (match_operand:SI 2 "nonmemory_operand" "")]
11557 rtvec vs = rtvec_alloc (16);
11558 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11559 rtx reg = gen_reg_rtx (V16QImode);
11561 rtx ele = ((CONST_INT_P (operands[2]))
11562 ? GEN_INT (- INTVAL (operands[2]))
11565 for (i = 0; i < 16; i++)
11566 RTVEC_ELT (vs, i) = ele;
11568 emit_insn (gen_vec_initv16qi (reg, par));
11570 if (!CONST_INT_P (operands[2]))
11572 rtx neg = gen_reg_rtx (V16QImode);
11573 emit_insn (gen_negv16qi2 (neg, reg));
11574 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11577 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11582 (define_expand "ashrv2di3"
11583 [(match_operand:V2DI 0 "register_operand" "")
11584 (match_operand:V2DI 1 "register_operand" "")
11585 (match_operand:DI 2 "nonmemory_operand" "")]
11588 rtvec vs = rtvec_alloc (2);
11589 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11590 rtx reg = gen_reg_rtx (V2DImode);
11593 if (CONST_INT_P (operands[2]))
11594 ele = GEN_INT (- INTVAL (operands[2]));
11595 else if (GET_MODE (operands[2]) != DImode)
11597 rtx move = gen_reg_rtx (DImode);
11598 ele = gen_reg_rtx (DImode);
11599 convert_move (move, operands[2], false);
11600 emit_insn (gen_negdi2 (ele, move));
11604 ele = gen_reg_rtx (DImode);
11605 emit_insn (gen_negdi2 (ele, operands[2]));
11608 RTVEC_ELT (vs, 0) = ele;
11609 RTVEC_ELT (vs, 1) = ele;
11610 emit_insn (gen_vec_initv2di (reg, par));
11611 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11615 ;; SSE5 FRCZ support
11617 (define_insn "sse5_frcz<mode>2"
11618 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11620 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11623 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11624 [(set_attr "type" "ssecvt1")
11625 (set_attr "mode" "<MODE>")])
11628 (define_insn "sse5_vmfrcz<mode>2"
11629 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11630 (vec_merge:SSEMODEF2P
11632 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11634 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11637 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11638 [(set_attr "type" "ssecvt1")
11639 (set_attr "mode" "<MODE>")])
11641 (define_insn "sse5_cvtph2ps"
11642 [(set (match_operand:V4SF 0 "register_operand" "=x")
11643 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11646 "cvtph2ps\t{%1, %0|%0, %1}"
11647 [(set_attr "type" "ssecvt")
11648 (set_attr "mode" "V4SF")])
11650 (define_insn "sse5_cvtps2ph"
11651 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11652 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11655 "cvtps2ph\t{%1, %0|%0, %1}"
11656 [(set_attr "type" "ssecvt")
11657 (set_attr "mode" "V4SF")])
11659 ;; Scalar versions of the com instructions that use vector types that are
11660 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11661 ;; com instructions fill in 0's in the upper bits instead of leaving them
11662 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11663 (define_expand "sse5_vmmaskcmp<mode>3"
11664 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11665 (vec_merge:SSEMODEF2P
11666 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11667 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11668 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11673 operands[4] = CONST0_RTX (<MODE>mode);
11676 (define_insn "*sse5_vmmaskcmp<mode>3"
11677 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11678 (vec_merge:SSEMODEF2P
11679 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11680 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11681 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11682 (match_operand:SSEMODEF2P 4 "")
11685 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11686 [(set_attr "type" "sse4arg")
11687 (set_attr "prefix_data16" "0")
11688 (set_attr "prefix_rep" "0")
11689 (set_attr "prefix_extra" "2")
11690 (set_attr "length_immediate" "1")
11691 (set_attr "mode" "<ssescalarmode>")])
11693 ;; We don't have a comparison operator that always returns true/false, so
11694 ;; handle comfalse and comtrue specially.
11695 (define_insn "sse5_com_tf<mode>3"
11696 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11698 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11699 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11700 (match_operand:SI 3 "const_int_operand" "n")]
11701 UNSPEC_SSE5_TRUEFALSE))]
11704 const char *ret = NULL;
11706 switch (INTVAL (operands[3]))
11709 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11713 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11717 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11721 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11725 gcc_unreachable ();
11730 [(set_attr "type" "ssecmp")
11731 (set_attr "prefix_data16" "0")
11732 (set_attr "prefix_rep" "0")
11733 (set_attr "prefix_extra" "2")
11734 (set_attr "length_immediate" "1")
11735 (set_attr "mode" "<MODE>")])
11737 (define_insn "sse5_maskcmp<mode>3"
11738 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11739 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11740 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11741 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11743 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11744 [(set_attr "type" "ssecmp")
11745 (set_attr "prefix_data16" "0")
11746 (set_attr "prefix_rep" "0")
11747 (set_attr "prefix_extra" "2")
11748 (set_attr "length_immediate" "1")
11749 (set_attr "mode" "<MODE>")])
11751 (define_insn "sse5_maskcmp<mode>3"
11752 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11753 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11754 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11755 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11757 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11758 [(set_attr "type" "sse4arg")
11759 (set_attr "prefix_data16" "0")
11760 (set_attr "prefix_rep" "0")
11761 (set_attr "prefix_extra" "2")
11762 (set_attr "length_immediate" "1")
11763 (set_attr "mode" "TI")])
11765 (define_insn "sse5_maskcmp_uns<mode>3"
11766 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11767 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11768 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11769 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11771 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11772 [(set_attr "type" "ssecmp")
11773 (set_attr "prefix_data16" "0")
11774 (set_attr "prefix_rep" "0")
11775 (set_attr "prefix_extra" "2")
11776 (set_attr "length_immediate" "1")
11777 (set_attr "mode" "TI")])
11779 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11780 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11781 ;; the exact instruction generated for the intrinsic.
11782 (define_insn "sse5_maskcmp_uns2<mode>3"
11783 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11784 (unspec:SSEMODE1248
11785 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11786 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11787 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11788 UNSPEC_SSE5_UNSIGNED_CMP))]
11790 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11791 [(set_attr "type" "ssecmp")
11792 (set_attr "prefix_data16" "0")
11793 (set_attr "prefix_extra" "2")
11794 (set_attr "length_immediate" "1")
11795 (set_attr "mode" "TI")])
11797 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11798 ;; being added here to be complete.
11799 (define_insn "sse5_pcom_tf<mode>3"
11800 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11801 (unspec:SSEMODE1248
11802 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11803 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11804 (match_operand:SI 3 "const_int_operand" "n")]
11805 UNSPEC_SSE5_TRUEFALSE))]
11808 return ((INTVAL (operands[3]) != 0)
11809 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11810 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11812 [(set_attr "type" "ssecmp")
11813 (set_attr "prefix_data16" "0")
11814 (set_attr "prefix_extra" "2")
11815 (set_attr "length_immediate" "1")
11816 (set_attr "mode" "TI")])
11818 (define_insn "*avx_aesenc"
11819 [(set (match_operand:V2DI 0 "register_operand" "=x")
11820 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11821 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11823 "TARGET_AES && TARGET_AVX"
11824 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11825 [(set_attr "type" "sselog1")
11826 (set_attr "prefix_extra" "1")
11827 (set_attr "prefix" "vex")
11828 (set_attr "mode" "TI")])
11830 (define_insn "aesenc"
11831 [(set (match_operand:V2DI 0 "register_operand" "=x")
11832 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11833 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11836 "aesenc\t{%2, %0|%0, %2}"
11837 [(set_attr "type" "sselog1")
11838 (set_attr "prefix_extra" "1")
11839 (set_attr "mode" "TI")])
11841 (define_insn "*avx_aesenclast"
11842 [(set (match_operand:V2DI 0 "register_operand" "=x")
11843 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11844 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11845 UNSPEC_AESENCLAST))]
11846 "TARGET_AES && TARGET_AVX"
11847 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11848 [(set_attr "type" "sselog1")
11849 (set_attr "prefix_extra" "1")
11850 (set_attr "prefix" "vex")
11851 (set_attr "mode" "TI")])
11853 (define_insn "aesenclast"
11854 [(set (match_operand:V2DI 0 "register_operand" "=x")
11855 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11856 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11857 UNSPEC_AESENCLAST))]
11859 "aesenclast\t{%2, %0|%0, %2}"
11860 [(set_attr "type" "sselog1")
11861 (set_attr "prefix_extra" "1")
11862 (set_attr "mode" "TI")])
11864 (define_insn "*avx_aesdec"
11865 [(set (match_operand:V2DI 0 "register_operand" "=x")
11866 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11867 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11869 "TARGET_AES && TARGET_AVX"
11870 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11871 [(set_attr "type" "sselog1")
11872 (set_attr "prefix_extra" "1")
11873 (set_attr "prefix" "vex")
11874 (set_attr "mode" "TI")])
11876 (define_insn "aesdec"
11877 [(set (match_operand:V2DI 0 "register_operand" "=x")
11878 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11879 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11882 "aesdec\t{%2, %0|%0, %2}"
11883 [(set_attr "type" "sselog1")
11884 (set_attr "prefix_extra" "1")
11885 (set_attr "mode" "TI")])
11887 (define_insn "*avx_aesdeclast"
11888 [(set (match_operand:V2DI 0 "register_operand" "=x")
11889 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11890 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11891 UNSPEC_AESDECLAST))]
11892 "TARGET_AES && TARGET_AVX"
11893 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11894 [(set_attr "type" "sselog1")
11895 (set_attr "prefix_extra" "1")
11896 (set_attr "prefix" "vex")
11897 (set_attr "mode" "TI")])
11899 (define_insn "aesdeclast"
11900 [(set (match_operand:V2DI 0 "register_operand" "=x")
11901 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11902 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11903 UNSPEC_AESDECLAST))]
11905 "aesdeclast\t{%2, %0|%0, %2}"
11906 [(set_attr "type" "sselog1")
11907 (set_attr "prefix_extra" "1")
11908 (set_attr "mode" "TI")])
11910 (define_insn "aesimc"
11911 [(set (match_operand:V2DI 0 "register_operand" "=x")
11912 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11915 "%vaesimc\t{%1, %0|%0, %1}"
11916 [(set_attr "type" "sselog1")
11917 (set_attr "prefix_extra" "1")
11918 (set_attr "prefix" "maybe_vex")
11919 (set_attr "mode" "TI")])
11921 (define_insn "aeskeygenassist"
11922 [(set (match_operand:V2DI 0 "register_operand" "=x")
11923 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11924 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11925 UNSPEC_AESKEYGENASSIST))]
11927 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11928 [(set_attr "type" "sselog1")
11929 (set_attr "prefix_extra" "1")
11930 (set_attr "length_immediate" "1")
11931 (set_attr "prefix" "maybe_vex")
11932 (set_attr "mode" "TI")])
11934 (define_insn "*vpclmulqdq"
11935 [(set (match_operand:V2DI 0 "register_operand" "=x")
11936 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11937 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11938 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11940 "TARGET_PCLMUL && TARGET_AVX"
11941 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11942 [(set_attr "type" "sselog1")
11943 (set_attr "prefix_extra" "1")
11944 (set_attr "length_immediate" "1")
11945 (set_attr "prefix" "vex")
11946 (set_attr "mode" "TI")])
11948 (define_insn "pclmulqdq"
11949 [(set (match_operand:V2DI 0 "register_operand" "=x")
11950 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11951 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11952 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11955 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11956 [(set_attr "type" "sselog1")
11957 (set_attr "prefix_extra" "1")
11958 (set_attr "length_immediate" "1")
11959 (set_attr "mode" "TI")])
11961 (define_expand "avx_vzeroall"
11962 [(match_par_dup 0 [(const_int 0)])]
11965 int nregs = TARGET_64BIT ? 16 : 8;
11968 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11970 XVECEXP (operands[0], 0, 0)
11971 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11974 for (regno = 0; regno < nregs; regno++)
11975 XVECEXP (operands[0], 0, regno + 1)
11976 = gen_rtx_SET (VOIDmode,
11977 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11978 CONST0_RTX (V8SImode));
11981 (define_insn "*avx_vzeroall"
11982 [(match_parallel 0 "vzeroall_operation"
11983 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11984 (set (match_operand 1 "register_operand" "=x")
11985 (match_operand 2 "const0_operand" "X"))])]
11988 [(set_attr "type" "sse")
11989 (set_attr "modrm" "0")
11990 (set_attr "memory" "none")
11991 (set_attr "prefix" "vex")
11992 (set_attr "mode" "OI")])
11994 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11995 (define_insn "avx_vzeroupper"
11996 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11997 (clobber (reg:V8SI XMM0_REG))
11998 (clobber (reg:V8SI XMM1_REG))
11999 (clobber (reg:V8SI XMM2_REG))
12000 (clobber (reg:V8SI XMM3_REG))
12001 (clobber (reg:V8SI XMM4_REG))
12002 (clobber (reg:V8SI XMM5_REG))
12003 (clobber (reg:V8SI XMM6_REG))
12004 (clobber (reg:V8SI XMM7_REG))]
12005 "TARGET_AVX && !TARGET_64BIT"
12007 [(set_attr "type" "sse")
12008 (set_attr "modrm" "0")
12009 (set_attr "memory" "none")
12010 (set_attr "prefix" "vex")
12011 (set_attr "mode" "OI")])
12013 (define_insn "avx_vzeroupper_rex64"
12014 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12015 (clobber (reg:V8SI XMM0_REG))
12016 (clobber (reg:V8SI XMM1_REG))
12017 (clobber (reg:V8SI XMM2_REG))
12018 (clobber (reg:V8SI XMM3_REG))
12019 (clobber (reg:V8SI XMM4_REG))
12020 (clobber (reg:V8SI XMM5_REG))
12021 (clobber (reg:V8SI XMM6_REG))
12022 (clobber (reg:V8SI XMM7_REG))
12023 (clobber (reg:V8SI XMM8_REG))
12024 (clobber (reg:V8SI XMM9_REG))
12025 (clobber (reg:V8SI XMM10_REG))
12026 (clobber (reg:V8SI XMM11_REG))
12027 (clobber (reg:V8SI XMM12_REG))
12028 (clobber (reg:V8SI XMM13_REG))
12029 (clobber (reg:V8SI XMM14_REG))
12030 (clobber (reg:V8SI XMM15_REG))]
12031 "TARGET_AVX && TARGET_64BIT"
12033 [(set_attr "type" "sse")
12034 (set_attr "modrm" "0")
12035 (set_attr "memory" "none")
12036 (set_attr "prefix" "vex")
12037 (set_attr "mode" "OI")])
12039 (define_insn "avx_vpermil<mode>"
12040 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12042 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
12043 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
12046 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12047 [(set_attr "type" "sselog")
12048 (set_attr "prefix_extra" "1")
12049 (set_attr "length_immediate" "1")
12050 (set_attr "prefix" "vex")
12051 (set_attr "mode" "<MODE>")])
12053 (define_insn "avx_vpermilvar<mode>3"
12054 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12056 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12057 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
12060 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12061 [(set_attr "type" "sselog")
12062 (set_attr "prefix_extra" "1")
12063 (set_attr "prefix" "vex")
12064 (set_attr "mode" "<MODE>")])
12066 (define_insn "avx_vperm2f128<mode>3"
12067 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12068 (unspec:AVX256MODE2P
12069 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12070 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12071 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12072 UNSPEC_VPERMIL2F128))]
12074 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12075 [(set_attr "type" "sselog")
12076 (set_attr "prefix_extra" "1")
12077 (set_attr "length_immediate" "1")
12078 (set_attr "prefix" "vex")
12079 (set_attr "mode" "V8SF")])
12081 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
12082 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
12083 (vec_concat:AVXMODEF4P
12084 (vec_concat:<avxhalfvecmode>
12085 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
12087 (vec_concat:<avxhalfvecmode>
12091 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
12092 [(set_attr "type" "ssemov")
12093 (set_attr "prefix_extra" "1")
12094 (set_attr "prefix" "vex")
12095 (set_attr "mode" "<avxscalarmode>")])
12097 (define_insn "avx_vbroadcastss256"
12098 [(set (match_operand:V8SF 0 "register_operand" "=x")
12102 (match_operand:SF 1 "memory_operand" "m")
12115 "vbroadcastss\t{%1, %0|%0, %1}"
12116 [(set_attr "type" "ssemov")
12117 (set_attr "prefix_extra" "1")
12118 (set_attr "prefix" "vex")
12119 (set_attr "mode" "SF")])
12121 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
12122 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
12123 (vec_concat:AVX256MODEF2P
12124 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
12127 "vbroadcastf128\t{%1, %0|%0, %1}"
12128 [(set_attr "type" "ssemov")
12129 (set_attr "prefix_extra" "1")
12130 (set_attr "prefix" "vex")
12131 (set_attr "mode" "V4SF")])
12133 (define_expand "avx_vinsertf128<mode>"
12134 [(match_operand:AVX256MODE 0 "register_operand" "")
12135 (match_operand:AVX256MODE 1 "register_operand" "")
12136 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12137 (match_operand:SI 3 "const_0_to_1_operand" "")]
12140 switch (INTVAL (operands[3]))
12143 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12147 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12151 gcc_unreachable ();
12156 (define_insn "vec_set_lo_<mode>"
12157 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12158 (vec_concat:AVX256MODE4P
12159 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12160 (vec_select:<avxhalfvecmode>
12161 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12162 (parallel [(const_int 2) (const_int 3)]))))]
12164 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12165 [(set_attr "type" "sselog")
12166 (set_attr "prefix_extra" "1")
12167 (set_attr "length_immediate" "1")
12168 (set_attr "prefix" "vex")
12169 (set_attr "mode" "V8SF")])
12171 (define_insn "vec_set_hi_<mode>"
12172 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12173 (vec_concat:AVX256MODE4P
12174 (vec_select:<avxhalfvecmode>
12175 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12176 (parallel [(const_int 0) (const_int 1)]))
12177 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12179 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12180 [(set_attr "type" "sselog")
12181 (set_attr "prefix_extra" "1")
12182 (set_attr "length_immediate" "1")
12183 (set_attr "prefix" "vex")
12184 (set_attr "mode" "V8SF")])
12186 (define_insn "vec_set_lo_<mode>"
12187 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12188 (vec_concat:AVX256MODE8P
12189 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12190 (vec_select:<avxhalfvecmode>
12191 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12192 (parallel [(const_int 4) (const_int 5)
12193 (const_int 6) (const_int 7)]))))]
12195 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12196 [(set_attr "type" "sselog")
12197 (set_attr "prefix_extra" "1")
12198 (set_attr "length_immediate" "1")
12199 (set_attr "prefix" "vex")
12200 (set_attr "mode" "V8SF")])
12202 (define_insn "vec_set_hi_<mode>"
12203 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12204 (vec_concat:AVX256MODE8P
12205 (vec_select:<avxhalfvecmode>
12206 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12207 (parallel [(const_int 0) (const_int 1)
12208 (const_int 2) (const_int 3)]))
12209 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12211 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12212 [(set_attr "type" "sselog")
12213 (set_attr "prefix_extra" "1")
12214 (set_attr "length_immediate" "1")
12215 (set_attr "prefix" "vex")
12216 (set_attr "mode" "V8SF")])
12218 (define_insn "vec_set_lo_v16hi"
12219 [(set (match_operand:V16HI 0 "register_operand" "=x")
12221 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12223 (match_operand:V16HI 1 "register_operand" "x")
12224 (parallel [(const_int 8) (const_int 9)
12225 (const_int 10) (const_int 11)
12226 (const_int 12) (const_int 13)
12227 (const_int 14) (const_int 15)]))))]
12229 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12230 [(set_attr "type" "sselog")
12231 (set_attr "prefix_extra" "1")
12232 (set_attr "length_immediate" "1")
12233 (set_attr "prefix" "vex")
12234 (set_attr "mode" "V8SF")])
12236 (define_insn "vec_set_hi_v16hi"
12237 [(set (match_operand:V16HI 0 "register_operand" "=x")
12240 (match_operand:V16HI 1 "register_operand" "x")
12241 (parallel [(const_int 0) (const_int 1)
12242 (const_int 2) (const_int 3)
12243 (const_int 4) (const_int 5)
12244 (const_int 6) (const_int 7)]))
12245 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12247 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12248 [(set_attr "type" "sselog")
12249 (set_attr "prefix_extra" "1")
12250 (set_attr "length_immediate" "1")
12251 (set_attr "prefix" "vex")
12252 (set_attr "mode" "V8SF")])
12254 (define_insn "vec_set_lo_v32qi"
12255 [(set (match_operand:V32QI 0 "register_operand" "=x")
12257 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12259 (match_operand:V32QI 1 "register_operand" "x")
12260 (parallel [(const_int 16) (const_int 17)
12261 (const_int 18) (const_int 19)
12262 (const_int 20) (const_int 21)
12263 (const_int 22) (const_int 23)
12264 (const_int 24) (const_int 25)
12265 (const_int 26) (const_int 27)
12266 (const_int 28) (const_int 29)
12267 (const_int 30) (const_int 31)]))))]
12269 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12270 [(set_attr "type" "sselog")
12271 (set_attr "prefix_extra" "1")
12272 (set_attr "length_immediate" "1")
12273 (set_attr "prefix" "vex")
12274 (set_attr "mode" "V8SF")])
12276 (define_insn "vec_set_hi_v32qi"
12277 [(set (match_operand:V32QI 0 "register_operand" "=x")
12280 (match_operand:V32QI 1 "register_operand" "x")
12281 (parallel [(const_int 0) (const_int 1)
12282 (const_int 2) (const_int 3)
12283 (const_int 4) (const_int 5)
12284 (const_int 6) (const_int 7)
12285 (const_int 8) (const_int 9)
12286 (const_int 10) (const_int 11)
12287 (const_int 12) (const_int 13)
12288 (const_int 14) (const_int 15)]))
12289 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12291 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12292 [(set_attr "type" "sselog")
12293 (set_attr "prefix_extra" "1")
12294 (set_attr "length_immediate" "1")
12295 (set_attr "prefix" "vex")
12296 (set_attr "mode" "V8SF")])
12298 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12299 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12301 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12302 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12306 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12307 [(set_attr "type" "sselog1")
12308 (set_attr "prefix_extra" "1")
12309 (set_attr "prefix" "vex")
12310 (set_attr "mode" "<MODE>")])
12312 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12313 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12315 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12316 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12318 UNSPEC_MASKSTORE))]
12320 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12321 [(set_attr "type" "sselog1")
12322 (set_attr "prefix_extra" "1")
12323 (set_attr "prefix" "vex")
12324 (set_attr "mode" "<MODE>")])
12326 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12327 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12328 (unspec:AVX256MODE2P
12329 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12333 switch (which_alternative)
12338 switch (get_attr_mode (insn))
12341 return "vmovaps\t{%1, %x0|%x0, %1}";
12343 return "vmovapd\t{%1, %x0|%x0, %1}";
12345 return "vmovdqa\t{%1, %x0|%x0, %1}";
12352 gcc_unreachable ();
12354 [(set_attr "type" "ssemov")
12355 (set_attr "prefix" "vex")
12356 (set_attr "mode" "<avxvecmode>")
12357 (set (attr "length")
12358 (if_then_else (eq_attr "alternative" "0")
12360 (const_string "*")))])
12362 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12363 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12364 (unspec:<avxhalfvecmode>
12365 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12369 switch (which_alternative)
12374 switch (get_attr_mode (insn))
12377 return "vmovaps\t{%x1, %0|%0, %x1}";
12379 return "vmovapd\t{%x1, %0|%0, %x1}";
12381 return "vmovdqa\t{%x1, %0|%0, %x1}";
12388 gcc_unreachable ();
12390 [(set_attr "type" "ssemov")
12391 (set_attr "prefix" "vex")
12392 (set_attr "mode" "<avxvecmode>")
12393 (set (attr "length")
12394 (if_then_else (eq_attr "alternative" "0")
12396 (const_string "*")))])
12398 (define_expand "vec_init<mode>"
12399 [(match_operand:AVX256MODE 0 "register_operand" "")
12400 (match_operand 1 "" "")]
12403 ix86_expand_vector_init (false, operands[0], operands[1]);
12407 (define_insn "*vec_concat<mode>_avx"
12408 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12409 (vec_concat:AVX256MODE
12410 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12411 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12414 switch (which_alternative)
12417 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12419 switch (get_attr_mode (insn))
12422 return "vmovaps\t{%1, %x0|%x0, %1}";
12424 return "vmovapd\t{%1, %x0|%x0, %1}";
12426 return "vmovdqa\t{%1, %x0|%x0, %1}";
12429 gcc_unreachable ();
12432 [(set_attr "type" "sselog,ssemov")
12433 (set_attr "prefix_extra" "1,*")
12434 (set_attr "length_immediate" "1,*")
12435 (set_attr "prefix" "vex")
12436 (set_attr "mode" "<avxvecmode>")])