1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All vector modes handled by AVX
39 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
42 (define_mode_iterator SSEMODE12 [V16QI V8HI])
43 (define_mode_iterator SSEMODE24 [V8HI V4SI])
44 (define_mode_iterator SSEMODE14 [V16QI V4SI])
45 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
46 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
47 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
48 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
49 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
51 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
52 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
53 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
54 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
55 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
56 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
57 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
58 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
60 ;; Int-float size matches
61 (define_mode_iterator SSEMODE4S [V4SF V4SI])
62 (define_mode_iterator SSEMODE2D [V2DF V2DI])
64 ;; Mapping from float mode to required SSE level
65 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
67 ;; Mapping from integer vector mode to mnemonic suffix
68 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
70 ;; Mapping of the sse5 suffix
71 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
72 (V4SF "ps") (V2DF "pd")])
73 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
74 (V4SF "ss") (V2DF "sd")])
75 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
77 ;; Mapping of the max integer size for sse5 rotate immediate constraint
78 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
80 ;; Mapping of vector modes back to the scalar modes
81 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
82 (V16QI "QI") (V8HI "HI")
83 (V4SI "SI") (V2DI "DI")])
85 ;; Mapping of vector modes to a vector mode of double size
86 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
87 (V4SF "V8SF") (V4SI "V8SI")])
89 ;; Number of scalar elements in each vector type
90 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
91 (V16QI "16") (V8HI "8")
92 (V4SI "4") (V2DI "2")])
95 (define_mode_attr avxvecmode
96 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
97 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
98 (V8SF "V8SF") (V4DF "V4DF")])
99 (define_mode_attr avxvecpsmode
100 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
101 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
102 (define_mode_attr avxhalfvecmode
103 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
104 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
105 (define_mode_attr avxscalarmode
106 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
107 (V8SF "SF") (V4DF "DF")])
108 (define_mode_attr avxcvtvecmode
109 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
110 (define_mode_attr avxpermvecmode
111 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
112 (define_mode_attr avxmodesuffixf2c
113 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
114 (define_mode_attr avxmodesuffixp
115 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
117 (define_mode_attr avxmodesuffixs
118 [(V16QI "b") (V8HI "w") (V4SI "d")])
119 (define_mode_attr avxmodesuffix
120 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
121 (V8SI "256") (V8SF "256") (V4DF "256")])
123 ;; Mapping of immediate bits for blend instructions
124 (define_mode_attr blendbits
125 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
127 ;; Mapping of immediate bits for vpermil instructions
128 (define_mode_attr vpermilbits
129 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
131 ;; Mapping of immediate bits for pinsr instructions
132 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
134 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
142 (define_expand "mov<mode>"
143 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
144 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
147 ix86_expand_vector_move (<MODE>mode, operands);
151 (define_insn "*avx_mov<mode>_internal"
152 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
153 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
155 && (register_operand (operands[0], <MODE>mode)
156 || register_operand (operands[1], <MODE>mode))"
158 switch (which_alternative)
161 return standard_sse_constant_opcode (insn, operands[1]);
164 switch (get_attr_mode (insn))
168 return "vmovaps\t{%1, %0|%0, %1}";
171 return "vmovapd\t{%1, %0|%0, %1}";
173 return "vmovdqa\t{%1, %0|%0, %1}";
179 [(set_attr "type" "sselog1,ssemov,ssemov")
180 (set_attr "prefix" "vex")
181 (set_attr "mode" "<avxvecmode>")])
183 ;; All of these patterns are enabled for SSE1 as well as SSE2.
184 ;; This is essential for maintaining stable calling conventions.
186 (define_expand "mov<mode>"
187 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
188 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
191 ix86_expand_vector_move (<MODE>mode, operands);
195 (define_insn "*mov<mode>_internal"
196 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
197 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
199 && (register_operand (operands[0], <MODE>mode)
200 || register_operand (operands[1], <MODE>mode))"
202 switch (which_alternative)
205 return standard_sse_constant_opcode (insn, operands[1]);
208 switch (get_attr_mode (insn))
211 return "movaps\t{%1, %0|%0, %1}";
213 return "movapd\t{%1, %0|%0, %1}";
215 return "movdqa\t{%1, %0|%0, %1}";
221 [(set_attr "type" "sselog1,ssemov,ssemov")
223 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
224 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
225 (and (eq_attr "alternative" "2")
226 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
228 (const_string "V4SF")
229 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
230 (const_string "V4SF")
231 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
232 (const_string "V2DF")
234 (const_string "TI")))])
236 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
237 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
238 ;; from memory, we'd prefer to load the memory directly into the %xmm
239 ;; register. To facilitate this happy circumstance, this pattern won't
240 ;; split until after register allocation. If the 64-bit value didn't
241 ;; come from memory, this is the best we can do. This is much better
242 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
245 (define_insn_and_split "movdi_to_sse"
247 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
248 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
249 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
250 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
252 "&& reload_completed"
255 if (register_operand (operands[1], DImode))
257 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
258 Assemble the 64-bit DImode value in an xmm register. */
259 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
260 gen_rtx_SUBREG (SImode, operands[1], 0)));
261 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
262 gen_rtx_SUBREG (SImode, operands[1], 4)));
263 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
265 else if (memory_operand (operands[1], DImode))
266 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
272 [(set (match_operand:V4SF 0 "register_operand" "")
273 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
274 "TARGET_SSE && reload_completed"
277 (vec_duplicate:V4SF (match_dup 1))
281 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
282 operands[2] = CONST0_RTX (V4SFmode);
286 [(set (match_operand:V2DF 0 "register_operand" "")
287 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
288 "TARGET_SSE2 && reload_completed"
289 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
291 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
292 operands[2] = CONST0_RTX (DFmode);
295 (define_expand "push<mode>1"
296 [(match_operand:AVX256MODE 0 "register_operand" "")]
299 ix86_expand_push (<MODE>mode, operands[0]);
303 (define_expand "push<mode>1"
304 [(match_operand:SSEMODE 0 "register_operand" "")]
307 ix86_expand_push (<MODE>mode, operands[0]);
311 (define_expand "movmisalign<mode>"
312 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
313 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
316 ix86_expand_vector_move_misalign (<MODE>mode, operands);
320 (define_expand "movmisalign<mode>"
321 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
322 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
325 ix86_expand_vector_move_misalign (<MODE>mode, operands);
329 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
330 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
332 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
334 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
335 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
336 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
337 [(set_attr "type" "ssemov")
338 (set_attr "prefix" "vex")
339 (set_attr "mode" "<MODE>")])
341 (define_insn "sse2_movq128"
342 [(set (match_operand:V2DI 0 "register_operand" "=x")
345 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
346 (parallel [(const_int 0)]))
349 "%vmovq\t{%1, %0|%0, %1}"
350 [(set_attr "type" "ssemov")
351 (set_attr "prefix" "maybe_vex")
352 (set_attr "mode" "TI")])
354 (define_insn "<sse>_movup<ssemodesuffixf2c>"
355 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
357 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
359 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
360 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
361 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
362 [(set_attr "type" "ssemov")
363 (set_attr "mode" "<MODE>")])
365 (define_insn "avx_movdqu<avxmodesuffix>"
366 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
368 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
370 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
371 "vmovdqu\t{%1, %0|%0, %1}"
372 [(set_attr "type" "ssemov")
373 (set_attr "prefix" "vex")
374 (set_attr "mode" "<avxvecmode>")])
376 (define_insn "sse2_movdqu"
377 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
378 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
380 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
381 "movdqu\t{%1, %0|%0, %1}"
382 [(set_attr "type" "ssemov")
383 (set_attr "prefix_data16" "1")
384 (set_attr "mode" "TI")])
386 (define_insn "<sse>_movnt<mode>"
387 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
389 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
391 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
392 "%vmovntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
393 [(set_attr "type" "ssemov")
394 (set_attr "prefix" "maybe_vex")
395 (set_attr "mode" "<MODE>")])
397 (define_insn "sse2_movntv2di"
398 [(set (match_operand:V2DI 0 "memory_operand" "=m")
399 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
402 "%vmovntdq\t{%1, %0|%0, %1}"
403 [(set_attr "type" "ssecvt")
404 (set_attr "prefix_data16" "1")
405 (set_attr "prefix" "maybe_vex")
406 (set_attr "mode" "TI")])
408 (define_insn "sse2_movntsi"
409 [(set (match_operand:SI 0 "memory_operand" "=m")
410 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
413 "movnti\t{%1, %0|%0, %1}"
414 [(set_attr "type" "ssecvt")
415 (set_attr "mode" "V2DF")])
417 (define_insn "avx_lddqu<avxmodesuffix>"
418 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
420 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
423 "vlddqu\t{%1, %0|%0, %1}"
424 [(set_attr "type" "ssecvt")
425 (set_attr "prefix" "vex")
426 (set_attr "mode" "<avxvecmode>")])
428 (define_insn "sse3_lddqu"
429 [(set (match_operand:V16QI 0 "register_operand" "=x")
430 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
433 "lddqu\t{%1, %0|%0, %1}"
434 [(set_attr "type" "ssecvt")
435 (set_attr "prefix_rep" "1")
436 (set_attr "mode" "TI")])
438 ; Expand patterns for non-temporal stores. At the moment, only those
439 ; that directly map to insns are defined; it would be possible to
440 ; define patterns for other modes that would expand to several insns.
442 (define_expand "storent<mode>"
443 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
445 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
447 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
450 (define_expand "storent<mode>"
451 [(set (match_operand:MODEF 0 "memory_operand" "")
453 [(match_operand:MODEF 1 "register_operand" "")]
458 (define_expand "storentv2di"
459 [(set (match_operand:V2DI 0 "memory_operand" "")
460 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
465 (define_expand "storentsi"
466 [(set (match_operand:SI 0 "memory_operand" "")
467 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
474 ;; Parallel floating point arithmetic
476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
478 (define_expand "<code><mode>2"
479 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
481 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
482 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
483 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
485 (define_expand "<plusminus_insn><mode>3"
486 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
487 (plusminus:AVX256MODEF2P
488 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
489 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
490 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
491 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
493 (define_insn "*avx_<plusminus_insn><mode>3"
494 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
495 (plusminus:AVXMODEF2P
496 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
497 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
498 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
499 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
500 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
501 [(set_attr "type" "sseadd")
502 (set_attr "prefix" "vex")
503 (set_attr "mode" "<avxvecmode>")])
505 (define_expand "<plusminus_insn><mode>3"
506 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
507 (plusminus:SSEMODEF2P
508 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
509 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
510 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
511 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
513 (define_insn "*<plusminus_insn><mode>3"
514 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
515 (plusminus:SSEMODEF2P
516 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
517 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
518 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
519 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
520 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
521 [(set_attr "type" "sseadd")
522 (set_attr "mode" "<MODE>")])
524 (define_insn "*avx_vm<plusminus_insn><mode>3"
525 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
526 (vec_merge:SSEMODEF2P
527 (plusminus:SSEMODEF2P
528 (match_operand:SSEMODEF2P 1 "register_operand" "x")
529 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
532 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
533 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
534 [(set_attr "type" "sseadd")
535 (set_attr "prefix" "vex")
536 (set_attr "mode" "<ssescalarmode>")])
538 (define_insn "<sse>_vm<plusminus_insn><mode>3"
539 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
540 (vec_merge:SSEMODEF2P
541 (plusminus:SSEMODEF2P
542 (match_operand:SSEMODEF2P 1 "register_operand" "0")
543 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
546 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
547 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
548 [(set_attr "type" "sseadd")
549 (set_attr "mode" "<ssescalarmode>")])
551 (define_expand "mul<mode>3"
552 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
554 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
555 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
556 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
557 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
559 (define_insn "*avx_mul<mode>3"
560 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
562 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
563 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
564 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
565 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
566 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
567 [(set_attr "type" "ssemul")
568 (set_attr "prefix" "vex")
569 (set_attr "mode" "<avxvecmode>")])
571 (define_expand "mul<mode>3"
572 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
574 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
575 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
576 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
577 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
579 (define_insn "*mul<mode>3"
580 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
583 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
584 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
585 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
586 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
587 [(set_attr "type" "ssemul")
588 (set_attr "mode" "<MODE>")])
590 (define_insn "*avx_vmmul<mode>3"
591 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
592 (vec_merge:SSEMODEF2P
594 (match_operand:SSEMODEF2P 1 "register_operand" "x")
595 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
598 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
599 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
600 [(set_attr "type" "ssemul")
601 (set_attr "prefix" "vex")
602 (set_attr "mode" "<ssescalarmode>")])
604 (define_insn "<sse>_vmmul<mode>3"
605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
606 (vec_merge:SSEMODEF2P
608 (match_operand:SSEMODEF2P 1 "register_operand" "0")
609 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
612 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
613 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
614 [(set_attr "type" "ssemul")
615 (set_attr "mode" "<ssescalarmode>")])
617 (define_expand "divv8sf3"
618 [(set (match_operand:V8SF 0 "register_operand" "")
619 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
620 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
623 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
625 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
626 && flag_finite_math_only && !flag_trapping_math
627 && flag_unsafe_math_optimizations)
629 ix86_emit_swdivsf (operands[0], operands[1],
630 operands[2], V8SFmode);
635 (define_expand "divv4df3"
636 [(set (match_operand:V4DF 0 "register_operand" "")
637 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
638 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
640 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
642 (define_insn "avx_div<mode>3"
643 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
645 (match_operand:AVXMODEF2P 1 "register_operand" "x")
646 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
647 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
648 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
649 [(set_attr "type" "ssediv")
650 (set_attr "prefix" "vex")
651 (set_attr "mode" "<MODE>")])
653 (define_expand "divv4sf3"
654 [(set (match_operand:V4SF 0 "register_operand" "")
655 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
656 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
659 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
660 && flag_finite_math_only && !flag_trapping_math
661 && flag_unsafe_math_optimizations)
663 ix86_emit_swdivsf (operands[0], operands[1],
664 operands[2], V4SFmode);
669 (define_expand "divv2df3"
670 [(set (match_operand:V2DF 0 "register_operand" "")
671 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
672 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
676 (define_insn "*avx_div<mode>3"
677 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
679 (match_operand:SSEMODEF2P 1 "register_operand" "x")
680 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
681 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
682 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
683 [(set_attr "type" "ssediv")
684 (set_attr "prefix" "vex")
685 (set_attr "mode" "<MODE>")])
687 (define_insn "<sse>_div<mode>3"
688 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
690 (match_operand:SSEMODEF2P 1 "register_operand" "0")
691 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
692 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
693 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
694 [(set_attr "type" "ssediv")
695 (set_attr "mode" "<MODE>")])
697 (define_insn "*avx_vmdiv<mode>3"
698 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
699 (vec_merge:SSEMODEF2P
701 (match_operand:SSEMODEF2P 1 "register_operand" "x")
702 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
705 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
706 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
707 [(set_attr "type" "ssediv")
708 (set_attr "prefix" "vex")
709 (set_attr "mode" "<ssescalarmode>")])
711 (define_insn "<sse>_vmdiv<mode>3"
712 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
713 (vec_merge:SSEMODEF2P
715 (match_operand:SSEMODEF2P 1 "register_operand" "0")
716 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
719 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
720 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
721 [(set_attr "type" "ssediv")
722 (set_attr "mode" "<ssescalarmode>")])
724 (define_insn "avx_rcpv8sf2"
725 [(set (match_operand:V8SF 0 "register_operand" "=x")
727 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
729 "vrcpps\t{%1, %0|%0, %1}"
730 [(set_attr "type" "sse")
731 (set_attr "prefix" "vex")
732 (set_attr "mode" "V8SF")])
734 (define_insn "sse_rcpv4sf2"
735 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
739 "%vrcpps\t{%1, %0|%0, %1}"
740 [(set_attr "type" "sse")
741 (set_attr "prefix" "maybe_vex")
742 (set_attr "mode" "V4SF")])
744 (define_insn "*avx_vmrcpv4sf2"
745 [(set (match_operand:V4SF 0 "register_operand" "=x")
747 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
749 (match_operand:V4SF 2 "register_operand" "x")
752 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
753 [(set_attr "type" "sse")
754 (set_attr "prefix" "vex")
755 (set_attr "mode" "SF")])
757 (define_insn "sse_vmrcpv4sf2"
758 [(set (match_operand:V4SF 0 "register_operand" "=x")
760 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
762 (match_operand:V4SF 2 "register_operand" "0")
765 "rcpss\t{%1, %0|%0, %1}"
766 [(set_attr "type" "sse")
767 (set_attr "mode" "SF")])
769 (define_expand "sqrtv8sf2"
770 [(set (match_operand:V8SF 0 "register_operand" "")
771 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
774 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
775 && flag_finite_math_only && !flag_trapping_math
776 && flag_unsafe_math_optimizations)
778 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
783 (define_insn "avx_sqrtv8sf2"
784 [(set (match_operand:V8SF 0 "register_operand" "=x")
785 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
787 "vsqrtps\t{%1, %0|%0, %1}"
788 [(set_attr "type" "sse")
789 (set_attr "prefix" "vex")
790 (set_attr "mode" "V8SF")])
792 (define_expand "sqrtv4sf2"
793 [(set (match_operand:V4SF 0 "register_operand" "")
794 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
797 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
798 && flag_finite_math_only && !flag_trapping_math
799 && flag_unsafe_math_optimizations)
801 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
806 (define_insn "sse_sqrtv4sf2"
807 [(set (match_operand:V4SF 0 "register_operand" "=x")
808 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
810 "%vsqrtps\t{%1, %0|%0, %1}"
811 [(set_attr "type" "sse")
812 (set_attr "prefix" "maybe_vex")
813 (set_attr "mode" "V4SF")])
815 (define_insn "sqrtv4df2"
816 [(set (match_operand:V4DF 0 "register_operand" "=x")
817 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
819 "vsqrtpd\t{%1, %0|%0, %1}"
820 [(set_attr "type" "sse")
821 (set_attr "prefix" "vex")
822 (set_attr "mode" "V4DF")])
824 (define_insn "sqrtv2df2"
825 [(set (match_operand:V2DF 0 "register_operand" "=x")
826 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
828 "%vsqrtpd\t{%1, %0|%0, %1}"
829 [(set_attr "type" "sse")
830 (set_attr "prefix" "maybe_vex")
831 (set_attr "mode" "V2DF")])
833 (define_insn "*avx_vmsqrt<mode>2"
834 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
835 (vec_merge:SSEMODEF2P
837 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
838 (match_operand:SSEMODEF2P 2 "register_operand" "x")
840 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
841 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
842 [(set_attr "type" "sse")
843 (set_attr "prefix" "vex")
844 (set_attr "mode" "<ssescalarmode>")])
846 (define_insn "<sse>_vmsqrt<mode>2"
847 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
848 (vec_merge:SSEMODEF2P
850 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
851 (match_operand:SSEMODEF2P 2 "register_operand" "0")
853 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
854 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
855 [(set_attr "type" "sse")
856 (set_attr "mode" "<ssescalarmode>")])
858 (define_expand "rsqrtv8sf2"
859 [(set (match_operand:V8SF 0 "register_operand" "")
861 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
862 "TARGET_AVX && TARGET_SSE_MATH"
864 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
868 (define_insn "avx_rsqrtv8sf2"
869 [(set (match_operand:V8SF 0 "register_operand" "=x")
871 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
873 "vrsqrtps\t{%1, %0|%0, %1}"
874 [(set_attr "type" "sse")
875 (set_attr "prefix" "vex")
876 (set_attr "mode" "V8SF")])
878 (define_expand "rsqrtv4sf2"
879 [(set (match_operand:V4SF 0 "register_operand" "")
881 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
884 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
888 (define_insn "sse_rsqrtv4sf2"
889 [(set (match_operand:V4SF 0 "register_operand" "=x")
891 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
893 "%vrsqrtps\t{%1, %0|%0, %1}"
894 [(set_attr "type" "sse")
895 (set_attr "prefix" "maybe_vex")
896 (set_attr "mode" "V4SF")])
898 (define_insn "*avx_vmrsqrtv4sf2"
899 [(set (match_operand:V4SF 0 "register_operand" "=x")
901 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
903 (match_operand:V4SF 2 "register_operand" "x")
906 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "prefix" "vex")
909 (set_attr "mode" "SF")])
911 (define_insn "sse_vmrsqrtv4sf2"
912 [(set (match_operand:V4SF 0 "register_operand" "=x")
914 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
916 (match_operand:V4SF 2 "register_operand" "0")
919 "rsqrtss\t{%1, %0|%0, %1}"
920 [(set_attr "type" "sse")
921 (set_attr "mode" "SF")])
923 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
924 ;; isn't really correct, as those rtl operators aren't defined when
925 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
927 (define_expand "<code><mode>3"
928 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
929 (smaxmin:AVX256MODEF2P
930 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
931 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
932 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
934 if (!flag_finite_math_only)
935 operands[1] = force_reg (<MODE>mode, operands[1]);
936 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
939 (define_expand "<code><mode>3"
940 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
942 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
943 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
944 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
946 if (!flag_finite_math_only)
947 operands[1] = force_reg (<MODE>mode, operands[1]);
948 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
951 (define_insn "*avx_<code><mode>3_finite"
952 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
954 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
955 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
956 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
957 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
958 "v<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
959 [(set_attr "type" "sseadd")
960 (set_attr "prefix" "vex")
961 (set_attr "mode" "<MODE>")])
963 (define_insn "*<code><mode>3_finite"
964 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
966 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
967 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
968 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
969 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
970 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
971 [(set_attr "type" "sseadd")
972 (set_attr "mode" "<MODE>")])
974 (define_insn "*avx_<code><mode>3"
975 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
977 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
978 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
979 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
980 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
981 [(set_attr "type" "sseadd")
982 (set_attr "prefix" "vex")
983 (set_attr "mode" "<avxvecmode>")])
985 (define_insn "*<code><mode>3"
986 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
988 (match_operand:SSEMODEF2P 1 "register_operand" "0")
989 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
990 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
991 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sseadd")
993 (set_attr "mode" "<MODE>")])
995 (define_insn "*avx_vm<code><mode>3"
996 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
997 (vec_merge:SSEMODEF2P
999 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1000 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1003 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1004 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1005 [(set_attr "type" "sse")
1006 (set_attr "prefix" "vex")
1007 (set_attr "mode" "<ssescalarmode>")])
1009 (define_insn "<sse>_vm<code><mode>3"
1010 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1011 (vec_merge:SSEMODEF2P
1013 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1014 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1017 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1018 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1019 [(set_attr "type" "sse")
1020 (set_attr "mode" "<ssescalarmode>")])
1022 ;; These versions of the min/max patterns implement exactly the operations
1023 ;; min = (op1 < op2 ? op1 : op2)
1024 ;; max = (!(op1 < op2) ? op1 : op2)
1025 ;; Their operands are not commutative, and thus they may be used in the
1026 ;; presence of -0.0 and NaN.
1028 (define_insn "*avx_ieee_smin<mode>3"
1029 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1031 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1032 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1034 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1035 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1036 [(set_attr "type" "sseadd")
1037 (set_attr "prefix" "vex")
1038 (set_attr "mode" "<avxvecmode>")])
1040 (define_insn "*avx_ieee_smax<mode>3"
1041 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1043 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1044 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1046 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1047 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1048 [(set_attr "type" "sseadd")
1049 (set_attr "prefix" "vex")
1050 (set_attr "mode" "<avxvecmode>")])
1052 (define_insn "*ieee_smin<mode>3"
1053 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1055 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1056 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1058 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1059 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1060 [(set_attr "type" "sseadd")
1061 (set_attr "mode" "<MODE>")])
1063 (define_insn "*ieee_smax<mode>3"
1064 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1066 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1067 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1069 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1070 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1071 [(set_attr "type" "sseadd")
1072 (set_attr "mode" "<MODE>")])
1074 (define_insn "avx_addsubv8sf3"
1075 [(set (match_operand:V8SF 0 "register_operand" "=x")
1078 (match_operand:V8SF 1 "register_operand" "x")
1079 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1080 (minus:V8SF (match_dup 1) (match_dup 2))
1083 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1084 [(set_attr "type" "sseadd")
1085 (set_attr "prefix" "vex")
1086 (set_attr "mode" "V8SF")])
1088 (define_insn "avx_addsubv4df3"
1089 [(set (match_operand:V4DF 0 "register_operand" "=x")
1092 (match_operand:V4DF 1 "register_operand" "x")
1093 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1094 (minus:V4DF (match_dup 1) (match_dup 2))
1097 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1098 [(set_attr "type" "sseadd")
1099 (set_attr "prefix" "vex")
1100 (set_attr "mode" "V4DF")])
1102 (define_insn "*avx_addsubv4sf3"
1103 [(set (match_operand:V4SF 0 "register_operand" "=x")
1106 (match_operand:V4SF 1 "register_operand" "x")
1107 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1108 (minus:V4SF (match_dup 1) (match_dup 2))
1111 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1112 [(set_attr "type" "sseadd")
1113 (set_attr "prefix" "vex")
1114 (set_attr "mode" "V4SF")])
1116 (define_insn "sse3_addsubv4sf3"
1117 [(set (match_operand:V4SF 0 "register_operand" "=x")
1120 (match_operand:V4SF 1 "register_operand" "0")
1121 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1122 (minus:V4SF (match_dup 1) (match_dup 2))
1125 "addsubps\t{%2, %0|%0, %2}"
1126 [(set_attr "type" "sseadd")
1127 (set_attr "prefix_rep" "1")
1128 (set_attr "mode" "V4SF")])
1130 (define_insn "*avx_addsubv2df3"
1131 [(set (match_operand:V2DF 0 "register_operand" "=x")
1134 (match_operand:V2DF 1 "register_operand" "x")
1135 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1136 (minus:V2DF (match_dup 1) (match_dup 2))
1139 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1140 [(set_attr "type" "sseadd")
1141 (set_attr "prefix" "vex")
1142 (set_attr "mode" "V2DF")])
1144 (define_insn "sse3_addsubv2df3"
1145 [(set (match_operand:V2DF 0 "register_operand" "=x")
1148 (match_operand:V2DF 1 "register_operand" "0")
1149 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1150 (minus:V2DF (match_dup 1) (match_dup 2))
1153 "addsubpd\t{%2, %0|%0, %2}"
1154 [(set_attr "type" "sseadd")
1155 (set_attr "mode" "V2DF")])
1157 (define_insn "avx_h<plusminus_insn>v4df3"
1158 [(set (match_operand:V4DF 0 "register_operand" "=x")
1163 (match_operand:V4DF 1 "register_operand" "x")
1164 (parallel [(const_int 0)]))
1165 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1167 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1168 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1172 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1173 (parallel [(const_int 0)]))
1174 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1176 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1177 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1179 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1180 [(set_attr "type" "sseadd")
1181 (set_attr "prefix" "vex")
1182 (set_attr "mode" "V4DF")])
1184 (define_insn "avx_h<plusminus_insn>v8sf3"
1185 [(set (match_operand:V8SF 0 "register_operand" "=x")
1191 (match_operand:V8SF 1 "register_operand" "x")
1192 (parallel [(const_int 0)]))
1193 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1195 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1196 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1200 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1201 (parallel [(const_int 0)]))
1202 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1204 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1205 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1209 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1210 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1212 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1213 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1216 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1217 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1219 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1220 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1222 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1223 [(set_attr "type" "sseadd")
1224 (set_attr "prefix" "vex")
1225 (set_attr "mode" "V8SF")])
1227 (define_insn "*avx_h<plusminus_insn>v4sf3"
1228 [(set (match_operand:V4SF 0 "register_operand" "=x")
1233 (match_operand:V4SF 1 "register_operand" "x")
1234 (parallel [(const_int 0)]))
1235 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1237 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1238 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1242 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1243 (parallel [(const_int 0)]))
1244 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1246 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1247 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1249 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1250 [(set_attr "type" "sseadd")
1251 (set_attr "prefix" "vex")
1252 (set_attr "mode" "V4SF")])
1254 (define_insn "sse3_h<plusminus_insn>v4sf3"
1255 [(set (match_operand:V4SF 0 "register_operand" "=x")
1260 (match_operand:V4SF 1 "register_operand" "0")
1261 (parallel [(const_int 0)]))
1262 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1264 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1265 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1269 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1270 (parallel [(const_int 0)]))
1271 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1273 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1274 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1276 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1277 [(set_attr "type" "sseadd")
1278 (set_attr "prefix_rep" "1")
1279 (set_attr "mode" "V4SF")])
1281 (define_insn "*avx_h<plusminus_insn>v2df3"
1282 [(set (match_operand:V2DF 0 "register_operand" "=x")
1286 (match_operand:V2DF 1 "register_operand" "x")
1287 (parallel [(const_int 0)]))
1288 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1291 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1292 (parallel [(const_int 0)]))
1293 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1295 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1296 [(set_attr "type" "sseadd")
1297 (set_attr "prefix" "vex")
1298 (set_attr "mode" "V2DF")])
1300 (define_insn "sse3_h<plusminus_insn>v2df3"
1301 [(set (match_operand:V2DF 0 "register_operand" "=x")
1305 (match_operand:V2DF 1 "register_operand" "0")
1306 (parallel [(const_int 0)]))
1307 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1310 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1311 (parallel [(const_int 0)]))
1312 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1314 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1315 [(set_attr "type" "sseadd")
1316 (set_attr "mode" "V2DF")])
1318 (define_expand "reduc_splus_v4sf"
1319 [(match_operand:V4SF 0 "register_operand" "")
1320 (match_operand:V4SF 1 "register_operand" "")]
1325 rtx tmp = gen_reg_rtx (V4SFmode);
1326 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1327 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1330 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1334 (define_expand "reduc_splus_v2df"
1335 [(match_operand:V2DF 0 "register_operand" "")
1336 (match_operand:V2DF 1 "register_operand" "")]
1339 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1343 (define_expand "reduc_smax_v4sf"
1344 [(match_operand:V4SF 0 "register_operand" "")
1345 (match_operand:V4SF 1 "register_operand" "")]
1348 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1352 (define_expand "reduc_smin_v4sf"
1353 [(match_operand:V4SF 0 "register_operand" "")
1354 (match_operand:V4SF 1 "register_operand" "")]
1357 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1361 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1363 ;; Parallel floating point comparisons
1365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1367 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1368 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1370 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1371 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1372 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1375 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1376 [(set_attr "type" "ssecmp")
1377 (set_attr "prefix" "vex")
1378 (set_attr "mode" "<MODE>")])
1380 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1381 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1382 (vec_merge:SSEMODEF2P
1384 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1385 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1386 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1391 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1392 [(set_attr "type" "ssecmp")
1393 (set_attr "prefix" "vex")
1394 (set_attr "mode" "<ssescalarmode>")])
1396 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1397 ;; may generate 256bit vector compare instructions.
1398 (define_insn "*avx_maskcmp<mode>3"
1399 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1400 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1401 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1402 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1403 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1404 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1405 [(set_attr "type" "ssecmp")
1406 (set_attr "prefix" "vex")
1407 (set_attr "mode" "<avxvecmode>")])
1409 (define_insn "<sse>_maskcmp<mode>3"
1410 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1411 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1412 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1413 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1414 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
1416 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1417 [(set_attr "type" "ssecmp")
1418 (set_attr "mode" "<MODE>")])
1420 (define_insn "<sse>_vmmaskcmp<mode>3"
1421 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1422 (vec_merge:SSEMODEF2P
1423 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1424 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1425 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1428 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
1429 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1430 [(set_attr "type" "ssecmp")
1431 (set_attr "mode" "<ssescalarmode>")])
1433 (define_insn "<sse>_comi"
1434 [(set (reg:CCFP FLAGS_REG)
1437 (match_operand:<ssevecmode> 0 "register_operand" "x")
1438 (parallel [(const_int 0)]))
1440 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1441 (parallel [(const_int 0)]))))]
1442 "SSE_FLOAT_MODE_P (<MODE>mode)"
1443 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1444 [(set_attr "type" "ssecomi")
1445 (set_attr "prefix" "maybe_vex")
1446 (set_attr "mode" "<MODE>")])
1448 (define_insn "<sse>_ucomi"
1449 [(set (reg:CCFPU FLAGS_REG)
1452 (match_operand:<ssevecmode> 0 "register_operand" "x")
1453 (parallel [(const_int 0)]))
1455 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1456 (parallel [(const_int 0)]))))]
1457 "SSE_FLOAT_MODE_P (<MODE>mode)"
1458 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1459 [(set_attr "type" "ssecomi")
1460 (set_attr "prefix" "maybe_vex")
1461 (set_attr "mode" "<MODE>")])
1463 (define_expand "vcond<mode>"
1464 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1465 (if_then_else:SSEMODEF2P
1466 (match_operator 3 ""
1467 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1468 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1469 (match_operand:SSEMODEF2P 1 "general_operand" "")
1470 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1471 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1473 if (ix86_expand_fp_vcond (operands))
1479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1481 ;; Parallel floating point logical operations
1483 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1485 (define_insn "avx_nand<mode>3"
1486 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1489 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1490 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1491 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1492 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1493 [(set_attr "type" "sselog")
1494 (set_attr "prefix" "vex")
1495 (set_attr "mode" "<avxvecmode>")])
1497 (define_insn "<sse>_nand<mode>3"
1498 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1501 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1502 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1503 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1504 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1505 [(set_attr "type" "sselog")
1506 (set_attr "mode" "<MODE>")])
1508 (define_expand "<code><mode>3"
1509 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1510 (plogic:AVX256MODEF2P
1511 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1512 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1513 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1514 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1516 (define_insn "*avx_<code><mode>3"
1517 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1519 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1520 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1521 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1522 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1523 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1524 [(set_attr "type" "sselog")
1525 (set_attr "prefix" "vex")
1526 (set_attr "mode" "<avxvecmode>")])
1528 (define_expand "<code><mode>3"
1529 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1531 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1532 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1533 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1534 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1536 (define_insn "*<code><mode>3"
1537 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1539 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1540 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1541 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1542 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1543 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1544 [(set_attr "type" "sselog")
1545 (set_attr "mode" "<MODE>")])
1547 ;; Also define scalar versions. These are used for abs, neg, and
1548 ;; conditional move. Using subregs into vector modes causes register
1549 ;; allocation lossage. These patterns do not allow memory operands
1550 ;; because the native instructions read the full 128-bits.
1552 (define_insn "*avx_nand<mode>3"
1553 [(set (match_operand:MODEF 0 "register_operand" "=x")
1556 (match_operand:MODEF 1 "register_operand" "x"))
1557 (match_operand:MODEF 2 "register_operand" "x")))]
1558 "AVX_FLOAT_MODE_P (<MODE>mode)"
1559 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1560 [(set_attr "type" "sselog")
1561 (set_attr "prefix" "vex")
1562 (set_attr "mode" "<ssevecmode>")])
1564 (define_insn "*nand<mode>3"
1565 [(set (match_operand:MODEF 0 "register_operand" "=x")
1568 (match_operand:MODEF 1 "register_operand" "0"))
1569 (match_operand:MODEF 2 "register_operand" "x")))]
1570 "SSE_FLOAT_MODE_P (<MODE>mode)"
1571 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1572 [(set_attr "type" "sselog")
1573 (set_attr "mode" "<ssevecmode>")])
1575 (define_insn "*avx_<code><mode>3"
1576 [(set (match_operand:MODEF 0 "register_operand" "=x")
1578 (match_operand:MODEF 1 "register_operand" "x")
1579 (match_operand:MODEF 2 "register_operand" "x")))]
1580 "AVX_FLOAT_MODE_P (<MODE>mode)"
1581 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1582 [(set_attr "type" "sselog")
1583 (set_attr "prefix" "vex")
1584 (set_attr "mode" "<ssevecmode>")])
1586 (define_insn "*<code><mode>3"
1587 [(set (match_operand:MODEF 0 "register_operand" "=x")
1589 (match_operand:MODEF 1 "register_operand" "0")
1590 (match_operand:MODEF 2 "register_operand" "x")))]
1591 "SSE_FLOAT_MODE_P (<MODE>mode)"
1592 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1593 [(set_attr "type" "sselog")
1594 (set_attr "mode" "<ssevecmode>")])
1596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1598 ;; SSE5 floating point multiply/accumulate instructions This includes the
1599 ;; scalar version of the instructions as well as the vector
1601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1603 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1604 ;; combine to generate a multiply/add with two memory references. We then
1605 ;; split this insn, into loading up the destination register with one of the
1606 ;; memory operations. If we don't manage to split the insn, reload will
1607 ;; generate the appropriate moves. The reason this is needed, is that combine
1608 ;; has already folded one of the memory references into both the multiply and
1609 ;; add insns, and it can't generate a new pseudo. I.e.:
1610 ;; (set (reg1) (mem (addr1)))
1611 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1612 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1614 (define_insn "sse5_fmadd<mode>4"
1615 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1618 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1619 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1620 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1621 "TARGET_SSE5 && TARGET_FUSED_MADD
1622 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1623 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1624 [(set_attr "type" "ssemuladd")
1625 (set_attr "mode" "<MODE>")])
1627 ;; Split fmadd with two memory operands into a load and the fmadd.
1629 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1632 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1633 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1634 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1636 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1637 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1638 && !reg_mentioned_p (operands[0], operands[1])
1639 && !reg_mentioned_p (operands[0], operands[2])
1640 && !reg_mentioned_p (operands[0], operands[3])"
1643 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1644 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1645 operands[2], operands[3]));
1649 ;; For the scalar operations, use operand1 for the upper words that aren't
1650 ;; modified, so restrict the forms that are generated.
1651 ;; Scalar version of fmadd
1652 (define_insn "sse5_vmfmadd<mode>4"
1653 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1654 (vec_merge:SSEMODEF2P
1657 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1658 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1659 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1662 "TARGET_SSE5 && TARGET_FUSED_MADD
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1664 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1665 [(set_attr "type" "ssemuladd")
1666 (set_attr "mode" "<MODE>")])
1668 ;; Floating multiply and subtract
1669 ;; Allow two memory operands the same as fmadd
1670 (define_insn "sse5_fmsub<mode>4"
1671 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1674 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1675 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1676 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1677 "TARGET_SSE5 && TARGET_FUSED_MADD
1678 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1679 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1680 [(set_attr "type" "ssemuladd")
1681 (set_attr "mode" "<MODE>")])
1683 ;; Split fmsub with two memory operands into a load and the fmsub.
1685 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1688 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1689 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1690 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1692 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1693 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1694 && !reg_mentioned_p (operands[0], operands[1])
1695 && !reg_mentioned_p (operands[0], operands[2])
1696 && !reg_mentioned_p (operands[0], operands[3])"
1699 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1700 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1701 operands[2], operands[3]));
1705 ;; For the scalar operations, use operand1 for the upper words that aren't
1706 ;; modified, so restrict the forms that are generated.
1707 ;; Scalar version of fmsub
1708 (define_insn "sse5_vmfmsub<mode>4"
1709 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1710 (vec_merge:SSEMODEF2P
1713 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1714 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1715 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1718 "TARGET_SSE5 && TARGET_FUSED_MADD
1719 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1720 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1721 [(set_attr "type" "ssemuladd")
1722 (set_attr "mode" "<MODE>")])
1724 ;; Floating point negative multiply and add
1725 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1726 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1727 ;; Allow two memory operands to help in optimizing.
1728 (define_insn "sse5_fnmadd<mode>4"
1729 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1731 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1733 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1734 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1735 "TARGET_SSE5 && TARGET_FUSED_MADD
1736 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
1737 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1738 [(set_attr "type" "ssemuladd")
1739 (set_attr "mode" "<MODE>")])
1741 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1743 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1745 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1747 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1748 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1750 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1751 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1752 && !reg_mentioned_p (operands[0], operands[1])
1753 && !reg_mentioned_p (operands[0], operands[2])
1754 && !reg_mentioned_p (operands[0], operands[3])"
1757 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1758 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1759 operands[2], operands[3]));
1763 ;; For the scalar operations, use operand1 for the upper words that aren't
1764 ;; modified, so restrict the forms that are generated.
1765 ;; Scalar version of fnmadd
1766 (define_insn "sse5_vmfnmadd<mode>4"
1767 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1768 (vec_merge:SSEMODEF2P
1770 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1772 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1773 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1776 "TARGET_SSE5 && TARGET_FUSED_MADD
1777 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1778 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1779 [(set_attr "type" "ssemuladd")
1780 (set_attr "mode" "<MODE>")])
1782 ;; Floating point negative multiply and subtract
1783 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1784 ;; Allow 2 memory operands to help with optimization
1785 (define_insn "sse5_fnmsub<mode>4"
1786 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1790 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1791 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1792 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1793 "TARGET_SSE5 && TARGET_FUSED_MADD
1794 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1795 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1796 [(set_attr "type" "ssemuladd")
1797 (set_attr "mode" "<MODE>")])
1799 ;; Split fnmsub with two memory operands into a load and the fmsub.
1801 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1805 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1806 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1807 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1809 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1810 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1811 && !reg_mentioned_p (operands[0], operands[1])
1812 && !reg_mentioned_p (operands[0], operands[2])
1813 && !reg_mentioned_p (operands[0], operands[3])"
1816 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1817 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1818 operands[2], operands[3]));
1822 ;; For the scalar operations, use operand1 for the upper words that aren't
1823 ;; modified, so restrict the forms that are generated.
1824 ;; Scalar version of fnmsub
1825 (define_insn "sse5_vmfnmsub<mode>4"
1826 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1827 (vec_merge:SSEMODEF2P
1831 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1832 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1833 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1836 "TARGET_SSE5 && TARGET_FUSED_MADD
1837 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
1838 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1839 [(set_attr "type" "ssemuladd")
1840 (set_attr "mode" "<MODE>")])
1842 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1843 ;; even if the user used -mno-fused-madd
1844 ;; Parallel instructions. During instruction generation, just default
1845 ;; to registers, and let combine later build the appropriate instruction.
1846 (define_expand "sse5i_fmadd<mode>4"
1847 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1851 (match_operand:SSEMODEF2P 1 "register_operand" "")
1852 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1853 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1854 UNSPEC_SSE5_INTRINSIC))]
1857 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1858 if (TARGET_FUSED_MADD)
1860 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1861 operands[2], operands[3]));
1866 (define_insn "*sse5i_fmadd<mode>4"
1867 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1871 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1872 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1873 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1874 UNSPEC_SSE5_INTRINSIC))]
1875 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1876 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1877 [(set_attr "type" "ssemuladd")
1878 (set_attr "mode" "<MODE>")])
1880 (define_expand "sse5i_fmsub<mode>4"
1881 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1885 (match_operand:SSEMODEF2P 1 "register_operand" "")
1886 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1887 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1888 UNSPEC_SSE5_INTRINSIC))]
1891 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1892 if (TARGET_FUSED_MADD)
1894 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1895 operands[2], operands[3]));
1900 (define_insn "*sse5i_fmsub<mode>4"
1901 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1905 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1906 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1907 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1908 UNSPEC_SSE5_INTRINSIC))]
1909 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1910 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1911 [(set_attr "type" "ssemuladd")
1912 (set_attr "mode" "<MODE>")])
1914 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1915 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1916 (define_expand "sse5i_fnmadd<mode>4"
1917 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1920 (match_operand:SSEMODEF2P 3 "register_operand" "")
1922 (match_operand:SSEMODEF2P 1 "register_operand" "")
1923 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1924 UNSPEC_SSE5_INTRINSIC))]
1927 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1928 if (TARGET_FUSED_MADD)
1930 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1931 operands[2], operands[3]));
1936 (define_insn "*sse5i_fnmadd<mode>4"
1937 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1940 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1942 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1943 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1944 UNSPEC_SSE5_INTRINSIC))]
1945 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1946 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1947 [(set_attr "type" "ssemuladd")
1948 (set_attr "mode" "<MODE>")])
1950 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1951 (define_expand "sse5i_fnmsub<mode>4"
1952 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1957 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1958 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1959 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1960 UNSPEC_SSE5_INTRINSIC))]
1963 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1964 if (TARGET_FUSED_MADD)
1966 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1967 operands[2], operands[3]));
1972 (define_insn "*sse5i_fnmsub<mode>4"
1973 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1978 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
1979 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1980 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1981 UNSPEC_SSE5_INTRINSIC))]
1982 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
1983 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1984 [(set_attr "type" "ssemuladd")
1985 (set_attr "mode" "<MODE>")])
1987 ;; Scalar instructions
1988 (define_expand "sse5i_vmfmadd<mode>4"
1989 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1991 [(vec_merge:SSEMODEF2P
1994 (match_operand:SSEMODEF2P 1 "register_operand" "")
1995 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1996 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1999 UNSPEC_SSE5_INTRINSIC))]
2002 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2003 if (TARGET_FUSED_MADD)
2005 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2006 operands[2], operands[3]));
2011 ;; For the scalar operations, use operand1 for the upper words that aren't
2012 ;; modified, so restrict the forms that are accepted.
2013 (define_insn "*sse5i_vmfmadd<mode>4"
2014 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2016 [(vec_merge:SSEMODEF2P
2019 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2020 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2021 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2024 UNSPEC_SSE5_INTRINSIC))]
2025 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2026 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2027 [(set_attr "type" "ssemuladd")
2028 (set_attr "mode" "<ssescalarmode>")])
2030 (define_expand "sse5i_vmfmsub<mode>4"
2031 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2033 [(vec_merge:SSEMODEF2P
2036 (match_operand:SSEMODEF2P 1 "register_operand" "")
2037 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2038 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2041 UNSPEC_SSE5_INTRINSIC))]
2044 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2045 if (TARGET_FUSED_MADD)
2047 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2048 operands[2], operands[3]));
2053 (define_insn "*sse5i_vmfmsub<mode>4"
2054 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2056 [(vec_merge:SSEMODEF2P
2059 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2061 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2064 UNSPEC_SSE5_INTRINSIC))]
2065 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2066 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2067 [(set_attr "type" "ssemuladd")
2068 (set_attr "mode" "<ssescalarmode>")])
2070 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2071 (define_expand "sse5i_vmfnmadd<mode>4"
2072 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2074 [(vec_merge:SSEMODEF2P
2076 (match_operand:SSEMODEF2P 3 "register_operand" "")
2078 (match_operand:SSEMODEF2P 1 "register_operand" "")
2079 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2082 UNSPEC_SSE5_INTRINSIC))]
2085 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2086 if (TARGET_FUSED_MADD)
2088 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2089 operands[2], operands[3]));
2094 (define_insn "*sse5i_vmfnmadd<mode>4"
2095 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2097 [(vec_merge:SSEMODEF2P
2099 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2101 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
2102 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2105 UNSPEC_SSE5_INTRINSIC))]
2106 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
2107 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2108 [(set_attr "type" "ssemuladd")
2109 (set_attr "mode" "<ssescalarmode>")])
2111 (define_expand "sse5i_vmfnmsub<mode>4"
2112 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2114 [(vec_merge:SSEMODEF2P
2118 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2119 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2120 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2123 UNSPEC_SSE5_INTRINSIC))]
2126 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2127 if (TARGET_FUSED_MADD)
2129 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2130 operands[2], operands[3]));
2135 (define_insn "*sse5i_vmfnmsub<mode>4"
2136 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2138 [(vec_merge:SSEMODEF2P
2142 (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
2143 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2144 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2147 UNSPEC_SSE5_INTRINSIC))]
2148 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2149 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2150 [(set_attr "type" "ssemuladd")
2151 (set_attr "mode" "<ssescalarmode>")])
2153 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2155 ;; Parallel single-precision floating point conversion operations
2157 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2159 (define_insn "sse_cvtpi2ps"
2160 [(set (match_operand:V4SF 0 "register_operand" "=x")
2163 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2164 (match_operand:V4SF 1 "register_operand" "0")
2167 "cvtpi2ps\t{%2, %0|%0, %2}"
2168 [(set_attr "type" "ssecvt")
2169 (set_attr "mode" "V4SF")])
2171 (define_insn "sse_cvtps2pi"
2172 [(set (match_operand:V2SI 0 "register_operand" "=y")
2174 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2176 (parallel [(const_int 0) (const_int 1)])))]
2178 "cvtps2pi\t{%1, %0|%0, %1}"
2179 [(set_attr "type" "ssecvt")
2180 (set_attr "unit" "mmx")
2181 (set_attr "mode" "DI")])
2183 (define_insn "sse_cvttps2pi"
2184 [(set (match_operand:V2SI 0 "register_operand" "=y")
2186 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2187 (parallel [(const_int 0) (const_int 1)])))]
2189 "cvttps2pi\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "ssecvt")
2191 (set_attr "unit" "mmx")
2192 (set_attr "mode" "SF")])
2194 (define_insn "*avx_cvtsi2ss"
2195 [(set (match_operand:V4SF 0 "register_operand" "=x")
2198 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2199 (match_operand:V4SF 1 "register_operand" "x")
2202 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2203 [(set_attr "type" "sseicvt")
2204 (set_attr "prefix" "vex")
2205 (set_attr "mode" "SF")])
2207 (define_insn "sse_cvtsi2ss"
2208 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2211 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2212 (match_operand:V4SF 1 "register_operand" "0,0")
2215 "cvtsi2ss\t{%2, %0|%0, %2}"
2216 [(set_attr "type" "sseicvt")
2217 (set_attr "athlon_decode" "vector,double")
2218 (set_attr "amdfam10_decode" "vector,double")
2219 (set_attr "mode" "SF")])
2221 (define_insn "*avx_cvtsi2ssq"
2222 [(set (match_operand:V4SF 0 "register_operand" "=x")
2225 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2226 (match_operand:V4SF 1 "register_operand" "x")
2228 "TARGET_AVX && TARGET_64BIT"
2229 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2230 [(set_attr "type" "sseicvt")
2231 (set_attr "prefix" "vex")
2232 (set_attr "mode" "SF")])
2234 (define_insn "sse_cvtsi2ssq"
2235 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2238 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2239 (match_operand:V4SF 1 "register_operand" "0,0")
2241 "TARGET_SSE && TARGET_64BIT"
2242 "cvtsi2ssq\t{%2, %0|%0, %2}"
2243 [(set_attr "type" "sseicvt")
2244 (set_attr "athlon_decode" "vector,double")
2245 (set_attr "amdfam10_decode" "vector,double")
2246 (set_attr "mode" "SF")])
2248 (define_insn "sse_cvtss2si"
2249 [(set (match_operand:SI 0 "register_operand" "=r,r")
2252 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2253 (parallel [(const_int 0)]))]
2254 UNSPEC_FIX_NOTRUNC))]
2256 "%vcvtss2si\t{%1, %0|%0, %1}"
2257 [(set_attr "type" "sseicvt")
2258 (set_attr "athlon_decode" "double,vector")
2259 (set_attr "prefix_rep" "1")
2260 (set_attr "prefix" "maybe_vex")
2261 (set_attr "mode" "SI")])
2263 (define_insn "sse_cvtss2si_2"
2264 [(set (match_operand:SI 0 "register_operand" "=r,r")
2265 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2266 UNSPEC_FIX_NOTRUNC))]
2268 "%vcvtss2si\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "sseicvt")
2270 (set_attr "athlon_decode" "double,vector")
2271 (set_attr "amdfam10_decode" "double,double")
2272 (set_attr "prefix_rep" "1")
2273 (set_attr "prefix" "maybe_vex")
2274 (set_attr "mode" "SI")])
2276 (define_insn "sse_cvtss2siq"
2277 [(set (match_operand:DI 0 "register_operand" "=r,r")
2280 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2281 (parallel [(const_int 0)]))]
2282 UNSPEC_FIX_NOTRUNC))]
2283 "TARGET_SSE && TARGET_64BIT"
2284 "%vcvtss2siq\t{%1, %0|%0, %1}"
2285 [(set_attr "type" "sseicvt")
2286 (set_attr "athlon_decode" "double,vector")
2287 (set_attr "prefix_rep" "1")
2288 (set_attr "prefix" "maybe_vex")
2289 (set_attr "mode" "DI")])
2291 (define_insn "sse_cvtss2siq_2"
2292 [(set (match_operand:DI 0 "register_operand" "=r,r")
2293 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2294 UNSPEC_FIX_NOTRUNC))]
2295 "TARGET_SSE && TARGET_64BIT"
2296 "%vcvtss2siq\t{%1, %0|%0, %1}"
2297 [(set_attr "type" "sseicvt")
2298 (set_attr "athlon_decode" "double,vector")
2299 (set_attr "amdfam10_decode" "double,double")
2300 (set_attr "prefix_rep" "1")
2301 (set_attr "prefix" "maybe_vex")
2302 (set_attr "mode" "DI")])
2304 (define_insn "sse_cvttss2si"
2305 [(set (match_operand:SI 0 "register_operand" "=r,r")
2308 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2309 (parallel [(const_int 0)]))))]
2311 "%vcvttss2si\t{%1, %0|%0, %1}"
2312 [(set_attr "type" "sseicvt")
2313 (set_attr "athlon_decode" "double,vector")
2314 (set_attr "amdfam10_decode" "double,double")
2315 (set_attr "prefix_rep" "1")
2316 (set_attr "prefix" "maybe_vex")
2317 (set_attr "mode" "SI")])
2319 (define_insn "sse_cvttss2siq"
2320 [(set (match_operand:DI 0 "register_operand" "=r,r")
2323 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2324 (parallel [(const_int 0)]))))]
2325 "TARGET_SSE && TARGET_64BIT"
2326 "%vcvttss2siq\t{%1, %0|%0, %1}"
2327 [(set_attr "type" "sseicvt")
2328 (set_attr "athlon_decode" "double,vector")
2329 (set_attr "amdfam10_decode" "double,double")
2330 (set_attr "prefix_rep" "1")
2331 (set_attr "prefix" "maybe_vex")
2332 (set_attr "mode" "DI")])
2334 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2335 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2336 (float:AVXMODEDCVTDQ2PS
2337 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2339 "vcvtdq2ps\t{%1, %0|%0, %1}"
2340 [(set_attr "type" "ssecvt")
2341 (set_attr "prefix" "vex")
2342 (set_attr "mode" "<avxvecmode>")])
2344 (define_insn "sse2_cvtdq2ps"
2345 [(set (match_operand:V4SF 0 "register_operand" "=x")
2346 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2348 "cvtdq2ps\t{%1, %0|%0, %1}"
2349 [(set_attr "type" "ssecvt")
2350 (set_attr "mode" "V4SF")])
2352 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2353 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2354 (unspec:AVXMODEDCVTPS2DQ
2355 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2356 UNSPEC_FIX_NOTRUNC))]
2358 "vcvtps2dq\t{%1, %0|%0, %1}"
2359 [(set_attr "type" "ssecvt")
2360 (set_attr "prefix" "vex")
2361 (set_attr "mode" "<avxvecmode>")])
2363 (define_insn "sse2_cvtps2dq"
2364 [(set (match_operand:V4SI 0 "register_operand" "=x")
2365 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2366 UNSPEC_FIX_NOTRUNC))]
2368 "cvtps2dq\t{%1, %0|%0, %1}"
2369 [(set_attr "type" "ssecvt")
2370 (set_attr "prefix_data16" "1")
2371 (set_attr "mode" "TI")])
2373 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2374 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2375 (fix:AVXMODEDCVTPS2DQ
2376 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2378 "vcvttps2dq\t{%1, %0|%0, %1}"
2379 [(set_attr "type" "ssecvt")
2380 (set_attr "prefix" "vex")
2381 (set_attr "mode" "<avxvecmode>")])
2383 (define_insn "sse2_cvttps2dq"
2384 [(set (match_operand:V4SI 0 "register_operand" "=x")
2385 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2387 "cvttps2dq\t{%1, %0|%0, %1}"
2388 [(set_attr "type" "ssecvt")
2389 (set_attr "prefix_rep" "1")
2390 (set_attr "mode" "TI")])
2392 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2394 ;; Parallel double-precision floating point conversion operations
2396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2398 (define_insn "sse2_cvtpi2pd"
2399 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2400 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2402 "cvtpi2pd\t{%1, %0|%0, %1}"
2403 [(set_attr "type" "ssecvt")
2404 (set_attr "unit" "mmx,*")
2405 (set_attr "mode" "V2DF")])
2407 (define_insn "sse2_cvtpd2pi"
2408 [(set (match_operand:V2SI 0 "register_operand" "=y")
2409 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2410 UNSPEC_FIX_NOTRUNC))]
2412 "cvtpd2pi\t{%1, %0|%0, %1}"
2413 [(set_attr "type" "ssecvt")
2414 (set_attr "unit" "mmx")
2415 (set_attr "prefix_data16" "1")
2416 (set_attr "mode" "DI")])
2418 (define_insn "sse2_cvttpd2pi"
2419 [(set (match_operand:V2SI 0 "register_operand" "=y")
2420 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2422 "cvttpd2pi\t{%1, %0|%0, %1}"
2423 [(set_attr "type" "ssecvt")
2424 (set_attr "unit" "mmx")
2425 (set_attr "prefix_data16" "1")
2426 (set_attr "mode" "TI")])
2428 (define_insn "*avx_cvtsi2sd"
2429 [(set (match_operand:V2DF 0 "register_operand" "=x")
2432 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2433 (match_operand:V2DF 1 "register_operand" "x")
2436 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2437 [(set_attr "type" "sseicvt")
2438 (set_attr "prefix" "vex")
2439 (set_attr "mode" "DF")])
2441 (define_insn "sse2_cvtsi2sd"
2442 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2445 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2446 (match_operand:V2DF 1 "register_operand" "0,0")
2449 "cvtsi2sd\t{%2, %0|%0, %2}"
2450 [(set_attr "type" "sseicvt")
2451 (set_attr "mode" "DF")
2452 (set_attr "athlon_decode" "double,direct")
2453 (set_attr "amdfam10_decode" "vector,double")])
2455 (define_insn "*avx_cvtsi2sdq"
2456 [(set (match_operand:V2DF 0 "register_operand" "=x")
2459 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2460 (match_operand:V2DF 1 "register_operand" "x")
2462 "TARGET_AVX && TARGET_64BIT"
2463 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2464 [(set_attr "type" "sseicvt")
2465 (set_attr "prefix" "vex")
2466 (set_attr "mode" "DF")])
2468 (define_insn "sse2_cvtsi2sdq"
2469 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2472 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2473 (match_operand:V2DF 1 "register_operand" "0,0")
2475 "TARGET_SSE2 && TARGET_64BIT"
2476 "cvtsi2sdq\t{%2, %0|%0, %2}"
2477 [(set_attr "type" "sseicvt")
2478 (set_attr "mode" "DF")
2479 (set_attr "athlon_decode" "double,direct")
2480 (set_attr "amdfam10_decode" "vector,double")])
2482 (define_insn "sse2_cvtsd2si"
2483 [(set (match_operand:SI 0 "register_operand" "=r,r")
2486 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2487 (parallel [(const_int 0)]))]
2488 UNSPEC_FIX_NOTRUNC))]
2490 "%vcvtsd2si\t{%1, %0|%0, %1}"
2491 [(set_attr "type" "sseicvt")
2492 (set_attr "athlon_decode" "double,vector")
2493 (set_attr "prefix_rep" "1")
2494 (set_attr "prefix" "maybe_vex")
2495 (set_attr "mode" "SI")])
2497 (define_insn "sse2_cvtsd2si_2"
2498 [(set (match_operand:SI 0 "register_operand" "=r,r")
2499 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2500 UNSPEC_FIX_NOTRUNC))]
2502 "%vcvtsd2si\t{%1, %0|%0, %1}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "athlon_decode" "double,vector")
2505 (set_attr "amdfam10_decode" "double,double")
2506 (set_attr "prefix_rep" "1")
2507 (set_attr "prefix" "maybe_vex")
2508 (set_attr "mode" "SI")])
2510 (define_insn "sse2_cvtsd2siq"
2511 [(set (match_operand:DI 0 "register_operand" "=r,r")
2514 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2515 (parallel [(const_int 0)]))]
2516 UNSPEC_FIX_NOTRUNC))]
2517 "TARGET_SSE2 && TARGET_64BIT"
2518 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2519 [(set_attr "type" "sseicvt")
2520 (set_attr "athlon_decode" "double,vector")
2521 (set_attr "prefix_rep" "1")
2522 (set_attr "prefix" "maybe_vex")
2523 (set_attr "mode" "DI")])
2525 (define_insn "sse2_cvtsd2siq_2"
2526 [(set (match_operand:DI 0 "register_operand" "=r,r")
2527 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2528 UNSPEC_FIX_NOTRUNC))]
2529 "TARGET_SSE2 && TARGET_64BIT"
2530 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2531 [(set_attr "type" "sseicvt")
2532 (set_attr "athlon_decode" "double,vector")
2533 (set_attr "amdfam10_decode" "double,double")
2534 (set_attr "prefix_rep" "1")
2535 (set_attr "prefix" "maybe_vex")
2536 (set_attr "mode" "DI")])
2538 (define_insn "sse2_cvttsd2si"
2539 [(set (match_operand:SI 0 "register_operand" "=r,r")
2542 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2543 (parallel [(const_int 0)]))))]
2545 "%vcvttsd2si\t{%1, %0|%0, %1}"
2546 [(set_attr "type" "sseicvt")
2547 (set_attr "prefix_rep" "1")
2548 (set_attr "prefix" "maybe_vex")
2549 (set_attr "mode" "SI")
2550 (set_attr "athlon_decode" "double,vector")
2551 (set_attr "amdfam10_decode" "double,double")])
2553 (define_insn "sse2_cvttsd2siq"
2554 [(set (match_operand:DI 0 "register_operand" "=r,r")
2557 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2558 (parallel [(const_int 0)]))))]
2559 "TARGET_SSE2 && TARGET_64BIT"
2560 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2561 [(set_attr "type" "sseicvt")
2562 (set_attr "prefix_rep" "1")
2563 (set_attr "prefix" "maybe_vex")
2564 (set_attr "mode" "DI")
2565 (set_attr "athlon_decode" "double,vector")
2566 (set_attr "amdfam10_decode" "double,double")])
2568 (define_insn "avx_cvtdq2pd256"
2569 [(set (match_operand:V4DF 0 "register_operand" "=x")
2570 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2572 "vcvtdq2pd\t{%1, %0|%0, %1}"
2573 [(set_attr "type" "ssecvt")
2574 (set_attr "prefix" "vex")
2575 (set_attr "mode" "V4DF")])
2577 (define_insn "sse2_cvtdq2pd"
2578 [(set (match_operand:V2DF 0 "register_operand" "=x")
2581 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2582 (parallel [(const_int 0) (const_int 1)]))))]
2584 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2585 [(set_attr "type" "ssecvt")
2586 (set_attr "prefix" "maybe_vex")
2587 (set_attr "mode" "V2DF")])
2589 (define_insn "avx_cvtpd2dq256"
2590 [(set (match_operand:V4SI 0 "register_operand" "=x")
2591 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2592 UNSPEC_FIX_NOTRUNC))]
2594 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2595 [(set_attr "type" "ssecvt")
2596 (set_attr "prefix" "vex")
2597 (set_attr "mode" "OI")])
2599 (define_expand "sse2_cvtpd2dq"
2600 [(set (match_operand:V4SI 0 "register_operand" "")
2602 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2606 "operands[2] = CONST0_RTX (V2SImode);")
2608 (define_insn "*sse2_cvtpd2dq"
2609 [(set (match_operand:V4SI 0 "register_operand" "=x")
2611 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2613 (match_operand:V2SI 2 "const0_operand" "")))]
2615 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2616 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2617 [(set_attr "type" "ssecvt")
2618 (set_attr "prefix_rep" "1")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "mode" "TI")
2621 (set_attr "amdfam10_decode" "double")])
2623 (define_insn "avx_cvttpd2dq256"
2624 [(set (match_operand:V4SI 0 "register_operand" "=x")
2625 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2627 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2628 [(set_attr "type" "ssecvt")
2629 (set_attr "prefix" "vex")
2630 (set_attr "mode" "OI")])
2632 (define_expand "sse2_cvttpd2dq"
2633 [(set (match_operand:V4SI 0 "register_operand" "")
2635 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2638 "operands[2] = CONST0_RTX (V2SImode);")
2640 (define_insn "*sse2_cvttpd2dq"
2641 [(set (match_operand:V4SI 0 "register_operand" "=x")
2643 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2644 (match_operand:V2SI 2 "const0_operand" "")))]
2646 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2647 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2648 [(set_attr "type" "ssecvt")
2649 (set_attr "prefix_rep" "1")
2650 (set_attr "prefix" "maybe_vex")
2651 (set_attr "mode" "TI")
2652 (set_attr "amdfam10_decode" "double")])
2654 (define_insn "*avx_cvtsd2ss"
2655 [(set (match_operand:V4SF 0 "register_operand" "=x")
2658 (float_truncate:V2SF
2659 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2660 (match_operand:V4SF 1 "register_operand" "x")
2663 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2664 [(set_attr "type" "ssecvt")
2665 (set_attr "prefix" "vex")
2666 (set_attr "mode" "SF")])
2668 (define_insn "sse2_cvtsd2ss"
2669 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2672 (float_truncate:V2SF
2673 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2674 (match_operand:V4SF 1 "register_operand" "0,0")
2677 "cvtsd2ss\t{%2, %0|%0, %2}"
2678 [(set_attr "type" "ssecvt")
2679 (set_attr "athlon_decode" "vector,double")
2680 (set_attr "amdfam10_decode" "vector,double")
2681 (set_attr "mode" "SF")])
2683 (define_insn "*avx_cvtss2sd"
2684 [(set (match_operand:V2DF 0 "register_operand" "=x")
2688 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2689 (parallel [(const_int 0) (const_int 1)])))
2690 (match_operand:V2DF 1 "register_operand" "x")
2693 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2694 [(set_attr "type" "ssecvt")
2695 (set_attr "prefix" "vex")
2696 (set_attr "mode" "DF")])
2698 (define_insn "sse2_cvtss2sd"
2699 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2703 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2704 (parallel [(const_int 0) (const_int 1)])))
2705 (match_operand:V2DF 1 "register_operand" "0,0")
2708 "cvtss2sd\t{%2, %0|%0, %2}"
2709 [(set_attr "type" "ssecvt")
2710 (set_attr "amdfam10_decode" "vector,double")
2711 (set_attr "mode" "DF")])
2713 (define_insn "avx_cvtpd2ps256"
2714 [(set (match_operand:V4SF 0 "register_operand" "=x")
2715 (float_truncate:V4SF
2716 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2718 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2719 [(set_attr "type" "ssecvt")
2720 (set_attr "prefix" "vex")
2721 (set_attr "mode" "V4SF")])
2723 (define_expand "sse2_cvtpd2ps"
2724 [(set (match_operand:V4SF 0 "register_operand" "")
2726 (float_truncate:V2SF
2727 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2730 "operands[2] = CONST0_RTX (V2SFmode);")
2732 (define_insn "*sse2_cvtpd2ps"
2733 [(set (match_operand:V4SF 0 "register_operand" "=x")
2735 (float_truncate:V2SF
2736 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2737 (match_operand:V2SF 2 "const0_operand" "")))]
2739 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2740 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2741 [(set_attr "type" "ssecvt")
2742 (set_attr "prefix_data16" "1")
2743 (set_attr "prefix" "maybe_vex")
2744 (set_attr "mode" "V4SF")
2745 (set_attr "amdfam10_decode" "double")])
2747 (define_insn "avx_cvtps2pd256"
2748 [(set (match_operand:V4DF 0 "register_operand" "=x")
2750 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2752 "vcvtps2pd\t{%1, %0|%0, %1}"
2753 [(set_attr "type" "ssecvt")
2754 (set_attr "prefix" "vex")
2755 (set_attr "mode" "V4DF")])
2757 (define_insn "sse2_cvtps2pd"
2758 [(set (match_operand:V2DF 0 "register_operand" "=x")
2761 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2762 (parallel [(const_int 0) (const_int 1)]))))]
2764 "%vcvtps2pd\t{%1, %0|%0, %1}"
2765 [(set_attr "type" "ssecvt")
2766 (set_attr "prefix" "maybe_vex")
2767 (set_attr "mode" "V2DF")
2768 (set_attr "amdfam10_decode" "direct")])
2770 (define_expand "vec_unpacks_hi_v4sf"
2775 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2776 (parallel [(const_int 6)
2780 (set (match_operand:V2DF 0 "register_operand" "")
2784 (parallel [(const_int 0) (const_int 1)]))))]
2787 operands[2] = gen_reg_rtx (V4SFmode);
2790 (define_expand "vec_unpacks_lo_v4sf"
2791 [(set (match_operand:V2DF 0 "register_operand" "")
2794 (match_operand:V4SF 1 "nonimmediate_operand" "")
2795 (parallel [(const_int 0) (const_int 1)]))))]
2798 (define_expand "vec_unpacks_float_hi_v8hi"
2799 [(match_operand:V4SF 0 "register_operand" "")
2800 (match_operand:V8HI 1 "register_operand" "")]
2803 rtx tmp = gen_reg_rtx (V4SImode);
2805 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2806 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2810 (define_expand "vec_unpacks_float_lo_v8hi"
2811 [(match_operand:V4SF 0 "register_operand" "")
2812 (match_operand:V8HI 1 "register_operand" "")]
2815 rtx tmp = gen_reg_rtx (V4SImode);
2817 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2818 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2822 (define_expand "vec_unpacku_float_hi_v8hi"
2823 [(match_operand:V4SF 0 "register_operand" "")
2824 (match_operand:V8HI 1 "register_operand" "")]
2827 rtx tmp = gen_reg_rtx (V4SImode);
2829 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2830 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2834 (define_expand "vec_unpacku_float_lo_v8hi"
2835 [(match_operand:V4SF 0 "register_operand" "")
2836 (match_operand:V8HI 1 "register_operand" "")]
2839 rtx tmp = gen_reg_rtx (V4SImode);
2841 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2842 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2846 (define_expand "vec_unpacks_float_hi_v4si"
2849 (match_operand:V4SI 1 "nonimmediate_operand" "")
2850 (parallel [(const_int 2)
2854 (set (match_operand:V2DF 0 "register_operand" "")
2858 (parallel [(const_int 0) (const_int 1)]))))]
2861 operands[2] = gen_reg_rtx (V4SImode);
2864 (define_expand "vec_unpacks_float_lo_v4si"
2865 [(set (match_operand:V2DF 0 "register_operand" "")
2868 (match_operand:V4SI 1 "nonimmediate_operand" "")
2869 (parallel [(const_int 0) (const_int 1)]))))]
2872 (define_expand "vec_pack_trunc_v2df"
2873 [(match_operand:V4SF 0 "register_operand" "")
2874 (match_operand:V2DF 1 "nonimmediate_operand" "")
2875 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2880 r1 = gen_reg_rtx (V4SFmode);
2881 r2 = gen_reg_rtx (V4SFmode);
2883 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2884 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2885 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2889 (define_expand "vec_pack_sfix_trunc_v2df"
2890 [(match_operand:V4SI 0 "register_operand" "")
2891 (match_operand:V2DF 1 "nonimmediate_operand" "")
2892 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2897 r1 = gen_reg_rtx (V4SImode);
2898 r2 = gen_reg_rtx (V4SImode);
2900 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2901 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2902 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2903 gen_lowpart (V2DImode, r1),
2904 gen_lowpart (V2DImode, r2)));
2908 (define_expand "vec_pack_sfix_v2df"
2909 [(match_operand:V4SI 0 "register_operand" "")
2910 (match_operand:V2DF 1 "nonimmediate_operand" "")
2911 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2916 r1 = gen_reg_rtx (V4SImode);
2917 r2 = gen_reg_rtx (V4SImode);
2919 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2920 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2921 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2922 gen_lowpart (V2DImode, r1),
2923 gen_lowpart (V2DImode, r2)));
2927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2929 ;; Parallel single-precision floating point element swizzling
2931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2933 (define_expand "sse_movhlps_exp"
2934 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2937 (match_operand:V4SF 1 "nonimmediate_operand" "")
2938 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2939 (parallel [(const_int 6)
2944 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2946 (define_insn "*avx_movhlps"
2947 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2950 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
2951 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2952 (parallel [(const_int 6)
2956 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2958 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2959 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2960 vmovhps\t{%2, %0|%0, %2}"
2961 [(set_attr "type" "ssemov")
2962 (set_attr "prefix" "vex")
2963 (set_attr "mode" "V4SF,V2SF,V2SF")])
2965 (define_insn "sse_movhlps"
2966 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2969 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2970 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2971 (parallel [(const_int 6)
2975 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2977 movhlps\t{%2, %0|%0, %2}
2978 movlps\t{%H2, %0|%0, %H2}
2979 movhps\t{%2, %0|%0, %2}"
2980 [(set_attr "type" "ssemov")
2981 (set_attr "mode" "V4SF,V2SF,V2SF")])
2983 (define_expand "sse_movlhps_exp"
2984 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2987 (match_operand:V4SF 1 "nonimmediate_operand" "")
2988 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2989 (parallel [(const_int 0)
2994 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2996 (define_insn "*avx_movlhps"
2997 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3000 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3001 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3002 (parallel [(const_int 0)
3006 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3008 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3009 vmovhps\t{%2, %1, %0|%0, %1, %2}
3010 vmovlps\t{%2, %H0|%H0, %2}"
3011 [(set_attr "type" "ssemov")
3012 (set_attr "prefix" "vex")
3013 (set_attr "mode" "V4SF,V2SF,V2SF")])
3015 (define_insn "sse_movlhps"
3016 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3019 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3020 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3021 (parallel [(const_int 0)
3025 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3027 movlhps\t{%2, %0|%0, %2}
3028 movhps\t{%2, %0|%0, %2}
3029 movlps\t{%2, %H0|%H0, %2}"
3030 [(set_attr "type" "ssemov")
3031 (set_attr "mode" "V4SF,V2SF,V2SF")])
3033 (define_insn "avx_unpckhps256"
3034 [(set (match_operand:V8SF 0 "register_operand" "=x")
3037 (match_operand:V8SF 1 "register_operand" "x")
3038 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3039 (parallel [(const_int 2) (const_int 6)
3040 (const_int 3) (const_int 7)
3041 (const_int 10) (const_int 14)
3042 (const_int 11) (const_int 15)])))]
3044 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3045 [(set_attr "type" "sselog")
3046 (set_attr "prefix" "vex")
3047 (set_attr "mode" "V8SF")])
3049 (define_insn "*avx_unpckhps"
3050 [(set (match_operand:V4SF 0 "register_operand" "=x")
3053 (match_operand:V4SF 1 "register_operand" "x")
3054 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3055 (parallel [(const_int 2) (const_int 6)
3056 (const_int 3) (const_int 7)])))]
3058 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3059 [(set_attr "type" "sselog")
3060 (set_attr "prefix" "vex")
3061 (set_attr "mode" "V4SF")])
3063 (define_insn "sse_unpckhps"
3064 [(set (match_operand:V4SF 0 "register_operand" "=x")
3067 (match_operand:V4SF 1 "register_operand" "0")
3068 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3069 (parallel [(const_int 2) (const_int 6)
3070 (const_int 3) (const_int 7)])))]
3072 "unpckhps\t{%2, %0|%0, %2}"
3073 [(set_attr "type" "sselog")
3074 (set_attr "mode" "V4SF")])
3076 (define_insn "avx_unpcklps256"
3077 [(set (match_operand:V8SF 0 "register_operand" "=x")
3080 (match_operand:V8SF 1 "register_operand" "x")
3081 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3082 (parallel [(const_int 0) (const_int 4)
3083 (const_int 1) (const_int 5)
3084 (const_int 8) (const_int 12)
3085 (const_int 9) (const_int 13)])))]
3087 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3088 [(set_attr "type" "sselog")
3089 (set_attr "prefix" "vex")
3090 (set_attr "mode" "V8SF")])
3092 (define_insn "*avx_unpcklps"
3093 [(set (match_operand:V4SF 0 "register_operand" "=x")
3096 (match_operand:V4SF 1 "register_operand" "x")
3097 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3098 (parallel [(const_int 0) (const_int 4)
3099 (const_int 1) (const_int 5)])))]
3101 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3102 [(set_attr "type" "sselog")
3103 (set_attr "prefix" "vex")
3104 (set_attr "mode" "V4SF")])
3106 (define_insn "sse_unpcklps"
3107 [(set (match_operand:V4SF 0 "register_operand" "=x")
3110 (match_operand:V4SF 1 "register_operand" "0")
3111 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3112 (parallel [(const_int 0) (const_int 4)
3113 (const_int 1) (const_int 5)])))]
3115 "unpcklps\t{%2, %0|%0, %2}"
3116 [(set_attr "type" "sselog")
3117 (set_attr "mode" "V4SF")])
3119 ;; These are modeled with the same vec_concat as the others so that we
3120 ;; capture users of shufps that can use the new instructions
3121 (define_insn "avx_movshdup256"
3122 [(set (match_operand:V8SF 0 "register_operand" "=x")
3125 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3127 (parallel [(const_int 1) (const_int 1)
3128 (const_int 3) (const_int 3)
3129 (const_int 5) (const_int 5)
3130 (const_int 7) (const_int 7)])))]
3132 "vmovshdup\t{%1, %0|%0, %1}"
3133 [(set_attr "type" "sse")
3134 (set_attr "prefix" "vex")
3135 (set_attr "mode" "V8SF")])
3137 (define_insn "sse3_movshdup"
3138 [(set (match_operand:V4SF 0 "register_operand" "=x")
3141 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3143 (parallel [(const_int 1)
3148 "%vmovshdup\t{%1, %0|%0, %1}"
3149 [(set_attr "type" "sse")
3150 (set_attr "prefix_rep" "1")
3151 (set_attr "prefix" "maybe_vex")
3152 (set_attr "mode" "V4SF")])
3154 (define_insn "avx_movsldup256"
3155 [(set (match_operand:V8SF 0 "register_operand" "=x")
3158 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3160 (parallel [(const_int 0) (const_int 0)
3161 (const_int 2) (const_int 2)
3162 (const_int 4) (const_int 4)
3163 (const_int 6) (const_int 6)])))]
3165 "vmovsldup\t{%1, %0|%0, %1}"
3166 [(set_attr "type" "sse")
3167 (set_attr "prefix" "vex")
3168 (set_attr "mode" "V8SF")])
3170 (define_insn "sse3_movsldup"
3171 [(set (match_operand:V4SF 0 "register_operand" "=x")
3174 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3176 (parallel [(const_int 0)
3181 "%vmovsldup\t{%1, %0|%0, %1}"
3182 [(set_attr "type" "sse")
3183 (set_attr "prefix_rep" "1")
3184 (set_attr "prefix" "maybe_vex")
3185 (set_attr "mode" "V4SF")])
3187 (define_expand "avx_shufps256"
3188 [(match_operand:V8SF 0 "register_operand" "")
3189 (match_operand:V8SF 1 "register_operand" "")
3190 (match_operand:V8SF 2 "nonimmediate_operand" "")
3191 (match_operand:SI 3 "const_int_operand" "")]
3194 int mask = INTVAL (operands[3]);
3195 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3196 GEN_INT ((mask >> 0) & 3),
3197 GEN_INT ((mask >> 2) & 3),
3198 GEN_INT (((mask >> 4) & 3) + 8),
3199 GEN_INT (((mask >> 6) & 3) + 8),
3200 GEN_INT (((mask >> 0) & 3) + 4),
3201 GEN_INT (((mask >> 2) & 3) + 4),
3202 GEN_INT (((mask >> 4) & 3) + 12),
3203 GEN_INT (((mask >> 6) & 3) + 12)));
3207 ;; One bit in mask selects 2 elements.
3208 (define_insn "avx_shufps256_1"
3209 [(set (match_operand:V8SF 0 "register_operand" "=x")
3212 (match_operand:V8SF 1 "register_operand" "x")
3213 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3214 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3215 (match_operand 4 "const_0_to_3_operand" "")
3216 (match_operand 5 "const_8_to_11_operand" "")
3217 (match_operand 6 "const_8_to_11_operand" "")
3218 (match_operand 7 "const_4_to_7_operand" "")
3219 (match_operand 8 "const_4_to_7_operand" "")
3220 (match_operand 9 "const_12_to_15_operand" "")
3221 (match_operand 10 "const_12_to_15_operand" "")])))]
3223 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3224 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3225 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3226 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3229 mask = INTVAL (operands[3]);
3230 mask |= INTVAL (operands[4]) << 2;
3231 mask |= (INTVAL (operands[5]) - 8) << 4;
3232 mask |= (INTVAL (operands[6]) - 8) << 6;
3233 operands[3] = GEN_INT (mask);
3235 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3237 [(set_attr "type" "sselog")
3238 (set_attr "prefix" "vex")
3239 (set_attr "mode" "V8SF")])
3241 (define_expand "sse_shufps"
3242 [(match_operand:V4SF 0 "register_operand" "")
3243 (match_operand:V4SF 1 "register_operand" "")
3244 (match_operand:V4SF 2 "nonimmediate_operand" "")
3245 (match_operand:SI 3 "const_int_operand" "")]
3248 int mask = INTVAL (operands[3]);
3249 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3250 GEN_INT ((mask >> 0) & 3),
3251 GEN_INT ((mask >> 2) & 3),
3252 GEN_INT (((mask >> 4) & 3) + 4),
3253 GEN_INT (((mask >> 6) & 3) + 4)));
3257 (define_insn "*avx_shufps_<mode>"
3258 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3259 (vec_select:SSEMODE4S
3260 (vec_concat:<ssedoublesizemode>
3261 (match_operand:SSEMODE4S 1 "register_operand" "x")
3262 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3263 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3264 (match_operand 4 "const_0_to_3_operand" "")
3265 (match_operand 5 "const_4_to_7_operand" "")
3266 (match_operand 6 "const_4_to_7_operand" "")])))]
3270 mask |= INTVAL (operands[3]) << 0;
3271 mask |= INTVAL (operands[4]) << 2;
3272 mask |= (INTVAL (operands[5]) - 4) << 4;
3273 mask |= (INTVAL (operands[6]) - 4) << 6;
3274 operands[3] = GEN_INT (mask);
3276 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3278 [(set_attr "type" "sselog")
3279 (set_attr "prefix" "vex")
3280 (set_attr "mode" "V4SF")])
3282 (define_insn "sse_shufps_<mode>"
3283 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3284 (vec_select:SSEMODE4S
3285 (vec_concat:<ssedoublesizemode>
3286 (match_operand:SSEMODE4S 1 "register_operand" "0")
3287 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3288 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3289 (match_operand 4 "const_0_to_3_operand" "")
3290 (match_operand 5 "const_4_to_7_operand" "")
3291 (match_operand 6 "const_4_to_7_operand" "")])))]
3295 mask |= INTVAL (operands[3]) << 0;
3296 mask |= INTVAL (operands[4]) << 2;
3297 mask |= (INTVAL (operands[5]) - 4) << 4;
3298 mask |= (INTVAL (operands[6]) - 4) << 6;
3299 operands[3] = GEN_INT (mask);
3301 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3303 [(set_attr "type" "sselog")
3304 (set_attr "mode" "V4SF")])
3306 (define_insn "sse_storehps"
3307 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3309 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3310 (parallel [(const_int 2) (const_int 3)])))]
3313 %vmovhps\t{%1, %0|%0, %1}
3314 %vmovhlps\t{%1, %d0|%d0, %1}
3315 %vmovlps\t{%H1, %d0|%d0, %H1}"
3316 [(set_attr "type" "ssemov")
3317 (set_attr "prefix" "maybe_vex")
3318 (set_attr "mode" "V2SF,V4SF,V2SF")])
3320 (define_expand "sse_loadhps_exp"
3321 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3324 (match_operand:V4SF 1 "nonimmediate_operand" "")
3325 (parallel [(const_int 0) (const_int 1)]))
3326 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3328 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3330 (define_insn "*avx_loadhps"
3331 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3334 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3335 (parallel [(const_int 0) (const_int 1)]))
3336 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3339 vmovhps\t{%2, %1, %0|%0, %1, %2}
3340 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3341 vmovlps\t{%2, %H0|%H0, %2}"
3342 [(set_attr "type" "ssemov")
3343 (set_attr "prefix" "vex")
3344 (set_attr "mode" "V2SF,V4SF,V2SF")])
3346 (define_insn "sse_loadhps"
3347 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3350 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3351 (parallel [(const_int 0) (const_int 1)]))
3352 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3355 movhps\t{%2, %0|%0, %2}
3356 movlhps\t{%2, %0|%0, %2}
3357 movlps\t{%2, %H0|%H0, %2}"
3358 [(set_attr "type" "ssemov")
3359 (set_attr "mode" "V2SF,V4SF,V2SF")])
3361 (define_insn "*avx_storelps"
3362 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3364 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3365 (parallel [(const_int 0) (const_int 1)])))]
3368 vmovlps\t{%1, %0|%0, %1}
3369 vmovaps\t{%1, %0|%0, %1}
3370 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3371 [(set_attr "type" "ssemov")
3372 (set_attr "prefix" "vex")
3373 (set_attr "mode" "V2SF,V2DF,V2SF")])
3375 (define_insn "sse_storelps"
3376 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3378 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3379 (parallel [(const_int 0) (const_int 1)])))]
3382 movlps\t{%1, %0|%0, %1}
3383 movaps\t{%1, %0|%0, %1}
3384 movlps\t{%1, %0|%0, %1}"
3385 [(set_attr "type" "ssemov")
3386 (set_attr "mode" "V2SF,V4SF,V2SF")])
3388 (define_expand "sse_loadlps_exp"
3389 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3391 (match_operand:V2SF 2 "nonimmediate_operand" "")
3393 (match_operand:V4SF 1 "nonimmediate_operand" "")
3394 (parallel [(const_int 2) (const_int 3)]))))]
3396 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3398 (define_insn "*avx_loadlps"
3399 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3401 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3403 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3404 (parallel [(const_int 2) (const_int 3)]))))]
3407 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3408 vmovlps\t{%2, %1, %0|%0, %1, %2}
3409 vmovlps\t{%2, %0|%0, %2}"
3410 [(set_attr "type" "sselog,ssemov,ssemov")
3411 (set_attr "prefix" "vex")
3412 (set_attr "mode" "V4SF,V2SF,V2SF")])
3414 (define_insn "sse_loadlps"
3415 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3417 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3419 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3420 (parallel [(const_int 2) (const_int 3)]))))]
3423 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3424 movlps\t{%2, %0|%0, %2}
3425 movlps\t{%2, %0|%0, %2}"
3426 [(set_attr "type" "sselog,ssemov,ssemov")
3427 (set_attr "mode" "V4SF,V2SF,V2SF")])
3429 (define_insn "*avx_movss"
3430 [(set (match_operand:V4SF 0 "register_operand" "=x")
3432 (match_operand:V4SF 2 "register_operand" "x")
3433 (match_operand:V4SF 1 "register_operand" "x")
3436 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3437 [(set_attr "type" "ssemov")
3438 (set_attr "prefix" "vex")
3439 (set_attr "mode" "SF")])
3441 (define_insn "sse_movss"
3442 [(set (match_operand:V4SF 0 "register_operand" "=x")
3444 (match_operand:V4SF 2 "register_operand" "x")
3445 (match_operand:V4SF 1 "register_operand" "0")
3448 "movss\t{%2, %0|%0, %2}"
3449 [(set_attr "type" "ssemov")
3450 (set_attr "mode" "SF")])
3452 (define_insn "*vec_dupv4sf_avx"
3453 [(set (match_operand:V4SF 0 "register_operand" "=x")
3455 (match_operand:SF 1 "register_operand" "x")))]
3457 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
3458 [(set_attr "type" "sselog1")
3459 (set_attr "prefix" "vex")
3460 (set_attr "mode" "V4SF")])
3462 (define_insn "*vec_dupv4sf"
3463 [(set (match_operand:V4SF 0 "register_operand" "=x")
3465 (match_operand:SF 1 "register_operand" "0")))]
3467 "shufps\t{$0, %0, %0|%0, %0, 0}"
3468 [(set_attr "type" "sselog1")
3469 (set_attr "mode" "V4SF")])
3471 (define_insn "*vec_concatv2sf_avx"
3472 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3474 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3475 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3478 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3479 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3480 vmovss\t{%1, %0|%0, %1}
3481 punpckldq\t{%2, %0|%0, %2}
3482 movd\t{%1, %0|%0, %1}"
3483 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3484 (set (attr "prefix")
3485 (if_then_else (eq_attr "alternative" "3,4")
3486 (const_string "orig")
3487 (const_string "vex")))
3488 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3490 ;; Although insertps takes register source, we prefer
3491 ;; unpcklps with register source since it is shorter.
3492 (define_insn "*vec_concatv2sf_sse4_1"
3493 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3495 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3496 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3499 unpcklps\t{%2, %0|%0, %2}
3500 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3501 movss\t{%1, %0|%0, %1}
3502 punpckldq\t{%2, %0|%0, %2}
3503 movd\t{%1, %0|%0, %1}"
3504 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3505 (set_attr "prefix_extra" "*,1,*,*,*")
3506 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3508 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3509 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3510 ;; alternatives pretty much forces the MMX alternative to be chosen.
3511 (define_insn "*vec_concatv2sf_sse"
3512 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3514 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3515 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3518 unpcklps\t{%2, %0|%0, %2}
3519 movss\t{%1, %0|%0, %1}
3520 punpckldq\t{%2, %0|%0, %2}
3521 movd\t{%1, %0|%0, %1}"
3522 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3523 (set_attr "mode" "V4SF,SF,DI,DI")])
3525 (define_insn "*vec_concatv4sf_avx"
3526 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3528 (match_operand:V2SF 1 "register_operand" " x,x")
3529 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3532 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3533 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3534 [(set_attr "type" "ssemov")
3535 (set_attr "prefix" "vex")
3536 (set_attr "mode" "V4SF,V2SF")])
3538 (define_insn "*vec_concatv4sf_sse"
3539 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3541 (match_operand:V2SF 1 "register_operand" " 0,0")
3542 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3545 movlhps\t{%2, %0|%0, %2}
3546 movhps\t{%2, %0|%0, %2}"
3547 [(set_attr "type" "ssemov")
3548 (set_attr "mode" "V4SF,V2SF")])
3550 (define_expand "vec_init<mode>"
3551 [(match_operand:SSEMODE 0 "register_operand" "")
3552 (match_operand 1 "" "")]
3555 ix86_expand_vector_init (false, operands[0], operands[1]);
3559 (define_insn "*vec_setv4sf_0_avx"
3560 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
3563 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3564 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
3568 vmovss\t{%2, %1, %0|%0, %1, %2}
3569 vmovss\t{%2, %0|%0, %2}
3570 vmovd\t{%2, %0|%0, %2}
3572 [(set_attr "type" "ssemov")
3573 (set_attr "prefix" "vex")
3574 (set_attr "mode" "SF")])
3576 (define_insn "vec_setv4sf_0"
3577 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
3580 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
3581 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
3585 movss\t{%2, %0|%0, %2}
3586 movss\t{%2, %0|%0, %2}
3587 movd\t{%2, %0|%0, %2}
3589 [(set_attr "type" "ssemov")
3590 (set_attr "mode" "SF")])
3592 ;; A subset is vec_setv4sf.
3593 (define_insn "*vec_setv4sf_avx"
3594 [(set (match_operand:V4SF 0 "register_operand" "=x")
3597 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3598 (match_operand:V4SF 1 "register_operand" "x")
3599 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3602 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3603 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3605 [(set_attr "type" "sselog")
3606 (set_attr "prefix" "vex")
3607 (set_attr "mode" "V4SF")])
3609 (define_insn "*vec_setv4sf_sse4_1"
3610 [(set (match_operand:V4SF 0 "register_operand" "=x")
3613 (match_operand:SF 2 "nonimmediate_operand" "xm"))
3614 (match_operand:V4SF 1 "register_operand" "0")
3615 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
3618 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3619 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3621 [(set_attr "type" "sselog")
3622 (set_attr "prefix_extra" "1")
3623 (set_attr "mode" "V4SF")])
3625 (define_insn "*avx_insertps"
3626 [(set (match_operand:V4SF 0 "register_operand" "=x")
3627 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3628 (match_operand:V4SF 1 "register_operand" "x")
3629 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3632 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3633 [(set_attr "type" "sselog")
3634 (set_attr "prefix" "vex")
3635 (set_attr "mode" "V4SF")])
3637 (define_insn "sse4_1_insertps"
3638 [(set (match_operand:V4SF 0 "register_operand" "=x")
3639 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3640 (match_operand:V4SF 1 "register_operand" "0")
3641 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3644 "insertps\t{%3, %2, %0|%0, %2, %3}";
3645 [(set_attr "type" "sselog")
3646 (set_attr "prefix_extra" "1")
3647 (set_attr "mode" "V4SF")])
3650 [(set (match_operand:V4SF 0 "memory_operand" "")
3653 (match_operand:SF 1 "nonmemory_operand" ""))
3656 "TARGET_SSE && reload_completed"
3659 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
3663 (define_expand "vec_set<mode>"
3664 [(match_operand:SSEMODE 0 "register_operand" "")
3665 (match_operand:<ssescalarmode> 1 "register_operand" "")
3666 (match_operand 2 "const_int_operand" "")]
3669 ix86_expand_vector_set (false, operands[0], operands[1],
3670 INTVAL (operands[2]));
3674 (define_insn_and_split "*vec_extractv4sf_0"
3675 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3677 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3678 (parallel [(const_int 0)])))]
3679 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3681 "&& reload_completed"
3684 rtx op1 = operands[1];
3686 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3688 op1 = gen_lowpart (SFmode, op1);
3689 emit_move_insn (operands[0], op1);
3693 (define_expand "avx_vextractf128<mode>"
3694 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
3695 (match_operand:AVX256MODE 1 "register_operand" "")
3696 (match_operand:SI 2 "const_0_to_1_operand" "")]
3699 switch (INTVAL (operands[2]))
3702 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
3705 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
3713 (define_insn "vec_extract_lo_<mode>"
3714 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3715 (vec_select:<avxhalfvecmode>
3716 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3717 (parallel [(const_int 0) (const_int 1)])))]
3719 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3720 [(set_attr "type" "sselog")
3721 (set_attr "memory" "none,store")
3722 (set_attr "prefix" "vex")
3723 (set_attr "mode" "V8SF")])
3725 (define_insn "vec_extract_hi_<mode>"
3726 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3727 (vec_select:<avxhalfvecmode>
3728 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3729 (parallel [(const_int 2) (const_int 3)])))]
3731 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3732 [(set_attr "type" "sselog")
3733 (set_attr "memory" "none,store")
3734 (set_attr "prefix" "vex")
3735 (set_attr "mode" "V8SF")])
3737 (define_insn "vec_extract_lo_<mode>"
3738 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3739 (vec_select:<avxhalfvecmode>
3740 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3741 (parallel [(const_int 0) (const_int 1)
3742 (const_int 2) (const_int 3)])))]
3744 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3745 [(set_attr "type" "sselog")
3746 (set_attr "memory" "none,store")
3747 (set_attr "prefix" "vex")
3748 (set_attr "mode" "V8SF")])
3750 (define_insn "vec_extract_hi_<mode>"
3751 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
3752 (vec_select:<avxhalfvecmode>
3753 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
3754 (parallel [(const_int 4) (const_int 5)
3755 (const_int 6) (const_int 7)])))]
3757 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3758 [(set_attr "type" "sselog")
3759 (set_attr "memory" "none,store")
3760 (set_attr "prefix" "vex")
3761 (set_attr "mode" "V8SF")])
3763 (define_insn "vec_extract_lo_v16hi"
3764 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3766 (match_operand:V16HI 1 "register_operand" "x,x")
3767 (parallel [(const_int 0) (const_int 1)
3768 (const_int 2) (const_int 3)
3769 (const_int 4) (const_int 5)
3770 (const_int 6) (const_int 7)])))]
3772 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3773 [(set_attr "type" "sselog")
3774 (set_attr "memory" "none,store")
3775 (set_attr "prefix" "vex")
3776 (set_attr "mode" "V8SF")])
3778 (define_insn "vec_extract_hi_v16hi"
3779 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3781 (match_operand:V16HI 1 "register_operand" "x,x")
3782 (parallel [(const_int 8) (const_int 9)
3783 (const_int 10) (const_int 11)
3784 (const_int 12) (const_int 13)
3785 (const_int 14) (const_int 15)])))]
3787 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3788 [(set_attr "type" "sselog")
3789 (set_attr "memory" "none,store")
3790 (set_attr "prefix" "vex")
3791 (set_attr "mode" "V8SF")])
3793 (define_insn "vec_extract_lo_v32qi"
3794 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3796 (match_operand:V32QI 1 "register_operand" "x,x")
3797 (parallel [(const_int 0) (const_int 1)
3798 (const_int 2) (const_int 3)
3799 (const_int 4) (const_int 5)
3800 (const_int 6) (const_int 7)
3801 (const_int 8) (const_int 9)
3802 (const_int 10) (const_int 11)
3803 (const_int 12) (const_int 13)
3804 (const_int 14) (const_int 15)])))]
3806 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3807 [(set_attr "type" "sselog")
3808 (set_attr "memory" "none,store")
3809 (set_attr "prefix" "vex")
3810 (set_attr "mode" "V8SF")])
3812 (define_insn "vec_extract_hi_v32qi"
3813 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3815 (match_operand:V32QI 1 "register_operand" "x,x")
3816 (parallel [(const_int 16) (const_int 17)
3817 (const_int 18) (const_int 19)
3818 (const_int 20) (const_int 21)
3819 (const_int 22) (const_int 23)
3820 (const_int 24) (const_int 25)
3821 (const_int 26) (const_int 27)
3822 (const_int 28) (const_int 29)
3823 (const_int 30) (const_int 31)])))]
3825 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3826 [(set_attr "type" "sselog")
3827 (set_attr "memory" "none,store")
3828 (set_attr "prefix" "vex")
3829 (set_attr "mode" "V8SF")])
3831 (define_insn "*sse4_1_extractps"
3832 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3834 (match_operand:V4SF 1 "register_operand" "x")
3835 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3837 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3838 [(set_attr "type" "sselog")
3839 (set_attr "prefix_extra" "1")
3840 (set_attr "prefix" "maybe_vex")
3841 (set_attr "mode" "V4SF")])
3843 (define_insn_and_split "*vec_extract_v4sf_mem"
3844 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3846 (match_operand:V4SF 1 "memory_operand" "o")
3847 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3853 int i = INTVAL (operands[2]);
3855 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3859 (define_expand "vec_extract<mode>"
3860 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3861 (match_operand:SSEMODE 1 "register_operand" "")
3862 (match_operand 2 "const_int_operand" "")]
3865 ix86_expand_vector_extract (false, operands[0], operands[1],
3866 INTVAL (operands[2]));
3870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3872 ;; Parallel double-precision floating point element swizzling
3874 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3876 (define_insn "avx_unpckhpd256"
3877 [(set (match_operand:V4DF 0 "register_operand" "=x")
3880 (match_operand:V4DF 1 "register_operand" "x")
3881 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3882 (parallel [(const_int 2) (const_int 6)
3883 (const_int 3) (const_int 7)])))]
3885 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3886 [(set_attr "type" "sselog")
3887 (set_attr "prefix" "vex")
3888 (set_attr "mode" "V4DF")])
3890 (define_expand "sse2_unpckhpd_exp"
3891 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
3894 (match_operand:V2DF 1 "nonimmediate_operand" "")
3895 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3896 (parallel [(const_int 1)
3899 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
3901 (define_insn "*avx_unpckhpd"
3902 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3905 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
3906 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
3907 (parallel [(const_int 1)
3909 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3911 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3912 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3913 vmovhpd\t{%1, %0|%0, %1}"
3914 [(set_attr "type" "sselog,ssemov,ssemov")
3915 (set_attr "prefix" "vex")
3916 (set_attr "mode" "V2DF,V1DF,V1DF")])
3918 (define_insn "sse2_unpckhpd"
3919 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3922 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3923 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3924 (parallel [(const_int 1)
3926 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3928 unpckhpd\t{%2, %0|%0, %2}
3929 movlpd\t{%H1, %0|%0, %H1}
3930 movhpd\t{%1, %0|%0, %1}"
3931 [(set_attr "type" "sselog,ssemov,ssemov")
3932 (set_attr "mode" "V2DF,V1DF,V1DF")])
3934 (define_insn "avx_movddup256"
3935 [(set (match_operand:V4DF 0 "register_operand" "=x")
3938 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
3940 (parallel [(const_int 0) (const_int 2)
3941 (const_int 4) (const_int 6)])))]
3943 "vmovddup\t{%1, %0|%0, %1}"
3944 [(set_attr "type" "sselog1")
3945 (set_attr "prefix" "vex")
3946 (set_attr "mode" "V4DF")])
3948 (define_insn "*avx_movddup"
3949 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3952 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3954 (parallel [(const_int 0)
3956 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3958 vmovddup\t{%1, %0|%0, %1}
3960 [(set_attr "type" "sselog1,ssemov")
3961 (set_attr "prefix" "vex")
3962 (set_attr "mode" "V2DF")])
3964 (define_insn "*sse3_movddup"
3965 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3968 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3970 (parallel [(const_int 0)
3972 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3974 movddup\t{%1, %0|%0, %1}
3976 [(set_attr "type" "sselog1,ssemov")
3977 (set_attr "mode" "V2DF")])
3980 [(set (match_operand:V2DF 0 "memory_operand" "")
3983 (match_operand:V2DF 1 "register_operand" "")
3985 (parallel [(const_int 0)
3987 "TARGET_SSE3 && reload_completed"
3990 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
3991 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
3992 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
3996 (define_insn "avx_unpcklpd256"
3997 [(set (match_operand:V4DF 0 "register_operand" "=x")
4000 (match_operand:V4DF 1 "register_operand" "x")
4001 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4002 (parallel [(const_int 0) (const_int 4)
4003 (const_int 1) (const_int 5)])))]
4005 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4006 [(set_attr "type" "sselog")
4007 (set_attr "prefix" "vex")
4008 (set_attr "mode" "V4DF")])
4010 (define_expand "sse2_unpcklpd_exp"
4011 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4014 (match_operand:V2DF 1 "nonimmediate_operand" "")
4015 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4016 (parallel [(const_int 0)
4019 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4021 (define_insn "*avx_unpcklpd"
4022 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4025 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4026 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4027 (parallel [(const_int 0)
4029 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4031 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4032 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4033 vmovlpd\t{%2, %H0|%H0, %2}"
4034 [(set_attr "type" "sselog,ssemov,ssemov")
4035 (set_attr "prefix" "vex")
4036 (set_attr "mode" "V2DF,V1DF,V1DF")])
4038 (define_insn "sse2_unpcklpd"
4039 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4042 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4043 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4044 (parallel [(const_int 0)
4046 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4048 unpcklpd\t{%2, %0|%0, %2}
4049 movhpd\t{%2, %0|%0, %2}
4050 movlpd\t{%2, %H0|%H0, %2}"
4051 [(set_attr "type" "sselog,ssemov,ssemov")
4052 (set_attr "mode" "V2DF,V1DF,V1DF")])
4054 (define_expand "avx_shufpd256"
4055 [(match_operand:V4DF 0 "register_operand" "")
4056 (match_operand:V4DF 1 "register_operand" "")
4057 (match_operand:V4DF 2 "nonimmediate_operand" "")
4058 (match_operand:SI 3 "const_int_operand" "")]
4061 int mask = INTVAL (operands[3]);
4062 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4064 GEN_INT (mask & 2 ? 5 : 4),
4065 GEN_INT (mask & 4 ? 3 : 2),
4066 GEN_INT (mask & 8 ? 7 : 6)));
4070 (define_insn "avx_shufpd256_1"
4071 [(set (match_operand:V4DF 0 "register_operand" "=x")
4074 (match_operand:V4DF 1 "register_operand" "x")
4075 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4076 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4077 (match_operand 4 "const_4_to_5_operand" "")
4078 (match_operand 5 "const_2_to_3_operand" "")
4079 (match_operand 6 "const_6_to_7_operand" "")])))]
4083 mask = INTVAL (operands[3]);
4084 mask |= (INTVAL (operands[4]) - 4) << 1;
4085 mask |= (INTVAL (operands[5]) - 2) << 2;
4086 mask |= (INTVAL (operands[6]) - 6) << 3;
4087 operands[3] = GEN_INT (mask);
4089 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4091 [(set_attr "type" "sselog")
4092 (set_attr "prefix" "vex")
4093 (set_attr "mode" "V4DF")])
4095 (define_expand "sse2_shufpd"
4096 [(match_operand:V2DF 0 "register_operand" "")
4097 (match_operand:V2DF 1 "register_operand" "")
4098 (match_operand:V2DF 2 "nonimmediate_operand" "")
4099 (match_operand:SI 3 "const_int_operand" "")]
4102 int mask = INTVAL (operands[3]);
4103 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4105 GEN_INT (mask & 2 ? 3 : 2)));
4109 (define_expand "vec_extract_even<mode>"
4110 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4111 (vec_select:SSEMODE4S
4112 (vec_concat:<ssedoublesizemode>
4113 (match_operand:SSEMODE4S 1 "register_operand" "")
4114 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4115 (parallel [(const_int 0)
4121 (define_expand "vec_extract_odd<mode>"
4122 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4123 (vec_select:SSEMODE4S
4124 (vec_concat:<ssedoublesizemode>
4125 (match_operand:SSEMODE4S 1 "register_operand" "")
4126 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4127 (parallel [(const_int 1)
4133 (define_expand "vec_extract_even<mode>"
4134 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4135 (vec_select:SSEMODE2D
4136 (vec_concat:<ssedoublesizemode>
4137 (match_operand:SSEMODE2D 1 "register_operand" "")
4138 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4139 (parallel [(const_int 0)
4143 (define_expand "vec_extract_odd<mode>"
4144 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4145 (vec_select:SSEMODE2D
4146 (vec_concat:<ssedoublesizemode>
4147 (match_operand:SSEMODE2D 1 "register_operand" "")
4148 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4149 (parallel [(const_int 1)
4153 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4154 (define_insn "*avx_punpckhqdq"
4155 [(set (match_operand:V2DI 0 "register_operand" "=x")
4158 (match_operand:V2DI 1 "register_operand" "x")
4159 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4160 (parallel [(const_int 1)
4163 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4164 [(set_attr "type" "sselog")
4165 (set_attr "prefix" "vex")
4166 (set_attr "mode" "TI")])
4168 (define_insn "sse2_punpckhqdq"
4169 [(set (match_operand:V2DI 0 "register_operand" "=x")
4172 (match_operand:V2DI 1 "register_operand" "0")
4173 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4174 (parallel [(const_int 1)
4177 "punpckhqdq\t{%2, %0|%0, %2}"
4178 [(set_attr "type" "sselog")
4179 (set_attr "prefix_data16" "1")
4180 (set_attr "mode" "TI")])
4182 (define_insn "*avx_punpcklqdq"
4183 [(set (match_operand:V2DI 0 "register_operand" "=x")
4186 (match_operand:V2DI 1 "register_operand" "x")
4187 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4188 (parallel [(const_int 0)
4191 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4192 [(set_attr "type" "sselog")
4193 (set_attr "prefix" "vex")
4194 (set_attr "mode" "TI")])
4196 (define_insn "sse2_punpcklqdq"
4197 [(set (match_operand:V2DI 0 "register_operand" "=x")
4200 (match_operand:V2DI 1 "register_operand" "0")
4201 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4202 (parallel [(const_int 0)
4205 "punpcklqdq\t{%2, %0|%0, %2}"
4206 [(set_attr "type" "sselog")
4207 (set_attr "prefix_data16" "1")
4208 (set_attr "mode" "TI")])
4210 (define_insn "*avx_shufpd_<mode>"
4211 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4212 (vec_select:SSEMODE2D
4213 (vec_concat:<ssedoublesizemode>
4214 (match_operand:SSEMODE2D 1 "register_operand" "x")
4215 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4216 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4217 (match_operand 4 "const_2_to_3_operand" "")])))]
4221 mask = INTVAL (operands[3]);
4222 mask |= (INTVAL (operands[4]) - 2) << 1;
4223 operands[3] = GEN_INT (mask);
4225 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4227 [(set_attr "type" "sselog")
4228 (set_attr "prefix" "vex")
4229 (set_attr "mode" "V2DF")])
4231 (define_insn "sse2_shufpd_<mode>"
4232 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4233 (vec_select:SSEMODE2D
4234 (vec_concat:<ssedoublesizemode>
4235 (match_operand:SSEMODE2D 1 "register_operand" "0")
4236 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4237 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4238 (match_operand 4 "const_2_to_3_operand" "")])))]
4242 mask = INTVAL (operands[3]);
4243 mask |= (INTVAL (operands[4]) - 2) << 1;
4244 operands[3] = GEN_INT (mask);
4246 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4248 [(set_attr "type" "sselog")
4249 (set_attr "mode" "V2DF")])
4251 ;; Avoid combining registers from different units in a single alternative,
4252 ;; see comment above inline_secondary_memory_needed function in i386.c
4253 (define_insn "*avx_storehpd"
4254 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4256 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4257 (parallel [(const_int 1)])))]
4258 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4260 vmovhpd\t{%1, %0|%0, %1}
4261 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4265 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4266 (set_attr "prefix" "vex")
4267 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4269 (define_insn "sse2_storehpd"
4270 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4272 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4273 (parallel [(const_int 1)])))]
4274 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4276 movhpd\t{%1, %0|%0, %1}
4281 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4282 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4285 [(set (match_operand:DF 0 "register_operand" "")
4287 (match_operand:V2DF 1 "memory_operand" "")
4288 (parallel [(const_int 1)])))]
4289 "TARGET_SSE2 && reload_completed"
4290 [(set (match_dup 0) (match_dup 1))]
4292 operands[1] = adjust_address (operands[1], DFmode, 8);
4295 ;; Avoid combining registers from different units in a single alternative,
4296 ;; see comment above inline_secondary_memory_needed function in i386.c
4297 (define_insn "sse2_storelpd"
4298 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4300 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4301 (parallel [(const_int 0)])))]
4302 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4304 %vmovlpd\t{%1, %0|%0, %1}
4309 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4310 (set_attr "prefix" "maybe_vex")
4311 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4314 [(set (match_operand:DF 0 "register_operand" "")
4316 (match_operand:V2DF 1 "nonimmediate_operand" "")
4317 (parallel [(const_int 0)])))]
4318 "TARGET_SSE2 && reload_completed"
4321 rtx op1 = operands[1];
4323 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4325 op1 = gen_lowpart (DFmode, op1);
4326 emit_move_insn (operands[0], op1);
4330 (define_expand "sse2_loadhpd_exp"
4331 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4334 (match_operand:V2DF 1 "nonimmediate_operand" "")
4335 (parallel [(const_int 0)]))
4336 (match_operand:DF 2 "nonimmediate_operand" "")))]
4338 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4340 ;; Avoid combining registers from different units in a single alternative,
4341 ;; see comment above inline_secondary_memory_needed function in i386.c
4342 (define_insn "*avx_loadhpd"
4343 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4346 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4347 (parallel [(const_int 0)]))
4348 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4349 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4351 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4352 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4356 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4357 (set_attr "prefix" "vex")
4358 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4360 (define_insn "sse2_loadhpd"
4361 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4364 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4365 (parallel [(const_int 0)]))
4366 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4367 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4369 movhpd\t{%2, %0|%0, %2}
4370 unpcklpd\t{%2, %0|%0, %2}
4371 shufpd\t{$1, %1, %0|%0, %1, 1}
4375 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4376 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4379 [(set (match_operand:V2DF 0 "memory_operand" "")
4381 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4382 (match_operand:DF 1 "register_operand" "")))]
4383 "TARGET_SSE2 && reload_completed"
4384 [(set (match_dup 0) (match_dup 1))]
4386 operands[0] = adjust_address (operands[0], DFmode, 8);
4389 (define_expand "sse2_loadlpd_exp"
4390 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4392 (match_operand:DF 2 "nonimmediate_operand" "")
4394 (match_operand:V2DF 1 "nonimmediate_operand" "")
4395 (parallel [(const_int 1)]))))]
4397 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4399 ;; Avoid combining registers from different units in a single alternative,
4400 ;; see comment above inline_secondary_memory_needed function in i386.c
4401 (define_insn "*avx_loadlpd"
4402 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4404 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4406 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4407 (parallel [(const_int 1)]))))]
4408 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4410 vmovsd\t{%2, %0|%0, %2}
4411 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4412 vmovsd\t{%2, %1, %0|%0, %1, %2}
4413 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4417 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4418 (set_attr "prefix" "vex")
4419 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4421 (define_insn "sse2_loadlpd"
4422 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4424 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4426 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4427 (parallel [(const_int 1)]))))]
4428 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4430 movsd\t{%2, %0|%0, %2}
4431 movlpd\t{%2, %0|%0, %2}
4432 movsd\t{%2, %0|%0, %2}
4433 shufpd\t{$2, %2, %0|%0, %2, 2}
4434 movhpd\t{%H1, %0|%0, %H1}
4438 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4439 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4442 [(set (match_operand:V2DF 0 "memory_operand" "")
4444 (match_operand:DF 1 "register_operand" "")
4445 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4446 "TARGET_SSE2 && reload_completed"
4447 [(set (match_dup 0) (match_dup 1))]
4449 operands[0] = adjust_address (operands[0], DFmode, 8);
4452 ;; Not sure these two are ever used, but it doesn't hurt to have
4454 (define_insn "*vec_extractv2df_1_sse"
4455 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4457 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4458 (parallel [(const_int 1)])))]
4459 "!TARGET_SSE2 && TARGET_SSE
4460 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4462 movhps\t{%1, %0|%0, %1}
4463 movhlps\t{%1, %0|%0, %1}
4464 movlps\t{%H1, %0|%0, %H1}"
4465 [(set_attr "type" "ssemov")
4466 (set_attr "mode" "V2SF,V4SF,V2SF")])
4468 (define_insn "*vec_extractv2df_0_sse"
4469 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4471 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4472 (parallel [(const_int 0)])))]
4473 "!TARGET_SSE2 && TARGET_SSE
4474 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4476 movlps\t{%1, %0|%0, %1}
4477 movaps\t{%1, %0|%0, %1}
4478 movlps\t{%1, %0|%0, %1}"
4479 [(set_attr "type" "ssemov")
4480 (set_attr "mode" "V2SF,V4SF,V2SF")])
4482 (define_insn "*avx_movsd"
4483 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4485 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4486 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4490 vmovsd\t{%2, %1, %0|%0, %1, %2}
4491 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4492 vmovlpd\t{%2, %0|%0, %2}
4493 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4494 vmovhps\t{%1, %H0|%H0, %1}"
4495 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4496 (set_attr "prefix" "vex")
4497 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4499 (define_insn "sse2_movsd"
4500 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4502 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4503 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4507 movsd\t{%2, %0|%0, %2}
4508 movlpd\t{%2, %0|%0, %2}
4509 movlpd\t{%2, %0|%0, %2}
4510 shufpd\t{$2, %2, %0|%0, %2, 2}
4511 movhps\t{%H1, %0|%0, %H1}
4512 movhps\t{%1, %H0|%H0, %1}"
4513 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4514 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4516 (define_insn "*vec_dupv2df_sse3"
4517 [(set (match_operand:V2DF 0 "register_operand" "=x")
4519 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4521 "%vmovddup\t{%1, %0|%0, %1}"
4522 [(set_attr "type" "sselog1")
4523 (set_attr "prefix" "maybe_vex")
4524 (set_attr "mode" "DF")])
4526 (define_insn "vec_dupv2df"
4527 [(set (match_operand:V2DF 0 "register_operand" "=x")
4529 (match_operand:DF 1 "register_operand" "0")))]
4532 [(set_attr "type" "sselog1")
4533 (set_attr "mode" "V2DF")])
4535 (define_insn "*vec_concatv2df_sse3"
4536 [(set (match_operand:V2DF 0 "register_operand" "=x")
4538 (match_operand:DF 1 "nonimmediate_operand" "xm")
4541 "%vmovddup\t{%1, %0|%0, %1}"
4542 [(set_attr "type" "sselog1")
4543 (set_attr "prefix" "maybe_vex")
4544 (set_attr "mode" "DF")])
4546 (define_insn "*vec_concatv2df_avx"
4547 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4549 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
4550 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
4553 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4554 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4555 vmovsd\t{%1, %0|%0, %1}"
4556 [(set_attr "type" "ssemov")
4557 (set_attr "prefix" "vex")
4558 (set_attr "mode" "DF,V1DF,DF")])
4560 (define_insn "*vec_concatv2df"
4561 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
4563 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
4564 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
4567 unpcklpd\t{%2, %0|%0, %2}
4568 movhpd\t{%2, %0|%0, %2}
4569 movsd\t{%1, %0|%0, %1}
4570 movlhps\t{%2, %0|%0, %2}
4571 movhps\t{%2, %0|%0, %2}"
4572 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
4573 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4577 ;; Parallel integral arithmetic
4579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4581 (define_expand "neg<mode>2"
4582 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4585 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
4587 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4589 (define_expand "<plusminus_insn><mode>3"
4590 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4592 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4593 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4595 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4597 (define_insn "*avx_<plusminus_insn><mode>3"
4598 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4600 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
4601 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4602 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4603 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4604 [(set_attr "type" "sseiadd")
4605 (set_attr "prefix" "vex")
4606 (set_attr "mode" "TI")])
4608 (define_insn "*<plusminus_insn><mode>3"
4609 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4611 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
4612 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4613 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4614 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4615 [(set_attr "type" "sseiadd")
4616 (set_attr "prefix_data16" "1")
4617 (set_attr "mode" "TI")])
4619 (define_expand "sse2_<plusminus_insn><mode>3"
4620 [(set (match_operand:SSEMODE12 0 "register_operand" "")
4621 (sat_plusminus:SSEMODE12
4622 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
4623 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
4625 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4627 (define_insn "*avx_<plusminus_insn><mode>3"
4628 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4629 (sat_plusminus:SSEMODE12
4630 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
4631 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4632 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4633 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
4634 [(set_attr "type" "sseiadd")
4635 (set_attr "prefix" "vex")
4636 (set_attr "mode" "TI")])
4638 (define_insn "*sse2_<plusminus_insn><mode>3"
4639 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
4640 (sat_plusminus:SSEMODE12
4641 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
4642 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
4643 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4644 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
4645 [(set_attr "type" "sseiadd")
4646 (set_attr "prefix_data16" "1")
4647 (set_attr "mode" "TI")])
4649 (define_insn_and_split "mulv16qi3"
4650 [(set (match_operand:V16QI 0 "register_operand" "")
4651 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4652 (match_operand:V16QI 2 "register_operand" "")))]
4654 && !(reload_completed || reload_in_progress)"
4659 rtx t[12], op0, op[3];
4664 /* On SSE5, we can take advantage of the pperm instruction to pack and
4665 unpack the bytes. Unpack data such that we've got a source byte in
4666 each low byte of each word. We don't care what goes into the high
4667 byte, so put 0 there. */
4668 for (i = 0; i < 6; ++i)
4669 t[i] = gen_reg_rtx (V8HImode);
4671 for (i = 0; i < 2; i++)
4674 op[1] = operands[i+1];
4675 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4678 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4681 /* Multiply words. */
4682 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4683 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4685 /* Pack the low byte of each word back into a single xmm */
4686 op[0] = operands[0];
4689 ix86_expand_sse5_pack (op);
4693 for (i = 0; i < 12; ++i)
4694 t[i] = gen_reg_rtx (V16QImode);
4696 /* Unpack data such that we've got a source byte in each low byte of
4697 each word. We don't care what goes into the high byte of each word.
4698 Rather than trying to get zero in there, most convenient is to let
4699 it be a copy of the low byte. */
4700 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
4701 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
4702 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
4703 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
4705 /* Multiply words. The end-of-line annotations here give a picture of what
4706 the output of that instruction looks like. Dot means don't care; the
4707 letters are the bytes of the result with A being the most significant. */
4708 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4709 gen_lowpart (V8HImode, t[0]),
4710 gen_lowpart (V8HImode, t[1])));
4711 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4712 gen_lowpart (V8HImode, t[2]),
4713 gen_lowpart (V8HImode, t[3])));
4715 /* Extract the relevant bytes and merge them back together. */
4716 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
4717 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4718 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4719 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4720 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4721 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4724 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4728 (define_expand "mulv8hi3"
4729 [(set (match_operand:V8HI 0 "register_operand" "")
4730 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4731 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4733 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4735 (define_insn "*avx_mulv8hi3"
4736 [(set (match_operand:V8HI 0 "register_operand" "=x")
4737 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4738 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4739 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4740 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
4741 [(set_attr "type" "sseimul")
4742 (set_attr "prefix" "vex")
4743 (set_attr "mode" "TI")])
4745 (define_insn "*mulv8hi3"
4746 [(set (match_operand:V8HI 0 "register_operand" "=x")
4747 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4748 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4749 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4750 "pmullw\t{%2, %0|%0, %2}"
4751 [(set_attr "type" "sseimul")
4752 (set_attr "prefix_data16" "1")
4753 (set_attr "mode" "TI")])
4755 (define_expand "smulv8hi3_highpart"
4756 [(set (match_operand:V8HI 0 "register_operand" "")
4761 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4763 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4766 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4768 (define_insn "*avxv8hi3_highpart"
4769 [(set (match_operand:V8HI 0 "register_operand" "=x")
4774 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4776 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4778 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4779 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
4780 [(set_attr "type" "sseimul")
4781 (set_attr "prefix" "vex")
4782 (set_attr "mode" "TI")])
4784 (define_insn "*smulv8hi3_highpart"
4785 [(set (match_operand:V8HI 0 "register_operand" "=x")
4790 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4792 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4794 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4795 "pmulhw\t{%2, %0|%0, %2}"
4796 [(set_attr "type" "sseimul")
4797 (set_attr "prefix_data16" "1")
4798 (set_attr "mode" "TI")])
4800 (define_expand "umulv8hi3_highpart"
4801 [(set (match_operand:V8HI 0 "register_operand" "")
4806 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4808 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4811 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4813 (define_insn "*avx_umulv8hi3_highpart"
4814 [(set (match_operand:V8HI 0 "register_operand" "=x")
4819 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
4821 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4823 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4824 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
4825 [(set_attr "type" "sseimul")
4826 (set_attr "prefix" "vex")
4827 (set_attr "mode" "TI")])
4829 (define_insn "*umulv8hi3_highpart"
4830 [(set (match_operand:V8HI 0 "register_operand" "=x")
4835 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4837 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4839 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4840 "pmulhuw\t{%2, %0|%0, %2}"
4841 [(set_attr "type" "sseimul")
4842 (set_attr "prefix_data16" "1")
4843 (set_attr "mode" "TI")])
4845 (define_expand "sse2_umulv2siv2di3"
4846 [(set (match_operand:V2DI 0 "register_operand" "")
4850 (match_operand:V4SI 1 "nonimmediate_operand" "")
4851 (parallel [(const_int 0) (const_int 2)])))
4854 (match_operand:V4SI 2 "nonimmediate_operand" "")
4855 (parallel [(const_int 0) (const_int 2)])))))]
4857 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4859 (define_insn "*avx_umulv2siv2di3"
4860 [(set (match_operand:V2DI 0 "register_operand" "=x")
4864 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4865 (parallel [(const_int 0) (const_int 2)])))
4868 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4869 (parallel [(const_int 0) (const_int 2)])))))]
4870 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4871 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4872 [(set_attr "type" "sseimul")
4873 (set_attr "prefix" "vex")
4874 (set_attr "mode" "TI")])
4876 (define_insn "*sse2_umulv2siv2di3"
4877 [(set (match_operand:V2DI 0 "register_operand" "=x")
4881 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4882 (parallel [(const_int 0) (const_int 2)])))
4885 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4886 (parallel [(const_int 0) (const_int 2)])))))]
4887 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4888 "pmuludq\t{%2, %0|%0, %2}"
4889 [(set_attr "type" "sseimul")
4890 (set_attr "prefix_data16" "1")
4891 (set_attr "mode" "TI")])
4893 (define_expand "sse4_1_mulv2siv2di3"
4894 [(set (match_operand:V2DI 0 "register_operand" "")
4898 (match_operand:V4SI 1 "nonimmediate_operand" "")
4899 (parallel [(const_int 0) (const_int 2)])))
4902 (match_operand:V4SI 2 "nonimmediate_operand" "")
4903 (parallel [(const_int 0) (const_int 2)])))))]
4905 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4907 (define_insn "*avx_mulv2siv2di3"
4908 [(set (match_operand:V2DI 0 "register_operand" "=x")
4912 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
4913 (parallel [(const_int 0) (const_int 2)])))
4916 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4917 (parallel [(const_int 0) (const_int 2)])))))]
4918 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4919 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4920 [(set_attr "type" "sseimul")
4921 (set_attr "prefix" "vex")
4922 (set_attr "mode" "TI")])
4924 (define_insn "*sse4_1_mulv2siv2di3"
4925 [(set (match_operand:V2DI 0 "register_operand" "=x")
4929 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
4930 (parallel [(const_int 0) (const_int 2)])))
4933 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4934 (parallel [(const_int 0) (const_int 2)])))))]
4935 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4936 "pmuldq\t{%2, %0|%0, %2}"
4937 [(set_attr "type" "sseimul")
4938 (set_attr "prefix_extra" "1")
4939 (set_attr "mode" "TI")])
4941 (define_expand "sse2_pmaddwd"
4942 [(set (match_operand:V4SI 0 "register_operand" "")
4947 (match_operand:V8HI 1 "nonimmediate_operand" "")
4948 (parallel [(const_int 0)
4954 (match_operand:V8HI 2 "nonimmediate_operand" "")
4955 (parallel [(const_int 0)
4961 (vec_select:V4HI (match_dup 1)
4962 (parallel [(const_int 1)
4967 (vec_select:V4HI (match_dup 2)
4968 (parallel [(const_int 1)
4971 (const_int 7)]))))))]
4973 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4975 (define_insn "*avx_pmaddwd"
4976 [(set (match_operand:V4SI 0 "register_operand" "=x")
4981 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
4982 (parallel [(const_int 0)
4988 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4989 (parallel [(const_int 0)
4995 (vec_select:V4HI (match_dup 1)
4996 (parallel [(const_int 1)
5001 (vec_select:V4HI (match_dup 2)
5002 (parallel [(const_int 1)
5005 (const_int 7)]))))))]
5006 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5007 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5008 [(set_attr "type" "sseiadd")
5009 (set_attr "prefix" "vex")
5010 (set_attr "mode" "TI")])
5012 (define_insn "*sse2_pmaddwd"
5013 [(set (match_operand:V4SI 0 "register_operand" "=x")
5018 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5019 (parallel [(const_int 0)
5025 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5026 (parallel [(const_int 0)
5032 (vec_select:V4HI (match_dup 1)
5033 (parallel [(const_int 1)
5038 (vec_select:V4HI (match_dup 2)
5039 (parallel [(const_int 1)
5042 (const_int 7)]))))))]
5043 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5044 "pmaddwd\t{%2, %0|%0, %2}"
5045 [(set_attr "type" "sseiadd")
5046 (set_attr "prefix_data16" "1")
5047 (set_attr "mode" "TI")])
5049 (define_expand "mulv4si3"
5050 [(set (match_operand:V4SI 0 "register_operand" "")
5051 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5052 (match_operand:V4SI 2 "register_operand" "")))]
5055 if (TARGET_SSE4_1 || TARGET_SSE5)
5056 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5059 (define_insn "*avx_mulv4si3"
5060 [(set (match_operand:V4SI 0 "register_operand" "=x")
5061 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5062 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5063 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5064 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5065 [(set_attr "type" "sseimul")
5066 (set_attr "prefix" "vex")
5067 (set_attr "mode" "TI")])
5069 (define_insn "*sse4_1_mulv4si3"
5070 [(set (match_operand:V4SI 0 "register_operand" "=x")
5071 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5072 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5073 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5074 "pmulld\t{%2, %0|%0, %2}"
5075 [(set_attr "type" "sseimul")
5076 (set_attr "prefix_extra" "1")
5077 (set_attr "mode" "TI")])
5079 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5080 ;; multiply/add. In general, we expect the define_split to occur before
5081 ;; register allocation, so we have to handle the corner case where the target
5082 ;; is the same as one of the inputs.
5083 (define_insn_and_split "*sse5_mulv4si3"
5084 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5085 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5086 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5089 "&& (reload_completed
5090 || (!reg_mentioned_p (operands[0], operands[1])
5091 && !reg_mentioned_p (operands[0], operands[2])))"
5095 (plus:V4SI (mult:V4SI (match_dup 1)
5099 operands[3] = CONST0_RTX (V4SImode);
5101 [(set_attr "type" "ssemuladd")
5102 (set_attr "mode" "TI")])
5104 (define_insn_and_split "*sse2_mulv4si3"
5105 [(set (match_operand:V4SI 0 "register_operand" "")
5106 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5107 (match_operand:V4SI 2 "register_operand" "")))]
5108 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
5109 && !(reload_completed || reload_in_progress)"
5114 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5120 t1 = gen_reg_rtx (V4SImode);
5121 t2 = gen_reg_rtx (V4SImode);
5122 t3 = gen_reg_rtx (V4SImode);
5123 t4 = gen_reg_rtx (V4SImode);
5124 t5 = gen_reg_rtx (V4SImode);
5125 t6 = gen_reg_rtx (V4SImode);
5126 thirtytwo = GEN_INT (32);
5128 /* Multiply elements 2 and 0. */
5129 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5132 /* Shift both input vectors down one element, so that elements 3
5133 and 1 are now in the slots for elements 2 and 0. For K8, at
5134 least, this is faster than using a shuffle. */
5135 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5136 gen_lowpart (TImode, op1),
5138 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5139 gen_lowpart (TImode, op2),
5141 /* Multiply elements 3 and 1. */
5142 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5145 /* Move the results in element 2 down to element 1; we don't care
5146 what goes in elements 2 and 3. */
5147 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5148 const0_rtx, const0_rtx));
5149 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5150 const0_rtx, const0_rtx));
5152 /* Merge the parts back together. */
5153 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5157 (define_insn_and_split "mulv2di3"
5158 [(set (match_operand:V2DI 0 "register_operand" "")
5159 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5160 (match_operand:V2DI 2 "register_operand" "")))]
5162 && !(reload_completed || reload_in_progress)"
5167 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5172 /* op1: A,B,C,D, op2: E,F,G,H */
5174 op1 = gen_lowpart (V4SImode, operands[1]);
5175 op2 = gen_lowpart (V4SImode, operands[2]);
5176 t1 = gen_reg_rtx (V4SImode);
5177 t2 = gen_reg_rtx (V4SImode);
5178 t3 = gen_reg_rtx (V4SImode);
5179 t4 = gen_reg_rtx (V2DImode);
5180 t5 = gen_reg_rtx (V2DImode);
5183 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5190 emit_move_insn (t2, CONST0_RTX (V4SImode));
5192 /* t3: (B*E),(A*F),(D*G),(C*H) */
5193 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5195 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5196 emit_insn (gen_sse5_phadddq (t4, t3));
5198 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5199 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5201 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5202 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
5209 t1 = gen_reg_rtx (V2DImode);
5210 t2 = gen_reg_rtx (V2DImode);
5211 t3 = gen_reg_rtx (V2DImode);
5212 t4 = gen_reg_rtx (V2DImode);
5213 t5 = gen_reg_rtx (V2DImode);
5214 t6 = gen_reg_rtx (V2DImode);
5215 thirtytwo = GEN_INT (32);
5217 /* Multiply low parts. */
5218 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5219 gen_lowpart (V4SImode, op2)));
5221 /* Shift input vectors left 32 bits so we can multiply high parts. */
5222 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5223 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5225 /* Multiply high parts by low parts. */
5226 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5227 gen_lowpart (V4SImode, t3)));
5228 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5229 gen_lowpart (V4SImode, t2)));
5231 /* Shift them back. */
5232 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5233 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5235 /* Add the three parts together. */
5236 emit_insn (gen_addv2di3 (t6, t1, t4));
5237 emit_insn (gen_addv2di3 (op0, t6, t5));
5241 (define_expand "vec_widen_smult_hi_v8hi"
5242 [(match_operand:V4SI 0 "register_operand" "")
5243 (match_operand:V8HI 1 "register_operand" "")
5244 (match_operand:V8HI 2 "register_operand" "")]
5247 rtx op1, op2, t1, t2, dest;
5251 t1 = gen_reg_rtx (V8HImode);
5252 t2 = gen_reg_rtx (V8HImode);
5253 dest = gen_lowpart (V8HImode, operands[0]);
5255 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5256 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5257 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5261 (define_expand "vec_widen_smult_lo_v8hi"
5262 [(match_operand:V4SI 0 "register_operand" "")
5263 (match_operand:V8HI 1 "register_operand" "")
5264 (match_operand:V8HI 2 "register_operand" "")]
5267 rtx op1, op2, t1, t2, dest;
5271 t1 = gen_reg_rtx (V8HImode);
5272 t2 = gen_reg_rtx (V8HImode);
5273 dest = gen_lowpart (V8HImode, operands[0]);
5275 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5276 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5277 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5281 (define_expand "vec_widen_umult_hi_v8hi"
5282 [(match_operand:V4SI 0 "register_operand" "")
5283 (match_operand:V8HI 1 "register_operand" "")
5284 (match_operand:V8HI 2 "register_operand" "")]
5287 rtx op1, op2, t1, t2, dest;
5291 t1 = gen_reg_rtx (V8HImode);
5292 t2 = gen_reg_rtx (V8HImode);
5293 dest = gen_lowpart (V8HImode, operands[0]);
5295 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5296 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5297 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5301 (define_expand "vec_widen_umult_lo_v8hi"
5302 [(match_operand:V4SI 0 "register_operand" "")
5303 (match_operand:V8HI 1 "register_operand" "")
5304 (match_operand:V8HI 2 "register_operand" "")]
5307 rtx op1, op2, t1, t2, dest;
5311 t1 = gen_reg_rtx (V8HImode);
5312 t2 = gen_reg_rtx (V8HImode);
5313 dest = gen_lowpart (V8HImode, operands[0]);
5315 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5316 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5317 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5321 (define_expand "vec_widen_smult_hi_v4si"
5322 [(match_operand:V2DI 0 "register_operand" "")
5323 (match_operand:V4SI 1 "register_operand" "")
5324 (match_operand:V4SI 2 "register_operand" "")]
5329 t1 = gen_reg_rtx (V4SImode);
5330 t2 = gen_reg_rtx (V4SImode);
5332 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5337 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5342 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
5346 (define_expand "vec_widen_smult_lo_v4si"
5347 [(match_operand:V2DI 0 "register_operand" "")
5348 (match_operand:V4SI 1 "register_operand" "")
5349 (match_operand:V4SI 2 "register_operand" "")]
5354 t1 = gen_reg_rtx (V4SImode);
5355 t2 = gen_reg_rtx (V4SImode);
5357 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5362 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5367 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
5372 (define_expand "vec_widen_umult_hi_v4si"
5373 [(match_operand:V2DI 0 "register_operand" "")
5374 (match_operand:V4SI 1 "register_operand" "")
5375 (match_operand:V4SI 2 "register_operand" "")]
5378 rtx op1, op2, t1, t2;
5382 t1 = gen_reg_rtx (V4SImode);
5383 t2 = gen_reg_rtx (V4SImode);
5385 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5386 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5387 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5391 (define_expand "vec_widen_umult_lo_v4si"
5392 [(match_operand:V2DI 0 "register_operand" "")
5393 (match_operand:V4SI 1 "register_operand" "")
5394 (match_operand:V4SI 2 "register_operand" "")]
5397 rtx op1, op2, t1, t2;
5401 t1 = gen_reg_rtx (V4SImode);
5402 t2 = gen_reg_rtx (V4SImode);
5404 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5405 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5406 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5410 (define_expand "sdot_prodv8hi"
5411 [(match_operand:V4SI 0 "register_operand" "")
5412 (match_operand:V8HI 1 "register_operand" "")
5413 (match_operand:V8HI 2 "register_operand" "")
5414 (match_operand:V4SI 3 "register_operand" "")]
5417 rtx t = gen_reg_rtx (V4SImode);
5418 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5419 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5423 (define_expand "udot_prodv4si"
5424 [(match_operand:V2DI 0 "register_operand" "")
5425 (match_operand:V4SI 1 "register_operand" "")
5426 (match_operand:V4SI 2 "register_operand" "")
5427 (match_operand:V2DI 3 "register_operand" "")]
5432 t1 = gen_reg_rtx (V2DImode);
5433 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5434 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5436 t2 = gen_reg_rtx (V4SImode);
5437 t3 = gen_reg_rtx (V4SImode);
5438 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5439 gen_lowpart (TImode, operands[1]),
5441 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5442 gen_lowpart (TImode, operands[2]),
5445 t4 = gen_reg_rtx (V2DImode);
5446 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5448 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5452 (define_insn "*avx_ashr<mode>3"
5453 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5455 (match_operand:SSEMODE24 1 "register_operand" "x")
5456 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5458 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5459 [(set_attr "type" "sseishft")
5460 (set_attr "prefix" "vex")
5461 (set_attr "mode" "TI")])
5463 (define_insn "ashr<mode>3"
5464 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5466 (match_operand:SSEMODE24 1 "register_operand" "0")
5467 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5469 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5470 [(set_attr "type" "sseishft")
5471 (set_attr "prefix_data16" "1")
5472 (set_attr "mode" "TI")])
5474 (define_insn "*avx_lshr<mode>3"
5475 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5476 (lshiftrt:SSEMODE248
5477 (match_operand:SSEMODE248 1 "register_operand" "x")
5478 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5480 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5481 [(set_attr "type" "sseishft")
5482 (set_attr "prefix" "vex")
5483 (set_attr "mode" "TI")])
5485 (define_insn "lshr<mode>3"
5486 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5487 (lshiftrt:SSEMODE248
5488 (match_operand:SSEMODE248 1 "register_operand" "0")
5489 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5491 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5492 [(set_attr "type" "sseishft")
5493 (set_attr "prefix_data16" "1")
5494 (set_attr "mode" "TI")])
5496 (define_insn "*avx_ashl<mode>3"
5497 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5499 (match_operand:SSEMODE248 1 "register_operand" "x")
5500 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5502 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5503 [(set_attr "type" "sseishft")
5504 (set_attr "prefix" "vex")
5505 (set_attr "mode" "TI")])
5507 (define_insn "ashl<mode>3"
5508 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5510 (match_operand:SSEMODE248 1 "register_operand" "0")
5511 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5513 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5514 [(set_attr "type" "sseishft")
5515 (set_attr "prefix_data16" "1")
5516 (set_attr "mode" "TI")])
5518 (define_expand "vec_shl_<mode>"
5519 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5520 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
5521 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5524 operands[0] = gen_lowpart (TImode, operands[0]);
5525 operands[1] = gen_lowpart (TImode, operands[1]);
5528 (define_expand "vec_shr_<mode>"
5529 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5530 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
5531 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5534 operands[0] = gen_lowpart (TImode, operands[0]);
5535 operands[1] = gen_lowpart (TImode, operands[1]);
5538 (define_insn "*avx_<code><mode>3"
5539 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5541 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5542 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5543 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5544 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5545 [(set_attr "type" "sseiadd")
5546 (set_attr "prefix" "vex")
5547 (set_attr "mode" "TI")])
5549 (define_expand "<code>v16qi3"
5550 [(set (match_operand:V16QI 0 "register_operand" "")
5552 (match_operand:V16QI 1 "nonimmediate_operand" "")
5553 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5555 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5557 (define_insn "*<code>v16qi3"
5558 [(set (match_operand:V16QI 0 "register_operand" "=x")
5560 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5561 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
5562 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5563 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
5564 [(set_attr "type" "sseiadd")
5565 (set_attr "prefix_data16" "1")
5566 (set_attr "mode" "TI")])
5568 (define_expand "<code>v8hi3"
5569 [(set (match_operand:V8HI 0 "register_operand" "")
5571 (match_operand:V8HI 1 "nonimmediate_operand" "")
5572 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5574 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5576 (define_insn "*<code>v8hi3"
5577 [(set (match_operand:V8HI 0 "register_operand" "=x")
5579 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5580 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5581 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5582 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
5583 [(set_attr "type" "sseiadd")
5584 (set_attr "prefix_data16" "1")
5585 (set_attr "mode" "TI")])
5587 (define_expand "umaxv8hi3"
5588 [(set (match_operand:V8HI 0 "register_operand" "")
5589 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5590 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5594 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5597 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5598 if (rtx_equal_p (op3, op2))
5599 op3 = gen_reg_rtx (V8HImode);
5600 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5601 emit_insn (gen_addv8hi3 (op0, op3, op2));
5606 (define_expand "smax<mode>3"
5607 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5608 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5609 (match_operand:SSEMODE14 2 "register_operand" "")))]
5613 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5619 xops[0] = operands[0];
5620 xops[1] = operands[1];
5621 xops[2] = operands[2];
5622 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5623 xops[4] = operands[1];
5624 xops[5] = operands[2];
5625 ok = ix86_expand_int_vcond (xops);
5631 (define_insn "*sse4_1_<code><mode>3"
5632 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
5634 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
5635 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
5636 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5637 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5638 [(set_attr "type" "sseiadd")
5639 (set_attr "prefix_extra" "1")
5640 (set_attr "mode" "TI")])
5642 (define_expand "umaxv4si3"
5643 [(set (match_operand:V4SI 0 "register_operand" "")
5644 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5645 (match_operand:V4SI 2 "register_operand" "")))]
5649 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5655 xops[0] = operands[0];
5656 xops[1] = operands[1];
5657 xops[2] = operands[2];
5658 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5659 xops[4] = operands[1];
5660 xops[5] = operands[2];
5661 ok = ix86_expand_int_vcond (xops);
5667 (define_insn "*sse4_1_<code><mode>3"
5668 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5670 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
5671 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
5672 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5673 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
5674 [(set_attr "type" "sseiadd")
5675 (set_attr "prefix_extra" "1")
5676 (set_attr "mode" "TI")])
5678 (define_expand "smin<mode>3"
5679 [(set (match_operand:SSEMODE14 0 "register_operand" "")
5680 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
5681 (match_operand:SSEMODE14 2 "register_operand" "")))]
5685 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5691 xops[0] = operands[0];
5692 xops[1] = operands[2];
5693 xops[2] = operands[1];
5694 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5695 xops[4] = operands[1];
5696 xops[5] = operands[2];
5697 ok = ix86_expand_int_vcond (xops);
5703 (define_expand "umin<mode>3"
5704 [(set (match_operand:SSEMODE24 0 "register_operand" "")
5705 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
5706 (match_operand:SSEMODE24 2 "register_operand" "")))]
5710 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5716 xops[0] = operands[0];
5717 xops[1] = operands[2];
5718 xops[2] = operands[1];
5719 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5720 xops[4] = operands[1];
5721 xops[5] = operands[2];
5722 ok = ix86_expand_int_vcond (xops);
5728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5730 ;; Parallel integral comparisons
5732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5734 (define_expand "sse2_eq<mode>3"
5735 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5737 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5738 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5739 "TARGET_SSE2 && !TARGET_SSE5"
5740 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5742 (define_insn "*avx_eq<mode>3"
5743 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5745 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5746 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5747 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5748 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5749 [(set_attr "type" "ssecmp")
5750 (set_attr "prefix" "vex")
5751 (set_attr "mode" "TI")])
5753 (define_insn "*sse2_eq<mode>3"
5754 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5756 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5757 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5758 "TARGET_SSE2 && !TARGET_SSE5
5759 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5760 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5761 [(set_attr "type" "ssecmp")
5762 (set_attr "prefix_data16" "1")
5763 (set_attr "mode" "TI")])
5765 (define_expand "sse4_1_eqv2di3"
5766 [(set (match_operand:V2DI 0 "register_operand" "")
5768 (match_operand:V2DI 1 "nonimmediate_operand" "")
5769 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5771 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5773 (define_insn "*sse4_1_eqv2di3"
5774 [(set (match_operand:V2DI 0 "register_operand" "=x")
5776 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
5777 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5778 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5779 "pcmpeqq\t{%2, %0|%0, %2}"
5780 [(set_attr "type" "ssecmp")
5781 (set_attr "prefix_extra" "1")
5782 (set_attr "mode" "TI")])
5784 (define_insn "*avx_gt<mode>3"
5785 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5787 (match_operand:SSEMODE1248 1 "register_operand" "x")
5788 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5790 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5791 [(set_attr "type" "ssecmp")
5792 (set_attr "prefix" "vex")
5793 (set_attr "mode" "TI")])
5795 (define_insn "sse2_gt<mode>3"
5796 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5798 (match_operand:SSEMODE124 1 "register_operand" "0")
5799 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5800 "TARGET_SSE2 && !TARGET_SSE5"
5801 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5802 [(set_attr "type" "ssecmp")
5803 (set_attr "prefix_data16" "1")
5804 (set_attr "mode" "TI")])
5806 (define_insn "sse4_2_gtv2di3"
5807 [(set (match_operand:V2DI 0 "register_operand" "=x")
5809 (match_operand:V2DI 1 "register_operand" "0")
5810 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5812 "pcmpgtq\t{%2, %0|%0, %2}"
5813 [(set_attr "type" "ssecmp")
5814 (set_attr "mode" "TI")])
5816 (define_expand "vcond<mode>"
5817 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5818 (if_then_else:SSEMODEI
5819 (match_operator 3 ""
5820 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
5821 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
5822 (match_operand:SSEMODEI 1 "general_operand" "")
5823 (match_operand:SSEMODEI 2 "general_operand" "")))]
5826 if (ix86_expand_int_vcond (operands))
5832 (define_expand "vcondu<mode>"
5833 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5834 (if_then_else:SSEMODEI
5835 (match_operator 3 ""
5836 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
5837 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
5838 (match_operand:SSEMODEI 1 "general_operand" "")
5839 (match_operand:SSEMODEI 2 "general_operand" "")))]
5842 if (ix86_expand_int_vcond (operands))
5848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5850 ;; Parallel bitwise logical operations
5852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5854 (define_expand "one_cmpl<mode>2"
5855 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5856 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5860 int i, n = GET_MODE_NUNITS (<MODE>mode);
5861 rtvec v = rtvec_alloc (n);
5863 for (i = 0; i < n; ++i)
5864 RTVEC_ELT (v, i) = constm1_rtx;
5866 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5869 (define_insn "*avx_nand<mode>3"
5870 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5872 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
5873 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5875 "vandnps\t{%2, %1, %0|%0, %1, %2}"
5876 [(set_attr "type" "sselog")
5877 (set_attr "prefix" "vex")
5878 (set_attr "mode" "<avxvecpsmode>")])
5880 (define_insn "*sse_nand<mode>3"
5881 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5883 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5884 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5885 "(TARGET_SSE && !TARGET_SSE2)"
5886 "andnps\t{%2, %0|%0, %2}"
5887 [(set_attr "type" "sselog")
5888 (set_attr "mode" "V4SF")])
5890 (define_insn "*avx_nand<mode>3"
5891 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5893 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
5894 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5896 "vpandn\t{%2, %1, %0|%0, %1, %2}"
5897 [(set_attr "type" "sselog")
5898 (set_attr "prefix" "vex")
5899 (set_attr "mode" "TI")])
5901 (define_insn "sse2_nand<mode>3"
5902 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5904 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
5905 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5907 "pandn\t{%2, %0|%0, %2}"
5908 [(set_attr "type" "sselog")
5909 (set_attr "prefix_data16" "1")
5910 (set_attr "mode" "TI")])
5912 (define_insn "*nandtf3"
5913 [(set (match_operand:TF 0 "register_operand" "=x")
5915 (not:TF (match_operand:TF 1 "register_operand" "0"))
5916 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5918 "pandn\t{%2, %0|%0, %2}"
5919 [(set_attr "type" "sselog")
5920 (set_attr "prefix_data16" "1")
5921 (set_attr "mode" "TI")])
5923 (define_expand "<code><mode>3"
5924 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5926 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5927 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5929 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5931 (define_insn "*avx_<code><mode>3"
5932 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5934 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5935 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5937 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5938 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5939 [(set_attr "type" "sselog")
5940 (set_attr "prefix" "vex")
5941 (set_attr "mode" "<avxvecpsmode>")])
5943 (define_insn "*sse_<code><mode>3"
5944 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5946 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5947 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5948 "(TARGET_SSE && !TARGET_SSE2)
5949 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5950 "<plogicprefix>ps\t{%2, %0|%0, %2}"
5951 [(set_attr "type" "sselog")
5952 (set_attr "mode" "V4SF")])
5954 (define_insn "*avx_<code><mode>3"
5955 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5957 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5958 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5960 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5961 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
5962 [(set_attr "type" "sselog")
5963 (set_attr "prefix" "vex")
5964 (set_attr "mode" "TI")])
5966 (define_insn "*sse2_<code><mode>3"
5967 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5969 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5970 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5971 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5972 "p<plogicprefix>\t{%2, %0|%0, %2}"
5973 [(set_attr "type" "sselog")
5974 (set_attr "prefix_data16" "1")
5975 (set_attr "mode" "TI")])
5977 (define_expand "<code>tf3"
5978 [(set (match_operand:TF 0 "register_operand" "")
5980 (match_operand:TF 1 "nonimmediate_operand" "")
5981 (match_operand:TF 2 "nonimmediate_operand" "")))]
5983 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5985 (define_insn "*<code>tf3"
5986 [(set (match_operand:TF 0 "register_operand" "=x")
5988 (match_operand:TF 1 "nonimmediate_operand" "%0")
5989 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
5990 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5991 "p<plogicprefix>\t{%2, %0|%0, %2}"
5992 [(set_attr "type" "sselog")
5993 (set_attr "prefix_data16" "1")
5994 (set_attr "mode" "TI")])
5996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5998 ;; Parallel integral element swizzling
6000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6003 ;; op1 = abcdefghijklmnop
6004 ;; op2 = qrstuvwxyz012345
6005 ;; h1 = aqbrcsdteufvgwhx
6006 ;; l1 = iyjzk0l1m2n3o4p5
6007 ;; h2 = aiqybjrzcks0dlt1
6008 ;; l2 = emu2fnv3gow4hpx5
6009 ;; h3 = aeimquy2bfjnrvz3
6010 ;; l3 = cgkosw04dhlptx15
6011 ;; result = bdfhjlnprtvxz135
6012 (define_expand "vec_pack_trunc_v8hi"
6013 [(match_operand:V16QI 0 "register_operand" "")
6014 (match_operand:V8HI 1 "register_operand" "")
6015 (match_operand:V8HI 2 "register_operand" "")]
6018 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6022 ix86_expand_sse5_pack (operands);
6026 op1 = gen_lowpart (V16QImode, operands[1]);
6027 op2 = gen_lowpart (V16QImode, operands[2]);
6028 h1 = gen_reg_rtx (V16QImode);
6029 l1 = gen_reg_rtx (V16QImode);
6030 h2 = gen_reg_rtx (V16QImode);
6031 l2 = gen_reg_rtx (V16QImode);
6032 h3 = gen_reg_rtx (V16QImode);
6033 l3 = gen_reg_rtx (V16QImode);
6035 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6036 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6037 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6038 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6039 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6040 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6041 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6052 ;; result = bdfhjlnp
6053 (define_expand "vec_pack_trunc_v4si"
6054 [(match_operand:V8HI 0 "register_operand" "")
6055 (match_operand:V4SI 1 "register_operand" "")
6056 (match_operand:V4SI 2 "register_operand" "")]
6059 rtx op1, op2, h1, l1, h2, l2;
6063 ix86_expand_sse5_pack (operands);
6067 op1 = gen_lowpart (V8HImode, operands[1]);
6068 op2 = gen_lowpart (V8HImode, operands[2]);
6069 h1 = gen_reg_rtx (V8HImode);
6070 l1 = gen_reg_rtx (V8HImode);
6071 h2 = gen_reg_rtx (V8HImode);
6072 l2 = gen_reg_rtx (V8HImode);
6074 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6075 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6076 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6077 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6078 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6088 (define_expand "vec_pack_trunc_v2di"
6089 [(match_operand:V4SI 0 "register_operand" "")
6090 (match_operand:V2DI 1 "register_operand" "")
6091 (match_operand:V2DI 2 "register_operand" "")]
6094 rtx op1, op2, h1, l1;
6098 ix86_expand_sse5_pack (operands);
6102 op1 = gen_lowpart (V4SImode, operands[1]);
6103 op2 = gen_lowpart (V4SImode, operands[2]);
6104 h1 = gen_reg_rtx (V4SImode);
6105 l1 = gen_reg_rtx (V4SImode);
6107 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6108 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6109 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6113 (define_expand "vec_interleave_highv16qi"
6114 [(set (match_operand:V16QI 0 "register_operand" "")
6117 (match_operand:V16QI 1 "register_operand" "")
6118 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6119 (parallel [(const_int 8) (const_int 24)
6120 (const_int 9) (const_int 25)
6121 (const_int 10) (const_int 26)
6122 (const_int 11) (const_int 27)
6123 (const_int 12) (const_int 28)
6124 (const_int 13) (const_int 29)
6125 (const_int 14) (const_int 30)
6126 (const_int 15) (const_int 31)])))]
6129 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6133 (define_expand "vec_interleave_lowv16qi"
6134 [(set (match_operand:V16QI 0 "register_operand" "")
6137 (match_operand:V16QI 1 "register_operand" "")
6138 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6139 (parallel [(const_int 0) (const_int 16)
6140 (const_int 1) (const_int 17)
6141 (const_int 2) (const_int 18)
6142 (const_int 3) (const_int 19)
6143 (const_int 4) (const_int 20)
6144 (const_int 5) (const_int 21)
6145 (const_int 6) (const_int 22)
6146 (const_int 7) (const_int 23)])))]
6149 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6153 (define_expand "vec_interleave_highv8hi"
6154 [(set (match_operand:V8HI 0 "register_operand" "=")
6157 (match_operand:V8HI 1 "register_operand" "")
6158 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6159 (parallel [(const_int 4) (const_int 12)
6160 (const_int 5) (const_int 13)
6161 (const_int 6) (const_int 14)
6162 (const_int 7) (const_int 15)])))]
6165 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6169 (define_expand "vec_interleave_lowv8hi"
6170 [(set (match_operand:V8HI 0 "register_operand" "")
6173 (match_operand:V8HI 1 "register_operand" "")
6174 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6175 (parallel [(const_int 0) (const_int 8)
6176 (const_int 1) (const_int 9)
6177 (const_int 2) (const_int 10)
6178 (const_int 3) (const_int 11)])))]
6181 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6185 (define_expand "vec_interleave_highv4si"
6186 [(set (match_operand:V4SI 0 "register_operand" "")
6189 (match_operand:V4SI 1 "register_operand" "")
6190 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6191 (parallel [(const_int 2) (const_int 6)
6192 (const_int 3) (const_int 7)])))]
6195 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6199 (define_expand "vec_interleave_lowv4si"
6200 [(set (match_operand:V4SI 0 "register_operand" "")
6203 (match_operand:V4SI 1 "register_operand" "")
6204 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6205 (parallel [(const_int 0) (const_int 4)
6206 (const_int 1) (const_int 5)])))]
6209 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6213 (define_expand "vec_interleave_highv2di"
6214 [(set (match_operand:V2DI 0 "register_operand" "")
6217 (match_operand:V2DI 1 "register_operand" "")
6218 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6219 (parallel [(const_int 1)
6223 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6227 (define_expand "vec_interleave_lowv2di"
6228 [(set (match_operand:V2DI 0 "register_operand" "")
6231 (match_operand:V2DI 1 "register_operand" "")
6232 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6233 (parallel [(const_int 0)
6237 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6241 (define_expand "vec_interleave_highv4sf"
6242 [(set (match_operand:V4SF 0 "register_operand" "")
6245 (match_operand:V4SF 1 "register_operand" "")
6246 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6247 (parallel [(const_int 2) (const_int 6)
6248 (const_int 3) (const_int 7)])))]
6251 (define_expand "vec_interleave_lowv4sf"
6252 [(set (match_operand:V4SF 0 "register_operand" "")
6255 (match_operand:V4SF 1 "register_operand" "")
6256 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6257 (parallel [(const_int 0) (const_int 4)
6258 (const_int 1) (const_int 5)])))]
6261 (define_expand "vec_interleave_highv2df"
6262 [(set (match_operand:V2DF 0 "register_operand" "")
6265 (match_operand:V2DF 1 "register_operand" "")
6266 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6267 (parallel [(const_int 1)
6271 (define_expand "vec_interleave_lowv2df"
6272 [(set (match_operand:V2DF 0 "register_operand" "")
6275 (match_operand:V2DF 1 "register_operand" "")
6276 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6277 (parallel [(const_int 0)
6281 (define_insn "*avx_packsswb"
6282 [(set (match_operand:V16QI 0 "register_operand" "=x")
6285 (match_operand:V8HI 1 "register_operand" "x"))
6287 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6289 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6290 [(set_attr "type" "sselog")
6291 (set_attr "prefix" "vex")
6292 (set_attr "mode" "TI")])
6294 (define_insn "sse2_packsswb"
6295 [(set (match_operand:V16QI 0 "register_operand" "=x")
6298 (match_operand:V8HI 1 "register_operand" "0"))
6300 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6302 "packsswb\t{%2, %0|%0, %2}"
6303 [(set_attr "type" "sselog")
6304 (set_attr "prefix_data16" "1")
6305 (set_attr "mode" "TI")])
6307 (define_insn "*avx_packssdw"
6308 [(set (match_operand:V8HI 0 "register_operand" "=x")
6311 (match_operand:V4SI 1 "register_operand" "x"))
6313 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6315 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6316 [(set_attr "type" "sselog")
6317 (set_attr "prefix" "vex")
6318 (set_attr "mode" "TI")])
6320 (define_insn "sse2_packssdw"
6321 [(set (match_operand:V8HI 0 "register_operand" "=x")
6324 (match_operand:V4SI 1 "register_operand" "0"))
6326 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6328 "packssdw\t{%2, %0|%0, %2}"
6329 [(set_attr "type" "sselog")
6330 (set_attr "prefix_data16" "1")
6331 (set_attr "mode" "TI")])
6333 (define_insn "*avx_packuswb"
6334 [(set (match_operand:V16QI 0 "register_operand" "=x")
6337 (match_operand:V8HI 1 "register_operand" "x"))
6339 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6341 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6342 [(set_attr "type" "sselog")
6343 (set_attr "prefix" "vex")
6344 (set_attr "mode" "TI")])
6346 (define_insn "sse2_packuswb"
6347 [(set (match_operand:V16QI 0 "register_operand" "=x")
6350 (match_operand:V8HI 1 "register_operand" "0"))
6352 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6354 "packuswb\t{%2, %0|%0, %2}"
6355 [(set_attr "type" "sselog")
6356 (set_attr "prefix_data16" "1")
6357 (set_attr "mode" "TI")])
6359 (define_insn "*avx_punpckhbw"
6360 [(set (match_operand:V16QI 0 "register_operand" "=x")
6363 (match_operand:V16QI 1 "register_operand" "x")
6364 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6365 (parallel [(const_int 8) (const_int 24)
6366 (const_int 9) (const_int 25)
6367 (const_int 10) (const_int 26)
6368 (const_int 11) (const_int 27)
6369 (const_int 12) (const_int 28)
6370 (const_int 13) (const_int 29)
6371 (const_int 14) (const_int 30)
6372 (const_int 15) (const_int 31)])))]
6374 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6375 [(set_attr "type" "sselog")
6376 (set_attr "prefix" "vex")
6377 (set_attr "mode" "TI")])
6379 (define_insn "sse2_punpckhbw"
6380 [(set (match_operand:V16QI 0 "register_operand" "=x")
6383 (match_operand:V16QI 1 "register_operand" "0")
6384 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6385 (parallel [(const_int 8) (const_int 24)
6386 (const_int 9) (const_int 25)
6387 (const_int 10) (const_int 26)
6388 (const_int 11) (const_int 27)
6389 (const_int 12) (const_int 28)
6390 (const_int 13) (const_int 29)
6391 (const_int 14) (const_int 30)
6392 (const_int 15) (const_int 31)])))]
6394 "punpckhbw\t{%2, %0|%0, %2}"
6395 [(set_attr "type" "sselog")
6396 (set_attr "prefix_data16" "1")
6397 (set_attr "mode" "TI")])
6399 (define_insn "*avx_punpcklbw"
6400 [(set (match_operand:V16QI 0 "register_operand" "=x")
6403 (match_operand:V16QI 1 "register_operand" "x")
6404 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6405 (parallel [(const_int 0) (const_int 16)
6406 (const_int 1) (const_int 17)
6407 (const_int 2) (const_int 18)
6408 (const_int 3) (const_int 19)
6409 (const_int 4) (const_int 20)
6410 (const_int 5) (const_int 21)
6411 (const_int 6) (const_int 22)
6412 (const_int 7) (const_int 23)])))]
6414 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6415 [(set_attr "type" "sselog")
6416 (set_attr "prefix" "vex")
6417 (set_attr "mode" "TI")])
6419 (define_insn "sse2_punpcklbw"
6420 [(set (match_operand:V16QI 0 "register_operand" "=x")
6423 (match_operand:V16QI 1 "register_operand" "0")
6424 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6425 (parallel [(const_int 0) (const_int 16)
6426 (const_int 1) (const_int 17)
6427 (const_int 2) (const_int 18)
6428 (const_int 3) (const_int 19)
6429 (const_int 4) (const_int 20)
6430 (const_int 5) (const_int 21)
6431 (const_int 6) (const_int 22)
6432 (const_int 7) (const_int 23)])))]
6434 "punpcklbw\t{%2, %0|%0, %2}"
6435 [(set_attr "type" "sselog")
6436 (set_attr "prefix_data16" "1")
6437 (set_attr "mode" "TI")])
6439 (define_insn "*avx_punpckhwd"
6440 [(set (match_operand:V8HI 0 "register_operand" "=x")
6443 (match_operand:V8HI 1 "register_operand" "x")
6444 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6445 (parallel [(const_int 4) (const_int 12)
6446 (const_int 5) (const_int 13)
6447 (const_int 6) (const_int 14)
6448 (const_int 7) (const_int 15)])))]
6450 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6451 [(set_attr "type" "sselog")
6452 (set_attr "prefix" "vex")
6453 (set_attr "mode" "TI")])
6455 (define_insn "sse2_punpckhwd"
6456 [(set (match_operand:V8HI 0 "register_operand" "=x")
6459 (match_operand:V8HI 1 "register_operand" "0")
6460 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6461 (parallel [(const_int 4) (const_int 12)
6462 (const_int 5) (const_int 13)
6463 (const_int 6) (const_int 14)
6464 (const_int 7) (const_int 15)])))]
6466 "punpckhwd\t{%2, %0|%0, %2}"
6467 [(set_attr "type" "sselog")
6468 (set_attr "prefix_data16" "1")
6469 (set_attr "mode" "TI")])
6471 (define_insn "*avx_punpcklwd"
6472 [(set (match_operand:V8HI 0 "register_operand" "=x")
6475 (match_operand:V8HI 1 "register_operand" "x")
6476 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6477 (parallel [(const_int 0) (const_int 8)
6478 (const_int 1) (const_int 9)
6479 (const_int 2) (const_int 10)
6480 (const_int 3) (const_int 11)])))]
6482 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6483 [(set_attr "type" "sselog")
6484 (set_attr "prefix" "vex")
6485 (set_attr "mode" "TI")])
6487 (define_insn "sse2_punpcklwd"
6488 [(set (match_operand:V8HI 0 "register_operand" "=x")
6491 (match_operand:V8HI 1 "register_operand" "0")
6492 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6493 (parallel [(const_int 0) (const_int 8)
6494 (const_int 1) (const_int 9)
6495 (const_int 2) (const_int 10)
6496 (const_int 3) (const_int 11)])))]
6498 "punpcklwd\t{%2, %0|%0, %2}"
6499 [(set_attr "type" "sselog")
6500 (set_attr "prefix_data16" "1")
6501 (set_attr "mode" "TI")])
6503 (define_insn "*avx_punpckhdq"
6504 [(set (match_operand:V4SI 0 "register_operand" "=x")
6507 (match_operand:V4SI 1 "register_operand" "x")
6508 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6509 (parallel [(const_int 2) (const_int 6)
6510 (const_int 3) (const_int 7)])))]
6512 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6513 [(set_attr "type" "sselog")
6514 (set_attr "prefix" "vex")
6515 (set_attr "mode" "TI")])
6517 (define_insn "sse2_punpckhdq"
6518 [(set (match_operand:V4SI 0 "register_operand" "=x")
6521 (match_operand:V4SI 1 "register_operand" "0")
6522 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6523 (parallel [(const_int 2) (const_int 6)
6524 (const_int 3) (const_int 7)])))]
6526 "punpckhdq\t{%2, %0|%0, %2}"
6527 [(set_attr "type" "sselog")
6528 (set_attr "prefix_data16" "1")
6529 (set_attr "mode" "TI")])
6531 (define_insn "*avx_punpckldq"
6532 [(set (match_operand:V4SI 0 "register_operand" "=x")
6535 (match_operand:V4SI 1 "register_operand" "x")
6536 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6537 (parallel [(const_int 0) (const_int 4)
6538 (const_int 1) (const_int 5)])))]
6540 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6541 [(set_attr "type" "sselog")
6542 (set_attr "prefix" "vex")
6543 (set_attr "mode" "TI")])
6545 (define_insn "sse2_punpckldq"
6546 [(set (match_operand:V4SI 0 "register_operand" "=x")
6549 (match_operand:V4SI 1 "register_operand" "0")
6550 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6551 (parallel [(const_int 0) (const_int 4)
6552 (const_int 1) (const_int 5)])))]
6554 "punpckldq\t{%2, %0|%0, %2}"
6555 [(set_attr "type" "sselog")
6556 (set_attr "prefix_data16" "1")
6557 (set_attr "mode" "TI")])
6559 (define_insn "*avx_pinsr<avxmodesuffixs>"
6560 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6561 (vec_merge:SSEMODE124
6562 (vec_duplicate:SSEMODE124
6563 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6564 (match_operand:SSEMODE124 1 "register_operand" "x")
6565 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6568 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6569 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6571 [(set_attr "type" "sselog")
6572 (set_attr "prefix" "vex")
6573 (set_attr "mode" "TI")])
6575 (define_insn "*sse4_1_pinsrb"
6576 [(set (match_operand:V16QI 0 "register_operand" "=x")
6578 (vec_duplicate:V16QI
6579 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6580 (match_operand:V16QI 1 "register_operand" "0")
6581 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6584 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6585 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6587 [(set_attr "type" "sselog")
6588 (set_attr "prefix_extra" "1")
6589 (set_attr "mode" "TI")])
6591 (define_insn "*sse2_pinsrw"
6592 [(set (match_operand:V8HI 0 "register_operand" "=x")
6595 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6596 (match_operand:V8HI 1 "register_operand" "0")
6597 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6600 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6601 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6603 [(set_attr "type" "sselog")
6604 (set_attr "prefix_data16" "1")
6605 (set_attr "mode" "TI")])
6607 ;; It must come before sse2_loadld since it is preferred.
6608 (define_insn "*sse4_1_pinsrd"
6609 [(set (match_operand:V4SI 0 "register_operand" "=x")
6612 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6613 (match_operand:V4SI 1 "register_operand" "0")
6614 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6617 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6618 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6620 [(set_attr "type" "sselog")
6621 (set_attr "prefix_extra" "1")
6622 (set_attr "mode" "TI")])
6624 (define_insn "*avx_pinsrq"
6625 [(set (match_operand:V2DI 0 "register_operand" "=x")
6628 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6629 (match_operand:V2DI 1 "register_operand" "x")
6630 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6631 "TARGET_AVX && TARGET_64BIT"
6633 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6634 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6636 [(set_attr "type" "sselog")
6637 (set_attr "prefix" "vex")
6638 (set_attr "mode" "TI")])
6640 (define_insn "*sse4_1_pinsrq"
6641 [(set (match_operand:V2DI 0 "register_operand" "=x")
6644 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6645 (match_operand:V2DI 1 "register_operand" "0")
6646 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6647 "TARGET_SSE4_1 && TARGET_64BIT"
6649 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6650 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6652 [(set_attr "type" "sselog")
6653 (set_attr "prefix_extra" "1")
6654 (set_attr "mode" "TI")])
6656 (define_insn "*sse4_1_pextrb"
6657 [(set (match_operand:SI 0 "register_operand" "=r")
6660 (match_operand:V16QI 1 "register_operand" "x")
6661 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6663 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6664 [(set_attr "type" "sselog")
6665 (set_attr "prefix_extra" "1")
6666 (set_attr "prefix" "maybe_vex")
6667 (set_attr "mode" "TI")])
6669 (define_insn "*sse4_1_pextrb_memory"
6670 [(set (match_operand:QI 0 "memory_operand" "=m")
6672 (match_operand:V16QI 1 "register_operand" "x")
6673 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6675 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "type" "sselog")
6677 (set_attr "prefix_extra" "1")
6678 (set_attr "prefix" "maybe_vex")
6679 (set_attr "mode" "TI")])
6681 (define_insn "*sse2_pextrw"
6682 [(set (match_operand:SI 0 "register_operand" "=r")
6685 (match_operand:V8HI 1 "register_operand" "x")
6686 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6688 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6689 [(set_attr "type" "sselog")
6690 (set_attr "prefix_data16" "1")
6691 (set_attr "prefix" "maybe_vex")
6692 (set_attr "mode" "TI")])
6694 (define_insn "*sse4_1_pextrw_memory"
6695 [(set (match_operand:HI 0 "memory_operand" "=m")
6697 (match_operand:V8HI 1 "register_operand" "x")
6698 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6700 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6701 [(set_attr "type" "sselog")
6702 (set_attr "prefix_extra" "1")
6703 (set_attr "prefix" "maybe_vex")
6704 (set_attr "mode" "TI")])
6706 (define_insn "*sse4_1_pextrd"
6707 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6709 (match_operand:V4SI 1 "register_operand" "x")
6710 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6712 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6713 [(set_attr "type" "sselog")
6714 (set_attr "prefix_extra" "1")
6715 (set_attr "prefix" "maybe_vex")
6716 (set_attr "mode" "TI")])
6718 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6719 (define_insn "*sse4_1_pextrq"
6720 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6722 (match_operand:V2DI 1 "register_operand" "x")
6723 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6724 "TARGET_SSE4_1 && TARGET_64BIT"
6725 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6726 [(set_attr "type" "sselog")
6727 (set_attr "prefix_extra" "1")
6728 (set_attr "prefix" "maybe_vex")
6729 (set_attr "mode" "TI")])
6731 (define_expand "sse2_pshufd"
6732 [(match_operand:V4SI 0 "register_operand" "")
6733 (match_operand:V4SI 1 "nonimmediate_operand" "")
6734 (match_operand:SI 2 "const_int_operand" "")]
6737 int mask = INTVAL (operands[2]);
6738 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6739 GEN_INT ((mask >> 0) & 3),
6740 GEN_INT ((mask >> 2) & 3),
6741 GEN_INT ((mask >> 4) & 3),
6742 GEN_INT ((mask >> 6) & 3)));
6746 (define_insn "sse2_pshufd_1"
6747 [(set (match_operand:V4SI 0 "register_operand" "=x")
6749 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6750 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6751 (match_operand 3 "const_0_to_3_operand" "")
6752 (match_operand 4 "const_0_to_3_operand" "")
6753 (match_operand 5 "const_0_to_3_operand" "")])))]
6757 mask |= INTVAL (operands[2]) << 0;
6758 mask |= INTVAL (operands[3]) << 2;
6759 mask |= INTVAL (operands[4]) << 4;
6760 mask |= INTVAL (operands[5]) << 6;
6761 operands[2] = GEN_INT (mask);
6763 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6765 [(set_attr "type" "sselog1")
6766 (set_attr "prefix_data16" "1")
6767 (set_attr "prefix" "vex")
6768 (set_attr "mode" "TI")])
6770 (define_expand "sse2_pshuflw"
6771 [(match_operand:V8HI 0 "register_operand" "")
6772 (match_operand:V8HI 1 "nonimmediate_operand" "")
6773 (match_operand:SI 2 "const_int_operand" "")]
6776 int mask = INTVAL (operands[2]);
6777 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6778 GEN_INT ((mask >> 0) & 3),
6779 GEN_INT ((mask >> 2) & 3),
6780 GEN_INT ((mask >> 4) & 3),
6781 GEN_INT ((mask >> 6) & 3)));
6785 (define_insn "sse2_pshuflw_1"
6786 [(set (match_operand:V8HI 0 "register_operand" "=x")
6788 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6789 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6790 (match_operand 3 "const_0_to_3_operand" "")
6791 (match_operand 4 "const_0_to_3_operand" "")
6792 (match_operand 5 "const_0_to_3_operand" "")
6800 mask |= INTVAL (operands[2]) << 0;
6801 mask |= INTVAL (operands[3]) << 2;
6802 mask |= INTVAL (operands[4]) << 4;
6803 mask |= INTVAL (operands[5]) << 6;
6804 operands[2] = GEN_INT (mask);
6806 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6808 [(set_attr "type" "sselog")
6809 (set_attr "prefix_rep" "1")
6810 (set_attr "prefix" "maybe_vex")
6811 (set_attr "mode" "TI")])
6813 (define_expand "sse2_pshufhw"
6814 [(match_operand:V8HI 0 "register_operand" "")
6815 (match_operand:V8HI 1 "nonimmediate_operand" "")
6816 (match_operand:SI 2 "const_int_operand" "")]
6819 int mask = INTVAL (operands[2]);
6820 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6821 GEN_INT (((mask >> 0) & 3) + 4),
6822 GEN_INT (((mask >> 2) & 3) + 4),
6823 GEN_INT (((mask >> 4) & 3) + 4),
6824 GEN_INT (((mask >> 6) & 3) + 4)));
6828 (define_insn "sse2_pshufhw_1"
6829 [(set (match_operand:V8HI 0 "register_operand" "=x")
6831 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6832 (parallel [(const_int 0)
6836 (match_operand 2 "const_4_to_7_operand" "")
6837 (match_operand 3 "const_4_to_7_operand" "")
6838 (match_operand 4 "const_4_to_7_operand" "")
6839 (match_operand 5 "const_4_to_7_operand" "")])))]
6843 mask |= (INTVAL (operands[2]) - 4) << 0;
6844 mask |= (INTVAL (operands[3]) - 4) << 2;
6845 mask |= (INTVAL (operands[4]) - 4) << 4;
6846 mask |= (INTVAL (operands[5]) - 4) << 6;
6847 operands[2] = GEN_INT (mask);
6849 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6851 [(set_attr "type" "sselog")
6852 (set_attr "prefix_rep" "1")
6853 (set_attr "prefix" "maybe_vex")
6854 (set_attr "mode" "TI")])
6856 (define_expand "sse2_loadd"
6857 [(set (match_operand:V4SI 0 "register_operand" "")
6860 (match_operand:SI 1 "nonimmediate_operand" ""))
6864 "operands[2] = CONST0_RTX (V4SImode);")
6866 (define_insn "*avx_loadld"
6867 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
6870 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
6871 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
6875 vmovd\t{%2, %0|%0, %2}
6876 vmovd\t{%2, %0|%0, %2}
6877 vmovss\t{%2, %1, %0|%0, %1, %2}"
6878 [(set_attr "type" "ssemov")
6879 (set_attr "prefix" "vex")
6880 (set_attr "mode" "TI,TI,V4SF")])
6882 (define_insn "sse2_loadld"
6883 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
6886 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
6887 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
6891 movd\t{%2, %0|%0, %2}
6892 movd\t{%2, %0|%0, %2}
6893 movss\t{%2, %0|%0, %2}
6894 movss\t{%2, %0|%0, %2}"
6895 [(set_attr "type" "ssemov")
6896 (set_attr "mode" "TI,TI,V4SF,SF")])
6898 (define_insn_and_split "sse2_stored"
6899 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
6901 (match_operand:V4SI 1 "register_operand" "x,Yi")
6902 (parallel [(const_int 0)])))]
6905 "&& reload_completed
6906 && (TARGET_INTER_UNIT_MOVES
6907 || MEM_P (operands [0])
6908 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6909 [(set (match_dup 0) (match_dup 1))]
6911 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
6914 (define_insn_and_split "*vec_ext_v4si_mem"
6915 [(set (match_operand:SI 0 "register_operand" "=r")
6917 (match_operand:V4SI 1 "memory_operand" "o")
6918 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6924 int i = INTVAL (operands[2]);
6926 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6930 (define_expand "sse_storeq"
6931 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6933 (match_operand:V2DI 1 "register_operand" "")
6934 (parallel [(const_int 0)])))]
6938 (define_insn "*sse2_storeq_rex64"
6939 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
6941 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6942 (parallel [(const_int 0)])))]
6943 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6947 %vmov{q}\t{%1, %0|%0, %1}"
6948 [(set_attr "type" "*,*,imov")
6949 (set_attr "prefix" "*,*,maybe_vex")
6950 (set_attr "mode" "*,*,DI")])
6952 (define_insn "*sse2_storeq"
6953 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
6955 (match_operand:V2DI 1 "register_operand" "x")
6956 (parallel [(const_int 0)])))]
6961 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6963 (match_operand:V2DI 1 "register_operand" "")
6964 (parallel [(const_int 0)])))]
6967 && (TARGET_INTER_UNIT_MOVES
6968 || MEM_P (operands [0])
6969 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6970 [(set (match_dup 0) (match_dup 1))]
6972 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
6975 (define_insn "*vec_extractv2di_1_rex64_avx"
6976 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
6978 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
6979 (parallel [(const_int 1)])))]
6982 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6984 vmovhps\t{%1, %0|%0, %1}
6985 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6986 vmovq\t{%H1, %0|%0, %H1}
6987 vmov{q}\t{%H1, %0|%0, %H1}"
6988 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
6989 (set_attr "memory" "*,none,*,*")
6990 (set_attr "prefix" "vex")
6991 (set_attr "mode" "V2SF,TI,TI,DI")])
6993 (define_insn "*vec_extractv2di_1_rex64"
6994 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
6996 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
6997 (parallel [(const_int 1)])))]
6998 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7000 movhps\t{%1, %0|%0, %1}
7001 psrldq\t{$8, %0|%0, 8}
7002 movq\t{%H1, %0|%0, %H1}
7003 mov{q}\t{%H1, %0|%0, %H1}"
7004 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7005 (set_attr "memory" "*,none,*,*")
7006 (set_attr "mode" "V2SF,TI,TI,DI")])
7008 (define_insn "*vec_extractv2di_1_avx"
7009 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7011 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7012 (parallel [(const_int 1)])))]
7015 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7017 vmovhps\t{%1, %0|%0, %1}
7018 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7019 vmovq\t{%H1, %0|%0, %H1}"
7020 [(set_attr "type" "ssemov,sseishft,ssemov")
7021 (set_attr "memory" "*,none,*")
7022 (set_attr "prefix" "vex")
7023 (set_attr "mode" "V2SF,TI,TI")])
7025 (define_insn "*vec_extractv2di_1_sse2"
7026 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7028 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7029 (parallel [(const_int 1)])))]
7031 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7033 movhps\t{%1, %0|%0, %1}
7034 psrldq\t{$8, %0|%0, 8}
7035 movq\t{%H1, %0|%0, %H1}"
7036 [(set_attr "type" "ssemov,sseishft,ssemov")
7037 (set_attr "memory" "*,none,*")
7038 (set_attr "mode" "V2SF,TI,TI")])
7040 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7041 (define_insn "*vec_extractv2di_1_sse"
7042 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7044 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7045 (parallel [(const_int 1)])))]
7046 "!TARGET_SSE2 && TARGET_SSE
7047 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7049 movhps\t{%1, %0|%0, %1}
7050 movhlps\t{%1, %0|%0, %1}
7051 movlps\t{%H1, %0|%0, %H1}"
7052 [(set_attr "type" "ssemov")
7053 (set_attr "mode" "V2SF,V4SF,V2SF")])
7055 (define_insn "*vec_dupv4si"
7056 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7058 (match_operand:SI 1 "register_operand" " Y2,0")))]
7061 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7062 shufps\t{$0, %0, %0|%0, %0, 0}"
7063 [(set_attr "type" "sselog1")
7064 (set_attr "prefix" "maybe_vex,orig")
7065 (set_attr "mode" "TI,V4SF")])
7067 (define_insn "*vec_dupv2di_avx"
7068 [(set (match_operand:V2DI 0 "register_operand" "=x")
7070 (match_operand:DI 1 "register_operand" "x")))]
7072 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7073 [(set_attr "type" "sselog1")
7074 (set_attr "prefix" "vex")
7075 (set_attr "mode" "TI")])
7077 (define_insn "*vec_dupv2di"
7078 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7080 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7085 [(set_attr "type" "sselog1,ssemov")
7086 (set_attr "mode" "TI,V4SF")])
7088 (define_insn "*vec_concatv2si_avx"
7089 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7091 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7092 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7095 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7096 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7097 vmovd\t{%1, %0|%0, %1}
7098 punpckldq\t{%2, %0|%0, %2}
7099 movd\t{%1, %0|%0, %1}"
7100 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7101 (set (attr "prefix")
7102 (if_then_else (eq_attr "alternative" "3,4")
7103 (const_string "orig")
7104 (const_string "vex")))
7105 (set_attr "mode" "TI,TI,TI,DI,DI")])
7107 (define_insn "*vec_concatv2si_sse4_1"
7108 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7110 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7111 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7114 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7115 punpckldq\t{%2, %0|%0, %2}
7116 movd\t{%1, %0|%0, %1}
7117 punpckldq\t{%2, %0|%0, %2}
7118 movd\t{%1, %0|%0, %1}"
7119 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7120 (set_attr "prefix_extra" "1,*,*,*,*")
7121 (set_attr "mode" "TI,TI,TI,DI,DI")])
7123 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7124 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7125 ;; alternatives pretty much forces the MMX alternative to be chosen.
7126 (define_insn "*vec_concatv2si_sse2"
7127 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7129 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7130 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7133 punpckldq\t{%2, %0|%0, %2}
7134 movd\t{%1, %0|%0, %1}
7135 punpckldq\t{%2, %0|%0, %2}
7136 movd\t{%1, %0|%0, %1}"
7137 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7138 (set_attr "mode" "TI,TI,DI,DI")])
7140 (define_insn "*vec_concatv2si_sse"
7141 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7143 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7144 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7147 unpcklps\t{%2, %0|%0, %2}
7148 movss\t{%1, %0|%0, %1}
7149 punpckldq\t{%2, %0|%0, %2}
7150 movd\t{%1, %0|%0, %1}"
7151 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7152 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7154 (define_insn "*vec_concatv4si_1_avx"
7155 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7157 (match_operand:V2SI 1 "register_operand" " x,x")
7158 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7161 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7162 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7163 [(set_attr "type" "sselog,ssemov")
7164 (set_attr "prefix" "vex")
7165 (set_attr "mode" "TI,V2SF")])
7167 (define_insn "*vec_concatv4si_1"
7168 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7170 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7171 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7174 punpcklqdq\t{%2, %0|%0, %2}
7175 movlhps\t{%2, %0|%0, %2}
7176 movhps\t{%2, %0|%0, %2}"
7177 [(set_attr "type" "sselog,ssemov,ssemov")
7178 (set_attr "mode" "TI,V4SF,V2SF")])
7180 (define_insn "*vec_concatv2di_avx"
7181 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7183 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7184 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7185 "!TARGET_64BIT && TARGET_AVX"
7187 vmovq\t{%1, %0|%0, %1}
7188 movq2dq\t{%1, %0|%0, %1}
7189 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7190 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7191 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7192 (set (attr "prefix")
7193 (if_then_else (eq_attr "alternative" "1")
7194 (const_string "orig")
7195 (const_string "vex")))
7196 (set_attr "mode" "TI,TI,TI,V2SF")])
7198 (define_insn "vec_concatv2di"
7199 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7201 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7202 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7203 "!TARGET_64BIT && TARGET_SSE"
7205 movq\t{%1, %0|%0, %1}
7206 movq2dq\t{%1, %0|%0, %1}
7207 punpcklqdq\t{%2, %0|%0, %2}
7208 movlhps\t{%2, %0|%0, %2}
7209 movhps\t{%2, %0|%0, %2}"
7210 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7211 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7213 (define_insn "*vec_concatv2di_rex64_avx"
7214 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7216 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7217 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7218 "TARGET_64BIT && TARGET_AVX"
7220 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7221 vmovq\t{%1, %0|%0, %1}
7222 vmovq\t{%1, %0|%0, %1}
7223 movq2dq\t{%1, %0|%0, %1}
7224 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7225 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7226 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7227 (set (attr "prefix")
7228 (if_then_else (eq_attr "alternative" "3")
7229 (const_string "orig")
7230 (const_string "vex")))
7231 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7233 (define_insn "*vec_concatv2di_rex64_sse4_1"
7234 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7236 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7237 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7238 "TARGET_64BIT && TARGET_SSE4_1"
7240 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7241 movq\t{%1, %0|%0, %1}
7242 movq\t{%1, %0|%0, %1}
7243 movq2dq\t{%1, %0|%0, %1}
7244 punpcklqdq\t{%2, %0|%0, %2}
7245 movlhps\t{%2, %0|%0, %2}
7246 movhps\t{%2, %0|%0, %2}"
7247 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7248 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7249 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7251 (define_insn "*vec_concatv2di_rex64_sse"
7252 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7254 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7255 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7256 "TARGET_64BIT && TARGET_SSE"
7258 movq\t{%1, %0|%0, %1}
7259 movq\t{%1, %0|%0, %1}
7260 movq2dq\t{%1, %0|%0, %1}
7261 punpcklqdq\t{%2, %0|%0, %2}
7262 movlhps\t{%2, %0|%0, %2}
7263 movhps\t{%2, %0|%0, %2}"
7264 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7265 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7267 (define_expand "vec_unpacku_hi_v16qi"
7268 [(match_operand:V8HI 0 "register_operand" "")
7269 (match_operand:V16QI 1 "register_operand" "")]
7273 ix86_expand_sse4_unpack (operands, true, true);
7274 else if (TARGET_SSE5)
7275 ix86_expand_sse5_unpack (operands, true, true);
7277 ix86_expand_sse_unpack (operands, true, true);
7281 (define_expand "vec_unpacks_hi_v16qi"
7282 [(match_operand:V8HI 0 "register_operand" "")
7283 (match_operand:V16QI 1 "register_operand" "")]
7287 ix86_expand_sse4_unpack (operands, false, true);
7288 else if (TARGET_SSE5)
7289 ix86_expand_sse5_unpack (operands, false, true);
7291 ix86_expand_sse_unpack (operands, false, true);
7295 (define_expand "vec_unpacku_lo_v16qi"
7296 [(match_operand:V8HI 0 "register_operand" "")
7297 (match_operand:V16QI 1 "register_operand" "")]
7301 ix86_expand_sse4_unpack (operands, true, false);
7302 else if (TARGET_SSE5)
7303 ix86_expand_sse5_unpack (operands, true, false);
7305 ix86_expand_sse_unpack (operands, true, false);
7309 (define_expand "vec_unpacks_lo_v16qi"
7310 [(match_operand:V8HI 0 "register_operand" "")
7311 (match_operand:V16QI 1 "register_operand" "")]
7315 ix86_expand_sse4_unpack (operands, false, false);
7316 else if (TARGET_SSE5)
7317 ix86_expand_sse5_unpack (operands, false, false);
7319 ix86_expand_sse_unpack (operands, false, false);
7323 (define_expand "vec_unpacku_hi_v8hi"
7324 [(match_operand:V4SI 0 "register_operand" "")
7325 (match_operand:V8HI 1 "register_operand" "")]
7329 ix86_expand_sse4_unpack (operands, true, true);
7330 else if (TARGET_SSE5)
7331 ix86_expand_sse5_unpack (operands, true, true);
7333 ix86_expand_sse_unpack (operands, true, true);
7337 (define_expand "vec_unpacks_hi_v8hi"
7338 [(match_operand:V4SI 0 "register_operand" "")
7339 (match_operand:V8HI 1 "register_operand" "")]
7343 ix86_expand_sse4_unpack (operands, false, true);
7344 else if (TARGET_SSE5)
7345 ix86_expand_sse5_unpack (operands, false, true);
7347 ix86_expand_sse_unpack (operands, false, true);
7351 (define_expand "vec_unpacku_lo_v8hi"
7352 [(match_operand:V4SI 0 "register_operand" "")
7353 (match_operand:V8HI 1 "register_operand" "")]
7357 ix86_expand_sse4_unpack (operands, true, false);
7358 else if (TARGET_SSE5)
7359 ix86_expand_sse5_unpack (operands, true, false);
7361 ix86_expand_sse_unpack (operands, true, false);
7365 (define_expand "vec_unpacks_lo_v8hi"
7366 [(match_operand:V4SI 0 "register_operand" "")
7367 (match_operand:V8HI 1 "register_operand" "")]
7371 ix86_expand_sse4_unpack (operands, false, false);
7372 else if (TARGET_SSE5)
7373 ix86_expand_sse5_unpack (operands, false, false);
7375 ix86_expand_sse_unpack (operands, false, false);
7379 (define_expand "vec_unpacku_hi_v4si"
7380 [(match_operand:V2DI 0 "register_operand" "")
7381 (match_operand:V4SI 1 "register_operand" "")]
7385 ix86_expand_sse4_unpack (operands, true, true);
7386 else if (TARGET_SSE5)
7387 ix86_expand_sse5_unpack (operands, true, true);
7389 ix86_expand_sse_unpack (operands, true, true);
7393 (define_expand "vec_unpacks_hi_v4si"
7394 [(match_operand:V2DI 0 "register_operand" "")
7395 (match_operand:V4SI 1 "register_operand" "")]
7399 ix86_expand_sse4_unpack (operands, false, true);
7400 else if (TARGET_SSE5)
7401 ix86_expand_sse5_unpack (operands, false, true);
7403 ix86_expand_sse_unpack (operands, false, true);
7407 (define_expand "vec_unpacku_lo_v4si"
7408 [(match_operand:V2DI 0 "register_operand" "")
7409 (match_operand:V4SI 1 "register_operand" "")]
7413 ix86_expand_sse4_unpack (operands, true, false);
7414 else if (TARGET_SSE5)
7415 ix86_expand_sse5_unpack (operands, true, false);
7417 ix86_expand_sse_unpack (operands, true, false);
7421 (define_expand "vec_unpacks_lo_v4si"
7422 [(match_operand:V2DI 0 "register_operand" "")
7423 (match_operand:V4SI 1 "register_operand" "")]
7427 ix86_expand_sse4_unpack (operands, false, false);
7428 else if (TARGET_SSE5)
7429 ix86_expand_sse5_unpack (operands, false, false);
7431 ix86_expand_sse_unpack (operands, false, false);
7435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7441 (define_expand "sse2_uavgv16qi3"
7442 [(set (match_operand:V16QI 0 "register_operand" "")
7448 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7450 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7451 (const_vector:V16QI [(const_int 1) (const_int 1)
7452 (const_int 1) (const_int 1)
7453 (const_int 1) (const_int 1)
7454 (const_int 1) (const_int 1)
7455 (const_int 1) (const_int 1)
7456 (const_int 1) (const_int 1)
7457 (const_int 1) (const_int 1)
7458 (const_int 1) (const_int 1)]))
7461 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7463 (define_insn "*avx_uavgv16qi3"
7464 [(set (match_operand:V16QI 0 "register_operand" "=x")
7470 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7472 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7473 (const_vector:V16QI [(const_int 1) (const_int 1)
7474 (const_int 1) (const_int 1)
7475 (const_int 1) (const_int 1)
7476 (const_int 1) (const_int 1)
7477 (const_int 1) (const_int 1)
7478 (const_int 1) (const_int 1)
7479 (const_int 1) (const_int 1)
7480 (const_int 1) (const_int 1)]))
7482 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7483 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7484 [(set_attr "type" "sseiadd")
7485 (set_attr "prefix" "vex")
7486 (set_attr "mode" "TI")])
7488 (define_insn "*sse2_uavgv16qi3"
7489 [(set (match_operand:V16QI 0 "register_operand" "=x")
7495 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7497 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7498 (const_vector:V16QI [(const_int 1) (const_int 1)
7499 (const_int 1) (const_int 1)
7500 (const_int 1) (const_int 1)
7501 (const_int 1) (const_int 1)
7502 (const_int 1) (const_int 1)
7503 (const_int 1) (const_int 1)
7504 (const_int 1) (const_int 1)
7505 (const_int 1) (const_int 1)]))
7507 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7508 "pavgb\t{%2, %0|%0, %2}"
7509 [(set_attr "type" "sseiadd")
7510 (set_attr "prefix_data16" "1")
7511 (set_attr "mode" "TI")])
7513 (define_expand "sse2_uavgv8hi3"
7514 [(set (match_operand:V8HI 0 "register_operand" "")
7520 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7522 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7523 (const_vector:V8HI [(const_int 1) (const_int 1)
7524 (const_int 1) (const_int 1)
7525 (const_int 1) (const_int 1)
7526 (const_int 1) (const_int 1)]))
7529 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7531 (define_insn "*avx_uavgv8hi3"
7532 [(set (match_operand:V8HI 0 "register_operand" "=x")
7538 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7540 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7541 (const_vector:V8HI [(const_int 1) (const_int 1)
7542 (const_int 1) (const_int 1)
7543 (const_int 1) (const_int 1)
7544 (const_int 1) (const_int 1)]))
7546 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7547 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7548 [(set_attr "type" "sseiadd")
7549 (set_attr "prefix" "vex")
7550 (set_attr "mode" "TI")])
7552 (define_insn "*sse2_uavgv8hi3"
7553 [(set (match_operand:V8HI 0 "register_operand" "=x")
7559 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7561 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7562 (const_vector:V8HI [(const_int 1) (const_int 1)
7563 (const_int 1) (const_int 1)
7564 (const_int 1) (const_int 1)
7565 (const_int 1) (const_int 1)]))
7567 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7568 "pavgw\t{%2, %0|%0, %2}"
7569 [(set_attr "type" "sseiadd")
7570 (set_attr "prefix_data16" "1")
7571 (set_attr "mode" "TI")])
7573 ;; The correct representation for this is absolutely enormous, and
7574 ;; surely not generally useful.
7575 (define_insn "*avx_psadbw"
7576 [(set (match_operand:V2DI 0 "register_operand" "=x")
7577 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7578 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7581 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7582 [(set_attr "type" "sseiadd")
7583 (set_attr "prefix" "vex")
7584 (set_attr "mode" "TI")])
7586 (define_insn "sse2_psadbw"
7587 [(set (match_operand:V2DI 0 "register_operand" "=x")
7588 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7589 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7592 "psadbw\t{%2, %0|%0, %2}"
7593 [(set_attr "type" "sseiadd")
7594 (set_attr "prefix_data16" "1")
7595 (set_attr "mode" "TI")])
7597 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7598 [(set (match_operand:SI 0 "register_operand" "=r")
7600 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7602 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7603 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7604 [(set_attr "type" "ssecvt")
7605 (set_attr "prefix" "vex")
7606 (set_attr "mode" "<MODE>")])
7608 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7609 [(set (match_operand:SI 0 "register_operand" "=r")
7611 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7613 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7614 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7615 [(set_attr "type" "ssecvt")
7616 (set_attr "prefix" "maybe_vex")
7617 (set_attr "mode" "<MODE>")])
7619 (define_insn "sse2_pmovmskb"
7620 [(set (match_operand:SI 0 "register_operand" "=r")
7621 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7624 "%vpmovmskb\t{%1, %0|%0, %1}"
7625 [(set_attr "type" "ssecvt")
7626 (set_attr "prefix_data16" "1")
7627 (set_attr "prefix" "maybe_vex")
7628 (set_attr "mode" "SI")])
7630 (define_expand "sse2_maskmovdqu"
7631 [(set (match_operand:V16QI 0 "memory_operand" "")
7632 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7633 (match_operand:V16QI 2 "register_operand" "")
7639 (define_insn "*sse2_maskmovdqu"
7640 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
7641 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7642 (match_operand:V16QI 2 "register_operand" "x")
7643 (mem:V16QI (match_dup 0))]
7645 "TARGET_SSE2 && !TARGET_64BIT"
7646 ;; @@@ check ordering of operands in intel/nonintel syntax
7647 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7648 [(set_attr "type" "ssecvt")
7649 (set_attr "prefix_data16" "1")
7650 (set_attr "prefix" "maybe_vex")
7651 (set_attr "mode" "TI")])
7653 (define_insn "*sse2_maskmovdqu_rex64"
7654 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7655 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7656 (match_operand:V16QI 2 "register_operand" "x")
7657 (mem:V16QI (match_dup 0))]
7659 "TARGET_SSE2 && TARGET_64BIT"
7660 ;; @@@ check ordering of operands in intel/nonintel syntax
7661 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7662 [(set_attr "type" "ssecvt")
7663 (set_attr "prefix_data16" "1")
7664 (set_attr "prefix" "maybe_vex")
7665 (set_attr "mode" "TI")])
7667 (define_insn "sse_ldmxcsr"
7668 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7672 [(set_attr "type" "sse")
7673 (set_attr "prefix" "maybe_vex")
7674 (set_attr "memory" "load")])
7676 (define_insn "sse_stmxcsr"
7677 [(set (match_operand:SI 0 "memory_operand" "=m")
7678 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7681 [(set_attr "type" "sse")
7682 (set_attr "prefix" "maybe_vex")
7683 (set_attr "memory" "store")])
7685 (define_expand "sse_sfence"
7687 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7688 "TARGET_SSE || TARGET_3DNOW_A"
7690 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7691 MEM_VOLATILE_P (operands[0]) = 1;
7694 (define_insn "*sse_sfence"
7695 [(set (match_operand:BLK 0 "" "")
7696 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7697 "TARGET_SSE || TARGET_3DNOW_A"
7699 [(set_attr "type" "sse")
7700 (set_attr "memory" "unknown")])
7702 (define_insn "sse2_clflush"
7703 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7707 [(set_attr "type" "sse")
7708 (set_attr "memory" "unknown")])
7710 (define_expand "sse2_mfence"
7712 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7715 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7716 MEM_VOLATILE_P (operands[0]) = 1;
7719 (define_insn "*sse2_mfence"
7720 [(set (match_operand:BLK 0 "" "")
7721 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7724 [(set_attr "type" "sse")
7725 (set_attr "memory" "unknown")])
7727 (define_expand "sse2_lfence"
7729 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7732 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7733 MEM_VOLATILE_P (operands[0]) = 1;
7736 (define_insn "*sse2_lfence"
7737 [(set (match_operand:BLK 0 "" "")
7738 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7741 [(set_attr "type" "sse")
7742 (set_attr "memory" "unknown")])
7744 (define_insn "sse3_mwait"
7745 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7746 (match_operand:SI 1 "register_operand" "c")]
7749 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7750 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7751 ;; we only need to set up 32bit registers.
7753 [(set_attr "length" "3")])
7755 (define_insn "sse3_monitor"
7756 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7757 (match_operand:SI 1 "register_operand" "c")
7758 (match_operand:SI 2 "register_operand" "d")]
7760 "TARGET_SSE3 && !TARGET_64BIT"
7761 "monitor\t%0, %1, %2"
7762 [(set_attr "length" "3")])
7764 (define_insn "sse3_monitor64"
7765 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7766 (match_operand:SI 1 "register_operand" "c")
7767 (match_operand:SI 2 "register_operand" "d")]
7769 "TARGET_SSE3 && TARGET_64BIT"
7770 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7771 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7772 ;; zero extended to 64bit, we only need to set up 32bit registers.
7774 [(set_attr "length" "3")])
7776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7778 ;; SSSE3 instructions
7780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7782 (define_insn "*avx_phaddwv8hi3"
7783 [(set (match_operand:V8HI 0 "register_operand" "=x")
7789 (match_operand:V8HI 1 "register_operand" "x")
7790 (parallel [(const_int 0)]))
7791 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7793 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7794 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7797 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7798 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7800 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7801 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7806 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7807 (parallel [(const_int 0)]))
7808 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7810 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7811 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7814 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7815 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7817 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7818 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7820 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7821 [(set_attr "type" "sseiadd")
7822 (set_attr "prefix" "vex")
7823 (set_attr "mode" "TI")])
7825 (define_insn "ssse3_phaddwv8hi3"
7826 [(set (match_operand:V8HI 0 "register_operand" "=x")
7832 (match_operand:V8HI 1 "register_operand" "0")
7833 (parallel [(const_int 0)]))
7834 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7836 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7837 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7840 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7841 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7843 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7844 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7849 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7850 (parallel [(const_int 0)]))
7851 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7853 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7854 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7857 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7858 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7860 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7861 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7863 "phaddw\t{%2, %0|%0, %2}"
7864 [(set_attr "type" "sseiadd")
7865 (set_attr "prefix_data16" "1")
7866 (set_attr "prefix_extra" "1")
7867 (set_attr "mode" "TI")])
7869 (define_insn "ssse3_phaddwv4hi3"
7870 [(set (match_operand:V4HI 0 "register_operand" "=y")
7875 (match_operand:V4HI 1 "register_operand" "0")
7876 (parallel [(const_int 0)]))
7877 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7879 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7880 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7884 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7885 (parallel [(const_int 0)]))
7886 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7888 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7889 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7891 "phaddw\t{%2, %0|%0, %2}"
7892 [(set_attr "type" "sseiadd")
7893 (set_attr "prefix_extra" "1")
7894 (set_attr "mode" "DI")])
7896 (define_insn "*avx_phadddv4si3"
7897 [(set (match_operand:V4SI 0 "register_operand" "=x")
7902 (match_operand:V4SI 1 "register_operand" "x")
7903 (parallel [(const_int 0)]))
7904 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7906 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7907 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7911 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7912 (parallel [(const_int 0)]))
7913 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7915 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7916 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7918 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7919 [(set_attr "type" "sseiadd")
7920 (set_attr "prefix" "vex")
7921 (set_attr "mode" "TI")])
7923 (define_insn "ssse3_phadddv4si3"
7924 [(set (match_operand:V4SI 0 "register_operand" "=x")
7929 (match_operand:V4SI 1 "register_operand" "0")
7930 (parallel [(const_int 0)]))
7931 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7933 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7934 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7938 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7939 (parallel [(const_int 0)]))
7940 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7942 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7943 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7945 "phaddd\t{%2, %0|%0, %2}"
7946 [(set_attr "type" "sseiadd")
7947 (set_attr "prefix_data16" "1")
7948 (set_attr "prefix_extra" "1")
7949 (set_attr "mode" "TI")])
7951 (define_insn "ssse3_phadddv2si3"
7952 [(set (match_operand:V2SI 0 "register_operand" "=y")
7956 (match_operand:V2SI 1 "register_operand" "0")
7957 (parallel [(const_int 0)]))
7958 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7961 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7962 (parallel [(const_int 0)]))
7963 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7965 "phaddd\t{%2, %0|%0, %2}"
7966 [(set_attr "type" "sseiadd")
7967 (set_attr "prefix_extra" "1")
7968 (set_attr "mode" "DI")])
7970 (define_insn "*avx_phaddswv8hi3"
7971 [(set (match_operand:V8HI 0 "register_operand" "=x")
7977 (match_operand:V8HI 1 "register_operand" "x")
7978 (parallel [(const_int 0)]))
7979 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7981 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7982 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7985 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7986 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7988 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7989 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7994 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
7995 (parallel [(const_int 0)]))
7996 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7998 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7999 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8002 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8003 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8005 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8006 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8008 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8009 [(set_attr "type" "sseiadd")
8010 (set_attr "prefix" "vex")
8011 (set_attr "mode" "TI")])
8013 (define_insn "ssse3_phaddswv8hi3"
8014 [(set (match_operand:V8HI 0 "register_operand" "=x")
8020 (match_operand:V8HI 1 "register_operand" "0")
8021 (parallel [(const_int 0)]))
8022 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8024 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8025 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8028 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8029 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8031 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8032 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8037 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8038 (parallel [(const_int 0)]))
8039 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8041 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8042 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8045 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8046 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8048 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8049 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8051 "phaddsw\t{%2, %0|%0, %2}"
8052 [(set_attr "type" "sseiadd")
8053 (set_attr "prefix_data16" "1")
8054 (set_attr "prefix_extra" "1")
8055 (set_attr "mode" "TI")])
8057 (define_insn "ssse3_phaddswv4hi3"
8058 [(set (match_operand:V4HI 0 "register_operand" "=y")
8063 (match_operand:V4HI 1 "register_operand" "0")
8064 (parallel [(const_int 0)]))
8065 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8067 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8068 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8072 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8073 (parallel [(const_int 0)]))
8074 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8076 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8077 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8079 "phaddsw\t{%2, %0|%0, %2}"
8080 [(set_attr "type" "sseiadd")
8081 (set_attr "prefix_extra" "1")
8082 (set_attr "mode" "DI")])
8084 (define_insn "*avx_phsubwv8hi3"
8085 [(set (match_operand:V8HI 0 "register_operand" "=x")
8091 (match_operand:V8HI 1 "register_operand" "x")
8092 (parallel [(const_int 0)]))
8093 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8095 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8096 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8099 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8100 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8103 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8108 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8109 (parallel [(const_int 0)]))
8110 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8112 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8113 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8116 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8117 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8119 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8120 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8122 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8123 [(set_attr "type" "sseiadd")
8124 (set_attr "prefix" "vex")
8125 (set_attr "mode" "TI")])
8127 (define_insn "ssse3_phsubwv8hi3"
8128 [(set (match_operand:V8HI 0 "register_operand" "=x")
8134 (match_operand:V8HI 1 "register_operand" "0")
8135 (parallel [(const_int 0)]))
8136 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8138 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8139 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8142 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8143 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8145 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8146 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8151 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8152 (parallel [(const_int 0)]))
8153 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8155 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8156 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8159 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8160 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8162 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8163 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8165 "phsubw\t{%2, %0|%0, %2}"
8166 [(set_attr "type" "sseiadd")
8167 (set_attr "prefix_data16" "1")
8168 (set_attr "prefix_extra" "1")
8169 (set_attr "mode" "TI")])
8171 (define_insn "ssse3_phsubwv4hi3"
8172 [(set (match_operand:V4HI 0 "register_operand" "=y")
8177 (match_operand:V4HI 1 "register_operand" "0")
8178 (parallel [(const_int 0)]))
8179 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8181 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8182 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8186 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8187 (parallel [(const_int 0)]))
8188 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8190 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8191 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8193 "phsubw\t{%2, %0|%0, %2}"
8194 [(set_attr "type" "sseiadd")
8195 (set_attr "prefix_extra" "1")
8196 (set_attr "mode" "DI")])
8198 (define_insn "*avx_phsubdv4si3"
8199 [(set (match_operand:V4SI 0 "register_operand" "=x")
8204 (match_operand:V4SI 1 "register_operand" "x")
8205 (parallel [(const_int 0)]))
8206 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8208 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8209 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8213 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8214 (parallel [(const_int 0)]))
8215 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8217 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8218 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8220 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8221 [(set_attr "type" "sseiadd")
8222 (set_attr "prefix" "vex")
8223 (set_attr "mode" "TI")])
8225 (define_insn "ssse3_phsubdv4si3"
8226 [(set (match_operand:V4SI 0 "register_operand" "=x")
8231 (match_operand:V4SI 1 "register_operand" "0")
8232 (parallel [(const_int 0)]))
8233 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8235 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8236 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8240 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8241 (parallel [(const_int 0)]))
8242 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8244 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8245 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8247 "phsubd\t{%2, %0|%0, %2}"
8248 [(set_attr "type" "sseiadd")
8249 (set_attr "prefix_data16" "1")
8250 (set_attr "prefix_extra" "1")
8251 (set_attr "mode" "TI")])
8253 (define_insn "ssse3_phsubdv2si3"
8254 [(set (match_operand:V2SI 0 "register_operand" "=y")
8258 (match_operand:V2SI 1 "register_operand" "0")
8259 (parallel [(const_int 0)]))
8260 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8263 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8264 (parallel [(const_int 0)]))
8265 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8267 "phsubd\t{%2, %0|%0, %2}"
8268 [(set_attr "type" "sseiadd")
8269 (set_attr "prefix_extra" "1")
8270 (set_attr "mode" "DI")])
8272 (define_insn "*avx_phsubswv8hi3"
8273 [(set (match_operand:V8HI 0 "register_operand" "=x")
8279 (match_operand:V8HI 1 "register_operand" "x")
8280 (parallel [(const_int 0)]))
8281 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8283 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8284 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8287 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8288 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8290 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8291 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8296 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8297 (parallel [(const_int 0)]))
8298 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8300 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8301 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8304 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8305 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8307 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8308 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8310 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8311 [(set_attr "type" "sseiadd")
8312 (set_attr "prefix" "vex")
8313 (set_attr "mode" "TI")])
8315 (define_insn "ssse3_phsubswv8hi3"
8316 [(set (match_operand:V8HI 0 "register_operand" "=x")
8322 (match_operand:V8HI 1 "register_operand" "0")
8323 (parallel [(const_int 0)]))
8324 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8326 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8327 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8330 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8331 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8333 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8334 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8339 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8340 (parallel [(const_int 0)]))
8341 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8343 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8344 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8347 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8348 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8350 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8353 "phsubsw\t{%2, %0|%0, %2}"
8354 [(set_attr "type" "sseiadd")
8355 (set_attr "prefix_data16" "1")
8356 (set_attr "prefix_extra" "1")
8357 (set_attr "mode" "TI")])
8359 (define_insn "ssse3_phsubswv4hi3"
8360 [(set (match_operand:V4HI 0 "register_operand" "=y")
8365 (match_operand:V4HI 1 "register_operand" "0")
8366 (parallel [(const_int 0)]))
8367 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8369 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8370 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8374 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8375 (parallel [(const_int 0)]))
8376 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8378 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8379 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8381 "phsubsw\t{%2, %0|%0, %2}"
8382 [(set_attr "type" "sseiadd")
8383 (set_attr "prefix_extra" "1")
8384 (set_attr "mode" "DI")])
8386 (define_insn "*avx_pmaddubsw128"
8387 [(set (match_operand:V8HI 0 "register_operand" "=x")
8392 (match_operand:V16QI 1 "register_operand" "x")
8393 (parallel [(const_int 0)
8403 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8404 (parallel [(const_int 0)
8414 (vec_select:V16QI (match_dup 1)
8415 (parallel [(const_int 1)
8424 (vec_select:V16QI (match_dup 2)
8425 (parallel [(const_int 1)
8432 (const_int 15)]))))))]
8434 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8435 [(set_attr "type" "sseiadd")
8436 (set_attr "prefix" "vex")
8437 (set_attr "mode" "TI")])
8439 (define_insn "ssse3_pmaddubsw128"
8440 [(set (match_operand:V8HI 0 "register_operand" "=x")
8445 (match_operand:V16QI 1 "register_operand" "0")
8446 (parallel [(const_int 0)
8456 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8457 (parallel [(const_int 0)
8467 (vec_select:V16QI (match_dup 1)
8468 (parallel [(const_int 1)
8477 (vec_select:V16QI (match_dup 2)
8478 (parallel [(const_int 1)
8485 (const_int 15)]))))))]
8487 "pmaddubsw\t{%2, %0|%0, %2}"
8488 [(set_attr "type" "sseiadd")
8489 (set_attr "prefix_data16" "1")
8490 (set_attr "prefix_extra" "1")
8491 (set_attr "mode" "TI")])
8493 (define_insn "ssse3_pmaddubsw"
8494 [(set (match_operand:V4HI 0 "register_operand" "=y")
8499 (match_operand:V8QI 1 "register_operand" "0")
8500 (parallel [(const_int 0)
8506 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8507 (parallel [(const_int 0)
8513 (vec_select:V8QI (match_dup 1)
8514 (parallel [(const_int 1)
8519 (vec_select:V8QI (match_dup 2)
8520 (parallel [(const_int 1)
8523 (const_int 7)]))))))]
8525 "pmaddubsw\t{%2, %0|%0, %2}"
8526 [(set_attr "type" "sseiadd")
8527 (set_attr "prefix_extra" "1")
8528 (set_attr "mode" "DI")])
8530 (define_expand "ssse3_pmulhrswv8hi3"
8531 [(set (match_operand:V8HI 0 "register_operand" "")
8538 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8540 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8542 (const_vector:V8HI [(const_int 1) (const_int 1)
8543 (const_int 1) (const_int 1)
8544 (const_int 1) (const_int 1)
8545 (const_int 1) (const_int 1)]))
8548 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8550 (define_insn "*avx_pmulhrswv8hi3"
8551 [(set (match_operand:V8HI 0 "register_operand" "=x")
8558 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8560 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8562 (const_vector:V8HI [(const_int 1) (const_int 1)
8563 (const_int 1) (const_int 1)
8564 (const_int 1) (const_int 1)
8565 (const_int 1) (const_int 1)]))
8567 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8568 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8569 [(set_attr "type" "sseimul")
8570 (set_attr "prefix" "vex")
8571 (set_attr "mode" "TI")])
8573 (define_insn "*ssse3_pmulhrswv8hi3"
8574 [(set (match_operand:V8HI 0 "register_operand" "=x")
8581 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8583 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8585 (const_vector:V8HI [(const_int 1) (const_int 1)
8586 (const_int 1) (const_int 1)
8587 (const_int 1) (const_int 1)
8588 (const_int 1) (const_int 1)]))
8590 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8591 "pmulhrsw\t{%2, %0|%0, %2}"
8592 [(set_attr "type" "sseimul")
8593 (set_attr "prefix_data16" "1")
8594 (set_attr "prefix_extra" "1")
8595 (set_attr "mode" "TI")])
8597 (define_expand "ssse3_pmulhrswv4hi3"
8598 [(set (match_operand:V4HI 0 "register_operand" "")
8605 (match_operand:V4HI 1 "nonimmediate_operand" ""))
8607 (match_operand:V4HI 2 "nonimmediate_operand" "")))
8609 (const_vector:V4HI [(const_int 1) (const_int 1)
8610 (const_int 1) (const_int 1)]))
8613 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
8615 (define_insn "*ssse3_pmulhrswv4hi3"
8616 [(set (match_operand:V4HI 0 "register_operand" "=y")
8623 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8625 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8627 (const_vector:V4HI [(const_int 1) (const_int 1)
8628 (const_int 1) (const_int 1)]))
8630 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8631 "pmulhrsw\t{%2, %0|%0, %2}"
8632 [(set_attr "type" "sseimul")
8633 (set_attr "prefix_extra" "1")
8634 (set_attr "mode" "DI")])
8636 (define_insn "*avx_pshufbv16qi3"
8637 [(set (match_operand:V16QI 0 "register_operand" "=x")
8638 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8639 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8642 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8643 [(set_attr "type" "sselog1")
8644 (set_attr "prefix" "vex")
8645 (set_attr "mode" "TI")])
8647 (define_insn "ssse3_pshufbv16qi3"
8648 [(set (match_operand:V16QI 0 "register_operand" "=x")
8649 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8650 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8653 "pshufb\t{%2, %0|%0, %2}";
8654 [(set_attr "type" "sselog1")
8655 (set_attr "prefix_data16" "1")
8656 (set_attr "prefix_extra" "1")
8657 (set_attr "mode" "TI")])
8659 (define_insn "ssse3_pshufbv8qi3"
8660 [(set (match_operand:V8QI 0 "register_operand" "=y")
8661 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8662 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8665 "pshufb\t{%2, %0|%0, %2}";
8666 [(set_attr "type" "sselog1")
8667 (set_attr "prefix_extra" "1")
8668 (set_attr "mode" "DI")])
8670 (define_insn "*avx_psign<mode>3"
8671 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8673 [(match_operand:SSEMODE124 1 "register_operand" "x")
8674 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8677 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8678 [(set_attr "type" "sselog1")
8679 (set_attr "prefix" "vex")
8680 (set_attr "mode" "TI")])
8682 (define_insn "ssse3_psign<mode>3"
8683 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8685 [(match_operand:SSEMODE124 1 "register_operand" "0")
8686 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8689 "psign<ssevecsize>\t{%2, %0|%0, %2}";
8690 [(set_attr "type" "sselog1")
8691 (set_attr "prefix_data16" "1")
8692 (set_attr "prefix_extra" "1")
8693 (set_attr "mode" "TI")])
8695 (define_insn "ssse3_psign<mode>3"
8696 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8698 [(match_operand:MMXMODEI 1 "register_operand" "0")
8699 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8702 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8703 [(set_attr "type" "sselog1")
8704 (set_attr "prefix_extra" "1")
8705 (set_attr "mode" "DI")])
8707 (define_insn "*avx_palignrti"
8708 [(set (match_operand:TI 0 "register_operand" "=x")
8709 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8710 (match_operand:TI 2 "nonimmediate_operand" "xm")
8711 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8715 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8716 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8718 [(set_attr "type" "sseishft")
8719 (set_attr "prefix" "vex")
8720 (set_attr "mode" "TI")])
8722 (define_insn "ssse3_palignrti"
8723 [(set (match_operand:TI 0 "register_operand" "=x")
8724 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
8725 (match_operand:TI 2 "nonimmediate_operand" "xm")
8726 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8730 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8731 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8733 [(set_attr "type" "sseishft")
8734 (set_attr "prefix_data16" "1")
8735 (set_attr "prefix_extra" "1")
8736 (set_attr "mode" "TI")])
8738 (define_insn "ssse3_palignrdi"
8739 [(set (match_operand:DI 0 "register_operand" "=y")
8740 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8741 (match_operand:DI 2 "nonimmediate_operand" "ym")
8742 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8746 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8747 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8749 [(set_attr "type" "sseishft")
8750 (set_attr "prefix_extra" "1")
8751 (set_attr "mode" "DI")])
8753 (define_insn "abs<mode>2"
8754 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8755 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8757 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
8758 [(set_attr "type" "sselog1")
8759 (set_attr "prefix_data16" "1")
8760 (set_attr "prefix_extra" "1")
8761 (set_attr "prefix" "maybe_vex")
8762 (set_attr "mode" "TI")])
8764 (define_insn "abs<mode>2"
8765 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8766 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8768 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8769 [(set_attr "type" "sselog1")
8770 (set_attr "prefix_extra" "1")
8771 (set_attr "mode" "DI")])
8773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8775 ;; AMD SSE4A instructions
8777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8779 (define_insn "sse4a_movnt<mode>"
8780 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8782 [(match_operand:MODEF 1 "register_operand" "x")]
8785 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
8786 [(set_attr "type" "ssemov")
8787 (set_attr "mode" "<MODE>")])
8789 (define_insn "sse4a_vmmovnt<mode>"
8790 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8791 (unspec:<ssescalarmode>
8792 [(vec_select:<ssescalarmode>
8793 (match_operand:SSEMODEF2P 1 "register_operand" "x")
8794 (parallel [(const_int 0)]))]
8797 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8798 [(set_attr "type" "ssemov")
8799 (set_attr "mode" "<ssescalarmode>")])
8801 (define_insn "sse4a_extrqi"
8802 [(set (match_operand:V2DI 0 "register_operand" "=x")
8803 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8804 (match_operand 2 "const_int_operand" "")
8805 (match_operand 3 "const_int_operand" "")]
8808 "extrq\t{%3, %2, %0|%0, %2, %3}"
8809 [(set_attr "type" "sse")
8810 (set_attr "prefix_data16" "1")
8811 (set_attr "mode" "TI")])
8813 (define_insn "sse4a_extrq"
8814 [(set (match_operand:V2DI 0 "register_operand" "=x")
8815 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8816 (match_operand:V16QI 2 "register_operand" "x")]
8819 "extrq\t{%2, %0|%0, %2}"
8820 [(set_attr "type" "sse")
8821 (set_attr "prefix_data16" "1")
8822 (set_attr "mode" "TI")])
8824 (define_insn "sse4a_insertqi"
8825 [(set (match_operand:V2DI 0 "register_operand" "=x")
8826 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8827 (match_operand:V2DI 2 "register_operand" "x")
8828 (match_operand 3 "const_int_operand" "")
8829 (match_operand 4 "const_int_operand" "")]
8832 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8833 [(set_attr "type" "sseins")
8834 (set_attr "prefix_rep" "1")
8835 (set_attr "mode" "TI")])
8837 (define_insn "sse4a_insertq"
8838 [(set (match_operand:V2DI 0 "register_operand" "=x")
8839 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8840 (match_operand:V2DI 2 "register_operand" "x")]
8843 "insertq\t{%2, %0|%0, %2}"
8844 [(set_attr "type" "sseins")
8845 (set_attr "prefix_rep" "1")
8846 (set_attr "mode" "TI")])
8848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8850 ;; Intel SSE4.1 instructions
8852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8854 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
8855 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8856 (vec_merge:AVXMODEF2P
8857 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8858 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8859 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8861 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8862 [(set_attr "type" "ssemov")
8863 (set_attr "prefix" "vex")
8864 (set_attr "mode" "<avxvecmode>")])
8866 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8867 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8869 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
8870 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8871 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8874 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8875 [(set_attr "type" "ssemov")
8876 (set_attr "prefix" "vex")
8877 (set_attr "mode" "<avxvecmode>")])
8879 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8880 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8881 (vec_merge:SSEMODEF2P
8882 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8883 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8884 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8886 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8887 [(set_attr "type" "ssemov")
8888 (set_attr "prefix_extra" "1")
8889 (set_attr "mode" "<MODE>")])
8891 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8892 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8894 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
8895 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
8896 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8899 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8900 [(set_attr "type" "ssemov")
8901 (set_attr "prefix_extra" "1")
8902 (set_attr "mode" "<MODE>")])
8904 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8905 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8907 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
8908 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
8909 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8912 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8913 [(set_attr "type" "ssemul")
8914 (set_attr "prefix" "vex")
8915 (set_attr "mode" "<avxvecmode>")])
8917 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8918 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8920 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
8921 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8922 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8925 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8926 [(set_attr "type" "ssemul")
8927 (set_attr "prefix_extra" "1")
8928 (set_attr "mode" "<MODE>")])
8930 (define_insn "sse4_1_movntdqa"
8931 [(set (match_operand:V2DI 0 "register_operand" "=x")
8932 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8935 "%vmovntdqa\t{%1, %0|%0, %1}"
8936 [(set_attr "type" "ssecvt")
8937 (set_attr "prefix_extra" "1")
8938 (set_attr "prefix" "maybe_vex")
8939 (set_attr "mode" "TI")])
8941 (define_insn "*avx_mpsadbw"
8942 [(set (match_operand:V16QI 0 "register_operand" "=x")
8943 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8944 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8945 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8948 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8949 [(set_attr "type" "sselog1")
8950 (set_attr "prefix" "vex")
8951 (set_attr "mode" "TI")])
8953 (define_insn "sse4_1_mpsadbw"
8954 [(set (match_operand:V16QI 0 "register_operand" "=x")
8955 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8956 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8957 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8960 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
8961 [(set_attr "type" "sselog1")
8962 (set_attr "prefix_extra" "1")
8963 (set_attr "mode" "TI")])
8965 (define_insn "*avx_packusdw"
8966 [(set (match_operand:V8HI 0 "register_operand" "=x")
8969 (match_operand:V4SI 1 "register_operand" "x"))
8971 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
8973 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8974 [(set_attr "type" "sselog")
8975 (set_attr "prefix" "vex")
8976 (set_attr "mode" "TI")])
8978 (define_insn "sse4_1_packusdw"
8979 [(set (match_operand:V8HI 0 "register_operand" "=x")
8982 (match_operand:V4SI 1 "register_operand" "0"))
8984 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
8986 "packusdw\t{%2, %0|%0, %2}"
8987 [(set_attr "type" "sselog")
8988 (set_attr "prefix_extra" "1")
8989 (set_attr "mode" "TI")])
8991 (define_insn "*avx_pblendvb"
8992 [(set (match_operand:V16QI 0 "register_operand" "=x")
8993 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8994 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8995 (match_operand:V16QI 3 "register_operand" "x")]
8998 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8999 [(set_attr "type" "ssemov")
9000 (set_attr "prefix" "vex")
9001 (set_attr "mode" "TI")])
9003 (define_insn "sse4_1_pblendvb"
9004 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9005 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9006 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9007 (match_operand:V16QI 3 "register_operand" "Yz")]
9010 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9011 [(set_attr "type" "ssemov")
9012 (set_attr "prefix_extra" "1")
9013 (set_attr "mode" "TI")])
9015 (define_insn "*avx_pblendw"
9016 [(set (match_operand:V8HI 0 "register_operand" "=x")
9018 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9019 (match_operand:V8HI 1 "register_operand" "x")
9020 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9022 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9023 [(set_attr "type" "ssemov")
9024 (set_attr "prefix" "vex")
9025 (set_attr "mode" "TI")])
9027 (define_insn "sse4_1_pblendw"
9028 [(set (match_operand:V8HI 0 "register_operand" "=x")
9030 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9031 (match_operand:V8HI 1 "register_operand" "0")
9032 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9034 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9035 [(set_attr "type" "ssemov")
9036 (set_attr "prefix_extra" "1")
9037 (set_attr "mode" "TI")])
9039 (define_insn "sse4_1_phminposuw"
9040 [(set (match_operand:V8HI 0 "register_operand" "=x")
9041 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9042 UNSPEC_PHMINPOSUW))]
9044 "%vphminposuw\t{%1, %0|%0, %1}"
9045 [(set_attr "type" "sselog1")
9046 (set_attr "prefix_extra" "1")
9047 (set_attr "prefix" "maybe_vex")
9048 (set_attr "mode" "TI")])
9050 (define_insn "sse4_1_extendv8qiv8hi2"
9051 [(set (match_operand:V8HI 0 "register_operand" "=x")
9054 (match_operand:V16QI 1 "register_operand" "x")
9055 (parallel [(const_int 0)
9064 "%vpmovsxbw\t{%1, %0|%0, %1}"
9065 [(set_attr "type" "ssemov")
9066 (set_attr "prefix_extra" "1")
9067 (set_attr "prefix" "maybe_vex")
9068 (set_attr "mode" "TI")])
9070 (define_insn "*sse4_1_extendv8qiv8hi2"
9071 [(set (match_operand:V8HI 0 "register_operand" "=x")
9074 (vec_duplicate:V16QI
9075 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9076 (parallel [(const_int 0)
9085 "%vpmovsxbw\t{%1, %0|%0, %1}"
9086 [(set_attr "type" "ssemov")
9087 (set_attr "prefix_extra" "1")
9088 (set_attr "prefix" "maybe_vex")
9089 (set_attr "mode" "TI")])
9091 (define_insn "sse4_1_extendv4qiv4si2"
9092 [(set (match_operand:V4SI 0 "register_operand" "=x")
9095 (match_operand:V16QI 1 "register_operand" "x")
9096 (parallel [(const_int 0)
9101 "%vpmovsxbd\t{%1, %0|%0, %1}"
9102 [(set_attr "type" "ssemov")
9103 (set_attr "prefix_extra" "1")
9104 (set_attr "prefix" "maybe_vex")
9105 (set_attr "mode" "TI")])
9107 (define_insn "*sse4_1_extendv4qiv4si2"
9108 [(set (match_operand:V4SI 0 "register_operand" "=x")
9111 (vec_duplicate:V16QI
9112 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9113 (parallel [(const_int 0)
9118 "%vpmovsxbd\t{%1, %0|%0, %1}"
9119 [(set_attr "type" "ssemov")
9120 (set_attr "prefix_extra" "1")
9121 (set_attr "prefix" "maybe_vex")
9122 (set_attr "mode" "TI")])
9124 (define_insn "sse4_1_extendv2qiv2di2"
9125 [(set (match_operand:V2DI 0 "register_operand" "=x")
9128 (match_operand:V16QI 1 "register_operand" "x")
9129 (parallel [(const_int 0)
9132 "%vpmovsxbq\t{%1, %0|%0, %1}"
9133 [(set_attr "type" "ssemov")
9134 (set_attr "prefix_extra" "1")
9135 (set_attr "prefix" "maybe_vex")
9136 (set_attr "mode" "TI")])
9138 (define_insn "*sse4_1_extendv2qiv2di2"
9139 [(set (match_operand:V2DI 0 "register_operand" "=x")
9142 (vec_duplicate:V16QI
9143 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9144 (parallel [(const_int 0)
9147 "%vpmovsxbq\t{%1, %0|%0, %1}"
9148 [(set_attr "type" "ssemov")
9149 (set_attr "prefix_extra" "1")
9150 (set_attr "prefix" "maybe_vex")
9151 (set_attr "mode" "TI")])
9153 (define_insn "sse4_1_extendv4hiv4si2"
9154 [(set (match_operand:V4SI 0 "register_operand" "=x")
9157 (match_operand:V8HI 1 "register_operand" "x")
9158 (parallel [(const_int 0)
9163 "%vpmovsxwd\t{%1, %0|%0, %1}"
9164 [(set_attr "type" "ssemov")
9165 (set_attr "prefix_extra" "1")
9166 (set_attr "prefix" "maybe_vex")
9167 (set_attr "mode" "TI")])
9169 (define_insn "*sse4_1_extendv4hiv4si2"
9170 [(set (match_operand:V4SI 0 "register_operand" "=x")
9174 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9175 (parallel [(const_int 0)
9180 "%vpmovsxwd\t{%1, %0|%0, %1}"
9181 [(set_attr "type" "ssemov")
9182 (set_attr "prefix_extra" "1")
9183 (set_attr "prefix" "maybe_vex")
9184 (set_attr "mode" "TI")])
9186 (define_insn "sse4_1_extendv2hiv2di2"
9187 [(set (match_operand:V2DI 0 "register_operand" "=x")
9190 (match_operand:V8HI 1 "register_operand" "x")
9191 (parallel [(const_int 0)
9194 "%vpmovsxwq\t{%1, %0|%0, %1}"
9195 [(set_attr "type" "ssemov")
9196 (set_attr "prefix_extra" "1")
9197 (set_attr "prefix" "maybe_vex")
9198 (set_attr "mode" "TI")])
9200 (define_insn "*sse4_1_extendv2hiv2di2"
9201 [(set (match_operand:V2DI 0 "register_operand" "=x")
9205 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9206 (parallel [(const_int 0)
9209 "%vpmovsxwq\t{%1, %0|%0, %1}"
9210 [(set_attr "type" "ssemov")
9211 (set_attr "prefix_extra" "1")
9212 (set_attr "prefix" "maybe_vex")
9213 (set_attr "mode" "TI")])
9215 (define_insn "sse4_1_extendv2siv2di2"
9216 [(set (match_operand:V2DI 0 "register_operand" "=x")
9219 (match_operand:V4SI 1 "register_operand" "x")
9220 (parallel [(const_int 0)
9223 "%vpmovsxdq\t{%1, %0|%0, %1}"
9224 [(set_attr "type" "ssemov")
9225 (set_attr "prefix_extra" "1")
9226 (set_attr "prefix" "maybe_vex")
9227 (set_attr "mode" "TI")])
9229 (define_insn "*sse4_1_extendv2siv2di2"
9230 [(set (match_operand:V2DI 0 "register_operand" "=x")
9234 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9235 (parallel [(const_int 0)
9238 "%vpmovsxdq\t{%1, %0|%0, %1}"
9239 [(set_attr "type" "ssemov")
9240 (set_attr "prefix_extra" "1")
9241 (set_attr "prefix" "maybe_vex")
9242 (set_attr "mode" "TI")])
9244 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9245 [(set (match_operand:V8HI 0 "register_operand" "=x")
9248 (match_operand:V16QI 1 "register_operand" "x")
9249 (parallel [(const_int 0)
9258 "%vpmovzxbw\t{%1, %0|%0, %1}"
9259 [(set_attr "type" "ssemov")
9260 (set_attr "prefix_extra" "1")
9261 (set_attr "prefix" "maybe_vex")
9262 (set_attr "mode" "TI")])
9264 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9265 [(set (match_operand:V8HI 0 "register_operand" "=x")
9268 (vec_duplicate:V16QI
9269 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9270 (parallel [(const_int 0)
9279 "%vpmovzxbw\t{%1, %0|%0, %1}"
9280 [(set_attr "type" "ssemov")
9281 (set_attr "prefix_extra" "1")
9282 (set_attr "prefix" "maybe_vex")
9283 (set_attr "mode" "TI")])
9285 (define_insn "sse4_1_zero_extendv4qiv4si2"
9286 [(set (match_operand:V4SI 0 "register_operand" "=x")
9289 (match_operand:V16QI 1 "register_operand" "x")
9290 (parallel [(const_int 0)
9295 "%vpmovzxbd\t{%1, %0|%0, %1}"
9296 [(set_attr "type" "ssemov")
9297 (set_attr "prefix_extra" "1")
9298 (set_attr "prefix" "maybe_vex")
9299 (set_attr "mode" "TI")])
9301 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9302 [(set (match_operand:V4SI 0 "register_operand" "=x")
9305 (vec_duplicate:V16QI
9306 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9307 (parallel [(const_int 0)
9312 "%vpmovzxbd\t{%1, %0|%0, %1}"
9313 [(set_attr "type" "ssemov")
9314 (set_attr "prefix_extra" "1")
9315 (set_attr "prefix" "maybe_vex")
9316 (set_attr "mode" "TI")])
9318 (define_insn "sse4_1_zero_extendv2qiv2di2"
9319 [(set (match_operand:V2DI 0 "register_operand" "=x")
9322 (match_operand:V16QI 1 "register_operand" "x")
9323 (parallel [(const_int 0)
9326 "%vpmovzxbq\t{%1, %0|%0, %1}"
9327 [(set_attr "type" "ssemov")
9328 (set_attr "prefix_extra" "1")
9329 (set_attr "prefix" "maybe_vex")
9330 (set_attr "mode" "TI")])
9332 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9333 [(set (match_operand:V2DI 0 "register_operand" "=x")
9336 (vec_duplicate:V16QI
9337 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9338 (parallel [(const_int 0)
9341 "%vpmovzxbq\t{%1, %0|%0, %1}"
9342 [(set_attr "type" "ssemov")
9343 (set_attr "prefix_extra" "1")
9344 (set_attr "prefix" "maybe_vex")
9345 (set_attr "mode" "TI")])
9347 (define_insn "sse4_1_zero_extendv4hiv4si2"
9348 [(set (match_operand:V4SI 0 "register_operand" "=x")
9351 (match_operand:V8HI 1 "register_operand" "x")
9352 (parallel [(const_int 0)
9357 "%vpmovzxwd\t{%1, %0|%0, %1}"
9358 [(set_attr "type" "ssemov")
9359 (set_attr "prefix_extra" "1")
9360 (set_attr "prefix" "maybe_vex")
9361 (set_attr "mode" "TI")])
9363 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9364 [(set (match_operand:V4SI 0 "register_operand" "=x")
9368 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9369 (parallel [(const_int 0)
9374 "%vpmovzxwd\t{%1, %0|%0, %1}"
9375 [(set_attr "type" "ssemov")
9376 (set_attr "prefix_extra" "1")
9377 (set_attr "prefix" "maybe_vex")
9378 (set_attr "mode" "TI")])
9380 (define_insn "sse4_1_zero_extendv2hiv2di2"
9381 [(set (match_operand:V2DI 0 "register_operand" "=x")
9384 (match_operand:V8HI 1 "register_operand" "x")
9385 (parallel [(const_int 0)
9388 "%vpmovzxwq\t{%1, %0|%0, %1}"
9389 [(set_attr "type" "ssemov")
9390 (set_attr "prefix_extra" "1")
9391 (set_attr "prefix" "maybe_vex")
9392 (set_attr "mode" "TI")])
9394 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9395 [(set (match_operand:V2DI 0 "register_operand" "=x")
9399 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9400 (parallel [(const_int 0)
9403 "%vpmovzxwq\t{%1, %0|%0, %1}"
9404 [(set_attr "type" "ssemov")
9405 (set_attr "prefix_extra" "1")
9406 (set_attr "prefix" "maybe_vex")
9407 (set_attr "mode" "TI")])
9409 (define_insn "sse4_1_zero_extendv2siv2di2"
9410 [(set (match_operand:V2DI 0 "register_operand" "=x")
9413 (match_operand:V4SI 1 "register_operand" "x")
9414 (parallel [(const_int 0)
9417 "%vpmovzxdq\t{%1, %0|%0, %1}"
9418 [(set_attr "type" "ssemov")
9419 (set_attr "prefix_extra" "1")
9420 (set_attr "prefix" "maybe_vex")
9421 (set_attr "mode" "TI")])
9423 (define_insn "*sse4_1_zero_extendv2siv2di2"
9424 [(set (match_operand:V2DI 0 "register_operand" "=x")
9428 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9429 (parallel [(const_int 0)
9432 "%vpmovzxdq\t{%1, %0|%0, %1}"
9433 [(set_attr "type" "ssemov")
9434 (set_attr "prefix_extra" "1")
9435 (set_attr "prefix" "maybe_vex")
9436 (set_attr "mode" "TI")])
9438 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9439 ;; setting FLAGS_REG. But it is not a really compare instruction.
9440 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9441 [(set (reg:CC FLAGS_REG)
9442 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9443 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9446 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9447 [(set_attr "type" "ssecomi")
9448 (set_attr "prefix" "vex")
9449 (set_attr "mode" "<MODE>")])
9451 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9452 ;; But it is not a really compare instruction.
9453 (define_insn "avx_ptest256"
9454 [(set (reg:CC FLAGS_REG)
9455 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9456 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9459 "vptest\t{%1, %0|%0, %1}"
9460 [(set_attr "type" "ssecomi")
9461 (set_attr "prefix" "vex")
9462 (set_attr "mode" "OI")])
9464 (define_insn "sse4_1_ptest"
9465 [(set (reg:CC FLAGS_REG)
9466 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9467 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9470 "%vptest\t{%1, %0|%0, %1}"
9471 [(set_attr "type" "ssecomi")
9472 (set_attr "prefix_extra" "1")
9473 (set_attr "prefix" "maybe_vex")
9474 (set_attr "mode" "TI")])
9476 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9477 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9478 (unspec:AVX256MODEF2P
9479 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9480 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9483 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9484 [(set_attr "type" "ssecvt")
9485 (set_attr "prefix" "vex")
9486 (set_attr "mode" "<MODE>")])
9488 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9489 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9491 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9492 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9495 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9496 [(set_attr "type" "ssecvt")
9497 (set_attr "prefix_extra" "1")
9498 (set_attr "prefix" "maybe_vex")
9499 (set_attr "mode" "<MODE>")])
9501 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9502 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9503 (vec_merge:SSEMODEF2P
9505 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9506 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9508 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9511 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9512 [(set_attr "type" "ssecvt")
9513 (set_attr "prefix" "vex")
9514 (set_attr "mode" "<MODE>")])
9516 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9517 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9518 (vec_merge:SSEMODEF2P
9520 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9521 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9523 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9526 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9527 [(set_attr "type" "ssecvt")
9528 (set_attr "prefix_extra" "1")
9529 (set_attr "mode" "<MODE>")])
9531 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9533 ;; Intel SSE4.2 string/text processing instructions
9535 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9537 (define_insn_and_split "sse4_2_pcmpestr"
9538 [(set (match_operand:SI 0 "register_operand" "=c,c")
9540 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9541 (match_operand:SI 3 "register_operand" "a,a")
9542 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9543 (match_operand:SI 5 "register_operand" "d,d")
9544 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9546 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9554 (set (reg:CC FLAGS_REG)
9563 && !(reload_completed || reload_in_progress)"
9568 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9569 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9570 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9573 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9574 operands[3], operands[4],
9575 operands[5], operands[6]));
9577 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9578 operands[3], operands[4],
9579 operands[5], operands[6]));
9580 if (flags && !(ecx || xmm0))
9581 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9582 operands[2], operands[3],
9583 operands[4], operands[5],
9587 [(set_attr "type" "sselog")
9588 (set_attr "prefix_data16" "1")
9589 (set_attr "prefix_extra" "1")
9590 (set_attr "memory" "none,load")
9591 (set_attr "mode" "TI")])
9593 (define_insn "sse4_2_pcmpestri"
9594 [(set (match_operand:SI 0 "register_operand" "=c,c")
9596 [(match_operand:V16QI 1 "register_operand" "x,x")
9597 (match_operand:SI 2 "register_operand" "a,a")
9598 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9599 (match_operand:SI 4 "register_operand" "d,d")
9600 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9602 (set (reg:CC FLAGS_REG)
9611 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9612 [(set_attr "type" "sselog")
9613 (set_attr "prefix_data16" "1")
9614 (set_attr "prefix_extra" "1")
9615 (set_attr "prefix" "maybe_vex")
9616 (set_attr "memory" "none,load")
9617 (set_attr "mode" "TI")])
9619 (define_insn "sse4_2_pcmpestrm"
9620 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9622 [(match_operand:V16QI 1 "register_operand" "x,x")
9623 (match_operand:SI 2 "register_operand" "a,a")
9624 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9625 (match_operand:SI 4 "register_operand" "d,d")
9626 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9628 (set (reg:CC FLAGS_REG)
9637 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9638 [(set_attr "type" "sselog")
9639 (set_attr "prefix_data16" "1")
9640 (set_attr "prefix_extra" "1")
9641 (set_attr "prefix" "maybe_vex")
9642 (set_attr "memory" "none,load")
9643 (set_attr "mode" "TI")])
9645 (define_insn "sse4_2_pcmpestr_cconly"
9646 [(set (reg:CC FLAGS_REG)
9648 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9649 (match_operand:SI 3 "register_operand" "a,a,a,a")
9650 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9651 (match_operand:SI 5 "register_operand" "d,d,d,d")
9652 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9654 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9655 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9658 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9659 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9660 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9661 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9662 [(set_attr "type" "sselog")
9663 (set_attr "prefix_data16" "1")
9664 (set_attr "prefix_extra" "1")
9665 (set_attr "memory" "none,load,none,load")
9666 (set_attr "prefix" "maybe_vex")
9667 (set_attr "mode" "TI")])
9669 (define_insn_and_split "sse4_2_pcmpistr"
9670 [(set (match_operand:SI 0 "register_operand" "=c,c")
9672 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9673 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9674 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9676 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9682 (set (reg:CC FLAGS_REG)
9689 && !(reload_completed || reload_in_progress)"
9694 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9695 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9696 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9699 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9700 operands[3], operands[4]));
9702 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9703 operands[3], operands[4]));
9704 if (flags && !(ecx || xmm0))
9705 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9706 operands[2], operands[3],
9710 [(set_attr "type" "sselog")
9711 (set_attr "prefix_data16" "1")
9712 (set_attr "prefix_extra" "1")
9713 (set_attr "memory" "none,load")
9714 (set_attr "mode" "TI")])
9716 (define_insn "sse4_2_pcmpistri"
9717 [(set (match_operand:SI 0 "register_operand" "=c,c")
9719 [(match_operand:V16QI 1 "register_operand" "x,x")
9720 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9721 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9723 (set (reg:CC FLAGS_REG)
9730 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9731 [(set_attr "type" "sselog")
9732 (set_attr "prefix_data16" "1")
9733 (set_attr "prefix_extra" "1")
9734 (set_attr "prefix" "maybe_vex")
9735 (set_attr "memory" "none,load")
9736 (set_attr "mode" "TI")])
9738 (define_insn "sse4_2_pcmpistrm"
9739 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9741 [(match_operand:V16QI 1 "register_operand" "x,x")
9742 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9743 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9745 (set (reg:CC FLAGS_REG)
9752 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9753 [(set_attr "type" "sselog")
9754 (set_attr "prefix_data16" "1")
9755 (set_attr "prefix_extra" "1")
9756 (set_attr "prefix" "maybe_vex")
9757 (set_attr "memory" "none,load")
9758 (set_attr "mode" "TI")])
9760 (define_insn "sse4_2_pcmpistr_cconly"
9761 [(set (reg:CC FLAGS_REG)
9763 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9764 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9765 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9767 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9768 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9771 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9772 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9773 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9774 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9775 [(set_attr "type" "sselog")
9776 (set_attr "prefix_data16" "1")
9777 (set_attr "prefix_extra" "1")
9778 (set_attr "memory" "none,load,none,load")
9779 (set_attr "prefix" "maybe_vex")
9780 (set_attr "mode" "TI")])
9782 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9784 ;; SSE5 instructions
9786 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9788 ;; SSE5 parallel integer multiply/add instructions.
9789 ;; Note the instruction does not allow the value being added to be a memory
9790 ;; operation. However by pretending via the nonimmediate_operand predicate
9791 ;; that it does and splitting it later allows the following to be recognized:
9792 ;; a[i] = b[i] * c[i] + d[i];
9793 (define_insn "sse5_pmacsww"
9794 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9797 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
9798 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
9799 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9800 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9802 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9803 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9804 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9805 [(set_attr "type" "ssemuladd")
9806 (set_attr "mode" "TI")])
9808 ;; Split pmacsww with two memory operands into a load and the pmacsww.
9810 [(set (match_operand:V8HI 0 "register_operand" "")
9812 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9813 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9814 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9816 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9817 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9818 && !reg_mentioned_p (operands[0], operands[1])
9819 && !reg_mentioned_p (operands[0], operands[2])
9820 && !reg_mentioned_p (operands[0], operands[3])"
9823 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9824 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9829 (define_insn "sse5_pmacssww"
9830 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9832 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
9833 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
9834 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
9835 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9837 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9838 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9839 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9840 [(set_attr "type" "ssemuladd")
9841 (set_attr "mode" "TI")])
9843 ;; Note the instruction does not allow the value being added to be a memory
9844 ;; operation. However by pretending via the nonimmediate_operand predicate
9845 ;; that it does and splitting it later allows the following to be recognized:
9846 ;; a[i] = b[i] * c[i] + d[i];
9847 (define_insn "sse5_pmacsdd"
9848 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9851 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9852 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9853 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9854 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
9856 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9857 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9858 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9859 [(set_attr "type" "ssemuladd")
9860 (set_attr "mode" "TI")])
9862 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
9864 [(set (match_operand:V4SI 0 "register_operand" "")
9866 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9867 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9868 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9870 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9871 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9872 && !reg_mentioned_p (operands[0], operands[1])
9873 && !reg_mentioned_p (operands[0], operands[2])
9874 && !reg_mentioned_p (operands[0], operands[3])"
9877 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9878 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9883 (define_insn "sse5_pmacssdd"
9884 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9886 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9887 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
9888 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
9889 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9891 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9892 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9893 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9894 [(set_attr "type" "ssemuladd")
9895 (set_attr "mode" "TI")])
9897 (define_insn "sse5_pmacssdql"
9898 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9903 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9904 (parallel [(const_int 1)
9907 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9908 (parallel [(const_int 1)
9910 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9911 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9913 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9914 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9915 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9916 [(set_attr "type" "ssemuladd")
9917 (set_attr "mode" "TI")])
9919 (define_insn "sse5_pmacssdqh"
9920 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9925 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9926 (parallel [(const_int 0)
9930 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9931 (parallel [(const_int 0)
9933 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9934 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9936 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9937 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9938 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9939 [(set_attr "type" "ssemuladd")
9940 (set_attr "mode" "TI")])
9942 (define_insn "sse5_pmacsdql"
9943 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
9948 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9949 (parallel [(const_int 1)
9953 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9954 (parallel [(const_int 1)
9956 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
9957 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
9959 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9960 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9961 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9962 [(set_attr "type" "ssemuladd")
9963 (set_attr "mode" "TI")])
9965 (define_insn_and_split "*sse5_pmacsdql_mem"
9966 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
9971 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
9972 (parallel [(const_int 1)
9976 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
9977 (parallel [(const_int 1)
9979 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
9980 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
9982 "&& (reload_completed
9983 || (!reg_mentioned_p (operands[0], operands[1])
9984 && !reg_mentioned_p (operands[0], operands[2])))"
9993 (parallel [(const_int 1)
9998 (parallel [(const_int 1)
10002 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10003 ;; fake it with a multiply/add. In general, we expect the define_split to
10004 ;; occur before register allocation, so we have to handle the corner case where
10005 ;; the target is the same as operands 1/2
10006 (define_insn_and_split "sse5_mulv2div2di3_low"
10007 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10011 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10012 (parallel [(const_int 1)
10016 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10017 (parallel [(const_int 1)
10018 (const_int 3)])))))]
10021 "&& (reload_completed
10022 || (!reg_mentioned_p (operands[0], operands[1])
10023 && !reg_mentioned_p (operands[0], operands[2])))"
10024 [(set (match_dup 0)
10032 (parallel [(const_int 1)
10037 (parallel [(const_int 1)
10041 operands[3] = CONST0_RTX (V2DImode);
10043 [(set_attr "type" "ssemuladd")
10044 (set_attr "mode" "TI")])
10046 (define_insn "sse5_pmacsdqh"
10047 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
10052 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10053 (parallel [(const_int 0)
10057 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10058 (parallel [(const_int 0)
10060 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
10061 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10063 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10064 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10065 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10066 [(set_attr "type" "ssemuladd")
10067 (set_attr "mode" "TI")])
10069 (define_insn_and_split "*sse5_pmacsdqh_mem"
10070 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10075 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10076 (parallel [(const_int 0)
10080 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10081 (parallel [(const_int 0)
10083 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10084 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10086 "&& (reload_completed
10087 || (!reg_mentioned_p (operands[0], operands[1])
10088 && !reg_mentioned_p (operands[0], operands[2])))"
10089 [(set (match_dup 0)
10097 (parallel [(const_int 0)
10102 (parallel [(const_int 0)
10106 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10107 ;; fake it with a multiply/add. In general, we expect the define_split to
10108 ;; occur before register allocation, so we have to handle the corner case where
10109 ;; the target is the same as either operands[1] or operands[2]
10110 (define_insn_and_split "sse5_mulv2div2di3_high"
10111 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10115 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10116 (parallel [(const_int 0)
10120 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10121 (parallel [(const_int 0)
10122 (const_int 2)])))))]
10125 "&& (reload_completed
10126 || (!reg_mentioned_p (operands[0], operands[1])
10127 && !reg_mentioned_p (operands[0], operands[2])))"
10128 [(set (match_dup 0)
10136 (parallel [(const_int 0)
10141 (parallel [(const_int 0)
10145 operands[3] = CONST0_RTX (V2DImode);
10147 [(set_attr "type" "ssemuladd")
10148 (set_attr "mode" "TI")])
10150 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
10151 (define_insn "sse5_pmacsswd"
10152 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10157 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10158 (parallel [(const_int 1)
10164 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10165 (parallel [(const_int 1)
10169 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10170 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10172 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10173 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10174 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10175 [(set_attr "type" "ssemuladd")
10176 (set_attr "mode" "TI")])
10178 (define_insn "sse5_pmacswd"
10179 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10184 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10185 (parallel [(const_int 1)
10191 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10192 (parallel [(const_int 1)
10196 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10197 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10199 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10200 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10201 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10202 [(set_attr "type" "ssemuladd")
10203 (set_attr "mode" "TI")])
10205 (define_insn "sse5_pmadcsswd"
10206 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10212 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10213 (parallel [(const_int 0)
10219 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10220 (parallel [(const_int 0)
10228 (parallel [(const_int 1)
10235 (parallel [(const_int 1)
10238 (const_int 7)])))))
10239 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10240 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10242 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10243 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10244 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10245 [(set_attr "type" "ssemuladd")
10246 (set_attr "mode" "TI")])
10248 (define_insn "sse5_pmadcswd"
10249 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10255 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
10256 (parallel [(const_int 0)
10262 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
10263 (parallel [(const_int 0)
10271 (parallel [(const_int 1)
10278 (parallel [(const_int 1)
10281 (const_int 7)])))))
10282 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
10283 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
10285 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10286 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10287 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10288 [(set_attr "type" "ssemuladd")
10289 (set_attr "mode" "TI")])
10291 ;; SSE5 parallel XMM conditional moves
10292 (define_insn "sse5_pcmov_<mode>"
10293 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
10294 (if_then_else:SSEMODE
10295 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
10296 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
10297 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
10298 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10300 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10301 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10302 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10303 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10304 [(set_attr "type" "sse4arg")])
10306 ;; SSE5 horizontal add/subtract instructions
10307 (define_insn "sse5_phaddbw"
10308 [(set (match_operand:V8HI 0 "register_operand" "=x")
10312 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10313 (parallel [(const_int 0)
10324 (parallel [(const_int 1)
10331 (const_int 15)])))))]
10333 "phaddbw\t{%1, %0|%0, %1}"
10334 [(set_attr "type" "sseiadd1")])
10336 (define_insn "sse5_phaddbd"
10337 [(set (match_operand:V4SI 0 "register_operand" "=x")
10342 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10343 (parallel [(const_int 0)
10350 (parallel [(const_int 1)
10353 (const_int 13)]))))
10358 (parallel [(const_int 2)
10365 (parallel [(const_int 3)
10368 (const_int 15)]))))))]
10370 "phaddbd\t{%1, %0|%0, %1}"
10371 [(set_attr "type" "sseiadd1")])
10373 (define_insn "sse5_phaddbq"
10374 [(set (match_operand:V2DI 0 "register_operand" "=x")
10380 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10381 (parallel [(const_int 0)
10386 (parallel [(const_int 1)
10392 (parallel [(const_int 2)
10397 (parallel [(const_int 3)
10398 (const_int 7)])))))
10404 (parallel [(const_int 8)
10409 (parallel [(const_int 9)
10410 (const_int 13)]))))
10415 (parallel [(const_int 10)
10420 (parallel [(const_int 11)
10421 (const_int 15)])))))))]
10423 "phaddbq\t{%1, %0|%0, %1}"
10424 [(set_attr "type" "sseiadd1")])
10426 (define_insn "sse5_phaddwd"
10427 [(set (match_operand:V4SI 0 "register_operand" "=x")
10431 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10432 (parallel [(const_int 0)
10439 (parallel [(const_int 1)
10442 (const_int 7)])))))]
10444 "phaddwd\t{%1, %0|%0, %1}"
10445 [(set_attr "type" "sseiadd1")])
10447 (define_insn "sse5_phaddwq"
10448 [(set (match_operand:V2DI 0 "register_operand" "=x")
10453 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10454 (parallel [(const_int 0)
10459 (parallel [(const_int 1)
10465 (parallel [(const_int 2)
10470 (parallel [(const_int 3)
10471 (const_int 7)]))))))]
10473 "phaddwq\t{%1, %0|%0, %1}"
10474 [(set_attr "type" "sseiadd1")])
10476 (define_insn "sse5_phadddq"
10477 [(set (match_operand:V2DI 0 "register_operand" "=x")
10481 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10482 (parallel [(const_int 0)
10487 (parallel [(const_int 1)
10488 (const_int 3)])))))]
10490 "phadddq\t{%1, %0|%0, %1}"
10491 [(set_attr "type" "sseiadd1")])
10493 (define_insn "sse5_phaddubw"
10494 [(set (match_operand:V8HI 0 "register_operand" "=x")
10498 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10499 (parallel [(const_int 0)
10510 (parallel [(const_int 1)
10517 (const_int 15)])))))]
10519 "phaddubw\t{%1, %0|%0, %1}"
10520 [(set_attr "type" "sseiadd1")])
10522 (define_insn "sse5_phaddubd"
10523 [(set (match_operand:V4SI 0 "register_operand" "=x")
10528 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10529 (parallel [(const_int 0)
10536 (parallel [(const_int 1)
10539 (const_int 13)]))))
10544 (parallel [(const_int 2)
10551 (parallel [(const_int 3)
10554 (const_int 15)]))))))]
10556 "phaddubd\t{%1, %0|%0, %1}"
10557 [(set_attr "type" "sseiadd1")])
10559 (define_insn "sse5_phaddubq"
10560 [(set (match_operand:V2DI 0 "register_operand" "=x")
10566 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10567 (parallel [(const_int 0)
10572 (parallel [(const_int 1)
10578 (parallel [(const_int 2)
10583 (parallel [(const_int 3)
10584 (const_int 7)])))))
10590 (parallel [(const_int 8)
10595 (parallel [(const_int 9)
10596 (const_int 13)]))))
10601 (parallel [(const_int 10)
10606 (parallel [(const_int 11)
10607 (const_int 15)])))))))]
10609 "phaddubq\t{%1, %0|%0, %1}"
10610 [(set_attr "type" "sseiadd1")])
10612 (define_insn "sse5_phadduwd"
10613 [(set (match_operand:V4SI 0 "register_operand" "=x")
10617 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10618 (parallel [(const_int 0)
10625 (parallel [(const_int 1)
10628 (const_int 7)])))))]
10630 "phadduwd\t{%1, %0|%0, %1}"
10631 [(set_attr "type" "sseiadd1")])
10633 (define_insn "sse5_phadduwq"
10634 [(set (match_operand:V2DI 0 "register_operand" "=x")
10639 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10640 (parallel [(const_int 0)
10645 (parallel [(const_int 1)
10651 (parallel [(const_int 2)
10656 (parallel [(const_int 3)
10657 (const_int 7)]))))))]
10659 "phadduwq\t{%1, %0|%0, %1}"
10660 [(set_attr "type" "sseiadd1")])
10662 (define_insn "sse5_phaddudq"
10663 [(set (match_operand:V2DI 0 "register_operand" "=x")
10667 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10668 (parallel [(const_int 0)
10673 (parallel [(const_int 1)
10674 (const_int 3)])))))]
10676 "phaddudq\t{%1, %0|%0, %1}"
10677 [(set_attr "type" "sseiadd1")])
10679 (define_insn "sse5_phsubbw"
10680 [(set (match_operand:V8HI 0 "register_operand" "=x")
10684 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10685 (parallel [(const_int 0)
10696 (parallel [(const_int 1)
10703 (const_int 15)])))))]
10705 "phsubbw\t{%1, %0|%0, %1}"
10706 [(set_attr "type" "sseiadd1")])
10708 (define_insn "sse5_phsubwd"
10709 [(set (match_operand:V4SI 0 "register_operand" "=x")
10713 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10714 (parallel [(const_int 0)
10721 (parallel [(const_int 1)
10724 (const_int 7)])))))]
10726 "phsubwd\t{%1, %0|%0, %1}"
10727 [(set_attr "type" "sseiadd1")])
10729 (define_insn "sse5_phsubdq"
10730 [(set (match_operand:V2DI 0 "register_operand" "=x")
10734 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10735 (parallel [(const_int 0)
10740 (parallel [(const_int 1)
10741 (const_int 3)])))))]
10743 "phsubdq\t{%1, %0|%0, %1}"
10744 [(set_attr "type" "sseiadd1")])
10746 ;; SSE5 permute instructions
10747 (define_insn "sse5_pperm"
10748 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10750 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
10751 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
10752 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10753 UNSPEC_SSE5_PERMUTE))]
10754 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10755 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10756 [(set_attr "type" "sse4arg")
10757 (set_attr "mode" "TI")])
10759 ;; The following are for the various unpack insns which doesn't need the first
10760 ;; source operand, so we can just use the output operand for the first operand.
10761 ;; This allows either of the other two operands to be a memory operand. We
10762 ;; can't just use the first operand as an argument to the normal pperm because
10763 ;; then an output only argument, suddenly becomes an input operand.
10764 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10765 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10768 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10769 (match_operand 2 "" "")))) ;; parallel with const_int's
10770 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10772 && (register_operand (operands[1], V16QImode)
10773 || register_operand (operands[2], V16QImode))"
10774 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10775 [(set_attr "type" "sseadd")
10776 (set_attr "mode" "TI")])
10778 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10779 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10782 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10783 (match_operand 2 "" "")))) ;; parallel with const_int's
10784 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10786 && (register_operand (operands[1], V16QImode)
10787 || register_operand (operands[2], V16QImode))"
10788 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10789 [(set_attr "type" "sseadd")
10790 (set_attr "mode" "TI")])
10792 (define_insn "sse5_pperm_zero_v8hi_v4si"
10793 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10796 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10797 (match_operand 2 "" "")))) ;; parallel with const_int's
10798 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10800 && (register_operand (operands[1], V8HImode)
10801 || register_operand (operands[2], V16QImode))"
10802 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10803 [(set_attr "type" "sseadd")
10804 (set_attr "mode" "TI")])
10806 (define_insn "sse5_pperm_sign_v8hi_v4si"
10807 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10810 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10811 (match_operand 2 "" "")))) ;; parallel with const_int's
10812 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10814 && (register_operand (operands[1], V8HImode)
10815 || register_operand (operands[2], V16QImode))"
10816 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10817 [(set_attr "type" "sseadd")
10818 (set_attr "mode" "TI")])
10820 (define_insn "sse5_pperm_zero_v4si_v2di"
10821 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10824 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10825 (match_operand 2 "" "")))) ;; parallel with const_int's
10826 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10828 && (register_operand (operands[1], V4SImode)
10829 || register_operand (operands[2], V16QImode))"
10830 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10831 [(set_attr "type" "sseadd")
10832 (set_attr "mode" "TI")])
10834 (define_insn "sse5_pperm_sign_v4si_v2di"
10835 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10838 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10839 (match_operand 2 "" "")))) ;; parallel with const_int's
10840 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10842 && (register_operand (operands[1], V4SImode)
10843 || register_operand (operands[2], V16QImode))"
10844 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10845 [(set_attr "type" "sseadd")
10846 (set_attr "mode" "TI")])
10848 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10849 (define_insn "sse5_pperm_pack_v2di_v4si"
10850 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10853 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
10855 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10856 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10857 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10858 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10859 [(set_attr "type" "sse4arg")
10860 (set_attr "mode" "TI")])
10862 (define_insn "sse5_pperm_pack_v4si_v8hi"
10863 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
10866 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
10868 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10869 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10870 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10871 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10872 [(set_attr "type" "sse4arg")
10873 (set_attr "mode" "TI")])
10875 (define_insn "sse5_pperm_pack_v8hi_v16qi"
10876 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
10879 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
10881 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
10882 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
10883 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10884 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10885 [(set_attr "type" "sse4arg")
10886 (set_attr "mode" "TI")])
10888 ;; Floating point permutation (permps, permpd)
10889 (define_insn "sse5_perm<mode>"
10890 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10892 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10893 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10894 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10895 UNSPEC_SSE5_PERMUTE))]
10896 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10897 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10898 [(set_attr "type" "sse4arg")
10899 (set_attr "mode" "<MODE>")])
10901 ;; SSE5 packed rotate instructions
10902 (define_expand "rotl<mode>3"
10903 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10904 (rotate:SSEMODE1248
10905 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10906 (match_operand:SI 2 "general_operand")))]
10909 /* If we were given a scalar, convert it to parallel */
10910 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10912 rtvec vs = rtvec_alloc (<ssescalarnum>);
10913 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10914 rtx reg = gen_reg_rtx (<MODE>mode);
10915 rtx op2 = operands[2];
10918 if (GET_MODE (op2) != <ssescalarmode>mode)
10920 op2 = gen_reg_rtx (<ssescalarmode>mode);
10921 convert_move (op2, operands[2], false);
10924 for (i = 0; i < <ssescalarnum>; i++)
10925 RTVEC_ELT (vs, i) = op2;
10927 emit_insn (gen_vec_init<mode> (reg, par));
10928 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
10933 (define_expand "rotr<mode>3"
10934 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10935 (rotatert:SSEMODE1248
10936 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10937 (match_operand:SI 2 "general_operand")))]
10940 /* If we were given a scalar, convert it to parallel */
10941 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10943 rtvec vs = rtvec_alloc (<ssescalarnum>);
10944 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10945 rtx neg = gen_reg_rtx (<MODE>mode);
10946 rtx reg = gen_reg_rtx (<MODE>mode);
10947 rtx op2 = operands[2];
10950 if (GET_MODE (op2) != <ssescalarmode>mode)
10952 op2 = gen_reg_rtx (<ssescalarmode>mode);
10953 convert_move (op2, operands[2], false);
10956 for (i = 0; i < <ssescalarnum>; i++)
10957 RTVEC_ELT (vs, i) = op2;
10959 emit_insn (gen_vec_init<mode> (reg, par));
10960 emit_insn (gen_neg<mode>2 (neg, reg));
10961 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
10966 (define_insn "sse5_rotl<mode>3"
10967 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10968 (rotate:SSEMODE1248
10969 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10970 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10972 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10973 [(set_attr "type" "sseishft")
10974 (set_attr "mode" "TI")])
10976 (define_insn "sse5_rotr<mode>3"
10977 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10978 (rotatert:SSEMODE1248
10979 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10980 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10983 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10984 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10986 [(set_attr "type" "sseishft")
10987 (set_attr "mode" "TI")])
10989 (define_expand "vrotr<mode>3"
10990 [(match_operand:SSEMODE1248 0 "register_operand" "")
10991 (match_operand:SSEMODE1248 1 "register_operand" "")
10992 (match_operand:SSEMODE1248 2 "register_operand" "")]
10995 rtx reg = gen_reg_rtx (<MODE>mode);
10996 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10997 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
11001 (define_expand "vrotl<mode>3"
11002 [(match_operand:SSEMODE1248 0 "register_operand" "")
11003 (match_operand:SSEMODE1248 1 "register_operand" "")
11004 (match_operand:SSEMODE1248 2 "register_operand" "")]
11007 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11011 (define_insn "sse5_vrotl<mode>3"
11012 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11013 (if_then_else:SSEMODE1248
11015 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11017 (rotate:SSEMODE1248
11018 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11020 (rotatert:SSEMODE1248
11022 (neg:SSEMODE1248 (match_dup 2)))))]
11023 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11024 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11025 [(set_attr "type" "sseishft")
11026 (set_attr "mode" "TI")])
11028 ;; SSE5 packed shift instructions.
11029 ;; FIXME: add V2DI back in
11030 (define_expand "vlshr<mode>3"
11031 [(match_operand:SSEMODE124 0 "register_operand" "")
11032 (match_operand:SSEMODE124 1 "register_operand" "")
11033 (match_operand:SSEMODE124 2 "register_operand" "")]
11036 rtx neg = gen_reg_rtx (<MODE>mode);
11037 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11038 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
11042 (define_expand "vashr<mode>3"
11043 [(match_operand:SSEMODE124 0 "register_operand" "")
11044 (match_operand:SSEMODE124 1 "register_operand" "")
11045 (match_operand:SSEMODE124 2 "register_operand" "")]
11048 rtx neg = gen_reg_rtx (<MODE>mode);
11049 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11050 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
11054 (define_expand "vashl<mode>3"
11055 [(match_operand:SSEMODE124 0 "register_operand" "")
11056 (match_operand:SSEMODE124 1 "register_operand" "")
11057 (match_operand:SSEMODE124 2 "register_operand" "")]
11060 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
11064 (define_insn "sse5_ashl<mode>3"
11065 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11066 (if_then_else:SSEMODE1248
11068 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11070 (ashift:SSEMODE1248
11071 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11073 (ashiftrt:SSEMODE1248
11075 (neg:SSEMODE1248 (match_dup 2)))))]
11076 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11077 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11078 [(set_attr "type" "sseishft")
11079 (set_attr "mode" "TI")])
11081 (define_insn "sse5_lshl<mode>3"
11082 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11083 (if_then_else:SSEMODE1248
11085 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
11087 (ashift:SSEMODE1248
11088 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
11090 (lshiftrt:SSEMODE1248
11092 (neg:SSEMODE1248 (match_dup 2)))))]
11093 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
11094 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11095 [(set_attr "type" "sseishft")
11096 (set_attr "mode" "TI")])
11098 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
11099 (define_expand "ashlv16qi3"
11100 [(match_operand:V16QI 0 "register_operand" "")
11101 (match_operand:V16QI 1 "register_operand" "")
11102 (match_operand:SI 2 "nonmemory_operand" "")]
11105 rtvec vs = rtvec_alloc (16);
11106 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11107 rtx reg = gen_reg_rtx (V16QImode);
11109 for (i = 0; i < 16; i++)
11110 RTVEC_ELT (vs, i) = operands[2];
11112 emit_insn (gen_vec_initv16qi (reg, par));
11113 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11117 (define_expand "lshlv16qi3"
11118 [(match_operand:V16QI 0 "register_operand" "")
11119 (match_operand:V16QI 1 "register_operand" "")
11120 (match_operand:SI 2 "nonmemory_operand" "")]
11123 rtvec vs = rtvec_alloc (16);
11124 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11125 rtx reg = gen_reg_rtx (V16QImode);
11127 for (i = 0; i < 16; i++)
11128 RTVEC_ELT (vs, i) = operands[2];
11130 emit_insn (gen_vec_initv16qi (reg, par));
11131 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
11135 (define_expand "ashrv16qi3"
11136 [(match_operand:V16QI 0 "register_operand" "")
11137 (match_operand:V16QI 1 "register_operand" "")
11138 (match_operand:SI 2 "nonmemory_operand" "")]
11141 rtvec vs = rtvec_alloc (16);
11142 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11143 rtx reg = gen_reg_rtx (V16QImode);
11145 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
11146 ? GEN_INT (- INTVAL (operands[2]))
11149 for (i = 0; i < 16; i++)
11150 RTVEC_ELT (vs, i) = ele;
11152 emit_insn (gen_vec_initv16qi (reg, par));
11154 if (GET_CODE (operands[2]) != CONST_INT)
11156 rtx neg = gen_reg_rtx (V16QImode);
11157 emit_insn (gen_negv16qi2 (neg, reg));
11158 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
11161 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
11166 (define_expand "ashrv2di3"
11167 [(match_operand:V2DI 0 "register_operand" "")
11168 (match_operand:V2DI 1 "register_operand" "")
11169 (match_operand:DI 2 "nonmemory_operand" "")]
11172 rtvec vs = rtvec_alloc (2);
11173 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11174 rtx reg = gen_reg_rtx (V2DImode);
11177 if (GET_CODE (operands[2]) == CONST_INT)
11178 ele = GEN_INT (- INTVAL (operands[2]));
11179 else if (GET_MODE (operands[2]) != DImode)
11181 rtx move = gen_reg_rtx (DImode);
11182 ele = gen_reg_rtx (DImode);
11183 convert_move (move, operands[2], false);
11184 emit_insn (gen_negdi2 (ele, move));
11188 ele = gen_reg_rtx (DImode);
11189 emit_insn (gen_negdi2 (ele, operands[2]));
11192 RTVEC_ELT (vs, 0) = ele;
11193 RTVEC_ELT (vs, 1) = ele;
11194 emit_insn (gen_vec_initv2di (reg, par));
11195 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
11199 ;; SSE5 FRCZ support
11201 (define_insn "sse5_frcz<mode>2"
11202 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11204 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11207 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11208 [(set_attr "type" "ssecvt1")
11209 (set_attr "prefix_extra" "1")
11210 (set_attr "mode" "<MODE>")])
11213 (define_insn "sse5_vmfrcz<mode>2"
11214 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11215 (vec_merge:SSEMODEF2P
11217 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11219 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11222 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11223 [(set_attr "type" "ssecvt1")
11224 (set_attr "prefix_extra" "1")
11225 (set_attr "mode" "<MODE>")])
11227 (define_insn "sse5_cvtph2ps"
11228 [(set (match_operand:V4SF 0 "register_operand" "=x")
11229 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
11232 "cvtph2ps\t{%1, %0|%0, %1}"
11233 [(set_attr "type" "ssecvt")
11234 (set_attr "mode" "V4SF")])
11236 (define_insn "sse5_cvtps2ph"
11237 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11238 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11241 "cvtps2ph\t{%1, %0|%0, %1}"
11242 [(set_attr "type" "ssecvt")
11243 (set_attr "mode" "V4SF")])
11245 ;; Scalar versions of the com instructions that use vector types that are
11246 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11247 ;; com instructions fill in 0's in the upper bits instead of leaving them
11248 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11249 (define_expand "sse5_vmmaskcmp<mode>3"
11250 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11251 (vec_merge:SSEMODEF2P
11252 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11253 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11254 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11259 operands[4] = CONST0_RTX (<MODE>mode);
11262 (define_insn "*sse5_vmmaskcmp<mode>3"
11263 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11264 (vec_merge:SSEMODEF2P
11265 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11266 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11267 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11268 (match_operand:SSEMODEF2P 4 "")
11271 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11272 [(set_attr "type" "sse4arg")
11273 (set_attr "mode" "<ssescalarmode>")])
11275 ;; We don't have a comparison operator that always returns true/false, so
11276 ;; handle comfalse and comtrue specially.
11277 (define_insn "sse5_com_tf<mode>3"
11278 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11280 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11281 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11282 (match_operand:SI 3 "const_int_operand" "n")]
11283 UNSPEC_SSE5_TRUEFALSE))]
11286 const char *ret = NULL;
11288 switch (INTVAL (operands[3]))
11291 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11295 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11299 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11303 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11307 gcc_unreachable ();
11312 [(set_attr "type" "ssecmp")
11313 (set_attr "mode" "<MODE>")])
11315 (define_insn "sse5_maskcmp<mode>3"
11316 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11317 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11318 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11319 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11321 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11322 [(set_attr "type" "ssecmp")
11323 (set_attr "mode" "<MODE>")])
11325 (define_insn "sse5_maskcmp<mode>3"
11326 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11327 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11328 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11329 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11331 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11332 [(set_attr "type" "sse4arg")
11333 (set_attr "mode" "TI")])
11335 (define_insn "sse5_maskcmp_uns<mode>3"
11336 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11337 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11338 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11339 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11341 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11342 [(set_attr "type" "ssecmp")
11343 (set_attr "mode" "TI")])
11345 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11346 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11347 ;; the exact instruction generated for the intrinsic.
11348 (define_insn "sse5_maskcmp_uns2<mode>3"
11349 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11350 (unspec:SSEMODE1248
11351 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11352 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11353 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11354 UNSPEC_SSE5_UNSIGNED_CMP))]
11356 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11357 [(set_attr "type" "ssecmp")
11358 (set_attr "mode" "TI")])
11360 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11361 ;; being added here to be complete.
11362 (define_insn "sse5_pcom_tf<mode>3"
11363 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11364 (unspec:SSEMODE1248
11365 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11366 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11367 (match_operand:SI 3 "const_int_operand" "n")]
11368 UNSPEC_SSE5_TRUEFALSE))]
11371 return ((INTVAL (operands[3]) != 0)
11372 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11373 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11375 [(set_attr "type" "ssecmp")
11376 (set_attr "mode" "TI")])
11378 (define_insn "*avx_aesenc"
11379 [(set (match_operand:V2DI 0 "register_operand" "=x")
11380 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11381 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11383 "TARGET_AES && TARGET_AVX"
11384 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11385 [(set_attr "type" "sselog1")
11386 (set_attr "prefix" "vex")
11387 (set_attr "mode" "TI")])
11389 (define_insn "aesenc"
11390 [(set (match_operand:V2DI 0 "register_operand" "=x")
11391 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11392 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11395 "aesenc\t{%2, %0|%0, %2}"
11396 [(set_attr "type" "sselog1")
11397 (set_attr "prefix_extra" "1")
11398 (set_attr "mode" "TI")])
11400 (define_insn "*avx_aesenclast"
11401 [(set (match_operand:V2DI 0 "register_operand" "=x")
11402 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11403 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11404 UNSPEC_AESENCLAST))]
11405 "TARGET_AES && TARGET_AVX"
11406 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11407 [(set_attr "type" "sselog1")
11408 (set_attr "prefix" "vex")
11409 (set_attr "mode" "TI")])
11411 (define_insn "aesenclast"
11412 [(set (match_operand:V2DI 0 "register_operand" "=x")
11413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11414 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11415 UNSPEC_AESENCLAST))]
11417 "aesenclast\t{%2, %0|%0, %2}"
11418 [(set_attr "type" "sselog1")
11419 (set_attr "prefix_extra" "1")
11420 (set_attr "mode" "TI")])
11422 (define_insn "*avx_aesdec"
11423 [(set (match_operand:V2DI 0 "register_operand" "=x")
11424 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11425 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11427 "TARGET_AES && TARGET_AVX"
11428 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11429 [(set_attr "type" "sselog1")
11430 (set_attr "prefix" "vex")
11431 (set_attr "mode" "TI")])
11433 (define_insn "aesdec"
11434 [(set (match_operand:V2DI 0 "register_operand" "=x")
11435 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11436 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11439 "aesdec\t{%2, %0|%0, %2}"
11440 [(set_attr "type" "sselog1")
11441 (set_attr "prefix_extra" "1")
11442 (set_attr "mode" "TI")])
11444 (define_insn "*avx_aesdeclast"
11445 [(set (match_operand:V2DI 0 "register_operand" "=x")
11446 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11447 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11448 UNSPEC_AESDECLAST))]
11449 "TARGET_AES && TARGET_AVX"
11450 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11451 [(set_attr "type" "sselog1")
11452 (set_attr "prefix" "vex")
11453 (set_attr "mode" "TI")])
11455 (define_insn "aesdeclast"
11456 [(set (match_operand:V2DI 0 "register_operand" "=x")
11457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11458 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11459 UNSPEC_AESDECLAST))]
11461 "aesdeclast\t{%2, %0|%0, %2}"
11462 [(set_attr "type" "sselog1")
11463 (set_attr "prefix_extra" "1")
11464 (set_attr "mode" "TI")])
11466 (define_insn "aesimc"
11467 [(set (match_operand:V2DI 0 "register_operand" "=x")
11468 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11471 "%vaesimc\t{%1, %0|%0, %1}"
11472 [(set_attr "type" "sselog1")
11473 (set_attr "prefix_extra" "1")
11474 (set_attr "prefix" "maybe_vex")
11475 (set_attr "mode" "TI")])
11477 (define_insn "aeskeygenassist"
11478 [(set (match_operand:V2DI 0 "register_operand" "=x")
11479 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11480 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11481 UNSPEC_AESKEYGENASSIST))]
11483 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11484 [(set_attr "type" "sselog1")
11485 (set_attr "prefix_extra" "1")
11486 (set_attr "prefix" "maybe_vex")
11487 (set_attr "mode" "TI")])
11489 (define_insn "pclmulqdq"
11490 [(set (match_operand:V2DI 0 "register_operand" "=x")
11491 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11492 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11493 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11496 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11497 [(set_attr "type" "sselog1")
11498 (set_attr "prefix_extra" "1")
11499 (set_attr "mode" "TI")])
11501 (define_expand "avx_vzeroall"
11502 [(match_par_dup 0 [(const_int 0)])]
11505 int nregs = TARGET_64BIT ? 16 : 8;
11508 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11510 XVECEXP (operands[0], 0, 0)
11511 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11514 for (regno = 0; regno < nregs; regno++)
11515 XVECEXP (operands[0], 0, regno + 1)
11516 = gen_rtx_SET (VOIDmode,
11517 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11518 CONST0_RTX (V8SImode));
11521 (define_insn "*avx_vzeroall"
11522 [(match_parallel 0 "vzeroall_operation"
11523 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
11524 (set (match_operand 1 "register_operand" "=x")
11525 (match_operand 2 "const0_operand" "X"))])]
11528 [(set_attr "type" "sse")
11529 (set_attr "memory" "none")
11530 (set_attr "prefix" "vex")
11531 (set_attr "mode" "OI")])
11533 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11534 (define_insn "avx_vzeroupper"
11535 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11536 (clobber (reg:V8SI XMM0_REG))
11537 (clobber (reg:V8SI XMM1_REG))
11538 (clobber (reg:V8SI XMM2_REG))
11539 (clobber (reg:V8SI XMM3_REG))
11540 (clobber (reg:V8SI XMM4_REG))
11541 (clobber (reg:V8SI XMM5_REG))
11542 (clobber (reg:V8SI XMM6_REG))
11543 (clobber (reg:V8SI XMM7_REG))]
11544 "TARGET_AVX && !TARGET_64BIT"
11546 [(set_attr "type" "sse")
11547 (set_attr "memory" "none")
11548 (set_attr "prefix" "vex")
11549 (set_attr "mode" "OI")])
11551 (define_insn "avx_vzeroupper_rex64"
11552 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
11553 (clobber (reg:V8SI XMM0_REG))
11554 (clobber (reg:V8SI XMM1_REG))
11555 (clobber (reg:V8SI XMM2_REG))
11556 (clobber (reg:V8SI XMM3_REG))
11557 (clobber (reg:V8SI XMM4_REG))
11558 (clobber (reg:V8SI XMM5_REG))
11559 (clobber (reg:V8SI XMM6_REG))
11560 (clobber (reg:V8SI XMM7_REG))
11561 (clobber (reg:V8SI XMM8_REG))
11562 (clobber (reg:V8SI XMM9_REG))
11563 (clobber (reg:V8SI XMM10_REG))
11564 (clobber (reg:V8SI XMM11_REG))
11565 (clobber (reg:V8SI XMM12_REG))
11566 (clobber (reg:V8SI XMM13_REG))
11567 (clobber (reg:V8SI XMM14_REG))
11568 (clobber (reg:V8SI XMM15_REG))]
11569 "TARGET_AVX && TARGET_64BIT"
11571 [(set_attr "type" "sse")
11572 (set_attr "memory" "none")
11573 (set_attr "prefix" "vex")
11574 (set_attr "mode" "OI")])
11576 (define_insn "avx_vpermil<mode>"
11577 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11579 [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
11580 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
11583 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11584 [(set_attr "type" "sselog")
11585 (set_attr "prefix" "vex")
11586 (set_attr "mode" "<MODE>")])
11588 (define_insn "avx_vpermilvar<mode>3"
11589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11591 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11592 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11595 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11596 [(set_attr "type" "sselog")
11597 (set_attr "prefix" "vex")
11598 (set_attr "mode" "<MODE>")])
11600 (define_insn "avx_vpermil2<mode>3"
11601 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
11603 [(match_operand:AVXMODEF2P 1 "register_operand" "x,x")
11604 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "x,xm")
11605 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm,x")
11606 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
11609 "vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11610 [(set_attr "type" "sselog")
11611 (set_attr "prefix" "vex")
11612 (set_attr "mode" "<MODE>")])
11614 (define_insn "avx_vperm2f128<mode>3"
11615 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11616 (unspec:AVX256MODE2P
11617 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11618 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11619 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11620 UNSPEC_VPERMIL2F128))]
11622 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11623 [(set_attr "type" "sselog")
11624 (set_attr "prefix" "vex")
11625 (set_attr "mode" "V8SF")])
11627 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
11628 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
11629 (vec_concat:AVXMODEF4P
11630 (vec_concat:<avxhalfvecmode>
11631 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
11633 (vec_concat:<avxhalfvecmode>
11637 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
11638 [(set_attr "type" "ssemov")
11639 (set_attr "prefix" "vex")
11640 (set_attr "mode" "<avxscalarmode>")])
11642 (define_insn "avx_vbroadcastss256"
11643 [(set (match_operand:V8SF 0 "register_operand" "=x")
11647 (match_operand:SF 1 "memory_operand" "m")
11660 "vbroadcastss\t{%1, %0|%0, %1}"
11661 [(set_attr "type" "ssemov")
11662 (set_attr "prefix" "vex")
11663 (set_attr "mode" "SF")])
11665 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11666 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11667 (vec_concat:AVX256MODEF2P
11668 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11671 "vbroadcastf128\t{%1, %0|%0, %1}"
11672 [(set_attr "type" "ssemov")
11673 (set_attr "prefix" "vex")
11674 (set_attr "mode" "V4SF")])
11676 (define_expand "avx_vinsertf128<mode>"
11677 [(match_operand:AVX256MODE 0 "register_operand" "")
11678 (match_operand:AVX256MODE 1 "register_operand" "")
11679 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11680 (match_operand:SI 3 "const_0_to_1_operand" "")]
11683 switch (INTVAL (operands[3]))
11686 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11690 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11694 gcc_unreachable ();
11699 (define_insn "vec_set_lo_<mode>"
11700 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11701 (vec_concat:AVX256MODE4P
11702 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11703 (vec_select:<avxhalfvecmode>
11704 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11705 (parallel [(const_int 2) (const_int 3)]))))]
11707 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11708 [(set_attr "type" "sselog")
11709 (set_attr "prefix" "vex")
11710 (set_attr "mode" "V8SF")])
11712 (define_insn "vec_set_hi_<mode>"
11713 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11714 (vec_concat:AVX256MODE4P
11715 (vec_select:<avxhalfvecmode>
11716 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11717 (parallel [(const_int 0) (const_int 1)]))
11718 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11720 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11721 [(set_attr "type" "sselog")
11722 (set_attr "prefix" "vex")
11723 (set_attr "mode" "V8SF")])
11725 (define_insn "vec_set_lo_<mode>"
11726 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11727 (vec_concat:AVX256MODE8P
11728 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11729 (vec_select:<avxhalfvecmode>
11730 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11731 (parallel [(const_int 4) (const_int 5)
11732 (const_int 6) (const_int 7)]))))]
11734 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11735 [(set_attr "type" "sselog")
11736 (set_attr "prefix" "vex")
11737 (set_attr "mode" "V8SF")])
11739 (define_insn "vec_set_hi_<mode>"
11740 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11741 (vec_concat:AVX256MODE8P
11742 (vec_select:<avxhalfvecmode>
11743 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11744 (parallel [(const_int 0) (const_int 1)
11745 (const_int 2) (const_int 3)]))
11746 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11748 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11749 [(set_attr "type" "sselog")
11750 (set_attr "prefix" "vex")
11751 (set_attr "mode" "V8SF")])
11753 (define_insn "vec_set_lo_v16hi"
11754 [(set (match_operand:V16HI 0 "register_operand" "=x")
11756 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11758 (match_operand:V16HI 1 "register_operand" "x")
11759 (parallel [(const_int 8) (const_int 9)
11760 (const_int 10) (const_int 11)
11761 (const_int 12) (const_int 13)
11762 (const_int 14) (const_int 15)]))))]
11764 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11765 [(set_attr "type" "sselog")
11766 (set_attr "prefix" "vex")
11767 (set_attr "mode" "V8SF")])
11769 (define_insn "vec_set_hi_v16hi"
11770 [(set (match_operand:V16HI 0 "register_operand" "=x")
11773 (match_operand:V16HI 1 "register_operand" "x")
11774 (parallel [(const_int 0) (const_int 1)
11775 (const_int 2) (const_int 3)
11776 (const_int 4) (const_int 5)
11777 (const_int 6) (const_int 7)]))
11778 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11780 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11781 [(set_attr "type" "sselog")
11782 (set_attr "prefix" "vex")
11783 (set_attr "mode" "V8SF")])
11785 (define_insn "vec_set_lo_v32qi"
11786 [(set (match_operand:V32QI 0 "register_operand" "=x")
11788 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11790 (match_operand:V32QI 1 "register_operand" "x")
11791 (parallel [(const_int 16) (const_int 17)
11792 (const_int 18) (const_int 19)
11793 (const_int 20) (const_int 21)
11794 (const_int 22) (const_int 23)
11795 (const_int 24) (const_int 25)
11796 (const_int 26) (const_int 27)
11797 (const_int 28) (const_int 29)
11798 (const_int 30) (const_int 31)]))))]
11800 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11801 [(set_attr "type" "sselog")
11802 (set_attr "prefix" "vex")
11803 (set_attr "mode" "V8SF")])
11805 (define_insn "vec_set_hi_v32qi"
11806 [(set (match_operand:V32QI 0 "register_operand" "=x")
11809 (match_operand:V32QI 1 "register_operand" "x")
11810 (parallel [(const_int 0) (const_int 1)
11811 (const_int 2) (const_int 3)
11812 (const_int 4) (const_int 5)
11813 (const_int 6) (const_int 7)
11814 (const_int 8) (const_int 9)
11815 (const_int 10) (const_int 11)
11816 (const_int 12) (const_int 13)
11817 (const_int 14) (const_int 15)]))
11818 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11820 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11821 [(set_attr "type" "sselog")
11822 (set_attr "prefix" "vex")
11823 (set_attr "mode" "V8SF")])
11825 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11826 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11828 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11829 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11833 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11834 [(set_attr "type" "sselog1")
11835 (set_attr "prefix" "vex")
11836 (set_attr "mode" "<MODE>")])
11838 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11839 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11841 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11842 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11844 UNSPEC_MASKSTORE))]
11846 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11847 [(set_attr "type" "sselog1")
11848 (set_attr "prefix" "vex")
11849 (set_attr "mode" "<MODE>")])
11851 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11852 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11853 (unspec:AVX256MODE2P
11854 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
11858 switch (which_alternative)
11863 switch (get_attr_mode (insn))
11866 return "vmovaps\t{%1, %x0|%x0, %1}";
11868 return "vmovapd\t{%1, %x0|%x0, %1}";
11870 return "vmovdqa\t{%1, %x0|%x0, %1}";
11877 gcc_unreachable ();
11879 [(set_attr "type" "ssemov")
11880 (set_attr "prefix" "vex")
11881 (set_attr "mode" "<avxvecmode>")
11882 (set (attr "length")
11883 (if_then_else (eq_attr "alternative" "0")
11885 (const_string "*")))])
11887 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
11888 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
11889 (unspec:<avxhalfvecmode>
11890 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
11894 switch (which_alternative)
11899 switch (get_attr_mode (insn))
11902 return "vmovaps\t{%x1, %0|%0, %x1}";
11904 return "vmovapd\t{%x1, %0|%0, %x1}";
11906 return "vmovdqa\t{%x1, %0|%0, %x1}";
11913 gcc_unreachable ();
11915 [(set_attr "type" "ssemov")
11916 (set_attr "prefix" "vex")
11917 (set_attr "mode" "<avxvecmode>")
11918 (set (attr "length")
11919 (if_then_else (eq_attr "alternative" "0")
11921 (const_string "*")))])
11923 (define_expand "vec_init<mode>"
11924 [(match_operand:AVX256MODE 0 "register_operand" "")
11925 (match_operand 1 "" "")]
11928 ix86_expand_vector_init (false, operands[0], operands[1]);
11932 (define_insn "*vec_concat<mode>_avx"
11933 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11934 (vec_concat:AVX256MODE
11935 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11936 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11939 switch (which_alternative)
11942 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11944 switch (get_attr_mode (insn))
11947 return "vmovaps\t{%1, %x0|%x0, %1}";
11949 return "vmovapd\t{%1, %x0|%x0, %1}";
11951 return "vmovdqa\t{%1, %x0|%x0, %1}";
11954 gcc_unreachable ();
11957 [(set_attr "type" "sselog,ssemov")
11958 (set_attr "prefix" "vex")
11959 (set_attr "mode" "<avxvecmode>")])