1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
97 ;; Mapping of the avx suffix
98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99 (V4SF "ps") (V2DF "pd")])
101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
105 ;; Mapping of the max integer size for xop rotate immediate constraint
106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
108 ;; Mapping of vector modes back to the scalar modes
109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110 (V16QI "QI") (V8HI "HI")
111 (V4SI "SI") (V2DI "DI")])
113 ;; Mapping of vector modes to a vector mode of double size
114 (define_mode_attr ssedoublesizemode
115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
120 ;; Number of scalar elements in each vector type
121 (define_mode_attr ssescalarnum
122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
126 (define_mode_attr avxvecmode
127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130 (define_mode_attr avxvecpsmode
131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133 (define_mode_attr avxhalfvecmode
134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137 (define_mode_attr avxscalarmode
138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140 (define_mode_attr avxcvtvecmode
141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142 (define_mode_attr avxpermvecmode
143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144 (define_mode_attr avxmodesuffixf2c
145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146 (define_mode_attr avxmodesuffixp
147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
149 (define_mode_attr avxmodesuffix
150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151 (V8SI "256") (V8SF "256") (V4DF "256")])
153 ;; Mapping of immediate bits for blend instructions
154 (define_mode_attr blendbits
155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
157 ;; Mapping of immediate bits for pinsr instructions
158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168 (define_expand "mov<mode>"
169 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
173 ix86_expand_vector_move (<MODE>mode, operands);
177 (define_insn "*avx_mov<mode>_internal"
178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || register_operand (operands[1], <MODE>mode))"
184 switch (which_alternative)
187 return standard_sse_constant_opcode (insn, operands[1]);
190 switch (get_attr_mode (insn))
194 return "vmovaps\t{%1, %0|%0, %1}";
197 return "vmovapd\t{%1, %0|%0, %1}";
199 return "vmovdqa\t{%1, %0|%0, %1}";
205 [(set_attr "type" "sselog1,ssemov,ssemov")
206 (set_attr "prefix" "vex")
207 (set_attr "mode" "<avxvecmode>")])
209 ;; All of these patterns are enabled for SSE1 as well as SSE2.
210 ;; This is essential for maintaining stable calling conventions.
212 (define_expand "mov<mode>"
213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
217 ix86_expand_vector_move (<MODE>mode, operands);
221 (define_insn "*mov<mode>_internal"
222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
225 && (register_operand (operands[0], <MODE>mode)
226 || register_operand (operands[1], <MODE>mode))"
228 switch (which_alternative)
231 return standard_sse_constant_opcode (insn, operands[1]);
234 switch (get_attr_mode (insn))
237 return "movaps\t{%1, %0|%0, %1}";
239 return "movapd\t{%1, %0|%0, %1}";
241 return "movdqa\t{%1, %0|%0, %1}";
247 [(set_attr "type" "sselog1,ssemov,ssemov")
249 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
250 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
251 (and (eq_attr "alternative" "2")
252 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
254 (const_string "V4SF")
255 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
256 (const_string "V4SF")
257 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
258 (const_string "V2DF")
260 (const_string "TI")))])
262 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
263 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
264 ;; from memory, we'd prefer to load the memory directly into the %xmm
265 ;; register. To facilitate this happy circumstance, this pattern won't
266 ;; split until after register allocation. If the 64-bit value didn't
267 ;; come from memory, this is the best we can do. This is much better
268 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
271 (define_insn_and_split "movdi_to_sse"
273 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
274 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
275 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
276 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
278 "&& reload_completed"
281 if (register_operand (operands[1], DImode))
283 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
284 Assemble the 64-bit DImode value in an xmm register. */
285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
286 gen_rtx_SUBREG (SImode, operands[1], 0)));
287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
288 gen_rtx_SUBREG (SImode, operands[1], 4)));
289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
292 else if (memory_operand (operands[1], DImode))
293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294 operands[1], const0_rtx));
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
302 "TARGET_SSE && reload_completed"
305 (vec_duplicate:V4SF (match_dup 1))
309 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
310 operands[2] = CONST0_RTX (V4SFmode);
314 [(set (match_operand:V2DF 0 "register_operand" "")
315 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
316 "TARGET_SSE2 && reload_completed"
317 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
319 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
320 operands[2] = CONST0_RTX (DFmode);
323 (define_expand "push<mode>1"
324 [(match_operand:AVX256MODE 0 "register_operand" "")]
327 ix86_expand_push (<MODE>mode, operands[0]);
331 (define_expand "push<mode>1"
332 [(match_operand:SSEMODE16 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "movmisalign<mode>"
340 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
341 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
344 ix86_expand_vector_move_misalign (<MODE>mode, operands);
348 (define_expand "movmisalign<mode>"
349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
353 ix86_expand_vector_move_misalign (<MODE>mode, operands);
357 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
358 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
360 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
365 [(set_attr "type" "ssemov")
366 (set_attr "movu" "1")
367 (set_attr "prefix" "vex")
368 (set_attr "mode" "<MODE>")])
370 (define_insn "sse2_movq128"
371 [(set (match_operand:V2DI 0 "register_operand" "=x")
374 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
375 (parallel [(const_int 0)]))
378 "%vmovq\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "maybe_vex")
381 (set_attr "mode" "TI")])
383 (define_insn "<sse>_movup<ssemodesuffixf2c>"
384 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
386 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
391 [(set_attr "type" "ssemov")
392 (set_attr "movu" "1")
393 (set_attr "mode" "<MODE>")])
395 (define_insn "avx_movdqu<avxmodesuffix>"
396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
401 "vmovdqu\t{%1, %0|%0, %1}"
402 [(set_attr "type" "ssemov")
403 (set_attr "movu" "1")
404 (set_attr "prefix" "vex")
405 (set_attr "mode" "<avxvecmode>")])
407 (define_insn "sse2_movdqu"
408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
412 "movdqu\t{%1, %0|%0, %1}"
413 [(set_attr "type" "ssemov")
414 (set_attr "movu" "1")
415 (set_attr "prefix_data16" "1")
416 (set_attr "mode" "TI")])
418 (define_insn "avx_movnt<mode>"
419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
421 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
423 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
424 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "vex")
427 (set_attr "mode" "<MODE>")])
429 (define_insn "<sse>_movnt<mode>"
430 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
432 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
434 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
435 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
436 [(set_attr "type" "ssemov")
437 (set_attr "mode" "<MODE>")])
439 (define_insn "avx_movnt<mode>"
440 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
442 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
445 "vmovntdq\t{%1, %0|%0, %1}"
446 [(set_attr "type" "ssecvt")
447 (set_attr "prefix" "vex")
448 (set_attr "mode" "<avxvecmode>")])
450 (define_insn "sse2_movntv2di"
451 [(set (match_operand:V2DI 0 "memory_operand" "=m")
452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
455 "movntdq\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "1")
458 (set_attr "mode" "TI")])
460 (define_insn "sse2_movntsi"
461 [(set (match_operand:SI 0 "memory_operand" "=m")
462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
465 "movnti\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix_data16" "0")
468 (set_attr "mode" "V2DF")])
470 (define_insn "avx_lddqu<avxmodesuffix>"
471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
476 "vlddqu\t{%1, %0|%0, %1}"
477 [(set_attr "type" "ssecvt")
478 (set_attr "movu" "1")
479 (set_attr "prefix" "vex")
480 (set_attr "mode" "<avxvecmode>")])
482 (define_insn "sse3_lddqu"
483 [(set (match_operand:V16QI 0 "register_operand" "=x")
484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
487 "lddqu\t{%1, %0|%0, %1}"
488 [(set_attr "type" "ssemov")
489 (set_attr "movu" "1")
490 (set_attr "prefix_data16" "0")
491 (set_attr "prefix_rep" "1")
492 (set_attr "mode" "TI")])
494 ; Expand patterns for non-temporal stores. At the moment, only those
495 ; that directly map to insns are defined; it would be possible to
496 ; define patterns for other modes that would expand to several insns.
498 (define_expand "storent<mode>"
499 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
501 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
503 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 (define_expand "storent<mode>"
507 [(set (match_operand:MODEF 0 "memory_operand" "")
509 [(match_operand:MODEF 1 "register_operand" "")]
514 (define_expand "storentv2di"
515 [(set (match_operand:V2DI 0 "memory_operand" "")
516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
521 (define_expand "storentsi"
522 [(set (match_operand:SI 0 "memory_operand" "")
523 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
528 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
530 ;; Parallel floating point arithmetic
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 (define_expand "<code><mode>2"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
537 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
538 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
539 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
541 (define_expand "<plusminus_insn><mode>3"
542 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
543 (plusminus:AVX256MODEF2P
544 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
545 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
546 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
547 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
549 (define_insn "*avx_<plusminus_insn><mode>3"
550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
551 (plusminus:AVXMODEF2P
552 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
554 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
555 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
556 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
557 [(set_attr "type" "sseadd")
558 (set_attr "prefix" "vex")
559 (set_attr "mode" "<avxvecmode>")])
561 (define_expand "<plusminus_insn><mode>3"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
563 (plusminus:SSEMODEF2P
564 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
565 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
566 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
569 (define_insn "*<plusminus_insn><mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
571 (plusminus:SSEMODEF2P
572 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
574 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
576 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*avx_vm<plusminus_insn><mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582 (vec_merge:SSEMODEF2P
583 (plusminus:SSEMODEF2P
584 (match_operand:SSEMODEF2P 1 "register_operand" "x")
585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
588 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
589 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "prefix" "vex")
592 (set_attr "mode" "<ssescalarmode>")])
594 (define_insn "<sse>_vm<plusminus_insn><mode>3"
595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (vec_merge:SSEMODEF2P
597 (plusminus:SSEMODEF2P
598 (match_operand:SSEMODEF2P 1 "register_operand" "0")
599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
602 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
603 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "mode" "<ssescalarmode>")])
607 (define_expand "mul<mode>3"
608 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
610 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
611 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
612 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
613 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
615 (define_insn "*avx_mul<mode>3"
616 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
618 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
619 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
620 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
621 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
622 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "type" "ssemul")
624 (set_attr "prefix" "vex")
625 (set_attr "mode" "<avxvecmode>")])
627 (define_expand "mul<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
635 (define_insn "*mul<mode>3"
636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
641 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
642 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<MODE>")])
646 (define_insn "*avx_vmmul<mode>3"
647 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
648 (vec_merge:SSEMODEF2P
650 (match_operand:SSEMODEF2P 1 "register_operand" "x")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
654 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
655 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
656 [(set_attr "type" "ssemul")
657 (set_attr "prefix" "vex")
658 (set_attr "mode" "<ssescalarmode>")])
660 (define_insn "<sse>_vmmul<mode>3"
661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662 (vec_merge:SSEMODEF2P
664 (match_operand:SSEMODEF2P 1 "register_operand" "0")
665 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
668 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
669 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
670 [(set_attr "type" "ssemul")
671 (set_attr "mode" "<ssescalarmode>")])
673 (define_expand "divv8sf3"
674 [(set (match_operand:V8SF 0 "register_operand" "")
675 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
676 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
682 && flag_finite_math_only && !flag_trapping_math
683 && flag_unsafe_math_optimizations)
685 ix86_emit_swdivsf (operands[0], operands[1],
686 operands[2], V8SFmode);
691 (define_expand "divv4df3"
692 [(set (match_operand:V4DF 0 "register_operand" "")
693 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
694 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
696 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
698 (define_insn "avx_div<mode>3"
699 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
701 (match_operand:AVXMODEF2P 1 "register_operand" "x")
702 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
703 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
704 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
705 [(set_attr "type" "ssediv")
706 (set_attr "prefix" "vex")
707 (set_attr "mode" "<MODE>")])
709 (define_expand "divv4sf3"
710 [(set (match_operand:V4SF 0 "register_operand" "")
711 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
712 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
715 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
716 && flag_finite_math_only && !flag_trapping_math
717 && flag_unsafe_math_optimizations)
719 ix86_emit_swdivsf (operands[0], operands[1],
720 operands[2], V4SFmode);
725 (define_expand "divv2df3"
726 [(set (match_operand:V2DF 0 "register_operand" "")
727 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
732 (define_insn "*avx_div<mode>3"
733 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
735 (match_operand:SSEMODEF2P 1 "register_operand" "x")
736 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
737 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
738 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
739 [(set_attr "type" "ssediv")
740 (set_attr "prefix" "vex")
741 (set_attr "mode" "<MODE>")])
743 (define_insn "<sse>_div<mode>3"
744 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
746 (match_operand:SSEMODEF2P 1 "register_operand" "0")
747 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "*avx_vmdiv<mode>3"
754 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
755 (vec_merge:SSEMODEF2P
757 (match_operand:SSEMODEF2P 1 "register_operand" "x")
758 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
761 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
762 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
763 [(set_attr "type" "ssediv")
764 (set_attr "prefix" "vex")
765 (set_attr "mode" "<ssescalarmode>")])
767 (define_insn "<sse>_vmdiv<mode>3"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
769 (vec_merge:SSEMODEF2P
771 (match_operand:SSEMODEF2P 1 "register_operand" "0")
772 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
776 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
777 [(set_attr "type" "ssediv")
778 (set_attr "mode" "<ssescalarmode>")])
780 (define_insn "avx_rcpv8sf2"
781 [(set (match_operand:V8SF 0 "register_operand" "=x")
783 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
785 "vrcpps\t{%1, %0|%0, %1}"
786 [(set_attr "type" "sse")
787 (set_attr "prefix" "vex")
788 (set_attr "mode" "V8SF")])
790 (define_insn "sse_rcpv4sf2"
791 [(set (match_operand:V4SF 0 "register_operand" "=x")
793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
795 "%vrcpps\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "prefix" "maybe_vex")
799 (set_attr "mode" "V4SF")])
801 (define_insn "*avx_vmrcpv4sf2"
802 [(set (match_operand:V4SF 0 "register_operand" "=x")
804 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
806 (match_operand:V4SF 2 "register_operand" "x")
809 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
810 [(set_attr "type" "sse")
811 (set_attr "prefix" "vex")
812 (set_attr "mode" "SF")])
814 (define_insn "sse_vmrcpv4sf2"
815 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
819 (match_operand:V4SF 2 "register_operand" "0")
822 "rcpss\t{%1, %0|%0, %1}"
823 [(set_attr "type" "sse")
824 (set_attr "atom_sse_attr" "rcp")
825 (set_attr "mode" "SF")])
827 (define_expand "sqrtv8sf2"
828 [(set (match_operand:V8SF 0 "register_operand" "")
829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
841 (define_insn "avx_sqrtv8sf2"
842 [(set (match_operand:V8SF 0 "register_operand" "=x")
843 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
845 "vsqrtps\t{%1, %0|%0, %1}"
846 [(set_attr "type" "sse")
847 (set_attr "prefix" "vex")
848 (set_attr "mode" "V8SF")])
850 (define_expand "sqrtv4sf2"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
855 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
864 (define_insn "sse_sqrtv4sf2"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrtps\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "V4SF")])
874 (define_insn "sqrtv4df2"
875 [(set (match_operand:V4DF 0 "register_operand" "=x")
876 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
878 "vsqrtpd\t{%1, %0|%0, %1}"
879 [(set_attr "type" "sse")
880 (set_attr "prefix" "vex")
881 (set_attr "mode" "V4DF")])
883 (define_insn "sqrtv2df2"
884 [(set (match_operand:V2DF 0 "register_operand" "=x")
885 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
887 "%vsqrtpd\t{%1, %0|%0, %1}"
888 [(set_attr "type" "sse")
889 (set_attr "prefix" "maybe_vex")
890 (set_attr "mode" "V2DF")])
892 (define_insn "*avx_vmsqrt<mode>2"
893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
894 (vec_merge:SSEMODEF2P
896 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
897 (match_operand:SSEMODEF2P 2 "register_operand" "x")
899 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
900 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_insn "<sse>_vmsqrt<mode>2"
906 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907 (vec_merge:SSEMODEF2P
909 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
910 (match_operand:SSEMODEF2P 2 "register_operand" "0")
912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
914 [(set_attr "type" "sse")
915 (set_attr "atom_sse_attr" "sqrt")
916 (set_attr "mode" "<ssescalarmode>")])
918 (define_expand "rsqrtv8sf2"
919 [(set (match_operand:V8SF 0 "register_operand" "")
921 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
922 "TARGET_AVX && TARGET_SSE_MATH"
924 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
928 (define_insn "avx_rsqrtv8sf2"
929 [(set (match_operand:V8SF 0 "register_operand" "=x")
931 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
933 "vrsqrtps\t{%1, %0|%0, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "V8SF")])
938 (define_expand "rsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "")
941 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
944 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
948 (define_insn "sse_rsqrtv4sf2"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
951 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
953 "%vrsqrtps\t{%1, %0|%0, %1}"
954 [(set_attr "type" "sse")
955 (set_attr "prefix" "maybe_vex")
956 (set_attr "mode" "V4SF")])
958 (define_insn "*avx_vmrsqrtv4sf2"
959 [(set (match_operand:V4SF 0 "register_operand" "=x")
961 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
963 (match_operand:V4SF 2 "register_operand" "x")
966 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
967 [(set_attr "type" "sse")
968 (set_attr "prefix" "vex")
969 (set_attr "mode" "SF")])
971 (define_insn "sse_vmrsqrtv4sf2"
972 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
976 (match_operand:V4SF 2 "register_operand" "0")
979 "rsqrtss\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sse")
981 (set_attr "mode" "SF")])
983 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
984 ;; isn't really correct, as those rtl operators aren't defined when
985 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
987 (define_expand "<code><mode>3"
988 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
989 (smaxmin:AVX256MODEF2P
990 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
991 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
992 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
994 if (!flag_finite_math_only)
995 operands[1] = force_reg (<MODE>mode, operands[1]);
996 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
999 (define_expand "<code><mode>3"
1000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1002 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1003 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1006 if (!flag_finite_math_only)
1007 operands[1] = force_reg (<MODE>mode, operands[1]);
1008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1011 (define_insn "*avx_<code><mode>3_finite"
1012 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1014 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1017 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1018 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "prefix" "vex")
1021 (set_attr "mode" "<MODE>")])
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1026 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "mode" "<MODE>")])
1034 (define_insn "*avx_<code><mode>3"
1035 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1037 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1038 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1039 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1040 "v<maxmin_float>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "<avxvecmode>")])
1045 (define_insn "*<code><mode>3"
1046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1048 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxmin_float>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<MODE>")])
1055 (define_insn "*avx_vm<code><mode>3"
1056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1057 (vec_merge:SSEMODEF2P
1059 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1063 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1064 "v<maxmin_float>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sse")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 (define_insn "<sse>_vm<code><mode>3"
1070 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1071 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "<maxmin_float>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "mode" "<ssescalarmode>")])
1082 ;; These versions of the min/max patterns implement exactly the operations
1083 ;; min = (op1 < op2 ? op1 : op2)
1084 ;; max = (!(op1 < op2) ? op1 : op2)
1085 ;; Their operands are not commutative, and thus they may be used in the
1086 ;; presence of -0.0 and NaN.
1088 (define_insn "*avx_ieee_smin<mode>3"
1089 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1091 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1092 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1094 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1096 [(set_attr "type" "sseadd")
1097 (set_attr "prefix" "vex")
1098 (set_attr "mode" "<avxvecmode>")])
1100 (define_insn "*avx_ieee_smax<mode>3"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<avxvecmode>")])
1112 (define_insn "*ieee_smin<mode>3"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1118 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1119 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*ieee_smax<mode>3"
1124 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1126 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1127 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1129 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1130 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "mode" "<MODE>")])
1134 (define_insn "avx_addsubv8sf3"
1135 [(set (match_operand:V8SF 0 "register_operand" "=x")
1138 (match_operand:V8SF 1 "register_operand" "x")
1139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1140 (minus:V8SF (match_dup 1) (match_dup 2))
1143 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1144 [(set_attr "type" "sseadd")
1145 (set_attr "prefix" "vex")
1146 (set_attr "mode" "V8SF")])
1148 (define_insn "avx_addsubv4df3"
1149 [(set (match_operand:V4DF 0 "register_operand" "=x")
1152 (match_operand:V4DF 1 "register_operand" "x")
1153 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1154 (minus:V4DF (match_dup 1) (match_dup 2))
1157 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1158 [(set_attr "type" "sseadd")
1159 (set_attr "prefix" "vex")
1160 (set_attr "mode" "V4DF")])
1162 (define_insn "*avx_addsubv4sf3"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1166 (match_operand:V4SF 1 "register_operand" "x")
1167 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1168 (minus:V4SF (match_dup 1) (match_dup 2))
1171 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V4SF")])
1176 (define_insn "sse3_addsubv4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x")
1180 (match_operand:V4SF 1 "register_operand" "0")
1181 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1182 (minus:V4SF (match_dup 1) (match_dup 2))
1185 "addsubps\t{%2, %0|%0, %2}"
1186 [(set_attr "type" "sseadd")
1187 (set_attr "prefix_rep" "1")
1188 (set_attr "mode" "V4SF")])
1190 (define_insn "*avx_addsubv2df3"
1191 [(set (match_operand:V2DF 0 "register_operand" "=x")
1194 (match_operand:V2DF 1 "register_operand" "x")
1195 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1196 (minus:V2DF (match_dup 1) (match_dup 2))
1199 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "type" "sseadd")
1201 (set_attr "prefix" "vex")
1202 (set_attr "mode" "V2DF")])
1204 (define_insn "sse3_addsubv2df3"
1205 [(set (match_operand:V2DF 0 "register_operand" "=x")
1208 (match_operand:V2DF 1 "register_operand" "0")
1209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1210 (minus:V2DF (match_dup 1) (match_dup 2))
1213 "addsubpd\t{%2, %0|%0, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "atom_unit" "complex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v4df3"
1219 [(set (match_operand:V4DF 0 "register_operand" "=x")
1224 (match_operand:V4DF 1 "register_operand" "x")
1225 (parallel [(const_int 0)]))
1226 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1233 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1234 (parallel [(const_int 0)]))
1235 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1237 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1240 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1241 [(set_attr "type" "sseadd")
1242 (set_attr "prefix" "vex")
1243 (set_attr "mode" "V4DF")])
1245 (define_insn "avx_h<plusminus_insn>v8sf3"
1246 [(set (match_operand:V8SF 0 "register_operand" "=x")
1252 (match_operand:V8SF 1 "register_operand" "x")
1253 (parallel [(const_int 0)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1261 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1262 (parallel [(const_int 0)]))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1265 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1270 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1277 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V8SF")])
1288 (define_insn "*avx_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "x")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "prefix" "vex")
1313 (set_attr "mode" "V4SF")])
1315 (define_insn "sse3_h<plusminus_insn>v4sf3"
1316 [(set (match_operand:V4SF 0 "register_operand" "=x")
1321 (match_operand:V4SF 1 "register_operand" "0")
1322 (parallel [(const_int 0)]))
1323 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1325 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1330 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1331 (parallel [(const_int 0)]))
1332 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "sseadd")
1339 (set_attr "atom_unit" "complex")
1340 (set_attr "prefix_rep" "1")
1341 (set_attr "mode" "V4SF")])
1343 (define_insn "*avx_h<plusminus_insn>v2df3"
1344 [(set (match_operand:V2DF 0 "register_operand" "=x")
1348 (match_operand:V2DF 1 "register_operand" "x")
1349 (parallel [(const_int 0)]))
1350 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1353 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1357 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1358 [(set_attr "type" "sseadd")
1359 (set_attr "prefix" "vex")
1360 (set_attr "mode" "V2DF")])
1362 (define_insn "sse3_h<plusminus_insn>v2df3"
1363 [(set (match_operand:V2DF 0 "register_operand" "=x")
1367 (match_operand:V2DF 1 "register_operand" "0")
1368 (parallel [(const_int 0)]))
1369 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1376 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1377 [(set_attr "type" "sseadd")
1378 (set_attr "mode" "V2DF")])
1380 (define_expand "reduc_splus_v4sf"
1381 [(match_operand:V4SF 0 "register_operand" "")
1382 (match_operand:V4SF 1 "register_operand" "")]
1387 rtx tmp = gen_reg_rtx (V4SFmode);
1388 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1389 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1392 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1396 (define_expand "reduc_splus_v2df"
1397 [(match_operand:V2DF 0 "register_operand" "")
1398 (match_operand:V2DF 1 "register_operand" "")]
1401 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1405 (define_expand "reduc_smax_v4sf"
1406 [(match_operand:V4SF 0 "register_operand" "")
1407 (match_operand:V4SF 1 "register_operand" "")]
1410 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1414 (define_expand "reduc_smin_v4sf"
1415 [(match_operand:V4SF 0 "register_operand" "")
1416 (match_operand:V4SF 1 "register_operand" "")]
1419 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1425 ;; Parallel floating point comparisons
1427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1429 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1430 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1432 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1433 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<MODE>")])
1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1445 (vec_merge:SSEMODEF2P
1447 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1448 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1449 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "prefix" "vex")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1461 ;; may generate 256bit vector compare instructions.
1462 (define_insn "*avx_maskcmp<mode>3"
1463 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1464 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1465 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "prefix" "vex")
1471 (set_attr "length_immediate" "1")
1472 (set_attr "mode" "<avxvecmode>")])
1474 (define_insn "<sse>_maskcmp<mode>3"
1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1477 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1482 [(set_attr "type" "ssecmp")
1483 (set_attr "length_immediate" "1")
1484 (set_attr "mode" "<MODE>")])
1486 (define_insn "<sse>_vmmaskcmp<mode>3"
1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1488 (vec_merge:SSEMODEF2P
1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1490 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1494 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1495 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1496 [(set_attr "type" "ssecmp")
1497 (set_attr "length_immediate" "1")
1498 (set_attr "mode" "<ssescalarmode>")])
1500 (define_insn "<sse>_comi"
1501 [(set (reg:CCFP FLAGS_REG)
1504 (match_operand:<ssevecmode> 0 "register_operand" "x")
1505 (parallel [(const_int 0)]))
1507 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1508 (parallel [(const_int 0)]))))]
1509 "SSE_FLOAT_MODE_P (<MODE>mode)"
1510 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1511 [(set_attr "type" "ssecomi")
1512 (set_attr "prefix" "maybe_vex")
1513 (set_attr "prefix_rep" "0")
1514 (set (attr "prefix_data16")
1515 (if_then_else (eq_attr "mode" "DF")
1517 (const_string "0")))
1518 (set_attr "mode" "<MODE>")])
1520 (define_insn "<sse>_ucomi"
1521 [(set (reg:CCFPU FLAGS_REG)
1524 (match_operand:<ssevecmode> 0 "register_operand" "x")
1525 (parallel [(const_int 0)]))
1527 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1528 (parallel [(const_int 0)]))))]
1529 "SSE_FLOAT_MODE_P (<MODE>mode)"
1530 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1531 [(set_attr "type" "ssecomi")
1532 (set_attr "prefix" "maybe_vex")
1533 (set_attr "prefix_rep" "0")
1534 (set (attr "prefix_data16")
1535 (if_then_else (eq_attr "mode" "DF")
1537 (const_string "0")))
1538 (set_attr "mode" "<MODE>")])
1540 (define_expand "vcond<mode>"
1541 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1542 (if_then_else:AVXMODEF2P
1543 (match_operator 3 ""
1544 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1545 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1546 (match_operand:AVXMODEF2P 1 "general_operand" "")
1547 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1548 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1549 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1551 bool ok = ix86_expand_fp_vcond (operands);
1556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1558 ;; Parallel floating point logical operations
1560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1562 (define_insn "avx_andnot<mode>3"
1563 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1566 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1567 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1568 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1569 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1570 [(set_attr "type" "sselog")
1571 (set_attr "prefix" "vex")
1572 (set_attr "mode" "<avxvecmode>")])
1574 (define_insn "<sse>_andnot<mode>3"
1575 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1578 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1579 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1580 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1581 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1582 [(set_attr "type" "sselog")
1583 (set_attr "mode" "<MODE>")])
1585 (define_expand "<code><mode>3"
1586 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1587 (any_logic:AVX256MODEF2P
1588 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1589 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1590 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1591 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1593 (define_insn "*avx_<code><mode>3"
1594 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1595 (any_logic:AVXMODEF2P
1596 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1597 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1598 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1599 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1600 "v<logic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1601 [(set_attr "type" "sselog")
1602 (set_attr "prefix" "vex")
1603 (set_attr "mode" "<avxvecmode>")])
1605 (define_expand "<code><mode>3"
1606 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1607 (any_logic:SSEMODEF2P
1608 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1609 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1610 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1611 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1613 (define_insn "*<code><mode>3"
1614 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1615 (any_logic:SSEMODEF2P
1616 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1617 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1618 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1619 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1620 "<logic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1621 [(set_attr "type" "sselog")
1622 (set_attr "mode" "<MODE>")])
1624 (define_expand "copysign<mode>3"
1627 (not:SSEMODEF2P (match_dup 3))
1628 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1630 (and:SSEMODEF2P (match_dup 3)
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1632 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1633 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1634 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1636 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1638 operands[4] = gen_reg_rtx (<MODE>mode);
1639 operands[5] = gen_reg_rtx (<MODE>mode);
1642 ;; Also define scalar versions. These are used for abs, neg, and
1643 ;; conditional move. Using subregs into vector modes causes register
1644 ;; allocation lossage. These patterns do not allow memory operands
1645 ;; because the native instructions read the full 128-bits.
1647 (define_insn "*avx_andnot<mode>3"
1648 [(set (match_operand:MODEF 0 "register_operand" "=x")
1651 (match_operand:MODEF 1 "register_operand" "x"))
1652 (match_operand:MODEF 2 "register_operand" "x")))]
1653 "AVX_FLOAT_MODE_P (<MODE>mode)"
1654 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1655 [(set_attr "type" "sselog")
1656 (set_attr "prefix" "vex")
1657 (set_attr "mode" "<ssevecmode>")])
1659 (define_insn "*andnot<mode>3"
1660 [(set (match_operand:MODEF 0 "register_operand" "=x")
1663 (match_operand:MODEF 1 "register_operand" "0"))
1664 (match_operand:MODEF 2 "register_operand" "x")))]
1665 "SSE_FLOAT_MODE_P (<MODE>mode)"
1666 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1667 [(set_attr "type" "sselog")
1668 (set_attr "mode" "<ssevecmode>")])
1670 (define_insn "*avx_<code><mode>3"
1671 [(set (match_operand:MODEF 0 "register_operand" "=x")
1673 (match_operand:MODEF 1 "register_operand" "x")
1674 (match_operand:MODEF 2 "register_operand" "x")))]
1675 "AVX_FLOAT_MODE_P (<MODE>mode)"
1676 "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1677 [(set_attr "type" "sselog")
1678 (set_attr "prefix" "vex")
1679 (set_attr "mode" "<ssevecmode>")])
1681 (define_insn "*<code><mode>3"
1682 [(set (match_operand:MODEF 0 "register_operand" "=x")
1684 (match_operand:MODEF 1 "register_operand" "0")
1685 (match_operand:MODEF 2 "register_operand" "x")))]
1686 "SSE_FLOAT_MODE_P (<MODE>mode)"
1687 "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1688 [(set_attr "type" "sselog")
1689 (set_attr "mode" "<ssevecmode>")])
1691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1693 ;; FMA4 floating point multiply/accumulate instructions. This
1694 ;; includes the scalar version of the instructions as well as the
1697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1699 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1700 ;; combine to generate a multiply/add with two memory references. We then
1701 ;; split this insn, into loading up the destination register with one of the
1702 ;; memory operations. If we don't manage to split the insn, reload will
1703 ;; generate the appropriate moves. The reason this is needed, is that combine
1704 ;; has already folded one of the memory references into both the multiply and
1705 ;; add insns, and it can't generate a new pseudo. I.e.:
1706 ;; (set (reg1) (mem (addr1)))
1707 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1708 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1710 (define_insn "fma4_fmadd<mode>4256"
1711 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1714 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1715 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1716 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1717 "TARGET_FMA4 && TARGET_FUSED_MADD"
1718 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1719 [(set_attr "type" "ssemuladd")
1720 (set_attr "mode" "<MODE>")])
1722 ;; Floating multiply and subtract.
1723 (define_insn "fma4_fmsub<mode>4256"
1724 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1727 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1728 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1729 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1730 "TARGET_FMA4 && TARGET_FUSED_MADD"
1731 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1732 [(set_attr "type" "ssemuladd")
1733 (set_attr "mode" "<MODE>")])
1735 ;; Floating point negative multiply and add.
1736 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1737 (define_insn "fma4_fnmadd<mode>4256"
1738 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1740 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1742 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1743 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1744 "TARGET_FMA4 && TARGET_FUSED_MADD"
1745 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1746 [(set_attr "type" "ssemuladd")
1747 (set_attr "mode" "<MODE>")])
1749 ;; Floating point negative multiply and subtract.
1750 (define_insn "fma4_fnmsub<mode>4256"
1751 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1755 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1756 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1757 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1758 "TARGET_FMA4 && TARGET_FUSED_MADD"
1759 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1760 [(set_attr "type" "ssemuladd")
1761 (set_attr "mode" "<MODE>")])
1763 (define_insn "fma4_fmadd<mode>4"
1764 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1767 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1768 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1769 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1770 "TARGET_FMA4 && TARGET_FUSED_MADD"
1771 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1772 [(set_attr "type" "ssemuladd")
1773 (set_attr "mode" "<MODE>")])
1775 ;; For the scalar operations, use operand1 for the upper words that aren't
1776 ;; modified, so restrict the forms that are generated.
1777 ;; Scalar version of fmadd.
1778 (define_insn "fma4_vmfmadd<mode>4"
1779 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1780 (vec_merge:SSEMODEF2P
1783 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1784 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1785 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1788 "TARGET_FMA4 && TARGET_FUSED_MADD"
1789 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1790 [(set_attr "type" "ssemuladd")
1791 (set_attr "mode" "<MODE>")])
1793 ;; Floating multiply and subtract.
1794 ;; Allow two memory operands the same as fmadd.
1795 (define_insn "fma4_fmsub<mode>4"
1796 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1799 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1800 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1801 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1802 "TARGET_FMA4 && TARGET_FUSED_MADD"
1803 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1804 [(set_attr "type" "ssemuladd")
1805 (set_attr "mode" "<MODE>")])
1807 ;; For the scalar operations, use operand1 for the upper words that aren't
1808 ;; modified, so restrict the forms that are generated.
1809 ;; Scalar version of fmsub.
1810 (define_insn "fma4_vmfmsub<mode>4"
1811 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1812 (vec_merge:SSEMODEF2P
1815 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1816 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1817 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1820 "TARGET_FMA4 && TARGET_FUSED_MADD"
1821 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1822 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")])
1825 ;; Floating point negative multiply and add.
1826 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1827 (define_insn "fma4_fnmadd<mode>4"
1828 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1830 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1832 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1833 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1834 "TARGET_FMA4 && TARGET_FUSED_MADD"
1835 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1836 [(set_attr "type" "ssemuladd")
1837 (set_attr "mode" "<MODE>")])
1839 ;; For the scalar operations, use operand1 for the upper words that aren't
1840 ;; modified, so restrict the forms that are generated.
1841 ;; Scalar version of fnmadd.
1842 (define_insn "fma4_vmfnmadd<mode>4"
1843 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1844 (vec_merge:SSEMODEF2P
1846 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1848 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1849 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1852 "TARGET_FMA4 && TARGET_FUSED_MADD"
1853 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1854 [(set_attr "type" "ssemuladd")
1855 (set_attr "mode" "<MODE>")])
1857 ;; Floating point negative multiply and subtract.
1858 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1859 (define_insn "fma4_fnmsub<mode>4"
1860 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1864 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1865 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1866 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1867 "TARGET_FMA4 && TARGET_FUSED_MADD"
1868 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1869 [(set_attr "type" "ssemuladd")
1870 (set_attr "mode" "<MODE>")])
1872 ;; For the scalar operations, use operand1 for the upper words that aren't
1873 ;; modified, so restrict the forms that are generated.
1874 ;; Scalar version of fnmsub.
1875 (define_insn "fma4_vmfnmsub<mode>4"
1876 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1877 (vec_merge:SSEMODEF2P
1881 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1882 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1883 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1886 "TARGET_FMA4 && TARGET_FUSED_MADD"
1887 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1888 [(set_attr "type" "ssemuladd")
1889 (set_attr "mode" "<MODE>")])
1891 (define_insn "fma4i_fmadd<mode>4256"
1892 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1896 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1897 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1898 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1899 UNSPEC_FMA4_INTRINSIC))]
1900 "TARGET_FMA4 && TARGET_FUSED_MADD"
1901 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1905 (define_insn "fma4i_fmsub<mode>4256"
1906 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1910 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1911 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1912 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1913 UNSPEC_FMA4_INTRINSIC))]
1914 "TARGET_FMA4 && TARGET_FUSED_MADD"
1915 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1916 [(set_attr "type" "ssemuladd")
1917 (set_attr "mode" "<MODE>")])
1919 (define_insn "fma4i_fnmadd<mode>4256"
1920 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1923 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1925 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1926 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1927 UNSPEC_FMA4_INTRINSIC))]
1928 "TARGET_FMA4 && TARGET_FUSED_MADD"
1929 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1930 [(set_attr "type" "ssemuladd")
1931 (set_attr "mode" "<MODE>")])
1933 (define_insn "fma4i_fnmsub<mode>4256"
1934 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1939 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1940 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1941 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1942 UNSPEC_FMA4_INTRINSIC))]
1943 "TARGET_FMA4 && TARGET_FUSED_MADD"
1944 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1945 [(set_attr "type" "ssemuladd")
1946 (set_attr "mode" "<MODE>")])
1948 (define_insn "fma4i_fmadd<mode>4"
1949 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1953 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1954 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1955 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1956 UNSPEC_FMA4_INTRINSIC))]
1957 "TARGET_FMA4 && TARGET_FUSED_MADD"
1958 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1959 [(set_attr "type" "ssemuladd")
1960 (set_attr "mode" "<MODE>")])
1962 (define_insn "fma4i_fmsub<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1967 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1968 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1969 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1970 UNSPEC_FMA4_INTRINSIC))]
1971 "TARGET_FMA4 && TARGET_FUSED_MADD"
1972 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1976 (define_insn "fma4i_fnmadd<mode>4"
1977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1980 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1982 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1983 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1984 UNSPEC_FMA4_INTRINSIC))]
1985 "TARGET_FMA4 && TARGET_FUSED_MADD"
1986 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1987 [(set_attr "type" "ssemuladd")
1988 (set_attr "mode" "<MODE>")])
1990 (define_insn "fma4i_fnmsub<mode>4"
1991 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1996 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1997 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1998 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1999 UNSPEC_FMA4_INTRINSIC))]
2000 "TARGET_FMA4 && TARGET_FUSED_MADD"
2001 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2002 [(set_attr "type" "ssemuladd")
2003 (set_attr "mode" "<MODE>")])
2005 ;; For the scalar operations, use operand1 for the upper words that aren't
2006 ;; modified, so restrict the forms that are accepted.
2007 (define_insn "fma4i_vmfmadd<mode>4"
2008 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2010 [(vec_merge:SSEMODEF2P
2013 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2014 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2015 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2018 UNSPEC_FMA4_INTRINSIC))]
2019 "TARGET_FMA4 && TARGET_FUSED_MADD"
2020 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2021 [(set_attr "type" "ssemuladd")
2022 (set_attr "mode" "<ssescalarmode>")])
2024 (define_insn "fma4i_vmfmsub<mode>4"
2025 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2027 [(vec_merge:SSEMODEF2P
2030 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2031 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2032 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2035 UNSPEC_FMA4_INTRINSIC))]
2036 "TARGET_FMA4 && TARGET_FUSED_MADD"
2037 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2038 [(set_attr "type" "ssemuladd")
2039 (set_attr "mode" "<ssescalarmode>")])
2041 (define_insn "fma4i_vmfnmadd<mode>4"
2042 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2044 [(vec_merge:SSEMODEF2P
2046 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2048 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2052 UNSPEC_FMA4_INTRINSIC))]
2053 "TARGET_FMA4 && TARGET_FUSED_MADD"
2054 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2055 [(set_attr "type" "ssemuladd")
2056 (set_attr "mode" "<ssescalarmode>")])
2058 (define_insn "fma4i_vmfnmsub<mode>4"
2059 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2061 [(vec_merge:SSEMODEF2P
2065 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2066 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2067 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2070 UNSPEC_FMA4_INTRINSIC))]
2071 "TARGET_FMA4 && TARGET_FUSED_MADD"
2072 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2073 [(set_attr "type" "ssemuladd")
2074 (set_attr "mode" "<ssescalarmode>")])
2076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2078 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2082 (define_insn "fma4_fmaddsubv8sf4"
2083 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2087 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2088 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2089 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2096 "TARGET_FMA4 && TARGET_FUSED_MADD"
2097 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2098 [(set_attr "type" "ssemuladd")
2099 (set_attr "mode" "V8SF")])
2101 (define_insn "fma4_fmaddsubv4df4"
2102 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2106 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2107 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2108 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2115 "TARGET_FMA4 && TARGET_FUSED_MADD"
2116 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2117 [(set_attr "type" "ssemuladd")
2118 (set_attr "mode" "V4DF")])
2120 (define_insn "fma4_fmaddsubv4sf4"
2121 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2125 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2126 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2127 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2134 "TARGET_FMA4 && TARGET_FUSED_MADD"
2135 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2136 [(set_attr "type" "ssemuladd")
2137 (set_attr "mode" "V4SF")])
2139 (define_insn "fma4_fmaddsubv2df4"
2140 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2144 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2145 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2146 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2153 "TARGET_FMA4 && TARGET_FUSED_MADD"
2154 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2155 [(set_attr "type" "ssemuladd")
2156 (set_attr "mode" "V2DF")])
2158 (define_insn "fma4_fmsubaddv8sf4"
2159 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2163 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2164 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2165 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2172 "TARGET_FMA4 && TARGET_FUSED_MADD"
2173 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2174 [(set_attr "type" "ssemuladd")
2175 (set_attr "mode" "V8SF")])
2177 (define_insn "fma4_fmsubaddv4df4"
2178 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2182 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2183 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2184 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2191 "TARGET_FMA4 && TARGET_FUSED_MADD"
2192 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2193 [(set_attr "type" "ssemuladd")
2194 (set_attr "mode" "V4DF")])
2196 (define_insn "fma4_fmsubaddv4sf4"
2197 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2201 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2202 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2203 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2210 "TARGET_FMA4 && TARGET_FUSED_MADD"
2211 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2212 [(set_attr "type" "ssemuladd")
2213 (set_attr "mode" "V4SF")])
2215 (define_insn "fma4_fmsubaddv2df4"
2216 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2220 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2221 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2222 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2229 "TARGET_FMA4 && TARGET_FUSED_MADD"
2230 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2231 [(set_attr "type" "ssemuladd")
2232 (set_attr "mode" "V2DF")])
2234 (define_insn "fma4i_fmaddsubv8sf4"
2235 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2240 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2241 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2242 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2249 UNSPEC_FMA4_INTRINSIC))]
2250 "TARGET_FMA4 && TARGET_FUSED_MADD"
2251 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2252 [(set_attr "type" "ssemuladd")
2253 (set_attr "mode" "V8SF")])
2255 (define_insn "fma4i_fmaddsubv4df4"
2256 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2261 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2262 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2263 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2270 UNSPEC_FMA4_INTRINSIC))]
2271 "TARGET_FMA4 && TARGET_FUSED_MADD"
2272 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2273 [(set_attr "type" "ssemuladd")
2274 (set_attr "mode" "V4DF")])
2276 (define_insn "fma4i_fmaddsubv4sf4"
2277 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2282 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2283 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2284 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2291 UNSPEC_FMA4_INTRINSIC))]
2292 "TARGET_FMA4 && TARGET_FUSED_MADD"
2293 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2294 [(set_attr "type" "ssemuladd")
2295 (set_attr "mode" "V4SF")])
2297 (define_insn "fma4i_fmaddsubv2df4"
2298 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2303 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2304 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2305 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2312 UNSPEC_FMA4_INTRINSIC))]
2313 "TARGET_FMA4 && TARGET_FUSED_MADD"
2314 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2315 [(set_attr "type" "ssemuladd")
2316 (set_attr "mode" "V2DF")])
2318 (define_insn "fma4i_fmsubaddv8sf4"
2319 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2324 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2325 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2326 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2333 UNSPEC_FMA4_INTRINSIC))]
2334 "TARGET_FMA4 && TARGET_FUSED_MADD"
2335 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2336 [(set_attr "type" "ssemuladd")
2337 (set_attr "mode" "V8SF")])
2339 (define_insn "fma4i_fmsubaddv4df4"
2340 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2345 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2346 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2347 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2354 UNSPEC_FMA4_INTRINSIC))]
2355 "TARGET_FMA4 && TARGET_FUSED_MADD"
2356 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2357 [(set_attr "type" "ssemuladd")
2358 (set_attr "mode" "V4DF")])
2360 (define_insn "fma4i_fmsubaddv4sf4"
2361 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2366 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2367 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2368 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2375 UNSPEC_FMA4_INTRINSIC))]
2376 "TARGET_FMA4 && TARGET_FUSED_MADD"
2377 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2378 [(set_attr "type" "ssemuladd")
2379 (set_attr "mode" "V4SF")])
2381 (define_insn "fma4i_fmsubaddv2df4"
2382 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2387 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2388 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2389 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2396 UNSPEC_FMA4_INTRINSIC))]
2397 "TARGET_FMA4 && TARGET_FUSED_MADD"
2398 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2399 [(set_attr "type" "ssemuladd")
2400 (set_attr "mode" "V2DF")])
2402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2404 ;; Parallel single-precision floating point conversion operations
2406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2408 (define_insn "sse_cvtpi2ps"
2409 [(set (match_operand:V4SF 0 "register_operand" "=x")
2412 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2413 (match_operand:V4SF 1 "register_operand" "0")
2416 "cvtpi2ps\t{%2, %0|%0, %2}"
2417 [(set_attr "type" "ssecvt")
2418 (set_attr "mode" "V4SF")])
2420 (define_insn "sse_cvtps2pi"
2421 [(set (match_operand:V2SI 0 "register_operand" "=y")
2423 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2425 (parallel [(const_int 0) (const_int 1)])))]
2427 "cvtps2pi\t{%1, %0|%0, %1}"
2428 [(set_attr "type" "ssecvt")
2429 (set_attr "unit" "mmx")
2430 (set_attr "mode" "DI")])
2432 (define_insn "sse_cvttps2pi"
2433 [(set (match_operand:V2SI 0 "register_operand" "=y")
2435 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2436 (parallel [(const_int 0) (const_int 1)])))]
2438 "cvttps2pi\t{%1, %0|%0, %1}"
2439 [(set_attr "type" "ssecvt")
2440 (set_attr "unit" "mmx")
2441 (set_attr "prefix_rep" "0")
2442 (set_attr "mode" "SF")])
2444 (define_insn "*avx_cvtsi2ss"
2445 [(set (match_operand:V4SF 0 "register_operand" "=x")
2448 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2449 (match_operand:V4SF 1 "register_operand" "x")
2452 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2453 [(set_attr "type" "sseicvt")
2454 (set_attr "prefix" "vex")
2455 (set_attr "mode" "SF")])
2457 (define_insn "sse_cvtsi2ss"
2458 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2461 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2462 (match_operand:V4SF 1 "register_operand" "0,0")
2465 "cvtsi2ss\t{%2, %0|%0, %2}"
2466 [(set_attr "type" "sseicvt")
2467 (set_attr "athlon_decode" "vector,double")
2468 (set_attr "amdfam10_decode" "vector,double")
2469 (set_attr "mode" "SF")])
2471 (define_insn "*avx_cvtsi2ssq"
2472 [(set (match_operand:V4SF 0 "register_operand" "=x")
2475 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2476 (match_operand:V4SF 1 "register_operand" "x")
2478 "TARGET_AVX && TARGET_64BIT"
2479 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2480 [(set_attr "type" "sseicvt")
2481 (set_attr "length_vex" "4")
2482 (set_attr "prefix" "vex")
2483 (set_attr "mode" "SF")])
2485 (define_insn "sse_cvtsi2ssq"
2486 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2489 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2490 (match_operand:V4SF 1 "register_operand" "0,0")
2492 "TARGET_SSE && TARGET_64BIT"
2493 "cvtsi2ssq\t{%2, %0|%0, %2}"
2494 [(set_attr "type" "sseicvt")
2495 (set_attr "prefix_rex" "1")
2496 (set_attr "athlon_decode" "vector,double")
2497 (set_attr "amdfam10_decode" "vector,double")
2498 (set_attr "mode" "SF")])
2500 (define_insn "sse_cvtss2si"
2501 [(set (match_operand:SI 0 "register_operand" "=r,r")
2504 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2505 (parallel [(const_int 0)]))]
2506 UNSPEC_FIX_NOTRUNC))]
2508 "%vcvtss2si\t{%1, %0|%0, %1}"
2509 [(set_attr "type" "sseicvt")
2510 (set_attr "athlon_decode" "double,vector")
2511 (set_attr "prefix_rep" "1")
2512 (set_attr "prefix" "maybe_vex")
2513 (set_attr "mode" "SI")])
2515 (define_insn "sse_cvtss2si_2"
2516 [(set (match_operand:SI 0 "register_operand" "=r,r")
2517 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2518 UNSPEC_FIX_NOTRUNC))]
2520 "%vcvtss2si\t{%1, %0|%0, %1}"
2521 [(set_attr "type" "sseicvt")
2522 (set_attr "athlon_decode" "double,vector")
2523 (set_attr "amdfam10_decode" "double,double")
2524 (set_attr "prefix_rep" "1")
2525 (set_attr "prefix" "maybe_vex")
2526 (set_attr "mode" "SI")])
2528 (define_insn "sse_cvtss2siq"
2529 [(set (match_operand:DI 0 "register_operand" "=r,r")
2532 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2533 (parallel [(const_int 0)]))]
2534 UNSPEC_FIX_NOTRUNC))]
2535 "TARGET_SSE && TARGET_64BIT"
2536 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2537 [(set_attr "type" "sseicvt")
2538 (set_attr "athlon_decode" "double,vector")
2539 (set_attr "prefix_rep" "1")
2540 (set_attr "prefix" "maybe_vex")
2541 (set_attr "mode" "DI")])
2543 (define_insn "sse_cvtss2siq_2"
2544 [(set (match_operand:DI 0 "register_operand" "=r,r")
2545 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2546 UNSPEC_FIX_NOTRUNC))]
2547 "TARGET_SSE && TARGET_64BIT"
2548 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2549 [(set_attr "type" "sseicvt")
2550 (set_attr "athlon_decode" "double,vector")
2551 (set_attr "amdfam10_decode" "double,double")
2552 (set_attr "prefix_rep" "1")
2553 (set_attr "prefix" "maybe_vex")
2554 (set_attr "mode" "DI")])
2556 (define_insn "sse_cvttss2si"
2557 [(set (match_operand:SI 0 "register_operand" "=r,r")
2560 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2561 (parallel [(const_int 0)]))))]
2563 "%vcvttss2si\t{%1, %0|%0, %1}"
2564 [(set_attr "type" "sseicvt")
2565 (set_attr "athlon_decode" "double,vector")
2566 (set_attr "amdfam10_decode" "double,double")
2567 (set_attr "prefix_rep" "1")
2568 (set_attr "prefix" "maybe_vex")
2569 (set_attr "mode" "SI")])
2571 (define_insn "sse_cvttss2siq"
2572 [(set (match_operand:DI 0 "register_operand" "=r,r")
2575 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2576 (parallel [(const_int 0)]))))]
2577 "TARGET_SSE && TARGET_64BIT"
2578 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2579 [(set_attr "type" "sseicvt")
2580 (set_attr "athlon_decode" "double,vector")
2581 (set_attr "amdfam10_decode" "double,double")
2582 (set_attr "prefix_rep" "1")
2583 (set_attr "prefix" "maybe_vex")
2584 (set_attr "mode" "DI")])
2586 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2587 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2588 (float:AVXMODEDCVTDQ2PS
2589 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2591 "vcvtdq2ps\t{%1, %0|%0, %1}"
2592 [(set_attr "type" "ssecvt")
2593 (set_attr "prefix" "vex")
2594 (set_attr "mode" "<avxvecmode>")])
2596 (define_insn "sse2_cvtdq2ps"
2597 [(set (match_operand:V4SF 0 "register_operand" "=x")
2598 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2600 "cvtdq2ps\t{%1, %0|%0, %1}"
2601 [(set_attr "type" "ssecvt")
2602 (set_attr "mode" "V4SF")])
2604 (define_expand "sse2_cvtudq2ps"
2606 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2608 (lt:V4SF (match_dup 5) (match_dup 3)))
2610 (and:V4SF (match_dup 6) (match_dup 4)))
2611 (set (match_operand:V4SF 0 "register_operand" "")
2612 (plus:V4SF (match_dup 5) (match_dup 7)))]
2615 REAL_VALUE_TYPE TWO32r;
2619 real_ldexp (&TWO32r, &dconst1, 32);
2620 x = const_double_from_real_value (TWO32r, SFmode);
2622 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2623 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2625 for (i = 5; i < 8; i++)
2626 operands[i] = gen_reg_rtx (V4SFmode);
2629 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2630 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2631 (unspec:AVXMODEDCVTPS2DQ
2632 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2633 UNSPEC_FIX_NOTRUNC))]
2635 "vcvtps2dq\t{%1, %0|%0, %1}"
2636 [(set_attr "type" "ssecvt")
2637 (set_attr "prefix" "vex")
2638 (set_attr "mode" "<avxvecmode>")])
2640 (define_insn "sse2_cvtps2dq"
2641 [(set (match_operand:V4SI 0 "register_operand" "=x")
2642 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2643 UNSPEC_FIX_NOTRUNC))]
2645 "cvtps2dq\t{%1, %0|%0, %1}"
2646 [(set_attr "type" "ssecvt")
2647 (set_attr "prefix_data16" "1")
2648 (set_attr "mode" "TI")])
2650 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2651 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2652 (fix:AVXMODEDCVTPS2DQ
2653 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2655 "vcvttps2dq\t{%1, %0|%0, %1}"
2656 [(set_attr "type" "ssecvt")
2657 (set_attr "prefix" "vex")
2658 (set_attr "mode" "<avxvecmode>")])
2660 (define_insn "sse2_cvttps2dq"
2661 [(set (match_operand:V4SI 0 "register_operand" "=x")
2662 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2664 "cvttps2dq\t{%1, %0|%0, %1}"
2665 [(set_attr "type" "ssecvt")
2666 (set_attr "prefix_rep" "1")
2667 (set_attr "prefix_data16" "0")
2668 (set_attr "mode" "TI")])
2670 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2672 ;; Parallel double-precision floating point conversion operations
2674 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2676 (define_insn "sse2_cvtpi2pd"
2677 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2678 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2680 "cvtpi2pd\t{%1, %0|%0, %1}"
2681 [(set_attr "type" "ssecvt")
2682 (set_attr "unit" "mmx,*")
2683 (set_attr "prefix_data16" "1,*")
2684 (set_attr "mode" "V2DF")])
2686 (define_insn "sse2_cvtpd2pi"
2687 [(set (match_operand:V2SI 0 "register_operand" "=y")
2688 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2689 UNSPEC_FIX_NOTRUNC))]
2691 "cvtpd2pi\t{%1, %0|%0, %1}"
2692 [(set_attr "type" "ssecvt")
2693 (set_attr "unit" "mmx")
2694 (set_attr "prefix_data16" "1")
2695 (set_attr "mode" "DI")])
2697 (define_insn "sse2_cvttpd2pi"
2698 [(set (match_operand:V2SI 0 "register_operand" "=y")
2699 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2701 "cvttpd2pi\t{%1, %0|%0, %1}"
2702 [(set_attr "type" "ssecvt")
2703 (set_attr "unit" "mmx")
2704 (set_attr "prefix_data16" "1")
2705 (set_attr "mode" "TI")])
2707 (define_insn "*avx_cvtsi2sd"
2708 [(set (match_operand:V2DF 0 "register_operand" "=x")
2711 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2712 (match_operand:V2DF 1 "register_operand" "x")
2715 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2716 [(set_attr "type" "sseicvt")
2717 (set_attr "prefix" "vex")
2718 (set_attr "mode" "DF")])
2720 (define_insn "sse2_cvtsi2sd"
2721 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2724 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2725 (match_operand:V2DF 1 "register_operand" "0,0")
2728 "cvtsi2sd\t{%2, %0|%0, %2}"
2729 [(set_attr "type" "sseicvt")
2730 (set_attr "mode" "DF")
2731 (set_attr "athlon_decode" "double,direct")
2732 (set_attr "amdfam10_decode" "vector,double")])
2734 (define_insn "*avx_cvtsi2sdq"
2735 [(set (match_operand:V2DF 0 "register_operand" "=x")
2738 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2739 (match_operand:V2DF 1 "register_operand" "x")
2741 "TARGET_AVX && TARGET_64BIT"
2742 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2743 [(set_attr "type" "sseicvt")
2744 (set_attr "length_vex" "4")
2745 (set_attr "prefix" "vex")
2746 (set_attr "mode" "DF")])
2748 (define_insn "sse2_cvtsi2sdq"
2749 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2752 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2753 (match_operand:V2DF 1 "register_operand" "0,0")
2755 "TARGET_SSE2 && TARGET_64BIT"
2756 "cvtsi2sdq\t{%2, %0|%0, %2}"
2757 [(set_attr "type" "sseicvt")
2758 (set_attr "prefix_rex" "1")
2759 (set_attr "mode" "DF")
2760 (set_attr "athlon_decode" "double,direct")
2761 (set_attr "amdfam10_decode" "vector,double")])
2763 (define_insn "sse2_cvtsd2si"
2764 [(set (match_operand:SI 0 "register_operand" "=r,r")
2767 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2768 (parallel [(const_int 0)]))]
2769 UNSPEC_FIX_NOTRUNC))]
2771 "%vcvtsd2si\t{%1, %0|%0, %1}"
2772 [(set_attr "type" "sseicvt")
2773 (set_attr "athlon_decode" "double,vector")
2774 (set_attr "prefix_rep" "1")
2775 (set_attr "prefix" "maybe_vex")
2776 (set_attr "mode" "SI")])
2778 (define_insn "sse2_cvtsd2si_2"
2779 [(set (match_operand:SI 0 "register_operand" "=r,r")
2780 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2781 UNSPEC_FIX_NOTRUNC))]
2783 "%vcvtsd2si\t{%1, %0|%0, %1}"
2784 [(set_attr "type" "sseicvt")
2785 (set_attr "athlon_decode" "double,vector")
2786 (set_attr "amdfam10_decode" "double,double")
2787 (set_attr "prefix_rep" "1")
2788 (set_attr "prefix" "maybe_vex")
2789 (set_attr "mode" "SI")])
2791 (define_insn "sse2_cvtsd2siq"
2792 [(set (match_operand:DI 0 "register_operand" "=r,r")
2795 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2796 (parallel [(const_int 0)]))]
2797 UNSPEC_FIX_NOTRUNC))]
2798 "TARGET_SSE2 && TARGET_64BIT"
2799 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2800 [(set_attr "type" "sseicvt")
2801 (set_attr "athlon_decode" "double,vector")
2802 (set_attr "prefix_rep" "1")
2803 (set_attr "prefix" "maybe_vex")
2804 (set_attr "mode" "DI")])
2806 (define_insn "sse2_cvtsd2siq_2"
2807 [(set (match_operand:DI 0 "register_operand" "=r,r")
2808 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2809 UNSPEC_FIX_NOTRUNC))]
2810 "TARGET_SSE2 && TARGET_64BIT"
2811 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2812 [(set_attr "type" "sseicvt")
2813 (set_attr "athlon_decode" "double,vector")
2814 (set_attr "amdfam10_decode" "double,double")
2815 (set_attr "prefix_rep" "1")
2816 (set_attr "prefix" "maybe_vex")
2817 (set_attr "mode" "DI")])
2819 (define_insn "sse2_cvttsd2si"
2820 [(set (match_operand:SI 0 "register_operand" "=r,r")
2823 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2824 (parallel [(const_int 0)]))))]
2826 "%vcvttsd2si\t{%1, %0|%0, %1}"
2827 [(set_attr "type" "sseicvt")
2828 (set_attr "prefix_rep" "1")
2829 (set_attr "prefix" "maybe_vex")
2830 (set_attr "mode" "SI")
2831 (set_attr "athlon_decode" "double,vector")
2832 (set_attr "amdfam10_decode" "double,double")])
2834 (define_insn "sse2_cvttsd2siq"
2835 [(set (match_operand:DI 0 "register_operand" "=r,r")
2838 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2839 (parallel [(const_int 0)]))))]
2840 "TARGET_SSE2 && TARGET_64BIT"
2841 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2842 [(set_attr "type" "sseicvt")
2843 (set_attr "prefix_rep" "1")
2844 (set_attr "prefix" "maybe_vex")
2845 (set_attr "mode" "DI")
2846 (set_attr "athlon_decode" "double,vector")
2847 (set_attr "amdfam10_decode" "double,double")])
2849 (define_insn "avx_cvtdq2pd256"
2850 [(set (match_operand:V4DF 0 "register_operand" "=x")
2851 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2853 "vcvtdq2pd\t{%1, %0|%0, %1}"
2854 [(set_attr "type" "ssecvt")
2855 (set_attr "prefix" "vex")
2856 (set_attr "mode" "V4DF")])
2858 (define_insn "sse2_cvtdq2pd"
2859 [(set (match_operand:V2DF 0 "register_operand" "=x")
2862 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2863 (parallel [(const_int 0) (const_int 1)]))))]
2865 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2866 [(set_attr "type" "ssecvt")
2867 (set_attr "prefix" "maybe_vex")
2868 (set_attr "mode" "V2DF")])
2870 (define_insn "avx_cvtpd2dq256"
2871 [(set (match_operand:V4SI 0 "register_operand" "=x")
2872 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2873 UNSPEC_FIX_NOTRUNC))]
2875 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2876 [(set_attr "type" "ssecvt")
2877 (set_attr "prefix" "vex")
2878 (set_attr "mode" "OI")])
2880 (define_expand "sse2_cvtpd2dq"
2881 [(set (match_operand:V4SI 0 "register_operand" "")
2883 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2887 "operands[2] = CONST0_RTX (V2SImode);")
2889 (define_insn "*sse2_cvtpd2dq"
2890 [(set (match_operand:V4SI 0 "register_operand" "=x")
2892 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2894 (match_operand:V2SI 2 "const0_operand" "")))]
2896 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2897 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2898 [(set_attr "type" "ssecvt")
2899 (set_attr "prefix_rep" "1")
2900 (set_attr "prefix_data16" "0")
2901 (set_attr "prefix" "maybe_vex")
2902 (set_attr "mode" "TI")
2903 (set_attr "amdfam10_decode" "double")])
2905 (define_insn "avx_cvttpd2dq256"
2906 [(set (match_operand:V4SI 0 "register_operand" "=x")
2907 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2909 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2910 [(set_attr "type" "ssecvt")
2911 (set_attr "prefix" "vex")
2912 (set_attr "mode" "OI")])
2914 (define_expand "sse2_cvttpd2dq"
2915 [(set (match_operand:V4SI 0 "register_operand" "")
2917 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2920 "operands[2] = CONST0_RTX (V2SImode);")
2922 (define_insn "*sse2_cvttpd2dq"
2923 [(set (match_operand:V4SI 0 "register_operand" "=x")
2925 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2926 (match_operand:V2SI 2 "const0_operand" "")))]
2928 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2929 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2930 [(set_attr "type" "ssecvt")
2931 (set_attr "prefix" "maybe_vex")
2932 (set_attr "mode" "TI")
2933 (set_attr "amdfam10_decode" "double")])
2935 (define_insn "*avx_cvtsd2ss"
2936 [(set (match_operand:V4SF 0 "register_operand" "=x")
2939 (float_truncate:V2SF
2940 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2941 (match_operand:V4SF 1 "register_operand" "x")
2944 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2945 [(set_attr "type" "ssecvt")
2946 (set_attr "prefix" "vex")
2947 (set_attr "mode" "SF")])
2949 (define_insn "sse2_cvtsd2ss"
2950 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2953 (float_truncate:V2SF
2954 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2955 (match_operand:V4SF 1 "register_operand" "0,0")
2958 "cvtsd2ss\t{%2, %0|%0, %2}"
2959 [(set_attr "type" "ssecvt")
2960 (set_attr "athlon_decode" "vector,double")
2961 (set_attr "amdfam10_decode" "vector,double")
2962 (set_attr "mode" "SF")])
2964 (define_insn "*avx_cvtss2sd"
2965 [(set (match_operand:V2DF 0 "register_operand" "=x")
2969 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2970 (parallel [(const_int 0) (const_int 1)])))
2971 (match_operand:V2DF 1 "register_operand" "x")
2974 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2975 [(set_attr "type" "ssecvt")
2976 (set_attr "prefix" "vex")
2977 (set_attr "mode" "DF")])
2979 (define_insn "sse2_cvtss2sd"
2980 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2984 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2985 (parallel [(const_int 0) (const_int 1)])))
2986 (match_operand:V2DF 1 "register_operand" "0,0")
2989 "cvtss2sd\t{%2, %0|%0, %2}"
2990 [(set_attr "type" "ssecvt")
2991 (set_attr "amdfam10_decode" "vector,double")
2992 (set_attr "mode" "DF")])
2994 (define_insn "avx_cvtpd2ps256"
2995 [(set (match_operand:V4SF 0 "register_operand" "=x")
2996 (float_truncate:V4SF
2997 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2999 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3000 [(set_attr "type" "ssecvt")
3001 (set_attr "prefix" "vex")
3002 (set_attr "mode" "V4SF")])
3004 (define_expand "sse2_cvtpd2ps"
3005 [(set (match_operand:V4SF 0 "register_operand" "")
3007 (float_truncate:V2SF
3008 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3011 "operands[2] = CONST0_RTX (V2SFmode);")
3013 (define_insn "*sse2_cvtpd2ps"
3014 [(set (match_operand:V4SF 0 "register_operand" "=x")
3016 (float_truncate:V2SF
3017 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3018 (match_operand:V2SF 2 "const0_operand" "")))]
3020 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3021 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3022 [(set_attr "type" "ssecvt")
3023 (set_attr "prefix_data16" "1")
3024 (set_attr "prefix" "maybe_vex")
3025 (set_attr "mode" "V4SF")
3026 (set_attr "amdfam10_decode" "double")])
3028 (define_insn "avx_cvtps2pd256"
3029 [(set (match_operand:V4DF 0 "register_operand" "=x")
3031 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3033 "vcvtps2pd\t{%1, %0|%0, %1}"
3034 [(set_attr "type" "ssecvt")
3035 (set_attr "prefix" "vex")
3036 (set_attr "mode" "V4DF")])
3038 (define_insn "sse2_cvtps2pd"
3039 [(set (match_operand:V2DF 0 "register_operand" "=x")
3042 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3043 (parallel [(const_int 0) (const_int 1)]))))]
3045 "%vcvtps2pd\t{%1, %0|%0, %1}"
3046 [(set_attr "type" "ssecvt")
3047 (set_attr "prefix" "maybe_vex")
3048 (set_attr "mode" "V2DF")
3049 (set_attr "prefix_data16" "0")
3050 (set_attr "amdfam10_decode" "direct")])
3052 (define_expand "vec_unpacks_hi_v4sf"
3057 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3058 (parallel [(const_int 6)
3062 (set (match_operand:V2DF 0 "register_operand" "")
3066 (parallel [(const_int 0) (const_int 1)]))))]
3069 operands[2] = gen_reg_rtx (V4SFmode);
3072 (define_expand "vec_unpacks_lo_v4sf"
3073 [(set (match_operand:V2DF 0 "register_operand" "")
3076 (match_operand:V4SF 1 "nonimmediate_operand" "")
3077 (parallel [(const_int 0) (const_int 1)]))))]
3080 (define_expand "vec_unpacks_float_hi_v8hi"
3081 [(match_operand:V4SF 0 "register_operand" "")
3082 (match_operand:V8HI 1 "register_operand" "")]
3085 rtx tmp = gen_reg_rtx (V4SImode);
3087 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3088 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3092 (define_expand "vec_unpacks_float_lo_v8hi"
3093 [(match_operand:V4SF 0 "register_operand" "")
3094 (match_operand:V8HI 1 "register_operand" "")]
3097 rtx tmp = gen_reg_rtx (V4SImode);
3099 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3100 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3104 (define_expand "vec_unpacku_float_hi_v8hi"
3105 [(match_operand:V4SF 0 "register_operand" "")
3106 (match_operand:V8HI 1 "register_operand" "")]
3109 rtx tmp = gen_reg_rtx (V4SImode);
3111 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3112 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3116 (define_expand "vec_unpacku_float_lo_v8hi"
3117 [(match_operand:V4SF 0 "register_operand" "")
3118 (match_operand:V8HI 1 "register_operand" "")]
3121 rtx tmp = gen_reg_rtx (V4SImode);
3123 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3124 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3128 (define_expand "vec_unpacks_float_hi_v4si"
3131 (match_operand:V4SI 1 "nonimmediate_operand" "")
3132 (parallel [(const_int 2)
3136 (set (match_operand:V2DF 0 "register_operand" "")
3140 (parallel [(const_int 0) (const_int 1)]))))]
3142 "operands[2] = gen_reg_rtx (V4SImode);")
3144 (define_expand "vec_unpacks_float_lo_v4si"
3145 [(set (match_operand:V2DF 0 "register_operand" "")
3148 (match_operand:V4SI 1 "nonimmediate_operand" "")
3149 (parallel [(const_int 0) (const_int 1)]))))]
3152 (define_expand "vec_unpacku_float_hi_v4si"
3155 (match_operand:V4SI 1 "nonimmediate_operand" "")
3156 (parallel [(const_int 2)
3164 (parallel [(const_int 0) (const_int 1)]))))
3166 (lt:V2DF (match_dup 6) (match_dup 3)))
3168 (and:V2DF (match_dup 7) (match_dup 4)))
3169 (set (match_operand:V2DF 0 "register_operand" "")
3170 (plus:V2DF (match_dup 6) (match_dup 8)))]
3173 REAL_VALUE_TYPE TWO32r;
3177 real_ldexp (&TWO32r, &dconst1, 32);
3178 x = const_double_from_real_value (TWO32r, DFmode);
3180 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3181 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3183 operands[5] = gen_reg_rtx (V4SImode);
3185 for (i = 6; i < 9; i++)
3186 operands[i] = gen_reg_rtx (V2DFmode);
3189 (define_expand "vec_unpacku_float_lo_v4si"
3193 (match_operand:V4SI 1 "nonimmediate_operand" "")
3194 (parallel [(const_int 0) (const_int 1)]))))
3196 (lt:V2DF (match_dup 5) (match_dup 3)))
3198 (and:V2DF (match_dup 6) (match_dup 4)))
3199 (set (match_operand:V2DF 0 "register_operand" "")
3200 (plus:V2DF (match_dup 5) (match_dup 7)))]
3203 REAL_VALUE_TYPE TWO32r;
3207 real_ldexp (&TWO32r, &dconst1, 32);
3208 x = const_double_from_real_value (TWO32r, DFmode);
3210 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3211 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3213 for (i = 5; i < 8; i++)
3214 operands[i] = gen_reg_rtx (V2DFmode);
3217 (define_expand "vec_pack_trunc_v2df"
3218 [(match_operand:V4SF 0 "register_operand" "")
3219 (match_operand:V2DF 1 "nonimmediate_operand" "")
3220 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3225 r1 = gen_reg_rtx (V4SFmode);
3226 r2 = gen_reg_rtx (V4SFmode);
3228 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3229 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3230 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3234 (define_expand "vec_pack_sfix_trunc_v2df"
3235 [(match_operand:V4SI 0 "register_operand" "")
3236 (match_operand:V2DF 1 "nonimmediate_operand" "")
3237 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3242 r1 = gen_reg_rtx (V4SImode);
3243 r2 = gen_reg_rtx (V4SImode);
3245 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3246 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3247 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3248 gen_lowpart (V2DImode, r1),
3249 gen_lowpart (V2DImode, r2)));
3253 (define_expand "vec_pack_sfix_v2df"
3254 [(match_operand:V4SI 0 "register_operand" "")
3255 (match_operand:V2DF 1 "nonimmediate_operand" "")
3256 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3261 r1 = gen_reg_rtx (V4SImode);
3262 r2 = gen_reg_rtx (V4SImode);
3264 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3265 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3266 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3267 gen_lowpart (V2DImode, r1),
3268 gen_lowpart (V2DImode, r2)));
3272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3274 ;; Parallel single-precision floating point element swizzling
3276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3278 (define_expand "sse_movhlps_exp"
3279 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3282 (match_operand:V4SF 1 "nonimmediate_operand" "")
3283 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3284 (parallel [(const_int 6)
3289 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3291 (define_insn "*avx_movhlps"
3292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3295 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3296 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3297 (parallel [(const_int 6)
3301 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3303 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3304 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3305 vmovhps\t{%2, %0|%0, %2}"
3306 [(set_attr "type" "ssemov")
3307 (set_attr "prefix" "vex")
3308 (set_attr "mode" "V4SF,V2SF,V2SF")])
3310 (define_insn "sse_movhlps"
3311 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3314 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3315 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3316 (parallel [(const_int 6)
3320 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3322 movhlps\t{%2, %0|%0, %2}
3323 movlps\t{%H2, %0|%0, %H2}
3324 movhps\t{%2, %0|%0, %2}"
3325 [(set_attr "type" "ssemov")
3326 (set_attr "mode" "V4SF,V2SF,V2SF")])
3328 (define_expand "sse_movlhps_exp"
3329 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3332 (match_operand:V4SF 1 "nonimmediate_operand" "")
3333 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3334 (parallel [(const_int 0)
3339 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3341 (define_insn "*avx_movlhps"
3342 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3345 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3346 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3347 (parallel [(const_int 0)
3351 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3353 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3354 vmovhps\t{%2, %1, %0|%0, %1, %2}
3355 vmovlps\t{%2, %H0|%H0, %2}"
3356 [(set_attr "type" "ssemov")
3357 (set_attr "prefix" "vex")
3358 (set_attr "mode" "V4SF,V2SF,V2SF")])
3360 (define_insn "sse_movlhps"
3361 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3364 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3365 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3366 (parallel [(const_int 0)
3370 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3372 movlhps\t{%2, %0|%0, %2}
3373 movhps\t{%2, %0|%0, %2}
3374 movlps\t{%2, %H0|%H0, %2}"
3375 [(set_attr "type" "ssemov")
3376 (set_attr "mode" "V4SF,V2SF,V2SF")])
3378 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3379 (define_insn "avx_unpckhps256"
3380 [(set (match_operand:V8SF 0 "register_operand" "=x")
3383 (match_operand:V8SF 1 "register_operand" "x")
3384 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3385 (parallel [(const_int 2) (const_int 10)
3386 (const_int 3) (const_int 11)
3387 (const_int 6) (const_int 14)
3388 (const_int 7) (const_int 15)])))]
3390 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3391 [(set_attr "type" "sselog")
3392 (set_attr "prefix" "vex")
3393 (set_attr "mode" "V8SF")])
3395 (define_insn "*avx_interleave_highv4sf"
3396 [(set (match_operand:V4SF 0 "register_operand" "=x")
3399 (match_operand:V4SF 1 "register_operand" "x")
3400 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3401 (parallel [(const_int 2) (const_int 6)
3402 (const_int 3) (const_int 7)])))]
3404 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3405 [(set_attr "type" "sselog")
3406 (set_attr "prefix" "vex")
3407 (set_attr "mode" "V4SF")])
3409 (define_insn "vec_interleave_highv4sf"
3410 [(set (match_operand:V4SF 0 "register_operand" "=x")
3413 (match_operand:V4SF 1 "register_operand" "0")
3414 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3415 (parallel [(const_int 2) (const_int 6)
3416 (const_int 3) (const_int 7)])))]
3418 "unpckhps\t{%2, %0|%0, %2}"
3419 [(set_attr "type" "sselog")
3420 (set_attr "mode" "V4SF")])
3422 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3423 (define_insn "avx_unpcklps256"
3424 [(set (match_operand:V8SF 0 "register_operand" "=x")
3427 (match_operand:V8SF 1 "register_operand" "x")
3428 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3429 (parallel [(const_int 0) (const_int 8)
3430 (const_int 1) (const_int 9)
3431 (const_int 4) (const_int 12)
3432 (const_int 5) (const_int 13)])))]
3434 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3435 [(set_attr "type" "sselog")
3436 (set_attr "prefix" "vex")
3437 (set_attr "mode" "V8SF")])
3439 (define_insn "*avx_interleave_lowv4sf"
3440 [(set (match_operand:V4SF 0 "register_operand" "=x")
3443 (match_operand:V4SF 1 "register_operand" "x")
3444 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3445 (parallel [(const_int 0) (const_int 4)
3446 (const_int 1) (const_int 5)])))]
3448 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449 [(set_attr "type" "sselog")
3450 (set_attr "prefix" "vex")
3451 (set_attr "mode" "V4SF")])
3453 (define_insn "vec_interleave_lowv4sf"
3454 [(set (match_operand:V4SF 0 "register_operand" "=x")
3457 (match_operand:V4SF 1 "register_operand" "0")
3458 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3459 (parallel [(const_int 0) (const_int 4)
3460 (const_int 1) (const_int 5)])))]
3462 "unpcklps\t{%2, %0|%0, %2}"
3463 [(set_attr "type" "sselog")
3464 (set_attr "mode" "V4SF")])
3466 ;; These are modeled with the same vec_concat as the others so that we
3467 ;; capture users of shufps that can use the new instructions
3468 (define_insn "avx_movshdup256"
3469 [(set (match_operand:V8SF 0 "register_operand" "=x")
3472 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3474 (parallel [(const_int 1) (const_int 1)
3475 (const_int 3) (const_int 3)
3476 (const_int 5) (const_int 5)
3477 (const_int 7) (const_int 7)])))]
3479 "vmovshdup\t{%1, %0|%0, %1}"
3480 [(set_attr "type" "sse")
3481 (set_attr "prefix" "vex")
3482 (set_attr "mode" "V8SF")])
3484 (define_insn "sse3_movshdup"
3485 [(set (match_operand:V4SF 0 "register_operand" "=x")
3488 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3490 (parallel [(const_int 1)
3495 "%vmovshdup\t{%1, %0|%0, %1}"
3496 [(set_attr "type" "sse")
3497 (set_attr "prefix_rep" "1")
3498 (set_attr "prefix" "maybe_vex")
3499 (set_attr "mode" "V4SF")])
3501 (define_insn "avx_movsldup256"
3502 [(set (match_operand:V8SF 0 "register_operand" "=x")
3505 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3507 (parallel [(const_int 0) (const_int 0)
3508 (const_int 2) (const_int 2)
3509 (const_int 4) (const_int 4)
3510 (const_int 6) (const_int 6)])))]
3512 "vmovsldup\t{%1, %0|%0, %1}"
3513 [(set_attr "type" "sse")
3514 (set_attr "prefix" "vex")
3515 (set_attr "mode" "V8SF")])
3517 (define_insn "sse3_movsldup"
3518 [(set (match_operand:V4SF 0 "register_operand" "=x")
3521 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3523 (parallel [(const_int 0)
3528 "%vmovsldup\t{%1, %0|%0, %1}"
3529 [(set_attr "type" "sse")
3530 (set_attr "prefix_rep" "1")
3531 (set_attr "prefix" "maybe_vex")
3532 (set_attr "mode" "V4SF")])
3534 (define_expand "avx_shufps256"
3535 [(match_operand:V8SF 0 "register_operand" "")
3536 (match_operand:V8SF 1 "register_operand" "")
3537 (match_operand:V8SF 2 "nonimmediate_operand" "")
3538 (match_operand:SI 3 "const_int_operand" "")]
3541 int mask = INTVAL (operands[3]);
3542 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3543 GEN_INT ((mask >> 0) & 3),
3544 GEN_INT ((mask >> 2) & 3),
3545 GEN_INT (((mask >> 4) & 3) + 8),
3546 GEN_INT (((mask >> 6) & 3) + 8),
3547 GEN_INT (((mask >> 0) & 3) + 4),
3548 GEN_INT (((mask >> 2) & 3) + 4),
3549 GEN_INT (((mask >> 4) & 3) + 12),
3550 GEN_INT (((mask >> 6) & 3) + 12)));
3554 ;; One bit in mask selects 2 elements.
3555 (define_insn "avx_shufps256_1"
3556 [(set (match_operand:V8SF 0 "register_operand" "=x")
3559 (match_operand:V8SF 1 "register_operand" "x")
3560 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3561 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3562 (match_operand 4 "const_0_to_3_operand" "")
3563 (match_operand 5 "const_8_to_11_operand" "")
3564 (match_operand 6 "const_8_to_11_operand" "")
3565 (match_operand 7 "const_4_to_7_operand" "")
3566 (match_operand 8 "const_4_to_7_operand" "")
3567 (match_operand 9 "const_12_to_15_operand" "")
3568 (match_operand 10 "const_12_to_15_operand" "")])))]
3570 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3571 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3572 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3573 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3576 mask = INTVAL (operands[3]);
3577 mask |= INTVAL (operands[4]) << 2;
3578 mask |= (INTVAL (operands[5]) - 8) << 4;
3579 mask |= (INTVAL (operands[6]) - 8) << 6;
3580 operands[3] = GEN_INT (mask);
3582 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3584 [(set_attr "type" "sselog")
3585 (set_attr "length_immediate" "1")
3586 (set_attr "prefix" "vex")
3587 (set_attr "mode" "V8SF")])
3589 (define_expand "sse_shufps"
3590 [(match_operand:V4SF 0 "register_operand" "")
3591 (match_operand:V4SF 1 "register_operand" "")
3592 (match_operand:V4SF 2 "nonimmediate_operand" "")
3593 (match_operand:SI 3 "const_int_operand" "")]
3596 int mask = INTVAL (operands[3]);
3597 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3598 GEN_INT ((mask >> 0) & 3),
3599 GEN_INT ((mask >> 2) & 3),
3600 GEN_INT (((mask >> 4) & 3) + 4),
3601 GEN_INT (((mask >> 6) & 3) + 4)));
3605 (define_insn "*avx_shufps_<mode>"
3606 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3607 (vec_select:SSEMODE4S
3608 (vec_concat:<ssedoublesizemode>
3609 (match_operand:SSEMODE4S 1 "register_operand" "x")
3610 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3611 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3612 (match_operand 4 "const_0_to_3_operand" "")
3613 (match_operand 5 "const_4_to_7_operand" "")
3614 (match_operand 6 "const_4_to_7_operand" "")])))]
3618 mask |= INTVAL (operands[3]) << 0;
3619 mask |= INTVAL (operands[4]) << 2;
3620 mask |= (INTVAL (operands[5]) - 4) << 4;
3621 mask |= (INTVAL (operands[6]) - 4) << 6;
3622 operands[3] = GEN_INT (mask);
3624 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3626 [(set_attr "type" "sselog")
3627 (set_attr "length_immediate" "1")
3628 (set_attr "prefix" "vex")
3629 (set_attr "mode" "V4SF")])
3631 (define_insn "sse_shufps_<mode>"
3632 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3633 (vec_select:SSEMODE4S
3634 (vec_concat:<ssedoublesizemode>
3635 (match_operand:SSEMODE4S 1 "register_operand" "0")
3636 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3637 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3638 (match_operand 4 "const_0_to_3_operand" "")
3639 (match_operand 5 "const_4_to_7_operand" "")
3640 (match_operand 6 "const_4_to_7_operand" "")])))]
3644 mask |= INTVAL (operands[3]) << 0;
3645 mask |= INTVAL (operands[4]) << 2;
3646 mask |= (INTVAL (operands[5]) - 4) << 4;
3647 mask |= (INTVAL (operands[6]) - 4) << 6;
3648 operands[3] = GEN_INT (mask);
3650 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3652 [(set_attr "type" "sselog")
3653 (set_attr "length_immediate" "1")
3654 (set_attr "mode" "V4SF")])
3656 (define_insn "sse_storehps"
3657 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3659 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3660 (parallel [(const_int 2) (const_int 3)])))]
3663 %vmovhps\t{%1, %0|%0, %1}
3664 %vmovhlps\t{%1, %d0|%d0, %1}
3665 %vmovlps\t{%H1, %d0|%d0, %H1}"
3666 [(set_attr "type" "ssemov")
3667 (set_attr "prefix" "maybe_vex")
3668 (set_attr "mode" "V2SF,V4SF,V2SF")])
3670 (define_expand "sse_loadhps_exp"
3671 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3674 (match_operand:V4SF 1 "nonimmediate_operand" "")
3675 (parallel [(const_int 0) (const_int 1)]))
3676 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3678 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3680 (define_insn "*avx_loadhps"
3681 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3684 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3685 (parallel [(const_int 0) (const_int 1)]))
3686 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3689 vmovhps\t{%2, %1, %0|%0, %1, %2}
3690 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3691 vmovlps\t{%2, %H0|%H0, %2}"
3692 [(set_attr "type" "ssemov")
3693 (set_attr "prefix" "vex")
3694 (set_attr "mode" "V2SF,V4SF,V2SF")])
3696 (define_insn "sse_loadhps"
3697 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3700 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3701 (parallel [(const_int 0) (const_int 1)]))
3702 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3705 movhps\t{%2, %0|%0, %2}
3706 movlhps\t{%2, %0|%0, %2}
3707 movlps\t{%2, %H0|%H0, %2}"
3708 [(set_attr "type" "ssemov")
3709 (set_attr "mode" "V2SF,V4SF,V2SF")])
3711 (define_insn "*avx_storelps"
3712 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3714 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3715 (parallel [(const_int 0) (const_int 1)])))]
3718 vmovlps\t{%1, %0|%0, %1}
3719 vmovaps\t{%1, %0|%0, %1}
3720 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3721 [(set_attr "type" "ssemov")
3722 (set_attr "prefix" "vex")
3723 (set_attr "mode" "V2SF,V2DF,V2SF")])
3725 (define_insn "sse_storelps"
3726 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3728 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3729 (parallel [(const_int 0) (const_int 1)])))]
3732 movlps\t{%1, %0|%0, %1}
3733 movaps\t{%1, %0|%0, %1}
3734 movlps\t{%1, %0|%0, %1}"
3735 [(set_attr "type" "ssemov")
3736 (set_attr "mode" "V2SF,V4SF,V2SF")])
3738 (define_expand "sse_loadlps_exp"
3739 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3741 (match_operand:V2SF 2 "nonimmediate_operand" "")
3743 (match_operand:V4SF 1 "nonimmediate_operand" "")
3744 (parallel [(const_int 2) (const_int 3)]))))]
3746 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3748 (define_insn "*avx_loadlps"
3749 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3751 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3753 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3754 (parallel [(const_int 2) (const_int 3)]))))]
3757 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3758 vmovlps\t{%2, %1, %0|%0, %1, %2}
3759 vmovlps\t{%2, %0|%0, %2}"
3760 [(set_attr "type" "sselog,ssemov,ssemov")
3761 (set_attr "length_immediate" "1,*,*")
3762 (set_attr "prefix" "vex")
3763 (set_attr "mode" "V4SF,V2SF,V2SF")])
3765 (define_insn "sse_loadlps"
3766 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3768 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3770 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3771 (parallel [(const_int 2) (const_int 3)]))))]
3774 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3775 movlps\t{%2, %0|%0, %2}
3776 movlps\t{%2, %0|%0, %2}"
3777 [(set_attr "type" "sselog,ssemov,ssemov")
3778 (set_attr "length_immediate" "1,*,*")
3779 (set_attr "mode" "V4SF,V2SF,V2SF")])
3781 (define_insn "*avx_movss"
3782 [(set (match_operand:V4SF 0 "register_operand" "=x")
3784 (match_operand:V4SF 2 "register_operand" "x")
3785 (match_operand:V4SF 1 "register_operand" "x")
3788 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3789 [(set_attr "type" "ssemov")
3790 (set_attr "prefix" "vex")
3791 (set_attr "mode" "SF")])
3793 (define_insn "sse_movss"
3794 [(set (match_operand:V4SF 0 "register_operand" "=x")
3796 (match_operand:V4SF 2 "register_operand" "x")
3797 (match_operand:V4SF 1 "register_operand" "0")
3800 "movss\t{%2, %0|%0, %2}"
3801 [(set_attr "type" "ssemov")
3802 (set_attr "mode" "SF")])
3804 (define_expand "vec_dupv4sf"
3805 [(set (match_operand:V4SF 0 "register_operand" "")
3807 (match_operand:SF 1 "nonimmediate_operand" "")))]
3811 operands[1] = force_reg (V4SFmode, operands[1]);
3814 (define_insn "*vec_dupv4sf_avx"
3815 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3817 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3820 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3821 vbroadcastss\t{%1, %0|%0, %1}"
3822 [(set_attr "type" "sselog1,ssemov")
3823 (set_attr "length_immediate" "1,0")
3824 (set_attr "prefix_extra" "0,1")
3825 (set_attr "prefix" "vex")
3826 (set_attr "mode" "V4SF")])
3828 (define_insn "*vec_dupv4sf"
3829 [(set (match_operand:V4SF 0 "register_operand" "=x")
3831 (match_operand:SF 1 "register_operand" "0")))]
3833 "shufps\t{$0, %0, %0|%0, %0, 0}"
3834 [(set_attr "type" "sselog1")
3835 (set_attr "length_immediate" "1")
3836 (set_attr "mode" "V4SF")])
3838 (define_insn "*vec_concatv2sf_avx"
3839 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3841 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3842 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3845 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3846 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3847 vmovss\t{%1, %0|%0, %1}
3848 punpckldq\t{%2, %0|%0, %2}
3849 movd\t{%1, %0|%0, %1}"
3850 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3851 (set_attr "length_immediate" "*,1,*,*,*")
3852 (set_attr "prefix_extra" "*,1,*,*,*")
3853 (set (attr "prefix")
3854 (if_then_else (eq_attr "alternative" "3,4")
3855 (const_string "orig")
3856 (const_string "vex")))
3857 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3859 ;; Although insertps takes register source, we prefer
3860 ;; unpcklps with register source since it is shorter.
3861 (define_insn "*vec_concatv2sf_sse4_1"
3862 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3864 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3865 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3868 unpcklps\t{%2, %0|%0, %2}
3869 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3870 movss\t{%1, %0|%0, %1}
3871 punpckldq\t{%2, %0|%0, %2}
3872 movd\t{%1, %0|%0, %1}"
3873 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3874 (set_attr "prefix_data16" "*,1,*,*,*")
3875 (set_attr "prefix_extra" "*,1,*,*,*")
3876 (set_attr "length_immediate" "*,1,*,*,*")
3877 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3879 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3880 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3881 ;; alternatives pretty much forces the MMX alternative to be chosen.
3882 (define_insn "*vec_concatv2sf_sse"
3883 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3885 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3886 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3889 unpcklps\t{%2, %0|%0, %2}
3890 movss\t{%1, %0|%0, %1}
3891 punpckldq\t{%2, %0|%0, %2}
3892 movd\t{%1, %0|%0, %1}"
3893 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3894 (set_attr "mode" "V4SF,SF,DI,DI")])
3896 (define_insn "*vec_concatv4sf_avx"
3897 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3899 (match_operand:V2SF 1 "register_operand" " x,x")
3900 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3903 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3904 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3905 [(set_attr "type" "ssemov")
3906 (set_attr "prefix" "vex")
3907 (set_attr "mode" "V4SF,V2SF")])
3909 (define_insn "*vec_concatv4sf_sse"
3910 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3912 (match_operand:V2SF 1 "register_operand" " 0,0")
3913 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3916 movlhps\t{%2, %0|%0, %2}
3917 movhps\t{%2, %0|%0, %2}"
3918 [(set_attr "type" "ssemov")
3919 (set_attr "mode" "V4SF,V2SF")])
3921 (define_expand "vec_init<mode>"
3922 [(match_operand:SSEMODE 0 "register_operand" "")
3923 (match_operand 1 "" "")]
3926 ix86_expand_vector_init (false, operands[0], operands[1]);
3930 (define_insn "*vec_set<mode>_0_avx"
3931 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3932 (vec_merge:SSEMODE4S
3933 (vec_duplicate:SSEMODE4S
3934 (match_operand:<ssescalarmode> 2
3935 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3936 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3940 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3941 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3942 vmovd\t{%2, %0|%0, %2}
3943 vmovss\t{%2, %1, %0|%0, %1, %2}
3944 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3946 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3947 (set_attr "prefix_extra" "*,*,*,*,1,*")
3948 (set_attr "length_immediate" "*,*,*,*,1,*")
3949 (set_attr "prefix" "vex")
3950 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3952 (define_insn "*vec_set<mode>_0_sse4_1"
3953 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3954 (vec_merge:SSEMODE4S
3955 (vec_duplicate:SSEMODE4S
3956 (match_operand:<ssescalarmode> 2
3957 "general_operand" " x,m,*r,x,*rm,*rfF"))
3958 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3962 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3963 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3964 movd\t{%2, %0|%0, %2}
3965 movss\t{%2, %0|%0, %2}
3966 pinsrd\t{$0, %2, %0|%0, %2, 0}
3968 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3969 (set_attr "prefix_extra" "*,*,*,*,1,*")
3970 (set_attr "length_immediate" "*,*,*,*,1,*")
3971 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3973 (define_insn "*vec_set<mode>_0_sse2"
3974 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
3975 (vec_merge:SSEMODE4S
3976 (vec_duplicate:SSEMODE4S
3977 (match_operand:<ssescalarmode> 2
3978 "general_operand" " m,*r,x,x*rfF"))
3979 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
3983 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3984 movd\t{%2, %0|%0, %2}
3985 movss\t{%2, %0|%0, %2}
3987 [(set_attr "type" "ssemov")
3988 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
3990 (define_insn "vec_set<mode>_0"
3991 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
3992 (vec_merge:SSEMODE4S
3993 (vec_duplicate:SSEMODE4S
3994 (match_operand:<ssescalarmode> 2
3995 "general_operand" " m,x,x*rfF"))
3996 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4000 movss\t{%2, %0|%0, %2}
4001 movss\t{%2, %0|%0, %2}
4003 [(set_attr "type" "ssemov")
4004 (set_attr "mode" "SF")])
4006 ;; A subset is vec_setv4sf.
4007 (define_insn "*vec_setv4sf_avx"
4008 [(set (match_operand:V4SF 0 "register_operand" "=x")
4011 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4012 (match_operand:V4SF 1 "register_operand" "x")
4013 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4016 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4017 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4019 [(set_attr "type" "sselog")
4020 (set_attr "prefix_extra" "1")
4021 (set_attr "length_immediate" "1")
4022 (set_attr "prefix" "vex")
4023 (set_attr "mode" "V4SF")])
4025 (define_insn "*vec_setv4sf_sse4_1"
4026 [(set (match_operand:V4SF 0 "register_operand" "=x")
4029 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4030 (match_operand:V4SF 1 "register_operand" "0")
4031 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4034 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4035 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4037 [(set_attr "type" "sselog")
4038 (set_attr "prefix_data16" "1")
4039 (set_attr "prefix_extra" "1")
4040 (set_attr "length_immediate" "1")
4041 (set_attr "mode" "V4SF")])
4043 (define_insn "*avx_insertps"
4044 [(set (match_operand:V4SF 0 "register_operand" "=x")
4045 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4046 (match_operand:V4SF 1 "register_operand" "x")
4047 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4050 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4051 [(set_attr "type" "sselog")
4052 (set_attr "prefix" "vex")
4053 (set_attr "prefix_extra" "1")
4054 (set_attr "length_immediate" "1")
4055 (set_attr "mode" "V4SF")])
4057 (define_insn "sse4_1_insertps"
4058 [(set (match_operand:V4SF 0 "register_operand" "=x")
4059 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4060 (match_operand:V4SF 1 "register_operand" "0")
4061 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4064 "insertps\t{%3, %2, %0|%0, %2, %3}";
4065 [(set_attr "type" "sselog")
4066 (set_attr "prefix_data16" "1")
4067 (set_attr "prefix_extra" "1")
4068 (set_attr "length_immediate" "1")
4069 (set_attr "mode" "V4SF")])
4072 [(set (match_operand:V4SF 0 "memory_operand" "")
4075 (match_operand:SF 1 "nonmemory_operand" ""))
4078 "TARGET_SSE && reload_completed"
4081 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4085 (define_expand "vec_set<mode>"
4086 [(match_operand:SSEMODE 0 "register_operand" "")
4087 (match_operand:<ssescalarmode> 1 "register_operand" "")
4088 (match_operand 2 "const_int_operand" "")]
4091 ix86_expand_vector_set (false, operands[0], operands[1],
4092 INTVAL (operands[2]));
4096 (define_insn_and_split "*vec_extractv4sf_0"
4097 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4099 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4100 (parallel [(const_int 0)])))]
4101 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4103 "&& reload_completed"
4106 rtx op1 = operands[1];
4108 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4110 op1 = gen_lowpart (SFmode, op1);
4111 emit_move_insn (operands[0], op1);
4115 (define_expand "avx_vextractf128<mode>"
4116 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4117 (match_operand:AVX256MODE 1 "register_operand" "")
4118 (match_operand:SI 2 "const_0_to_1_operand" "")]
4121 switch (INTVAL (operands[2]))
4124 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4127 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4135 (define_insn "vec_extract_lo_<mode>"
4136 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4137 (vec_select:<avxhalfvecmode>
4138 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4139 (parallel [(const_int 0) (const_int 1)])))]
4141 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4142 [(set_attr "type" "sselog")
4143 (set_attr "prefix_extra" "1")
4144 (set_attr "length_immediate" "1")
4145 (set_attr "memory" "none,store")
4146 (set_attr "prefix" "vex")
4147 (set_attr "mode" "V8SF")])
4149 (define_insn "vec_extract_hi_<mode>"
4150 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4151 (vec_select:<avxhalfvecmode>
4152 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4153 (parallel [(const_int 2) (const_int 3)])))]
4155 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4156 [(set_attr "type" "sselog")
4157 (set_attr "prefix_extra" "1")
4158 (set_attr "length_immediate" "1")
4159 (set_attr "memory" "none,store")
4160 (set_attr "prefix" "vex")
4161 (set_attr "mode" "V8SF")])
4163 (define_insn "vec_extract_lo_<mode>"
4164 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4165 (vec_select:<avxhalfvecmode>
4166 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4167 (parallel [(const_int 0) (const_int 1)
4168 (const_int 2) (const_int 3)])))]
4170 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4171 [(set_attr "type" "sselog")
4172 (set_attr "prefix_extra" "1")
4173 (set_attr "length_immediate" "1")
4174 (set_attr "memory" "none,store")
4175 (set_attr "prefix" "vex")
4176 (set_attr "mode" "V8SF")])
4178 (define_insn "vec_extract_hi_<mode>"
4179 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4180 (vec_select:<avxhalfvecmode>
4181 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4182 (parallel [(const_int 4) (const_int 5)
4183 (const_int 6) (const_int 7)])))]
4185 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4186 [(set_attr "type" "sselog")
4187 (set_attr "prefix_extra" "1")
4188 (set_attr "length_immediate" "1")
4189 (set_attr "memory" "none,store")
4190 (set_attr "prefix" "vex")
4191 (set_attr "mode" "V8SF")])
4193 (define_insn "vec_extract_lo_v16hi"
4194 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4196 (match_operand:V16HI 1 "register_operand" "x,x")
4197 (parallel [(const_int 0) (const_int 1)
4198 (const_int 2) (const_int 3)
4199 (const_int 4) (const_int 5)
4200 (const_int 6) (const_int 7)])))]
4202 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4203 [(set_attr "type" "sselog")
4204 (set_attr "prefix_extra" "1")
4205 (set_attr "length_immediate" "1")
4206 (set_attr "memory" "none,store")
4207 (set_attr "prefix" "vex")
4208 (set_attr "mode" "V8SF")])
4210 (define_insn "vec_extract_hi_v16hi"
4211 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4213 (match_operand:V16HI 1 "register_operand" "x,x")
4214 (parallel [(const_int 8) (const_int 9)
4215 (const_int 10) (const_int 11)
4216 (const_int 12) (const_int 13)
4217 (const_int 14) (const_int 15)])))]
4219 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4220 [(set_attr "type" "sselog")
4221 (set_attr "prefix_extra" "1")
4222 (set_attr "length_immediate" "1")
4223 (set_attr "memory" "none,store")
4224 (set_attr "prefix" "vex")
4225 (set_attr "mode" "V8SF")])
4227 (define_insn "vec_extract_lo_v32qi"
4228 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4230 (match_operand:V32QI 1 "register_operand" "x,x")
4231 (parallel [(const_int 0) (const_int 1)
4232 (const_int 2) (const_int 3)
4233 (const_int 4) (const_int 5)
4234 (const_int 6) (const_int 7)
4235 (const_int 8) (const_int 9)
4236 (const_int 10) (const_int 11)
4237 (const_int 12) (const_int 13)
4238 (const_int 14) (const_int 15)])))]
4240 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4241 [(set_attr "type" "sselog")
4242 (set_attr "prefix_extra" "1")
4243 (set_attr "length_immediate" "1")
4244 (set_attr "memory" "none,store")
4245 (set_attr "prefix" "vex")
4246 (set_attr "mode" "V8SF")])
4248 (define_insn "vec_extract_hi_v32qi"
4249 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4251 (match_operand:V32QI 1 "register_operand" "x,x")
4252 (parallel [(const_int 16) (const_int 17)
4253 (const_int 18) (const_int 19)
4254 (const_int 20) (const_int 21)
4255 (const_int 22) (const_int 23)
4256 (const_int 24) (const_int 25)
4257 (const_int 26) (const_int 27)
4258 (const_int 28) (const_int 29)
4259 (const_int 30) (const_int 31)])))]
4261 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4262 [(set_attr "type" "sselog")
4263 (set_attr "prefix_extra" "1")
4264 (set_attr "length_immediate" "1")
4265 (set_attr "memory" "none,store")
4266 (set_attr "prefix" "vex")
4267 (set_attr "mode" "V8SF")])
4269 (define_insn "*sse4_1_extractps"
4270 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4272 (match_operand:V4SF 1 "register_operand" "x")
4273 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4275 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4276 [(set_attr "type" "sselog")
4277 (set_attr "prefix_data16" "1")
4278 (set_attr "prefix_extra" "1")
4279 (set_attr "length_immediate" "1")
4280 (set_attr "prefix" "maybe_vex")
4281 (set_attr "mode" "V4SF")])
4283 (define_insn_and_split "*vec_extract_v4sf_mem"
4284 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4286 (match_operand:V4SF 1 "memory_operand" "o")
4287 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4293 int i = INTVAL (operands[2]);
4295 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4299 (define_expand "vec_extract<mode>"
4300 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4301 (match_operand:SSEMODE 1 "register_operand" "")
4302 (match_operand 2 "const_int_operand" "")]
4305 ix86_expand_vector_extract (false, operands[0], operands[1],
4306 INTVAL (operands[2]));
4310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4312 ;; Parallel double-precision floating point element swizzling
4314 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4316 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4317 (define_insn "avx_unpckhpd256"
4318 [(set (match_operand:V4DF 0 "register_operand" "=x")
4321 (match_operand:V4DF 1 "register_operand" "x")
4322 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4323 (parallel [(const_int 1) (const_int 5)
4324 (const_int 3) (const_int 7)])))]
4326 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4327 [(set_attr "type" "sselog")
4328 (set_attr "prefix" "vex")
4329 (set_attr "mode" "V4DF")])
4331 (define_expand "vec_interleave_highv2df"
4332 [(set (match_operand:V2DF 0 "register_operand" "")
4335 (match_operand:V2DF 1 "nonimmediate_operand" "")
4336 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4337 (parallel [(const_int 1)
4341 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4342 operands[2] = force_reg (V2DFmode, operands[2]);
4345 (define_insn "*avx_interleave_highv2df"
4346 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4349 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4350 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4351 (parallel [(const_int 1)
4353 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4355 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4356 vmovddup\t{%H1, %0|%0, %H1}
4357 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4358 vmovhpd\t{%1, %0|%0, %1}"
4359 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4360 (set_attr "prefix" "vex")
4361 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4363 (define_insn "*sse3_interleave_highv2df"
4364 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4367 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4368 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4369 (parallel [(const_int 1)
4371 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4373 unpckhpd\t{%2, %0|%0, %2}
4374 movddup\t{%H1, %0|%0, %H1}
4375 movlpd\t{%H1, %0|%0, %H1}
4376 movhpd\t{%1, %0|%0, %1}"
4377 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4378 (set_attr "prefix_data16" "*,*,1,1")
4379 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4381 (define_insn "*sse2_interleave_highv2df"
4382 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4385 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4386 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4387 (parallel [(const_int 1)
4389 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4391 unpckhpd\t{%2, %0|%0, %2}
4392 movlpd\t{%H1, %0|%0, %H1}
4393 movhpd\t{%1, %0|%0, %1}"
4394 [(set_attr "type" "sselog,ssemov,ssemov")
4395 (set_attr "prefix_data16" "*,1,1")
4396 (set_attr "mode" "V2DF,V1DF,V1DF")])
4398 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4399 (define_expand "avx_movddup256"
4400 [(set (match_operand:V4DF 0 "register_operand" "")
4403 (match_operand:V4DF 1 "nonimmediate_operand" "")
4405 (parallel [(const_int 0) (const_int 4)
4406 (const_int 2) (const_int 6)])))]
4410 (define_expand "avx_unpcklpd256"
4411 [(set (match_operand:V4DF 0 "register_operand" "")
4414 (match_operand:V4DF 1 "register_operand" "")
4415 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4416 (parallel [(const_int 0) (const_int 4)
4417 (const_int 2) (const_int 6)])))]
4421 (define_insn "*avx_unpcklpd256"
4422 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4425 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4426 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4427 (parallel [(const_int 0) (const_int 4)
4428 (const_int 2) (const_int 6)])))]
4430 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4432 vmovddup\t{%1, %0|%0, %1}
4433 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4434 [(set_attr "type" "sselog")
4435 (set_attr "prefix" "vex")
4436 (set_attr "mode" "V4DF")])
4438 (define_expand "vec_interleave_lowv2df"
4439 [(set (match_operand:V2DF 0 "register_operand" "")
4442 (match_operand:V2DF 1 "nonimmediate_operand" "")
4443 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4444 (parallel [(const_int 0)
4448 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4449 operands[1] = force_reg (V2DFmode, operands[1]);
4452 (define_insn "*avx_interleave_lowv2df"
4453 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4456 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4457 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4458 (parallel [(const_int 0)
4460 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4462 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4463 vmovddup\t{%1, %0|%0, %1}
4464 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4465 vmovlpd\t{%2, %H0|%H0, %2}"
4466 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4467 (set_attr "prefix" "vex")
4468 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4470 (define_insn "*sse3_interleave_lowv2df"
4471 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4474 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4475 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4476 (parallel [(const_int 0)
4478 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4480 unpcklpd\t{%2, %0|%0, %2}
4481 movddup\t{%1, %0|%0, %1}
4482 movhpd\t{%2, %0|%0, %2}
4483 movlpd\t{%2, %H0|%H0, %2}"
4484 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4485 (set_attr "prefix_data16" "*,*,1,1")
4486 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4488 (define_insn "*sse2_interleave_lowv2df"
4489 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4492 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4493 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4494 (parallel [(const_int 0)
4496 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4498 unpcklpd\t{%2, %0|%0, %2}
4499 movhpd\t{%2, %0|%0, %2}
4500 movlpd\t{%2, %H0|%H0, %2}"
4501 [(set_attr "type" "sselog,ssemov,ssemov")
4502 (set_attr "prefix_data16" "*,1,1")
4503 (set_attr "mode" "V2DF,V1DF,V1DF")])
4506 [(set (match_operand:V2DF 0 "memory_operand" "")
4509 (match_operand:V2DF 1 "register_operand" "")
4511 (parallel [(const_int 0)
4513 "TARGET_SSE3 && reload_completed"
4516 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4517 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4518 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4523 [(set (match_operand:V2DF 0 "register_operand" "")
4526 (match_operand:V2DF 1 "memory_operand" "")
4528 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4529 (match_operand:SI 3 "const_int_operand" "")])))]
4530 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4531 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4533 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4536 (define_expand "avx_shufpd256"
4537 [(match_operand:V4DF 0 "register_operand" "")
4538 (match_operand:V4DF 1 "register_operand" "")
4539 (match_operand:V4DF 2 "nonimmediate_operand" "")
4540 (match_operand:SI 3 "const_int_operand" "")]
4543 int mask = INTVAL (operands[3]);
4544 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4546 GEN_INT (mask & 2 ? 5 : 4),
4547 GEN_INT (mask & 4 ? 3 : 2),
4548 GEN_INT (mask & 8 ? 7 : 6)));
4552 (define_insn "avx_shufpd256_1"
4553 [(set (match_operand:V4DF 0 "register_operand" "=x")
4556 (match_operand:V4DF 1 "register_operand" "x")
4557 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4558 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4559 (match_operand 4 "const_4_to_5_operand" "")
4560 (match_operand 5 "const_2_to_3_operand" "")
4561 (match_operand 6 "const_6_to_7_operand" "")])))]
4565 mask = INTVAL (operands[3]);
4566 mask |= (INTVAL (operands[4]) - 4) << 1;
4567 mask |= (INTVAL (operands[5]) - 2) << 2;
4568 mask |= (INTVAL (operands[6]) - 6) << 3;
4569 operands[3] = GEN_INT (mask);
4571 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4573 [(set_attr "type" "sselog")
4574 (set_attr "length_immediate" "1")
4575 (set_attr "prefix" "vex")
4576 (set_attr "mode" "V4DF")])
4578 (define_expand "sse2_shufpd"
4579 [(match_operand:V2DF 0 "register_operand" "")
4580 (match_operand:V2DF 1 "register_operand" "")
4581 (match_operand:V2DF 2 "nonimmediate_operand" "")
4582 (match_operand:SI 3 "const_int_operand" "")]
4585 int mask = INTVAL (operands[3]);
4586 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4588 GEN_INT (mask & 2 ? 3 : 2)));
4592 (define_expand "vec_extract_even<mode>"
4593 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4594 (match_operand:SSEMODE_EO 1 "register_operand" "")
4595 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4598 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4602 (define_expand "vec_extract_odd<mode>"
4603 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4604 (match_operand:SSEMODE_EO 1 "register_operand" "")
4605 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4608 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4612 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4613 (define_insn "*avx_interleave_highv2di"
4614 [(set (match_operand:V2DI 0 "register_operand" "=x")
4617 (match_operand:V2DI 1 "register_operand" "x")
4618 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4619 (parallel [(const_int 1)
4622 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4623 [(set_attr "type" "sselog")
4624 (set_attr "prefix" "vex")
4625 (set_attr "mode" "TI")])
4627 (define_insn "vec_interleave_highv2di"
4628 [(set (match_operand:V2DI 0 "register_operand" "=x")
4631 (match_operand:V2DI 1 "register_operand" "0")
4632 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4633 (parallel [(const_int 1)
4636 "punpckhqdq\t{%2, %0|%0, %2}"
4637 [(set_attr "type" "sselog")
4638 (set_attr "prefix_data16" "1")
4639 (set_attr "mode" "TI")])
4641 (define_insn "*avx_interleave_lowv2di"
4642 [(set (match_operand:V2DI 0 "register_operand" "=x")
4645 (match_operand:V2DI 1 "register_operand" "x")
4646 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4647 (parallel [(const_int 0)
4650 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4651 [(set_attr "type" "sselog")
4652 (set_attr "prefix" "vex")
4653 (set_attr "mode" "TI")])
4655 (define_insn "vec_interleave_lowv2di"
4656 [(set (match_operand:V2DI 0 "register_operand" "=x")
4659 (match_operand:V2DI 1 "register_operand" "0")
4660 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4661 (parallel [(const_int 0)
4664 "punpcklqdq\t{%2, %0|%0, %2}"
4665 [(set_attr "type" "sselog")
4666 (set_attr "prefix_data16" "1")
4667 (set_attr "mode" "TI")])
4669 (define_insn "*avx_shufpd_<mode>"
4670 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4671 (vec_select:SSEMODE2D
4672 (vec_concat:<ssedoublesizemode>
4673 (match_operand:SSEMODE2D 1 "register_operand" "x")
4674 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4675 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4676 (match_operand 4 "const_2_to_3_operand" "")])))]
4680 mask = INTVAL (operands[3]);
4681 mask |= (INTVAL (operands[4]) - 2) << 1;
4682 operands[3] = GEN_INT (mask);
4684 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4686 [(set_attr "type" "sselog")
4687 (set_attr "length_immediate" "1")
4688 (set_attr "prefix" "vex")
4689 (set_attr "mode" "V2DF")])
4691 (define_insn "sse2_shufpd_<mode>"
4692 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4693 (vec_select:SSEMODE2D
4694 (vec_concat:<ssedoublesizemode>
4695 (match_operand:SSEMODE2D 1 "register_operand" "0")
4696 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4697 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4698 (match_operand 4 "const_2_to_3_operand" "")])))]
4702 mask = INTVAL (operands[3]);
4703 mask |= (INTVAL (operands[4]) - 2) << 1;
4704 operands[3] = GEN_INT (mask);
4706 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4708 [(set_attr "type" "sselog")
4709 (set_attr "length_immediate" "1")
4710 (set_attr "mode" "V2DF")])
4712 ;; Avoid combining registers from different units in a single alternative,
4713 ;; see comment above inline_secondary_memory_needed function in i386.c
4714 (define_insn "*avx_storehpd"
4715 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4717 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4718 (parallel [(const_int 1)])))]
4719 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4721 vmovhpd\t{%1, %0|%0, %1}
4722 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4726 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4727 (set_attr "prefix" "vex")
4728 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4730 (define_insn "sse2_storehpd"
4731 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4733 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4734 (parallel [(const_int 1)])))]
4735 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4737 movhpd\t{%1, %0|%0, %1}
4742 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4743 (set_attr "prefix_data16" "1,*,*,*,*")
4744 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4747 [(set (match_operand:DF 0 "register_operand" "")
4749 (match_operand:V2DF 1 "memory_operand" "")
4750 (parallel [(const_int 1)])))]
4751 "TARGET_SSE2 && reload_completed"
4752 [(set (match_dup 0) (match_dup 1))]
4754 operands[1] = adjust_address (operands[1], DFmode, 8);
4757 ;; Avoid combining registers from different units in a single alternative,
4758 ;; see comment above inline_secondary_memory_needed function in i386.c
4759 (define_insn "sse2_storelpd"
4760 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4762 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4763 (parallel [(const_int 0)])))]
4764 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4766 %vmovlpd\t{%1, %0|%0, %1}
4771 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4772 (set_attr "prefix_data16" "1,*,*,*,*")
4773 (set_attr "prefix" "maybe_vex")
4774 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4777 [(set (match_operand:DF 0 "register_operand" "")
4779 (match_operand:V2DF 1 "nonimmediate_operand" "")
4780 (parallel [(const_int 0)])))]
4781 "TARGET_SSE2 && reload_completed"
4784 rtx op1 = operands[1];
4786 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4788 op1 = gen_lowpart (DFmode, op1);
4789 emit_move_insn (operands[0], op1);
4793 (define_expand "sse2_loadhpd_exp"
4794 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4797 (match_operand:V2DF 1 "nonimmediate_operand" "")
4798 (parallel [(const_int 0)]))
4799 (match_operand:DF 2 "nonimmediate_operand" "")))]
4801 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4803 ;; Avoid combining registers from different units in a single alternative,
4804 ;; see comment above inline_secondary_memory_needed function in i386.c
4805 (define_insn "*avx_loadhpd"
4806 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4809 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4810 (parallel [(const_int 0)]))
4811 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4812 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4814 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4815 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4819 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4820 (set_attr "prefix" "vex")
4821 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4823 (define_insn "sse2_loadhpd"
4824 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4827 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4828 (parallel [(const_int 0)]))
4829 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4830 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4832 movhpd\t{%2, %0|%0, %2}
4833 unpcklpd\t{%2, %0|%0, %2}
4834 shufpd\t{$1, %1, %0|%0, %1, 1}
4838 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4839 (set_attr "prefix_data16" "1,*,*,*,*,*")
4840 (set_attr "length_immediate" "*,*,1,*,*,*")
4841 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4844 [(set (match_operand:V2DF 0 "memory_operand" "")
4846 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4847 (match_operand:DF 1 "register_operand" "")))]
4848 "TARGET_SSE2 && reload_completed"
4849 [(set (match_dup 0) (match_dup 1))]
4851 operands[0] = adjust_address (operands[0], DFmode, 8);
4854 (define_expand "sse2_loadlpd_exp"
4855 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4857 (match_operand:DF 2 "nonimmediate_operand" "")
4859 (match_operand:V2DF 1 "nonimmediate_operand" "")
4860 (parallel [(const_int 1)]))))]
4862 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4864 ;; Avoid combining registers from different units in a single alternative,
4865 ;; see comment above inline_secondary_memory_needed function in i386.c
4866 (define_insn "*avx_loadlpd"
4867 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4869 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4871 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4872 (parallel [(const_int 1)]))))]
4873 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4875 vmovsd\t{%2, %0|%0, %2}
4876 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4877 vmovsd\t{%2, %1, %0|%0, %1, %2}
4878 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4882 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4883 (set_attr "prefix" "vex")
4884 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4886 (define_insn "sse2_loadlpd"
4887 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4889 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4891 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4892 (parallel [(const_int 1)]))))]
4893 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4895 movsd\t{%2, %0|%0, %2}
4896 movlpd\t{%2, %0|%0, %2}
4897 movsd\t{%2, %0|%0, %2}
4898 shufpd\t{$2, %2, %0|%0, %2, 2}
4899 movhpd\t{%H1, %0|%0, %H1}
4903 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4904 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4905 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4906 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4909 [(set (match_operand:V2DF 0 "memory_operand" "")
4911 (match_operand:DF 1 "register_operand" "")
4912 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4913 "TARGET_SSE2 && reload_completed"
4914 [(set (match_dup 0) (match_dup 1))]
4916 operands[0] = adjust_address (operands[0], DFmode, 8);
4919 ;; Not sure these two are ever used, but it doesn't hurt to have
4921 (define_insn "*vec_extractv2df_1_sse"
4922 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4924 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4925 (parallel [(const_int 1)])))]
4926 "!TARGET_SSE2 && TARGET_SSE
4927 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4929 movhps\t{%1, %0|%0, %1}
4930 movhlps\t{%1, %0|%0, %1}
4931 movlps\t{%H1, %0|%0, %H1}"
4932 [(set_attr "type" "ssemov")
4933 (set_attr "mode" "V2SF,V4SF,V2SF")])
4935 (define_insn "*vec_extractv2df_0_sse"
4936 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4938 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4939 (parallel [(const_int 0)])))]
4940 "!TARGET_SSE2 && TARGET_SSE
4941 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4943 movlps\t{%1, %0|%0, %1}
4944 movaps\t{%1, %0|%0, %1}
4945 movlps\t{%1, %0|%0, %1}"
4946 [(set_attr "type" "ssemov")
4947 (set_attr "mode" "V2SF,V4SF,V2SF")])
4949 (define_insn "*avx_movsd"
4950 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4952 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4953 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4957 vmovsd\t{%2, %1, %0|%0, %1, %2}
4958 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4959 vmovlpd\t{%2, %0|%0, %2}
4960 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4961 vmovhps\t{%1, %H0|%H0, %1}"
4962 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4963 (set_attr "prefix" "vex")
4964 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4966 (define_insn "sse2_movsd"
4967 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4969 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4970 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4974 movsd\t{%2, %0|%0, %2}
4975 movlpd\t{%2, %0|%0, %2}
4976 movlpd\t{%2, %0|%0, %2}
4977 shufpd\t{$2, %2, %0|%0, %2, 2}
4978 movhps\t{%H1, %0|%0, %H1}
4979 movhps\t{%1, %H0|%H0, %1}"
4980 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4981 (set_attr "prefix_data16" "*,1,1,*,*,*")
4982 (set_attr "length_immediate" "*,*,*,1,*,*")
4983 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4985 (define_insn "*vec_dupv2df_sse3"
4986 [(set (match_operand:V2DF 0 "register_operand" "=x")
4988 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4990 "%vmovddup\t{%1, %0|%0, %1}"
4991 [(set_attr "type" "sselog1")
4992 (set_attr "prefix" "maybe_vex")
4993 (set_attr "mode" "DF")])
4995 (define_insn "vec_dupv2df"
4996 [(set (match_operand:V2DF 0 "register_operand" "=x")
4998 (match_operand:DF 1 "register_operand" "0")))]
5001 [(set_attr "type" "sselog1")
5002 (set_attr "mode" "V2DF")])
5004 (define_insn "*vec_concatv2df_sse3"
5005 [(set (match_operand:V2DF 0 "register_operand" "=x")
5007 (match_operand:DF 1 "nonimmediate_operand" "xm")
5010 "%vmovddup\t{%1, %0|%0, %1}"
5011 [(set_attr "type" "sselog1")
5012 (set_attr "prefix" "maybe_vex")
5013 (set_attr "mode" "DF")])
5015 (define_insn "*vec_concatv2df_avx"
5016 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5018 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5019 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5022 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5023 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5024 vmovsd\t{%1, %0|%0, %1}"
5025 [(set_attr "type" "ssemov")
5026 (set_attr "prefix" "vex")
5027 (set_attr "mode" "DF,V1DF,DF")])
5029 (define_insn "*vec_concatv2df"
5030 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5032 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5033 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5036 unpcklpd\t{%2, %0|%0, %2}
5037 movhpd\t{%2, %0|%0, %2}
5038 movsd\t{%1, %0|%0, %1}
5039 movlhps\t{%2, %0|%0, %2}
5040 movhps\t{%2, %0|%0, %2}"
5041 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5042 (set_attr "prefix_data16" "*,1,*,*,*")
5043 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5045 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5047 ;; Parallel integral arithmetic
5049 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5051 (define_expand "neg<mode>2"
5052 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5055 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5057 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5059 (define_expand "<plusminus_insn><mode>3"
5060 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5062 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5063 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5065 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5067 (define_insn "*avx_<plusminus_insn><mode>3"
5068 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5070 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5071 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5072 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5073 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5074 [(set_attr "type" "sseiadd")
5075 (set_attr "prefix" "vex")
5076 (set_attr "mode" "TI")])
5078 (define_insn "*<plusminus_insn><mode>3"
5079 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5081 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5082 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5083 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5084 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5085 [(set_attr "type" "sseiadd")
5086 (set_attr "prefix_data16" "1")
5087 (set_attr "mode" "TI")])
5089 (define_expand "sse2_<plusminus_insn><mode>3"
5090 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5091 (sat_plusminus:SSEMODE12
5092 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5093 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5095 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5097 (define_insn "*avx_<plusminus_insn><mode>3"
5098 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5099 (sat_plusminus:SSEMODE12
5100 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5101 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5102 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5103 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5104 [(set_attr "type" "sseiadd")
5105 (set_attr "prefix" "vex")
5106 (set_attr "mode" "TI")])
5108 (define_insn "*sse2_<plusminus_insn><mode>3"
5109 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5110 (sat_plusminus:SSEMODE12
5111 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5112 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5113 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5114 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5115 [(set_attr "type" "sseiadd")
5116 (set_attr "prefix_data16" "1")
5117 (set_attr "mode" "TI")])
5119 (define_insn_and_split "mulv16qi3"
5120 [(set (match_operand:V16QI 0 "register_operand" "")
5121 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5122 (match_operand:V16QI 2 "register_operand" "")))]
5124 && can_create_pseudo_p ()"
5132 for (i = 0; i < 6; ++i)
5133 t[i] = gen_reg_rtx (V16QImode);
5135 /* Unpack data such that we've got a source byte in each low byte of
5136 each word. We don't care what goes into the high byte of each word.
5137 Rather than trying to get zero in there, most convenient is to let
5138 it be a copy of the low byte. */
5139 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5140 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5141 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5142 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5144 /* Multiply words. The end-of-line annotations here give a picture of what
5145 the output of that instruction looks like. Dot means don't care; the
5146 letters are the bytes of the result with A being the most significant. */
5147 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5148 gen_lowpart (V8HImode, t[0]),
5149 gen_lowpart (V8HImode, t[1])));
5150 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5151 gen_lowpart (V8HImode, t[2]),
5152 gen_lowpart (V8HImode, t[3])));
5154 /* Extract the even bytes and merge them back together. */
5155 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5159 (define_expand "mulv8hi3"
5160 [(set (match_operand:V8HI 0 "register_operand" "")
5161 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5162 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5164 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5166 (define_insn "*avx_mulv8hi3"
5167 [(set (match_operand:V8HI 0 "register_operand" "=x")
5168 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5169 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5170 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5171 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5172 [(set_attr "type" "sseimul")
5173 (set_attr "prefix" "vex")
5174 (set_attr "mode" "TI")])
5176 (define_insn "*mulv8hi3"
5177 [(set (match_operand:V8HI 0 "register_operand" "=x")
5178 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5179 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5180 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5181 "pmullw\t{%2, %0|%0, %2}"
5182 [(set_attr "type" "sseimul")
5183 (set_attr "prefix_data16" "1")
5184 (set_attr "mode" "TI")])
5186 (define_expand "smulv8hi3_highpart"
5187 [(set (match_operand:V8HI 0 "register_operand" "")
5192 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5194 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5197 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5199 (define_insn "*avxv8hi3_highpart"
5200 [(set (match_operand:V8HI 0 "register_operand" "=x")
5205 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5207 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5209 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5210 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5211 [(set_attr "type" "sseimul")
5212 (set_attr "prefix" "vex")
5213 (set_attr "mode" "TI")])
5215 (define_insn "*smulv8hi3_highpart"
5216 [(set (match_operand:V8HI 0 "register_operand" "=x")
5221 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5223 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5225 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5226 "pmulhw\t{%2, %0|%0, %2}"
5227 [(set_attr "type" "sseimul")
5228 (set_attr "prefix_data16" "1")
5229 (set_attr "mode" "TI")])
5231 (define_expand "umulv8hi3_highpart"
5232 [(set (match_operand:V8HI 0 "register_operand" "")
5237 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5239 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5242 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5244 (define_insn "*avx_umulv8hi3_highpart"
5245 [(set (match_operand:V8HI 0 "register_operand" "=x")
5250 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5252 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5254 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5255 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5256 [(set_attr "type" "sseimul")
5257 (set_attr "prefix" "vex")
5258 (set_attr "mode" "TI")])
5260 (define_insn "*umulv8hi3_highpart"
5261 [(set (match_operand:V8HI 0 "register_operand" "=x")
5266 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5268 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5270 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5271 "pmulhuw\t{%2, %0|%0, %2}"
5272 [(set_attr "type" "sseimul")
5273 (set_attr "prefix_data16" "1")
5274 (set_attr "mode" "TI")])
5276 (define_expand "sse2_umulv2siv2di3"
5277 [(set (match_operand:V2DI 0 "register_operand" "")
5281 (match_operand:V4SI 1 "nonimmediate_operand" "")
5282 (parallel [(const_int 0) (const_int 2)])))
5285 (match_operand:V4SI 2 "nonimmediate_operand" "")
5286 (parallel [(const_int 0) (const_int 2)])))))]
5288 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5290 (define_insn "*avx_umulv2siv2di3"
5291 [(set (match_operand:V2DI 0 "register_operand" "=x")
5295 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5296 (parallel [(const_int 0) (const_int 2)])))
5299 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5300 (parallel [(const_int 0) (const_int 2)])))))]
5301 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5302 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5303 [(set_attr "type" "sseimul")
5304 (set_attr "prefix" "vex")
5305 (set_attr "mode" "TI")])
5307 (define_insn "*sse2_umulv2siv2di3"
5308 [(set (match_operand:V2DI 0 "register_operand" "=x")
5312 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5313 (parallel [(const_int 0) (const_int 2)])))
5316 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5317 (parallel [(const_int 0) (const_int 2)])))))]
5318 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5319 "pmuludq\t{%2, %0|%0, %2}"
5320 [(set_attr "type" "sseimul")
5321 (set_attr "prefix_data16" "1")
5322 (set_attr "mode" "TI")])
5324 (define_expand "sse4_1_mulv2siv2di3"
5325 [(set (match_operand:V2DI 0 "register_operand" "")
5329 (match_operand:V4SI 1 "nonimmediate_operand" "")
5330 (parallel [(const_int 0) (const_int 2)])))
5333 (match_operand:V4SI 2 "nonimmediate_operand" "")
5334 (parallel [(const_int 0) (const_int 2)])))))]
5336 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5338 (define_insn "*avx_mulv2siv2di3"
5339 [(set (match_operand:V2DI 0 "register_operand" "=x")
5343 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5344 (parallel [(const_int 0) (const_int 2)])))
5347 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5348 (parallel [(const_int 0) (const_int 2)])))))]
5349 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5350 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5351 [(set_attr "type" "sseimul")
5352 (set_attr "prefix_extra" "1")
5353 (set_attr "prefix" "vex")
5354 (set_attr "mode" "TI")])
5356 (define_insn "*sse4_1_mulv2siv2di3"
5357 [(set (match_operand:V2DI 0 "register_operand" "=x")
5361 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5362 (parallel [(const_int 0) (const_int 2)])))
5365 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5366 (parallel [(const_int 0) (const_int 2)])))))]
5367 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5368 "pmuldq\t{%2, %0|%0, %2}"
5369 [(set_attr "type" "sseimul")
5370 (set_attr "prefix_extra" "1")
5371 (set_attr "mode" "TI")])
5373 (define_expand "sse2_pmaddwd"
5374 [(set (match_operand:V4SI 0 "register_operand" "")
5379 (match_operand:V8HI 1 "nonimmediate_operand" "")
5380 (parallel [(const_int 0)
5386 (match_operand:V8HI 2 "nonimmediate_operand" "")
5387 (parallel [(const_int 0)
5393 (vec_select:V4HI (match_dup 1)
5394 (parallel [(const_int 1)
5399 (vec_select:V4HI (match_dup 2)
5400 (parallel [(const_int 1)
5403 (const_int 7)]))))))]
5405 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5407 (define_insn "*avx_pmaddwd"
5408 [(set (match_operand:V4SI 0 "register_operand" "=x")
5413 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5414 (parallel [(const_int 0)
5420 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5421 (parallel [(const_int 0)
5427 (vec_select:V4HI (match_dup 1)
5428 (parallel [(const_int 1)
5433 (vec_select:V4HI (match_dup 2)
5434 (parallel [(const_int 1)
5437 (const_int 7)]))))))]
5438 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5439 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5440 [(set_attr "type" "sseiadd")
5441 (set_attr "prefix" "vex")
5442 (set_attr "mode" "TI")])
5444 (define_insn "*sse2_pmaddwd"
5445 [(set (match_operand:V4SI 0 "register_operand" "=x")
5450 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5451 (parallel [(const_int 0)
5457 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5458 (parallel [(const_int 0)
5464 (vec_select:V4HI (match_dup 1)
5465 (parallel [(const_int 1)
5470 (vec_select:V4HI (match_dup 2)
5471 (parallel [(const_int 1)
5474 (const_int 7)]))))))]
5475 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5476 "pmaddwd\t{%2, %0|%0, %2}"
5477 [(set_attr "type" "sseiadd")
5478 (set_attr "atom_unit" "simul")
5479 (set_attr "prefix_data16" "1")
5480 (set_attr "mode" "TI")])
5482 (define_expand "mulv4si3"
5483 [(set (match_operand:V4SI 0 "register_operand" "")
5484 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5485 (match_operand:V4SI 2 "register_operand" "")))]
5488 if (TARGET_SSE4_1 || TARGET_AVX)
5489 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5492 (define_insn "*avx_mulv4si3"
5493 [(set (match_operand:V4SI 0 "register_operand" "=x")
5494 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5495 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5496 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5497 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5498 [(set_attr "type" "sseimul")
5499 (set_attr "prefix_extra" "1")
5500 (set_attr "prefix" "vex")
5501 (set_attr "mode" "TI")])
5503 (define_insn "*sse4_1_mulv4si3"
5504 [(set (match_operand:V4SI 0 "register_operand" "=x")
5505 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5506 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5507 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5508 "pmulld\t{%2, %0|%0, %2}"
5509 [(set_attr "type" "sseimul")
5510 (set_attr "prefix_extra" "1")
5511 (set_attr "mode" "TI")])
5513 (define_insn_and_split "*sse2_mulv4si3"
5514 [(set (match_operand:V4SI 0 "register_operand" "")
5515 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5516 (match_operand:V4SI 2 "register_operand" "")))]
5517 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5518 && can_create_pseudo_p ()"
5523 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5529 t1 = gen_reg_rtx (V4SImode);
5530 t2 = gen_reg_rtx (V4SImode);
5531 t3 = gen_reg_rtx (V4SImode);
5532 t4 = gen_reg_rtx (V4SImode);
5533 t5 = gen_reg_rtx (V4SImode);
5534 t6 = gen_reg_rtx (V4SImode);
5535 thirtytwo = GEN_INT (32);
5537 /* Multiply elements 2 and 0. */
5538 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5541 /* Shift both input vectors down one element, so that elements 3
5542 and 1 are now in the slots for elements 2 and 0. For K8, at
5543 least, this is faster than using a shuffle. */
5544 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5545 gen_lowpart (V1TImode, op1),
5547 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5548 gen_lowpart (V1TImode, op2),
5550 /* Multiply elements 3 and 1. */
5551 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5554 /* Move the results in element 2 down to element 1; we don't care
5555 what goes in elements 2 and 3. */
5556 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5557 const0_rtx, const0_rtx));
5558 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5559 const0_rtx, const0_rtx));
5561 /* Merge the parts back together. */
5562 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5566 (define_insn_and_split "mulv2di3"
5567 [(set (match_operand:V2DI 0 "register_operand" "")
5568 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5569 (match_operand:V2DI 2 "register_operand" "")))]
5571 && can_create_pseudo_p ()"
5576 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5585 /* op1: A,B,C,D, op2: E,F,G,H */
5586 op1 = gen_lowpart (V4SImode, op1);
5587 op2 = gen_lowpart (V4SImode, op2);
5589 t1 = gen_reg_rtx (V4SImode);
5590 t2 = gen_reg_rtx (V4SImode);
5591 t3 = gen_reg_rtx (V2DImode);
5592 t4 = gen_reg_rtx (V2DImode);
5595 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5601 /* t2: (B*E),(A*F),(D*G),(C*H) */
5602 emit_insn (gen_mulv4si3 (t2, t1, op2));
5604 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5605 emit_insn (gen_xop_phadddq (t3, t2));
5607 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5608 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5610 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5611 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5615 t1 = gen_reg_rtx (V2DImode);
5616 t2 = gen_reg_rtx (V2DImode);
5617 t3 = gen_reg_rtx (V2DImode);
5618 t4 = gen_reg_rtx (V2DImode);
5619 t5 = gen_reg_rtx (V2DImode);
5620 t6 = gen_reg_rtx (V2DImode);
5621 thirtytwo = GEN_INT (32);
5623 /* Multiply low parts. */
5624 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5625 gen_lowpart (V4SImode, op2)));
5627 /* Shift input vectors left 32 bits so we can multiply high parts. */
5628 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5629 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5631 /* Multiply high parts by low parts. */
5632 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5633 gen_lowpart (V4SImode, t3)));
5634 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5635 gen_lowpart (V4SImode, t2)));
5637 /* Shift them back. */
5638 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5639 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5641 /* Add the three parts together. */
5642 emit_insn (gen_addv2di3 (t6, t1, t4));
5643 emit_insn (gen_addv2di3 (op0, t6, t5));
5648 (define_expand "vec_widen_smult_hi_v8hi"
5649 [(match_operand:V4SI 0 "register_operand" "")
5650 (match_operand:V8HI 1 "register_operand" "")
5651 (match_operand:V8HI 2 "register_operand" "")]
5654 rtx op1, op2, t1, t2, dest;
5658 t1 = gen_reg_rtx (V8HImode);
5659 t2 = gen_reg_rtx (V8HImode);
5660 dest = gen_lowpart (V8HImode, operands[0]);
5662 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5663 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5664 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5668 (define_expand "vec_widen_smult_lo_v8hi"
5669 [(match_operand:V4SI 0 "register_operand" "")
5670 (match_operand:V8HI 1 "register_operand" "")
5671 (match_operand:V8HI 2 "register_operand" "")]
5674 rtx op1, op2, t1, t2, dest;
5678 t1 = gen_reg_rtx (V8HImode);
5679 t2 = gen_reg_rtx (V8HImode);
5680 dest = gen_lowpart (V8HImode, operands[0]);
5682 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5683 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5684 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5688 (define_expand "vec_widen_umult_hi_v8hi"
5689 [(match_operand:V4SI 0 "register_operand" "")
5690 (match_operand:V8HI 1 "register_operand" "")
5691 (match_operand:V8HI 2 "register_operand" "")]
5694 rtx op1, op2, t1, t2, dest;
5698 t1 = gen_reg_rtx (V8HImode);
5699 t2 = gen_reg_rtx (V8HImode);
5700 dest = gen_lowpart (V8HImode, operands[0]);
5702 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5703 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5704 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5708 (define_expand "vec_widen_umult_lo_v8hi"
5709 [(match_operand:V4SI 0 "register_operand" "")
5710 (match_operand:V8HI 1 "register_operand" "")
5711 (match_operand:V8HI 2 "register_operand" "")]
5714 rtx op1, op2, t1, t2, dest;
5718 t1 = gen_reg_rtx (V8HImode);
5719 t2 = gen_reg_rtx (V8HImode);
5720 dest = gen_lowpart (V8HImode, operands[0]);
5722 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5723 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5724 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5728 (define_expand "vec_widen_smult_hi_v4si"
5729 [(match_operand:V2DI 0 "register_operand" "")
5730 (match_operand:V4SI 1 "register_operand" "")
5731 (match_operand:V4SI 2 "register_operand" "")]
5736 t1 = gen_reg_rtx (V4SImode);
5737 t2 = gen_reg_rtx (V4SImode);
5739 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5744 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5749 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5753 (define_expand "vec_widen_smult_lo_v4si"
5754 [(match_operand:V2DI 0 "register_operand" "")
5755 (match_operand:V4SI 1 "register_operand" "")
5756 (match_operand:V4SI 2 "register_operand" "")]
5761 t1 = gen_reg_rtx (V4SImode);
5762 t2 = gen_reg_rtx (V4SImode);
5764 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5769 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5774 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5778 (define_expand "vec_widen_umult_hi_v4si"
5779 [(match_operand:V2DI 0 "register_operand" "")
5780 (match_operand:V4SI 1 "register_operand" "")
5781 (match_operand:V4SI 2 "register_operand" "")]
5784 rtx op1, op2, t1, t2;
5788 t1 = gen_reg_rtx (V4SImode);
5789 t2 = gen_reg_rtx (V4SImode);
5791 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5792 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5793 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5797 (define_expand "vec_widen_umult_lo_v4si"
5798 [(match_operand:V2DI 0 "register_operand" "")
5799 (match_operand:V4SI 1 "register_operand" "")
5800 (match_operand:V4SI 2 "register_operand" "")]
5803 rtx op1, op2, t1, t2;
5807 t1 = gen_reg_rtx (V4SImode);
5808 t2 = gen_reg_rtx (V4SImode);
5810 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5811 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5812 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5816 (define_expand "sdot_prodv8hi"
5817 [(match_operand:V4SI 0 "register_operand" "")
5818 (match_operand:V8HI 1 "register_operand" "")
5819 (match_operand:V8HI 2 "register_operand" "")
5820 (match_operand:V4SI 3 "register_operand" "")]
5823 rtx t = gen_reg_rtx (V4SImode);
5824 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5825 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5829 (define_expand "udot_prodv4si"
5830 [(match_operand:V2DI 0 "register_operand" "")
5831 (match_operand:V4SI 1 "register_operand" "")
5832 (match_operand:V4SI 2 "register_operand" "")
5833 (match_operand:V2DI 3 "register_operand" "")]
5838 t1 = gen_reg_rtx (V2DImode);
5839 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5840 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5842 t2 = gen_reg_rtx (V4SImode);
5843 t3 = gen_reg_rtx (V4SImode);
5844 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5845 gen_lowpart (V1TImode, operands[1]),
5847 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5848 gen_lowpart (V1TImode, operands[2]),
5851 t4 = gen_reg_rtx (V2DImode);
5852 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5854 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5858 (define_insn "*avx_ashr<mode>3"
5859 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5861 (match_operand:SSEMODE24 1 "register_operand" "x")
5862 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5864 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5865 [(set_attr "type" "sseishft")
5866 (set_attr "prefix" "vex")
5867 (set (attr "length_immediate")
5868 (if_then_else (match_operand 2 "const_int_operand" "")
5870 (const_string "0")))
5871 (set_attr "mode" "TI")])
5873 (define_insn "ashr<mode>3"
5874 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5876 (match_operand:SSEMODE24 1 "register_operand" "0")
5877 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5879 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5880 [(set_attr "type" "sseishft")
5881 (set_attr "prefix_data16" "1")
5882 (set (attr "length_immediate")
5883 (if_then_else (match_operand 2 "const_int_operand" "")
5885 (const_string "0")))
5886 (set_attr "mode" "TI")])
5888 (define_insn "*avx_lshrv1ti3"
5889 [(set (match_operand:V1TI 0 "register_operand" "=x")
5891 (match_operand:V1TI 1 "register_operand" "x")
5892 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5895 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5896 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5898 [(set_attr "type" "sseishft")
5899 (set_attr "prefix" "vex")
5900 (set_attr "length_immediate" "1")
5901 (set_attr "mode" "TI")])
5903 (define_insn "*avx_lshr<mode>3"
5904 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5905 (lshiftrt:SSEMODE248
5906 (match_operand:SSEMODE248 1 "register_operand" "x")
5907 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5909 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5910 [(set_attr "type" "sseishft")
5911 (set_attr "prefix" "vex")
5912 (set (attr "length_immediate")
5913 (if_then_else (match_operand 2 "const_int_operand" "")
5915 (const_string "0")))
5916 (set_attr "mode" "TI")])
5918 (define_insn "sse2_lshrv1ti3"
5919 [(set (match_operand:V1TI 0 "register_operand" "=x")
5921 (match_operand:V1TI 1 "register_operand" "0")
5922 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5925 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5926 return "psrldq\t{%2, %0|%0, %2}";
5928 [(set_attr "type" "sseishft")
5929 (set_attr "prefix_data16" "1")
5930 (set_attr "length_immediate" "1")
5931 (set_attr "mode" "TI")])
5933 (define_insn "lshr<mode>3"
5934 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5935 (lshiftrt:SSEMODE248
5936 (match_operand:SSEMODE248 1 "register_operand" "0")
5937 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5939 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5940 [(set_attr "type" "sseishft")
5941 (set_attr "prefix_data16" "1")
5942 (set (attr "length_immediate")
5943 (if_then_else (match_operand 2 "const_int_operand" "")
5945 (const_string "0")))
5946 (set_attr "mode" "TI")])
5948 (define_insn "*avx_ashlv1ti3"
5949 [(set (match_operand:V1TI 0 "register_operand" "=x")
5950 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5951 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5954 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5955 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5957 [(set_attr "type" "sseishft")
5958 (set_attr "prefix" "vex")
5959 (set_attr "length_immediate" "1")
5960 (set_attr "mode" "TI")])
5962 (define_insn "*avx_ashl<mode>3"
5963 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5965 (match_operand:SSEMODE248 1 "register_operand" "x")
5966 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5968 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5969 [(set_attr "type" "sseishft")
5970 (set_attr "prefix" "vex")
5971 (set (attr "length_immediate")
5972 (if_then_else (match_operand 2 "const_int_operand" "")
5974 (const_string "0")))
5975 (set_attr "mode" "TI")])
5977 (define_insn "sse2_ashlv1ti3"
5978 [(set (match_operand:V1TI 0 "register_operand" "=x")
5979 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5980 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5983 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5984 return "pslldq\t{%2, %0|%0, %2}";
5986 [(set_attr "type" "sseishft")
5987 (set_attr "prefix_data16" "1")
5988 (set_attr "length_immediate" "1")
5989 (set_attr "mode" "TI")])
5991 (define_insn "ashl<mode>3"
5992 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5994 (match_operand:SSEMODE248 1 "register_operand" "0")
5995 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5997 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5998 [(set_attr "type" "sseishft")
5999 (set_attr "prefix_data16" "1")
6000 (set (attr "length_immediate")
6001 (if_then_else (match_operand 2 "const_int_operand" "")
6003 (const_string "0")))
6004 (set_attr "mode" "TI")])
6006 (define_expand "vec_shl_<mode>"
6007 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6009 (match_operand:SSEMODEI 1 "register_operand" "")
6010 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6013 operands[0] = gen_lowpart (V1TImode, operands[0]);
6014 operands[1] = gen_lowpart (V1TImode, operands[1]);
6017 (define_expand "vec_shr_<mode>"
6018 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6020 (match_operand:SSEMODEI 1 "register_operand" "")
6021 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6024 operands[0] = gen_lowpart (V1TImode, operands[0]);
6025 operands[1] = gen_lowpart (V1TImode, operands[1]);
6028 (define_insn "*avx_<code><mode>3"
6029 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6031 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6032 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6033 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6034 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6035 [(set_attr "type" "sseiadd")
6036 (set (attr "prefix_extra")
6038 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6041 (const_string "0")))
6042 (set_attr "prefix" "vex")
6043 (set_attr "mode" "TI")])
6045 (define_expand "<code>v16qi3"
6046 [(set (match_operand:V16QI 0 "register_operand" "")
6048 (match_operand:V16QI 1 "nonimmediate_operand" "")
6049 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6051 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6053 (define_insn "*<code>v16qi3"
6054 [(set (match_operand:V16QI 0 "register_operand" "=x")
6056 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6057 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6058 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6059 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6060 [(set_attr "type" "sseiadd")
6061 (set_attr "prefix_data16" "1")
6062 (set_attr "mode" "TI")])
6064 (define_expand "<code>v8hi3"
6065 [(set (match_operand:V8HI 0 "register_operand" "")
6067 (match_operand:V8HI 1 "nonimmediate_operand" "")
6068 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6070 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6072 (define_insn "*<code>v8hi3"
6073 [(set (match_operand:V8HI 0 "register_operand" "=x")
6075 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6076 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6077 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6078 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6079 [(set_attr "type" "sseiadd")
6080 (set_attr "prefix_data16" "1")
6081 (set_attr "mode" "TI")])
6083 (define_expand "umaxv8hi3"
6084 [(set (match_operand:V8HI 0 "register_operand" "")
6085 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6086 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6090 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6093 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6094 if (rtx_equal_p (op3, op2))
6095 op3 = gen_reg_rtx (V8HImode);
6096 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6097 emit_insn (gen_addv8hi3 (op0, op3, op2));
6102 (define_expand "smax<mode>3"
6103 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6104 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6105 (match_operand:SSEMODE14 2 "register_operand" "")))]
6109 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6115 xops[0] = operands[0];
6116 xops[1] = operands[1];
6117 xops[2] = operands[2];
6118 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6119 xops[4] = operands[1];
6120 xops[5] = operands[2];
6121 ok = ix86_expand_int_vcond (xops);
6127 (define_insn "*sse4_1_<code><mode>3"
6128 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6130 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6131 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6132 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6133 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6134 [(set_attr "type" "sseiadd")
6135 (set_attr "prefix_extra" "1")
6136 (set_attr "mode" "TI")])
6138 (define_expand "smaxv2di3"
6139 [(set (match_operand:V2DI 0 "register_operand" "")
6140 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6141 (match_operand:V2DI 2 "register_operand" "")))]
6147 xops[0] = operands[0];
6148 xops[1] = operands[1];
6149 xops[2] = operands[2];
6150 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6151 xops[4] = operands[1];
6152 xops[5] = operands[2];
6153 ok = ix86_expand_int_vcond (xops);
6158 (define_expand "umaxv4si3"
6159 [(set (match_operand:V4SI 0 "register_operand" "")
6160 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6161 (match_operand:V4SI 2 "register_operand" "")))]
6165 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6171 xops[0] = operands[0];
6172 xops[1] = operands[1];
6173 xops[2] = operands[2];
6174 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6175 xops[4] = operands[1];
6176 xops[5] = operands[2];
6177 ok = ix86_expand_int_vcond (xops);
6183 (define_insn "*sse4_1_<code><mode>3"
6184 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6186 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6187 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6188 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6189 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6190 [(set_attr "type" "sseiadd")
6191 (set_attr "prefix_extra" "1")
6192 (set_attr "mode" "TI")])
6194 (define_expand "umaxv2di3"
6195 [(set (match_operand:V2DI 0 "register_operand" "")
6196 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6197 (match_operand:V2DI 2 "register_operand" "")))]
6203 xops[0] = operands[0];
6204 xops[1] = operands[1];
6205 xops[2] = operands[2];
6206 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6207 xops[4] = operands[1];
6208 xops[5] = operands[2];
6209 ok = ix86_expand_int_vcond (xops);
6214 (define_expand "smin<mode>3"
6215 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6216 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6217 (match_operand:SSEMODE14 2 "register_operand" "")))]
6221 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6227 xops[0] = operands[0];
6228 xops[1] = operands[2];
6229 xops[2] = operands[1];
6230 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6231 xops[4] = operands[1];
6232 xops[5] = operands[2];
6233 ok = ix86_expand_int_vcond (xops);
6239 (define_expand "sminv2di3"
6240 [(set (match_operand:V2DI 0 "register_operand" "")
6241 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6242 (match_operand:V2DI 2 "register_operand" "")))]
6248 xops[0] = operands[0];
6249 xops[1] = operands[2];
6250 xops[2] = operands[1];
6251 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6252 xops[4] = operands[1];
6253 xops[5] = operands[2];
6254 ok = ix86_expand_int_vcond (xops);
6259 (define_expand "umin<mode>3"
6260 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6261 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6262 (match_operand:SSEMODE24 2 "register_operand" "")))]
6266 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6272 xops[0] = operands[0];
6273 xops[1] = operands[2];
6274 xops[2] = operands[1];
6275 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6276 xops[4] = operands[1];
6277 xops[5] = operands[2];
6278 ok = ix86_expand_int_vcond (xops);
6284 (define_expand "uminv2di3"
6285 [(set (match_operand:V2DI 0 "register_operand" "")
6286 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6287 (match_operand:V2DI 2 "register_operand" "")))]
6293 xops[0] = operands[0];
6294 xops[1] = operands[2];
6295 xops[2] = operands[1];
6296 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6297 xops[4] = operands[1];
6298 xops[5] = operands[2];
6299 ok = ix86_expand_int_vcond (xops);
6304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6306 ;; Parallel integral comparisons
6308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6310 (define_expand "sse2_eq<mode>3"
6311 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6313 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6314 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6315 "TARGET_SSE2 && !TARGET_XOP "
6316 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6318 (define_insn "*avx_eq<mode>3"
6319 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6321 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6322 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6323 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6324 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6325 [(set_attr "type" "ssecmp")
6326 (set (attr "prefix_extra")
6327 (if_then_else (match_operand:V2DI 0 "" "")
6329 (const_string "*")))
6330 (set_attr "prefix" "vex")
6331 (set_attr "mode" "TI")])
6333 (define_insn "*sse2_eq<mode>3"
6334 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6336 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6337 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6338 "TARGET_SSE2 && !TARGET_XOP
6339 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6340 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6341 [(set_attr "type" "ssecmp")
6342 (set_attr "prefix_data16" "1")
6343 (set_attr "mode" "TI")])
6345 (define_expand "sse4_1_eqv2di3"
6346 [(set (match_operand:V2DI 0 "register_operand" "")
6348 (match_operand:V2DI 1 "nonimmediate_operand" "")
6349 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6351 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6353 (define_insn "*sse4_1_eqv2di3"
6354 [(set (match_operand:V2DI 0 "register_operand" "=x")
6356 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6357 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6358 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6359 "pcmpeqq\t{%2, %0|%0, %2}"
6360 [(set_attr "type" "ssecmp")
6361 (set_attr "prefix_extra" "1")
6362 (set_attr "mode" "TI")])
6364 (define_insn "*avx_gt<mode>3"
6365 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6367 (match_operand:SSEMODE1248 1 "register_operand" "x")
6368 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6370 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6371 [(set_attr "type" "ssecmp")
6372 (set (attr "prefix_extra")
6373 (if_then_else (match_operand:V2DI 0 "" "")
6375 (const_string "*")))
6376 (set_attr "prefix" "vex")
6377 (set_attr "mode" "TI")])
6379 (define_insn "sse2_gt<mode>3"
6380 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6382 (match_operand:SSEMODE124 1 "register_operand" "0")
6383 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6384 "TARGET_SSE2 && !TARGET_XOP"
6385 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6386 [(set_attr "type" "ssecmp")
6387 (set_attr "prefix_data16" "1")
6388 (set_attr "mode" "TI")])
6390 (define_insn "sse4_2_gtv2di3"
6391 [(set (match_operand:V2DI 0 "register_operand" "=x")
6393 (match_operand:V2DI 1 "register_operand" "0")
6394 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6396 "pcmpgtq\t{%2, %0|%0, %2}"
6397 [(set_attr "type" "ssecmp")
6398 (set_attr "prefix_extra" "1")
6399 (set_attr "mode" "TI")])
6401 (define_expand "vcond<mode>"
6402 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6403 (if_then_else:SSEMODE124C8
6404 (match_operator 3 ""
6405 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6406 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6407 (match_operand:SSEMODE124C8 1 "general_operand" "")
6408 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6411 bool ok = ix86_expand_int_vcond (operands);
6416 (define_expand "vcondu<mode>"
6417 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6418 (if_then_else:SSEMODE124C8
6419 (match_operator 3 ""
6420 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6421 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6422 (match_operand:SSEMODE124C8 1 "general_operand" "")
6423 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6426 bool ok = ix86_expand_int_vcond (operands);
6431 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6433 ;; Parallel bitwise logical operations
6435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6437 (define_expand "one_cmpl<mode>2"
6438 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6439 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6443 int i, n = GET_MODE_NUNITS (<MODE>mode);
6444 rtvec v = rtvec_alloc (n);
6446 for (i = 0; i < n; ++i)
6447 RTVEC_ELT (v, i) = constm1_rtx;
6449 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6452 (define_insn "*avx_andnot<mode>3"
6453 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6455 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6456 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6458 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6459 [(set_attr "type" "sselog")
6460 (set_attr "prefix" "vex")
6461 (set_attr "mode" "<avxvecpsmode>")])
6463 (define_insn "*sse_andnot<mode>3"
6464 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6466 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6467 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6468 "(TARGET_SSE && !TARGET_SSE2)"
6469 "andnps\t{%2, %0|%0, %2}"
6470 [(set_attr "type" "sselog")
6471 (set_attr "mode" "V4SF")])
6473 (define_insn "*avx_andnot<mode>3"
6474 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6476 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6477 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6479 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6480 [(set_attr "type" "sselog")
6481 (set_attr "prefix" "vex")
6482 (set_attr "mode" "TI")])
6484 (define_insn "sse2_andnot<mode>3"
6485 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6487 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6488 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6490 "pandn\t{%2, %0|%0, %2}"
6491 [(set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1")
6493 (set_attr "mode" "TI")])
6495 (define_insn "*andnottf3"
6496 [(set (match_operand:TF 0 "register_operand" "=x")
6498 (not:TF (match_operand:TF 1 "register_operand" "0"))
6499 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6501 "pandn\t{%2, %0|%0, %2}"
6502 [(set_attr "type" "sselog")
6503 (set_attr "prefix_data16" "1")
6504 (set_attr "mode" "TI")])
6506 (define_expand "<code><mode>3"
6507 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6509 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6510 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6512 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6514 (define_insn "*avx_<code><mode>3"
6515 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6516 (any_logic:AVX256MODEI
6517 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6518 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6520 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6521 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6522 [(set_attr "type" "sselog")
6523 (set_attr "prefix" "vex")
6524 (set_attr "mode" "<avxvecpsmode>")])
6526 (define_insn "*sse_<code><mode>3"
6527 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6529 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6530 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6531 "(TARGET_SSE && !TARGET_SSE2)
6532 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6533 "<logic>ps\t{%2, %0|%0, %2}"
6534 [(set_attr "type" "sselog")
6535 (set_attr "mode" "V4SF")])
6537 (define_insn "*avx_<code><mode>3"
6538 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6540 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6541 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6543 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6544 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6545 [(set_attr "type" "sselog")
6546 (set_attr "prefix" "vex")
6547 (set_attr "mode" "TI")])
6549 (define_insn "*sse2_<code><mode>3"
6550 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6552 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6553 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6554 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6555 "p<logic>\t{%2, %0|%0, %2}"
6556 [(set_attr "type" "sselog")
6557 (set_attr "prefix_data16" "1")
6558 (set_attr "mode" "TI")])
6560 (define_expand "<code>tf3"
6561 [(set (match_operand:TF 0 "register_operand" "")
6563 (match_operand:TF 1 "nonimmediate_operand" "")
6564 (match_operand:TF 2 "nonimmediate_operand" "")))]
6566 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6568 (define_insn "*<code>tf3"
6569 [(set (match_operand:TF 0 "register_operand" "=x")
6571 (match_operand:TF 1 "nonimmediate_operand" "%0")
6572 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6573 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6574 "p<logic>\t{%2, %0|%0, %2}"
6575 [(set_attr "type" "sselog")
6576 (set_attr "prefix_data16" "1")
6577 (set_attr "mode" "TI")])
6579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6581 ;; Parallel integral element swizzling
6583 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6585 (define_expand "vec_pack_trunc_v8hi"
6586 [(match_operand:V16QI 0 "register_operand" "")
6587 (match_operand:V8HI 1 "register_operand" "")
6588 (match_operand:V8HI 2 "register_operand" "")]
6591 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6592 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6593 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6597 (define_expand "vec_pack_trunc_v4si"
6598 [(match_operand:V8HI 0 "register_operand" "")
6599 (match_operand:V4SI 1 "register_operand" "")
6600 (match_operand:V4SI 2 "register_operand" "")]
6603 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6604 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6605 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6609 (define_expand "vec_pack_trunc_v2di"
6610 [(match_operand:V4SI 0 "register_operand" "")
6611 (match_operand:V2DI 1 "register_operand" "")
6612 (match_operand:V2DI 2 "register_operand" "")]
6615 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6616 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6617 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6621 (define_insn "*avx_packsswb"
6622 [(set (match_operand:V16QI 0 "register_operand" "=x")
6625 (match_operand:V8HI 1 "register_operand" "x"))
6627 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6629 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6630 [(set_attr "type" "sselog")
6631 (set_attr "prefix" "vex")
6632 (set_attr "mode" "TI")])
6634 (define_insn "sse2_packsswb"
6635 [(set (match_operand:V16QI 0 "register_operand" "=x")
6638 (match_operand:V8HI 1 "register_operand" "0"))
6640 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6642 "packsswb\t{%2, %0|%0, %2}"
6643 [(set_attr "type" "sselog")
6644 (set_attr "prefix_data16" "1")
6645 (set_attr "mode" "TI")])
6647 (define_insn "*avx_packssdw"
6648 [(set (match_operand:V8HI 0 "register_operand" "=x")
6651 (match_operand:V4SI 1 "register_operand" "x"))
6653 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6655 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6656 [(set_attr "type" "sselog")
6657 (set_attr "prefix" "vex")
6658 (set_attr "mode" "TI")])
6660 (define_insn "sse2_packssdw"
6661 [(set (match_operand:V8HI 0 "register_operand" "=x")
6664 (match_operand:V4SI 1 "register_operand" "0"))
6666 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6668 "packssdw\t{%2, %0|%0, %2}"
6669 [(set_attr "type" "sselog")
6670 (set_attr "prefix_data16" "1")
6671 (set_attr "mode" "TI")])
6673 (define_insn "*avx_packuswb"
6674 [(set (match_operand:V16QI 0 "register_operand" "=x")
6677 (match_operand:V8HI 1 "register_operand" "x"))
6679 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6681 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6682 [(set_attr "type" "sselog")
6683 (set_attr "prefix" "vex")
6684 (set_attr "mode" "TI")])
6686 (define_insn "sse2_packuswb"
6687 [(set (match_operand:V16QI 0 "register_operand" "=x")
6690 (match_operand:V8HI 1 "register_operand" "0"))
6692 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6694 "packuswb\t{%2, %0|%0, %2}"
6695 [(set_attr "type" "sselog")
6696 (set_attr "prefix_data16" "1")
6697 (set_attr "mode" "TI")])
6699 (define_insn "*avx_interleave_highv16qi"
6700 [(set (match_operand:V16QI 0 "register_operand" "=x")
6703 (match_operand:V16QI 1 "register_operand" "x")
6704 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6705 (parallel [(const_int 8) (const_int 24)
6706 (const_int 9) (const_int 25)
6707 (const_int 10) (const_int 26)
6708 (const_int 11) (const_int 27)
6709 (const_int 12) (const_int 28)
6710 (const_int 13) (const_int 29)
6711 (const_int 14) (const_int 30)
6712 (const_int 15) (const_int 31)])))]
6714 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6715 [(set_attr "type" "sselog")
6716 (set_attr "prefix" "vex")
6717 (set_attr "mode" "TI")])
6719 (define_insn "vec_interleave_highv16qi"
6720 [(set (match_operand:V16QI 0 "register_operand" "=x")
6723 (match_operand:V16QI 1 "register_operand" "0")
6724 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6725 (parallel [(const_int 8) (const_int 24)
6726 (const_int 9) (const_int 25)
6727 (const_int 10) (const_int 26)
6728 (const_int 11) (const_int 27)
6729 (const_int 12) (const_int 28)
6730 (const_int 13) (const_int 29)
6731 (const_int 14) (const_int 30)
6732 (const_int 15) (const_int 31)])))]
6734 "punpckhbw\t{%2, %0|%0, %2}"
6735 [(set_attr "type" "sselog")
6736 (set_attr "prefix_data16" "1")
6737 (set_attr "mode" "TI")])
6739 (define_insn "*avx_interleave_lowv16qi"
6740 [(set (match_operand:V16QI 0 "register_operand" "=x")
6743 (match_operand:V16QI 1 "register_operand" "x")
6744 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6745 (parallel [(const_int 0) (const_int 16)
6746 (const_int 1) (const_int 17)
6747 (const_int 2) (const_int 18)
6748 (const_int 3) (const_int 19)
6749 (const_int 4) (const_int 20)
6750 (const_int 5) (const_int 21)
6751 (const_int 6) (const_int 22)
6752 (const_int 7) (const_int 23)])))]
6754 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6755 [(set_attr "type" "sselog")
6756 (set_attr "prefix" "vex")
6757 (set_attr "mode" "TI")])
6759 (define_insn "vec_interleave_lowv16qi"
6760 [(set (match_operand:V16QI 0 "register_operand" "=x")
6763 (match_operand:V16QI 1 "register_operand" "0")
6764 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6765 (parallel [(const_int 0) (const_int 16)
6766 (const_int 1) (const_int 17)
6767 (const_int 2) (const_int 18)
6768 (const_int 3) (const_int 19)
6769 (const_int 4) (const_int 20)
6770 (const_int 5) (const_int 21)
6771 (const_int 6) (const_int 22)
6772 (const_int 7) (const_int 23)])))]
6774 "punpcklbw\t{%2, %0|%0, %2}"
6775 [(set_attr "type" "sselog")
6776 (set_attr "prefix_data16" "1")
6777 (set_attr "mode" "TI")])
6779 (define_insn "*avx_interleave_highv8hi"
6780 [(set (match_operand:V8HI 0 "register_operand" "=x")
6783 (match_operand:V8HI 1 "register_operand" "x")
6784 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6785 (parallel [(const_int 4) (const_int 12)
6786 (const_int 5) (const_int 13)
6787 (const_int 6) (const_int 14)
6788 (const_int 7) (const_int 15)])))]
6790 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6791 [(set_attr "type" "sselog")
6792 (set_attr "prefix" "vex")
6793 (set_attr "mode" "TI")])
6795 (define_insn "vec_interleave_highv8hi"
6796 [(set (match_operand:V8HI 0 "register_operand" "=x")
6799 (match_operand:V8HI 1 "register_operand" "0")
6800 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6801 (parallel [(const_int 4) (const_int 12)
6802 (const_int 5) (const_int 13)
6803 (const_int 6) (const_int 14)
6804 (const_int 7) (const_int 15)])))]
6806 "punpckhwd\t{%2, %0|%0, %2}"
6807 [(set_attr "type" "sselog")
6808 (set_attr "prefix_data16" "1")
6809 (set_attr "mode" "TI")])
6811 (define_insn "*avx_interleave_lowv8hi"
6812 [(set (match_operand:V8HI 0 "register_operand" "=x")
6815 (match_operand:V8HI 1 "register_operand" "x")
6816 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6817 (parallel [(const_int 0) (const_int 8)
6818 (const_int 1) (const_int 9)
6819 (const_int 2) (const_int 10)
6820 (const_int 3) (const_int 11)])))]
6822 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6823 [(set_attr "type" "sselog")
6824 (set_attr "prefix" "vex")
6825 (set_attr "mode" "TI")])
6827 (define_insn "vec_interleave_lowv8hi"
6828 [(set (match_operand:V8HI 0 "register_operand" "=x")
6831 (match_operand:V8HI 1 "register_operand" "0")
6832 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6833 (parallel [(const_int 0) (const_int 8)
6834 (const_int 1) (const_int 9)
6835 (const_int 2) (const_int 10)
6836 (const_int 3) (const_int 11)])))]
6838 "punpcklwd\t{%2, %0|%0, %2}"
6839 [(set_attr "type" "sselog")
6840 (set_attr "prefix_data16" "1")
6841 (set_attr "mode" "TI")])
6843 (define_insn "*avx_interleave_highv4si"
6844 [(set (match_operand:V4SI 0 "register_operand" "=x")
6847 (match_operand:V4SI 1 "register_operand" "x")
6848 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6849 (parallel [(const_int 2) (const_int 6)
6850 (const_int 3) (const_int 7)])))]
6852 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6853 [(set_attr "type" "sselog")
6854 (set_attr "prefix" "vex")
6855 (set_attr "mode" "TI")])
6857 (define_insn "vec_interleave_highv4si"
6858 [(set (match_operand:V4SI 0 "register_operand" "=x")
6861 (match_operand:V4SI 1 "register_operand" "0")
6862 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6863 (parallel [(const_int 2) (const_int 6)
6864 (const_int 3) (const_int 7)])))]
6866 "punpckhdq\t{%2, %0|%0, %2}"
6867 [(set_attr "type" "sselog")
6868 (set_attr "prefix_data16" "1")
6869 (set_attr "mode" "TI")])
6871 (define_insn "*avx_interleave_lowv4si"
6872 [(set (match_operand:V4SI 0 "register_operand" "=x")
6875 (match_operand:V4SI 1 "register_operand" "x")
6876 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6877 (parallel [(const_int 0) (const_int 4)
6878 (const_int 1) (const_int 5)])))]
6880 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6881 [(set_attr "type" "sselog")
6882 (set_attr "prefix" "vex")
6883 (set_attr "mode" "TI")])
6885 (define_insn "vec_interleave_lowv4si"
6886 [(set (match_operand:V4SI 0 "register_operand" "=x")
6889 (match_operand:V4SI 1 "register_operand" "0")
6890 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6891 (parallel [(const_int 0) (const_int 4)
6892 (const_int 1) (const_int 5)])))]
6894 "punpckldq\t{%2, %0|%0, %2}"
6895 [(set_attr "type" "sselog")
6896 (set_attr "prefix_data16" "1")
6897 (set_attr "mode" "TI")])
6899 (define_insn "*avx_pinsr<ssevecsize>"
6900 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6901 (vec_merge:SSEMODE124
6902 (vec_duplicate:SSEMODE124
6903 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6904 (match_operand:SSEMODE124 1 "register_operand" "x")
6905 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6908 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6909 if (MEM_P (operands[2]))
6910 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6912 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6914 [(set_attr "type" "sselog")
6915 (set (attr "prefix_extra")
6916 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6918 (const_string "1")))
6919 (set_attr "length_immediate" "1")
6920 (set_attr "prefix" "vex")
6921 (set_attr "mode" "TI")])
6923 (define_insn "*sse4_1_pinsrb"
6924 [(set (match_operand:V16QI 0 "register_operand" "=x")
6926 (vec_duplicate:V16QI
6927 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6928 (match_operand:V16QI 1 "register_operand" "0")
6929 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6932 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6933 if (MEM_P (operands[2]))
6934 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6936 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6938 [(set_attr "type" "sselog")
6939 (set_attr "prefix_extra" "1")
6940 (set_attr "length_immediate" "1")
6941 (set_attr "mode" "TI")])
6943 (define_insn "*sse2_pinsrw"
6944 [(set (match_operand:V8HI 0 "register_operand" "=x")
6947 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6948 (match_operand:V8HI 1 "register_operand" "0")
6949 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6952 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6953 if (MEM_P (operands[2]))
6954 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6956 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6958 [(set_attr "type" "sselog")
6959 (set_attr "prefix_data16" "1")
6960 (set_attr "length_immediate" "1")
6961 (set_attr "mode" "TI")])
6963 ;; It must come before sse2_loadld since it is preferred.
6964 (define_insn "*sse4_1_pinsrd"
6965 [(set (match_operand:V4SI 0 "register_operand" "=x")
6968 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6969 (match_operand:V4SI 1 "register_operand" "0")
6970 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6973 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6974 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6976 [(set_attr "type" "sselog")
6977 (set_attr "prefix_extra" "1")
6978 (set_attr "length_immediate" "1")
6979 (set_attr "mode" "TI")])
6981 (define_insn "*avx_pinsrq"
6982 [(set (match_operand:V2DI 0 "register_operand" "=x")
6985 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6986 (match_operand:V2DI 1 "register_operand" "x")
6987 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6988 "TARGET_AVX && TARGET_64BIT"
6990 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6991 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6993 [(set_attr "type" "sselog")
6994 (set_attr "prefix_extra" "1")
6995 (set_attr "length_immediate" "1")
6996 (set_attr "prefix" "vex")
6997 (set_attr "mode" "TI")])
6999 (define_insn "*sse4_1_pinsrq"
7000 [(set (match_operand:V2DI 0 "register_operand" "=x")
7003 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7004 (match_operand:V2DI 1 "register_operand" "0")
7005 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7006 "TARGET_SSE4_1 && TARGET_64BIT"
7008 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7009 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7011 [(set_attr "type" "sselog")
7012 (set_attr "prefix_rex" "1")
7013 (set_attr "prefix_extra" "1")
7014 (set_attr "length_immediate" "1")
7015 (set_attr "mode" "TI")])
7017 (define_insn "*sse4_1_pextrb"
7018 [(set (match_operand:SI 0 "register_operand" "=r")
7021 (match_operand:V16QI 1 "register_operand" "x")
7022 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7024 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7025 [(set_attr "type" "sselog")
7026 (set_attr "prefix_extra" "1")
7027 (set_attr "length_immediate" "1")
7028 (set_attr "prefix" "maybe_vex")
7029 (set_attr "mode" "TI")])
7031 (define_insn "*sse4_1_pextrb_memory"
7032 [(set (match_operand:QI 0 "memory_operand" "=m")
7034 (match_operand:V16QI 1 "register_operand" "x")
7035 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7037 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7038 [(set_attr "type" "sselog")
7039 (set_attr "prefix_extra" "1")
7040 (set_attr "length_immediate" "1")
7041 (set_attr "prefix" "maybe_vex")
7042 (set_attr "mode" "TI")])
7044 (define_insn "*sse2_pextrw"
7045 [(set (match_operand:SI 0 "register_operand" "=r")
7048 (match_operand:V8HI 1 "register_operand" "x")
7049 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7051 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7052 [(set_attr "type" "sselog")
7053 (set_attr "prefix_data16" "1")
7054 (set_attr "length_immediate" "1")
7055 (set_attr "prefix" "maybe_vex")
7056 (set_attr "mode" "TI")])
7058 (define_insn "*sse4_1_pextrw_memory"
7059 [(set (match_operand:HI 0 "memory_operand" "=m")
7061 (match_operand:V8HI 1 "register_operand" "x")
7062 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7064 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7065 [(set_attr "type" "sselog")
7066 (set_attr "prefix_extra" "1")
7067 (set_attr "length_immediate" "1")
7068 (set_attr "prefix" "maybe_vex")
7069 (set_attr "mode" "TI")])
7071 (define_insn "*sse4_1_pextrd"
7072 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7074 (match_operand:V4SI 1 "register_operand" "x")
7075 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7077 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7078 [(set_attr "type" "sselog")
7079 (set_attr "prefix_extra" "1")
7080 (set_attr "length_immediate" "1")
7081 (set_attr "prefix" "maybe_vex")
7082 (set_attr "mode" "TI")])
7084 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7085 (define_insn "*sse4_1_pextrq"
7086 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7088 (match_operand:V2DI 1 "register_operand" "x")
7089 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7090 "TARGET_SSE4_1 && TARGET_64BIT"
7091 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7092 [(set_attr "type" "sselog")
7093 (set_attr "prefix_rex" "1")
7094 (set_attr "prefix_extra" "1")
7095 (set_attr "length_immediate" "1")
7096 (set_attr "prefix" "maybe_vex")
7097 (set_attr "mode" "TI")])
7099 (define_expand "sse2_pshufd"
7100 [(match_operand:V4SI 0 "register_operand" "")
7101 (match_operand:V4SI 1 "nonimmediate_operand" "")
7102 (match_operand:SI 2 "const_int_operand" "")]
7105 int mask = INTVAL (operands[2]);
7106 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7107 GEN_INT ((mask >> 0) & 3),
7108 GEN_INT ((mask >> 2) & 3),
7109 GEN_INT ((mask >> 4) & 3),
7110 GEN_INT ((mask >> 6) & 3)));
7114 (define_insn "sse2_pshufd_1"
7115 [(set (match_operand:V4SI 0 "register_operand" "=x")
7117 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7118 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7119 (match_operand 3 "const_0_to_3_operand" "")
7120 (match_operand 4 "const_0_to_3_operand" "")
7121 (match_operand 5 "const_0_to_3_operand" "")])))]
7125 mask |= INTVAL (operands[2]) << 0;
7126 mask |= INTVAL (operands[3]) << 2;
7127 mask |= INTVAL (operands[4]) << 4;
7128 mask |= INTVAL (operands[5]) << 6;
7129 operands[2] = GEN_INT (mask);
7131 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7133 [(set_attr "type" "sselog1")
7134 (set_attr "prefix_data16" "1")
7135 (set_attr "prefix" "maybe_vex")
7136 (set_attr "length_immediate" "1")
7137 (set_attr "mode" "TI")])
7139 (define_expand "sse2_pshuflw"
7140 [(match_operand:V8HI 0 "register_operand" "")
7141 (match_operand:V8HI 1 "nonimmediate_operand" "")
7142 (match_operand:SI 2 "const_int_operand" "")]
7145 int mask = INTVAL (operands[2]);
7146 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7147 GEN_INT ((mask >> 0) & 3),
7148 GEN_INT ((mask >> 2) & 3),
7149 GEN_INT ((mask >> 4) & 3),
7150 GEN_INT ((mask >> 6) & 3)));
7154 (define_insn "sse2_pshuflw_1"
7155 [(set (match_operand:V8HI 0 "register_operand" "=x")
7157 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7158 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7159 (match_operand 3 "const_0_to_3_operand" "")
7160 (match_operand 4 "const_0_to_3_operand" "")
7161 (match_operand 5 "const_0_to_3_operand" "")
7169 mask |= INTVAL (operands[2]) << 0;
7170 mask |= INTVAL (operands[3]) << 2;
7171 mask |= INTVAL (operands[4]) << 4;
7172 mask |= INTVAL (operands[5]) << 6;
7173 operands[2] = GEN_INT (mask);
7175 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7177 [(set_attr "type" "sselog")
7178 (set_attr "prefix_data16" "0")
7179 (set_attr "prefix_rep" "1")
7180 (set_attr "prefix" "maybe_vex")
7181 (set_attr "length_immediate" "1")
7182 (set_attr "mode" "TI")])
7184 (define_expand "sse2_pshufhw"
7185 [(match_operand:V8HI 0 "register_operand" "")
7186 (match_operand:V8HI 1 "nonimmediate_operand" "")
7187 (match_operand:SI 2 "const_int_operand" "")]
7190 int mask = INTVAL (operands[2]);
7191 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7192 GEN_INT (((mask >> 0) & 3) + 4),
7193 GEN_INT (((mask >> 2) & 3) + 4),
7194 GEN_INT (((mask >> 4) & 3) + 4),
7195 GEN_INT (((mask >> 6) & 3) + 4)));
7199 (define_insn "sse2_pshufhw_1"
7200 [(set (match_operand:V8HI 0 "register_operand" "=x")
7202 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7203 (parallel [(const_int 0)
7207 (match_operand 2 "const_4_to_7_operand" "")
7208 (match_operand 3 "const_4_to_7_operand" "")
7209 (match_operand 4 "const_4_to_7_operand" "")
7210 (match_operand 5 "const_4_to_7_operand" "")])))]
7214 mask |= (INTVAL (operands[2]) - 4) << 0;
7215 mask |= (INTVAL (operands[3]) - 4) << 2;
7216 mask |= (INTVAL (operands[4]) - 4) << 4;
7217 mask |= (INTVAL (operands[5]) - 4) << 6;
7218 operands[2] = GEN_INT (mask);
7220 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7222 [(set_attr "type" "sselog")
7223 (set_attr "prefix_rep" "1")
7224 (set_attr "prefix_data16" "0")
7225 (set_attr "prefix" "maybe_vex")
7226 (set_attr "length_immediate" "1")
7227 (set_attr "mode" "TI")])
7229 (define_expand "sse2_loadd"
7230 [(set (match_operand:V4SI 0 "register_operand" "")
7233 (match_operand:SI 1 "nonimmediate_operand" ""))
7237 "operands[2] = CONST0_RTX (V4SImode);")
7239 (define_insn "*avx_loadld"
7240 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7243 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7244 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7248 vmovd\t{%2, %0|%0, %2}
7249 vmovd\t{%2, %0|%0, %2}
7250 vmovss\t{%2, %1, %0|%0, %1, %2}"
7251 [(set_attr "type" "ssemov")
7252 (set_attr "prefix" "vex")
7253 (set_attr "mode" "TI,TI,V4SF")])
7255 (define_insn "sse2_loadld"
7256 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7259 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7260 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7264 movd\t{%2, %0|%0, %2}
7265 movd\t{%2, %0|%0, %2}
7266 movss\t{%2, %0|%0, %2}
7267 movss\t{%2, %0|%0, %2}"
7268 [(set_attr "type" "ssemov")
7269 (set_attr "mode" "TI,TI,V4SF,SF")])
7271 (define_insn_and_split "sse2_stored"
7272 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7274 (match_operand:V4SI 1 "register_operand" "x,Yi")
7275 (parallel [(const_int 0)])))]
7278 "&& reload_completed
7279 && (TARGET_INTER_UNIT_MOVES
7280 || MEM_P (operands [0])
7281 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7282 [(set (match_dup 0) (match_dup 1))]
7284 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7287 (define_insn_and_split "*vec_ext_v4si_mem"
7288 [(set (match_operand:SI 0 "register_operand" "=r")
7290 (match_operand:V4SI 1 "memory_operand" "o")
7291 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7297 int i = INTVAL (operands[2]);
7299 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7303 (define_expand "sse_storeq"
7304 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7306 (match_operand:V2DI 1 "register_operand" "")
7307 (parallel [(const_int 0)])))]
7311 (define_insn "*sse2_storeq_rex64"
7312 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7314 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7315 (parallel [(const_int 0)])))]
7316 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7320 %vmov{q}\t{%1, %0|%0, %1}"
7321 [(set_attr "type" "*,*,imov")
7322 (set_attr "prefix" "*,*,maybe_vex")
7323 (set_attr "mode" "*,*,DI")])
7325 (define_insn "*sse2_storeq"
7326 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7328 (match_operand:V2DI 1 "register_operand" "x")
7329 (parallel [(const_int 0)])))]
7334 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7336 (match_operand:V2DI 1 "register_operand" "")
7337 (parallel [(const_int 0)])))]
7340 && (TARGET_INTER_UNIT_MOVES
7341 || MEM_P (operands [0])
7342 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7343 [(set (match_dup 0) (match_dup 1))]
7345 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7348 (define_insn "*vec_extractv2di_1_rex64_avx"
7349 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7351 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7352 (parallel [(const_int 1)])))]
7355 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7357 vmovhps\t{%1, %0|%0, %1}
7358 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7359 vmovq\t{%H1, %0|%0, %H1}
7360 vmov{q}\t{%H1, %0|%0, %H1}"
7361 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7362 (set_attr "length_immediate" "*,1,*,*")
7363 (set_attr "memory" "*,none,*,*")
7364 (set_attr "prefix" "vex")
7365 (set_attr "mode" "V2SF,TI,TI,DI")])
7367 (define_insn "*vec_extractv2di_1_rex64"
7368 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7370 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7371 (parallel [(const_int 1)])))]
7372 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7374 movhps\t{%1, %0|%0, %1}
7375 psrldq\t{$8, %0|%0, 8}
7376 movq\t{%H1, %0|%0, %H1}
7377 mov{q}\t{%H1, %0|%0, %H1}"
7378 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7379 (set_attr "length_immediate" "*,1,*,*")
7380 (set_attr "atom_unit" "*,sishuf,*,*")
7381 (set_attr "memory" "*,none,*,*")
7382 (set_attr "mode" "V2SF,TI,TI,DI")])
7384 (define_insn "*vec_extractv2di_1_avx"
7385 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7387 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7388 (parallel [(const_int 1)])))]
7391 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7393 vmovhps\t{%1, %0|%0, %1}
7394 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7395 vmovq\t{%H1, %0|%0, %H1}"
7396 [(set_attr "type" "ssemov,sseishft,ssemov")
7397 (set_attr "length_immediate" "*,1,*")
7398 (set_attr "memory" "*,none,*")
7399 (set_attr "prefix" "vex")
7400 (set_attr "mode" "V2SF,TI,TI")])
7402 (define_insn "*vec_extractv2di_1_sse2"
7403 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7405 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7406 (parallel [(const_int 1)])))]
7408 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7410 movhps\t{%1, %0|%0, %1}
7411 psrldq\t{$8, %0|%0, 8}
7412 movq\t{%H1, %0|%0, %H1}"
7413 [(set_attr "type" "ssemov,sseishft,ssemov")
7414 (set_attr "length_immediate" "*,1,*")
7415 (set_attr "atom_unit" "*,sishuf,*")
7416 (set_attr "memory" "*,none,*")
7417 (set_attr "mode" "V2SF,TI,TI")])
7419 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7420 (define_insn "*vec_extractv2di_1_sse"
7421 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7423 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7424 (parallel [(const_int 1)])))]
7425 "!TARGET_SSE2 && TARGET_SSE
7426 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7428 movhps\t{%1, %0|%0, %1}
7429 movhlps\t{%1, %0|%0, %1}
7430 movlps\t{%H1, %0|%0, %H1}"
7431 [(set_attr "type" "ssemov")
7432 (set_attr "mode" "V2SF,V4SF,V2SF")])
7434 (define_insn "*vec_dupv4si_avx"
7435 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7437 (match_operand:SI 1 "register_operand" "x,m")))]
7440 vpshufd\t{$0, %1, %0|%0, %1, 0}
7441 vbroadcastss\t{%1, %0|%0, %1}"
7442 [(set_attr "type" "sselog1,ssemov")
7443 (set_attr "length_immediate" "1,0")
7444 (set_attr "prefix_extra" "0,1")
7445 (set_attr "prefix" "vex")
7446 (set_attr "mode" "TI,V4SF")])
7448 (define_insn "*vec_dupv4si"
7449 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7451 (match_operand:SI 1 "register_operand" " Y2,0")))]
7454 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7455 shufps\t{$0, %0, %0|%0, %0, 0}"
7456 [(set_attr "type" "sselog1")
7457 (set_attr "length_immediate" "1")
7458 (set_attr "mode" "TI,V4SF")])
7460 (define_insn "*vec_dupv2di_avx"
7461 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7463 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7466 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7467 vmovddup\t{%1, %0|%0, %1}"
7468 [(set_attr "type" "sselog1")
7469 (set_attr "prefix" "vex")
7470 (set_attr "mode" "TI,DF")])
7472 (define_insn "*vec_dupv2di_sse3"
7473 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7475 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7479 movddup\t{%1, %0|%0, %1}"
7480 [(set_attr "type" "sselog1")
7481 (set_attr "mode" "TI,DF")])
7483 (define_insn "*vec_dupv2di"
7484 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7486 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7491 [(set_attr "type" "sselog1,ssemov")
7492 (set_attr "mode" "TI,V4SF")])
7494 (define_insn "*vec_concatv2si_avx"
7495 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7497 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7498 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7501 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7502 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7503 vmovd\t{%1, %0|%0, %1}
7504 punpckldq\t{%2, %0|%0, %2}
7505 movd\t{%1, %0|%0, %1}"
7506 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7507 (set_attr "prefix_extra" "1,*,*,*,*")
7508 (set_attr "length_immediate" "1,*,*,*,*")
7509 (set (attr "prefix")
7510 (if_then_else (eq_attr "alternative" "3,4")
7511 (const_string "orig")
7512 (const_string "vex")))
7513 (set_attr "mode" "TI,TI,TI,DI,DI")])
7515 (define_insn "*vec_concatv2si_sse4_1"
7516 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7518 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7519 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7522 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7523 punpckldq\t{%2, %0|%0, %2}
7524 movd\t{%1, %0|%0, %1}
7525 punpckldq\t{%2, %0|%0, %2}
7526 movd\t{%1, %0|%0, %1}"
7527 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7528 (set_attr "prefix_extra" "1,*,*,*,*")
7529 (set_attr "length_immediate" "1,*,*,*,*")
7530 (set_attr "mode" "TI,TI,TI,DI,DI")])
7532 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7533 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7534 ;; alternatives pretty much forces the MMX alternative to be chosen.
7535 (define_insn "*vec_concatv2si_sse2"
7536 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7538 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7539 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7542 punpckldq\t{%2, %0|%0, %2}
7543 movd\t{%1, %0|%0, %1}
7544 punpckldq\t{%2, %0|%0, %2}
7545 movd\t{%1, %0|%0, %1}"
7546 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7547 (set_attr "mode" "TI,TI,DI,DI")])
7549 (define_insn "*vec_concatv2si_sse"
7550 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7552 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7553 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7556 unpcklps\t{%2, %0|%0, %2}
7557 movss\t{%1, %0|%0, %1}
7558 punpckldq\t{%2, %0|%0, %2}
7559 movd\t{%1, %0|%0, %1}"
7560 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7561 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7563 (define_insn "*vec_concatv4si_1_avx"
7564 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7566 (match_operand:V2SI 1 "register_operand" " x,x")
7567 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7570 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7571 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7572 [(set_attr "type" "sselog,ssemov")
7573 (set_attr "prefix" "vex")
7574 (set_attr "mode" "TI,V2SF")])
7576 (define_insn "*vec_concatv4si_1"
7577 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7579 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7580 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7583 punpcklqdq\t{%2, %0|%0, %2}
7584 movlhps\t{%2, %0|%0, %2}
7585 movhps\t{%2, %0|%0, %2}"
7586 [(set_attr "type" "sselog,ssemov,ssemov")
7587 (set_attr "mode" "TI,V4SF,V2SF")])
7589 (define_insn "*vec_concatv2di_avx"
7590 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7592 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7593 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7594 "!TARGET_64BIT && TARGET_AVX"
7596 vmovq\t{%1, %0|%0, %1}
7597 movq2dq\t{%1, %0|%0, %1}
7598 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7599 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7600 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7601 (set (attr "prefix")
7602 (if_then_else (eq_attr "alternative" "1")
7603 (const_string "orig")
7604 (const_string "vex")))
7605 (set_attr "mode" "TI,TI,TI,V2SF")])
7607 (define_insn "vec_concatv2di"
7608 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7610 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7611 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7612 "!TARGET_64BIT && TARGET_SSE"
7614 movq\t{%1, %0|%0, %1}
7615 movq2dq\t{%1, %0|%0, %1}
7616 punpcklqdq\t{%2, %0|%0, %2}
7617 movlhps\t{%2, %0|%0, %2}
7618 movhps\t{%2, %0|%0, %2}"
7619 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7620 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7622 (define_insn "*vec_concatv2di_rex64_avx"
7623 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7625 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7626 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7627 "TARGET_64BIT && TARGET_AVX"
7629 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7630 vmovq\t{%1, %0|%0, %1}
7631 vmovq\t{%1, %0|%0, %1}
7632 movq2dq\t{%1, %0|%0, %1}
7633 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7634 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7635 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7636 (set_attr "prefix_extra" "1,*,*,*,*,*")
7637 (set_attr "length_immediate" "1,*,*,*,*,*")
7638 (set (attr "prefix")
7639 (if_then_else (eq_attr "alternative" "3")
7640 (const_string "orig")
7641 (const_string "vex")))
7642 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7644 (define_insn "*vec_concatv2di_rex64_sse4_1"
7645 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7647 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7648 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7649 "TARGET_64BIT && TARGET_SSE4_1"
7651 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7652 movq\t{%1, %0|%0, %1}
7653 movq\t{%1, %0|%0, %1}
7654 movq2dq\t{%1, %0|%0, %1}
7655 punpcklqdq\t{%2, %0|%0, %2}
7656 movlhps\t{%2, %0|%0, %2}
7657 movhps\t{%2, %0|%0, %2}"
7658 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7659 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7660 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7661 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7662 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7664 (define_insn "*vec_concatv2di_rex64_sse"
7665 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7667 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7668 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7669 "TARGET_64BIT && TARGET_SSE"
7671 movq\t{%1, %0|%0, %1}
7672 movq\t{%1, %0|%0, %1}
7673 movq2dq\t{%1, %0|%0, %1}
7674 punpcklqdq\t{%2, %0|%0, %2}
7675 movlhps\t{%2, %0|%0, %2}
7676 movhps\t{%2, %0|%0, %2}"
7677 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7678 (set_attr "prefix_rex" "*,1,*,*,*,*")
7679 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7681 (define_expand "vec_unpacku_hi_v16qi"
7682 [(match_operand:V8HI 0 "register_operand" "")
7683 (match_operand:V16QI 1 "register_operand" "")]
7687 ix86_expand_sse4_unpack (operands, true, true);
7689 ix86_expand_sse_unpack (operands, true, true);
7693 (define_expand "vec_unpacks_hi_v16qi"
7694 [(match_operand:V8HI 0 "register_operand" "")
7695 (match_operand:V16QI 1 "register_operand" "")]
7699 ix86_expand_sse4_unpack (operands, false, true);
7701 ix86_expand_sse_unpack (operands, false, true);
7705 (define_expand "vec_unpacku_lo_v16qi"
7706 [(match_operand:V8HI 0 "register_operand" "")
7707 (match_operand:V16QI 1 "register_operand" "")]
7711 ix86_expand_sse4_unpack (operands, true, false);
7713 ix86_expand_sse_unpack (operands, true, false);
7717 (define_expand "vec_unpacks_lo_v16qi"
7718 [(match_operand:V8HI 0 "register_operand" "")
7719 (match_operand:V16QI 1 "register_operand" "")]
7723 ix86_expand_sse4_unpack (operands, false, false);
7725 ix86_expand_sse_unpack (operands, false, false);
7729 (define_expand "vec_unpacku_hi_v8hi"
7730 [(match_operand:V4SI 0 "register_operand" "")
7731 (match_operand:V8HI 1 "register_operand" "")]
7735 ix86_expand_sse4_unpack (operands, true, true);
7737 ix86_expand_sse_unpack (operands, true, true);
7741 (define_expand "vec_unpacks_hi_v8hi"
7742 [(match_operand:V4SI 0 "register_operand" "")
7743 (match_operand:V8HI 1 "register_operand" "")]
7747 ix86_expand_sse4_unpack (operands, false, true);
7749 ix86_expand_sse_unpack (operands, false, true);
7753 (define_expand "vec_unpacku_lo_v8hi"
7754 [(match_operand:V4SI 0 "register_operand" "")
7755 (match_operand:V8HI 1 "register_operand" "")]
7759 ix86_expand_sse4_unpack (operands, true, false);
7761 ix86_expand_sse_unpack (operands, true, false);
7765 (define_expand "vec_unpacks_lo_v8hi"
7766 [(match_operand:V4SI 0 "register_operand" "")
7767 (match_operand:V8HI 1 "register_operand" "")]
7771 ix86_expand_sse4_unpack (operands, false, false);
7773 ix86_expand_sse_unpack (operands, false, false);
7777 (define_expand "vec_unpacku_hi_v4si"
7778 [(match_operand:V2DI 0 "register_operand" "")
7779 (match_operand:V4SI 1 "register_operand" "")]
7783 ix86_expand_sse4_unpack (operands, true, true);
7785 ix86_expand_sse_unpack (operands, true, true);
7789 (define_expand "vec_unpacks_hi_v4si"
7790 [(match_operand:V2DI 0 "register_operand" "")
7791 (match_operand:V4SI 1 "register_operand" "")]
7795 ix86_expand_sse4_unpack (operands, false, true);
7797 ix86_expand_sse_unpack (operands, false, true);
7801 (define_expand "vec_unpacku_lo_v4si"
7802 [(match_operand:V2DI 0 "register_operand" "")
7803 (match_operand:V4SI 1 "register_operand" "")]
7807 ix86_expand_sse4_unpack (operands, true, false);
7809 ix86_expand_sse_unpack (operands, true, false);
7813 (define_expand "vec_unpacks_lo_v4si"
7814 [(match_operand:V2DI 0 "register_operand" "")
7815 (match_operand:V4SI 1 "register_operand" "")]
7819 ix86_expand_sse4_unpack (operands, false, false);
7821 ix86_expand_sse_unpack (operands, false, false);
7825 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7829 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7831 (define_expand "sse2_uavgv16qi3"
7832 [(set (match_operand:V16QI 0 "register_operand" "")
7838 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7840 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7841 (const_vector:V16QI [(const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)
7847 (const_int 1) (const_int 1)
7848 (const_int 1) (const_int 1)]))
7851 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7853 (define_insn "*avx_uavgv16qi3"
7854 [(set (match_operand:V16QI 0 "register_operand" "=x")
7860 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7862 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7863 (const_vector:V16QI [(const_int 1) (const_int 1)
7864 (const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)
7870 (const_int 1) (const_int 1)]))
7872 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7873 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7874 [(set_attr "type" "sseiadd")
7875 (set_attr "prefix" "vex")
7876 (set_attr "mode" "TI")])
7878 (define_insn "*sse2_uavgv16qi3"
7879 [(set (match_operand:V16QI 0 "register_operand" "=x")
7885 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7887 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7888 (const_vector:V16QI [(const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)
7895 (const_int 1) (const_int 1)]))
7897 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7898 "pavgb\t{%2, %0|%0, %2}"
7899 [(set_attr "type" "sseiadd")
7900 (set_attr "prefix_data16" "1")
7901 (set_attr "mode" "TI")])
7903 (define_expand "sse2_uavgv8hi3"
7904 [(set (match_operand:V8HI 0 "register_operand" "")
7910 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7912 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7913 (const_vector:V8HI [(const_int 1) (const_int 1)
7914 (const_int 1) (const_int 1)
7915 (const_int 1) (const_int 1)
7916 (const_int 1) (const_int 1)]))
7919 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7921 (define_insn "*avx_uavgv8hi3"
7922 [(set (match_operand:V8HI 0 "register_operand" "=x")
7928 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7930 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7931 (const_vector:V8HI [(const_int 1) (const_int 1)
7932 (const_int 1) (const_int 1)
7933 (const_int 1) (const_int 1)
7934 (const_int 1) (const_int 1)]))
7936 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7937 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7938 [(set_attr "type" "sseiadd")
7939 (set_attr "prefix" "vex")
7940 (set_attr "mode" "TI")])
7942 (define_insn "*sse2_uavgv8hi3"
7943 [(set (match_operand:V8HI 0 "register_operand" "=x")
7949 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7951 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7952 (const_vector:V8HI [(const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)
7954 (const_int 1) (const_int 1)
7955 (const_int 1) (const_int 1)]))
7957 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7958 "pavgw\t{%2, %0|%0, %2}"
7959 [(set_attr "type" "sseiadd")
7960 (set_attr "prefix_data16" "1")
7961 (set_attr "mode" "TI")])
7963 ;; The correct representation for this is absolutely enormous, and
7964 ;; surely not generally useful.
7965 (define_insn "*avx_psadbw"
7966 [(set (match_operand:V2DI 0 "register_operand" "=x")
7967 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7968 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7971 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7972 [(set_attr "type" "sseiadd")
7973 (set_attr "prefix" "vex")
7974 (set_attr "mode" "TI")])
7976 (define_insn "sse2_psadbw"
7977 [(set (match_operand:V2DI 0 "register_operand" "=x")
7978 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7979 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7982 "psadbw\t{%2, %0|%0, %2}"
7983 [(set_attr "type" "sseiadd")
7984 (set_attr "atom_unit" "simul")
7985 (set_attr "prefix_data16" "1")
7986 (set_attr "mode" "TI")])
7988 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7989 [(set (match_operand:SI 0 "register_operand" "=r")
7991 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7993 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7994 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7995 [(set_attr "type" "ssecvt")
7996 (set_attr "prefix" "vex")
7997 (set_attr "mode" "<MODE>")])
7999 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
8000 [(set (match_operand:SI 0 "register_operand" "=r")
8002 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8005 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8006 [(set_attr "type" "ssemov")
8007 (set_attr "prefix" "maybe_vex")
8008 (set_attr "mode" "<MODE>")])
8010 (define_insn "sse2_pmovmskb"
8011 [(set (match_operand:SI 0 "register_operand" "=r")
8012 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8015 "%vpmovmskb\t{%1, %0|%0, %1}"
8016 [(set_attr "type" "ssemov")
8017 (set_attr "prefix_data16" "1")
8018 (set_attr "prefix" "maybe_vex")
8019 (set_attr "mode" "SI")])
8021 (define_expand "sse2_maskmovdqu"
8022 [(set (match_operand:V16QI 0 "memory_operand" "")
8023 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8024 (match_operand:V16QI 2 "register_operand" "")
8030 (define_insn "*sse2_maskmovdqu"
8031 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8032 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8033 (match_operand:V16QI 2 "register_operand" "x")
8034 (mem:V16QI (match_dup 0))]
8036 "TARGET_SSE2 && !TARGET_64BIT"
8037 ;; @@@ check ordering of operands in intel/nonintel syntax
8038 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8039 [(set_attr "type" "ssemov")
8040 (set_attr "prefix_data16" "1")
8041 ;; The implicit %rdi operand confuses default length_vex computation.
8042 (set_attr "length_vex" "3")
8043 (set_attr "prefix" "maybe_vex")
8044 (set_attr "mode" "TI")])
8046 (define_insn "*sse2_maskmovdqu_rex64"
8047 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8048 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8049 (match_operand:V16QI 2 "register_operand" "x")
8050 (mem:V16QI (match_dup 0))]
8052 "TARGET_SSE2 && TARGET_64BIT"
8053 ;; @@@ check ordering of operands in intel/nonintel syntax
8054 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8055 [(set_attr "type" "ssemov")
8056 (set_attr "prefix_data16" "1")
8057 ;; The implicit %rdi operand confuses default length_vex computation.
8058 (set (attr "length_vex")
8059 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8060 (set_attr "prefix" "maybe_vex")
8061 (set_attr "mode" "TI")])
8063 (define_insn "sse_ldmxcsr"
8064 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8068 [(set_attr "type" "sse")
8069 (set_attr "atom_sse_attr" "mxcsr")
8070 (set_attr "prefix" "maybe_vex")
8071 (set_attr "memory" "load")])
8073 (define_insn "sse_stmxcsr"
8074 [(set (match_operand:SI 0 "memory_operand" "=m")
8075 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8078 [(set_attr "type" "sse")
8079 (set_attr "atom_sse_attr" "mxcsr")
8080 (set_attr "prefix" "maybe_vex")
8081 (set_attr "memory" "store")])
8083 (define_expand "sse_sfence"
8085 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8086 "TARGET_SSE || TARGET_3DNOW_A"
8088 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8089 MEM_VOLATILE_P (operands[0]) = 1;
8092 (define_insn "*sse_sfence"
8093 [(set (match_operand:BLK 0 "" "")
8094 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8095 "TARGET_SSE || TARGET_3DNOW_A"
8097 [(set_attr "type" "sse")
8098 (set_attr "length_address" "0")
8099 (set_attr "atom_sse_attr" "fence")
8100 (set_attr "memory" "unknown")])
8102 (define_insn "sse2_clflush"
8103 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8107 [(set_attr "type" "sse")
8108 (set_attr "atom_sse_attr" "fence")
8109 (set_attr "memory" "unknown")])
8111 (define_expand "sse2_mfence"
8113 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8116 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8117 MEM_VOLATILE_P (operands[0]) = 1;
8120 (define_insn "*sse2_mfence"
8121 [(set (match_operand:BLK 0 "" "")
8122 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8123 "TARGET_64BIT || TARGET_SSE2"
8125 [(set_attr "type" "sse")
8126 (set_attr "length_address" "0")
8127 (set_attr "atom_sse_attr" "fence")
8128 (set_attr "memory" "unknown")])
8130 (define_expand "sse2_lfence"
8132 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8135 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8136 MEM_VOLATILE_P (operands[0]) = 1;
8139 (define_insn "*sse2_lfence"
8140 [(set (match_operand:BLK 0 "" "")
8141 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8144 [(set_attr "type" "sse")
8145 (set_attr "length_address" "0")
8146 (set_attr "atom_sse_attr" "lfence")
8147 (set_attr "memory" "unknown")])
8149 (define_insn "sse3_mwait"
8150 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8151 (match_operand:SI 1 "register_operand" "c")]
8154 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8155 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8156 ;; we only need to set up 32bit registers.
8158 [(set_attr "length" "3")])
8160 (define_insn "sse3_monitor"
8161 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8162 (match_operand:SI 1 "register_operand" "c")
8163 (match_operand:SI 2 "register_operand" "d")]
8165 "TARGET_SSE3 && !TARGET_64BIT"
8166 "monitor\t%0, %1, %2"
8167 [(set_attr "length" "3")])
8169 (define_insn "sse3_monitor64"
8170 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8171 (match_operand:SI 1 "register_operand" "c")
8172 (match_operand:SI 2 "register_operand" "d")]
8174 "TARGET_SSE3 && TARGET_64BIT"
8175 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8176 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8177 ;; zero extended to 64bit, we only need to set up 32bit registers.
8179 [(set_attr "length" "3")])
8181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8183 ;; SSSE3 instructions
8185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8187 (define_insn "*avx_phaddwv8hi3"
8188 [(set (match_operand:V8HI 0 "register_operand" "=x")
8194 (match_operand:V8HI 1 "register_operand" "x")
8195 (parallel [(const_int 0)]))
8196 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8198 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8199 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8203 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8206 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8211 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8212 (parallel [(const_int 0)]))
8213 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8215 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8216 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8219 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8220 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8222 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8225 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8226 [(set_attr "type" "sseiadd")
8227 (set_attr "prefix_extra" "1")
8228 (set_attr "prefix" "vex")
8229 (set_attr "mode" "TI")])
8231 (define_insn "ssse3_phaddwv8hi3"
8232 [(set (match_operand:V8HI 0 "register_operand" "=x")
8238 (match_operand:V8HI 1 "register_operand" "0")
8239 (parallel [(const_int 0)]))
8240 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8242 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8243 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8246 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8247 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8249 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8250 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8255 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8256 (parallel [(const_int 0)]))
8257 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8259 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8260 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8263 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8264 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8266 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8267 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8269 "phaddw\t{%2, %0|%0, %2}"
8270 [(set_attr "type" "sseiadd")
8271 (set_attr "atom_unit" "complex")
8272 (set_attr "prefix_data16" "1")
8273 (set_attr "prefix_extra" "1")
8274 (set_attr "mode" "TI")])
8276 (define_insn "ssse3_phaddwv4hi3"
8277 [(set (match_operand:V4HI 0 "register_operand" "=y")
8282 (match_operand:V4HI 1 "register_operand" "0")
8283 (parallel [(const_int 0)]))
8284 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8287 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8291 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8292 (parallel [(const_int 0)]))
8293 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8295 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8296 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8298 "phaddw\t{%2, %0|%0, %2}"
8299 [(set_attr "type" "sseiadd")
8300 (set_attr "atom_unit" "complex")
8301 (set_attr "prefix_extra" "1")
8302 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8303 (set_attr "mode" "DI")])
8305 (define_insn "*avx_phadddv4si3"
8306 [(set (match_operand:V4SI 0 "register_operand" "=x")
8311 (match_operand:V4SI 1 "register_operand" "x")
8312 (parallel [(const_int 0)]))
8313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8315 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8316 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8320 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8321 (parallel [(const_int 0)]))
8322 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8324 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8325 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8327 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8328 [(set_attr "type" "sseiadd")
8329 (set_attr "prefix_extra" "1")
8330 (set_attr "prefix" "vex")
8331 (set_attr "mode" "TI")])
8333 (define_insn "ssse3_phadddv4si3"
8334 [(set (match_operand:V4SI 0 "register_operand" "=x")
8339 (match_operand:V4SI 1 "register_operand" "0")
8340 (parallel [(const_int 0)]))
8341 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8343 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8344 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8348 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8349 (parallel [(const_int 0)]))
8350 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8352 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8353 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8355 "phaddd\t{%2, %0|%0, %2}"
8356 [(set_attr "type" "sseiadd")
8357 (set_attr "atom_unit" "complex")
8358 (set_attr "prefix_data16" "1")
8359 (set_attr "prefix_extra" "1")
8360 (set_attr "mode" "TI")])
8362 (define_insn "ssse3_phadddv2si3"
8363 [(set (match_operand:V2SI 0 "register_operand" "=y")
8367 (match_operand:V2SI 1 "register_operand" "0")
8368 (parallel [(const_int 0)]))
8369 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8372 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8373 (parallel [(const_int 0)]))
8374 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8376 "phaddd\t{%2, %0|%0, %2}"
8377 [(set_attr "type" "sseiadd")
8378 (set_attr "atom_unit" "complex")
8379 (set_attr "prefix_extra" "1")
8380 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8381 (set_attr "mode" "DI")])
8383 (define_insn "*avx_phaddswv8hi3"
8384 [(set (match_operand:V8HI 0 "register_operand" "=x")
8390 (match_operand:V8HI 1 "register_operand" "x")
8391 (parallel [(const_int 0)]))
8392 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8394 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8395 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8398 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8399 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8401 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8407 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8408 (parallel [(const_int 0)]))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8412 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8415 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8416 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8418 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8419 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8421 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8422 [(set_attr "type" "sseiadd")
8423 (set_attr "prefix_extra" "1")
8424 (set_attr "prefix" "vex")
8425 (set_attr "mode" "TI")])
8427 (define_insn "ssse3_phaddswv8hi3"
8428 [(set (match_operand:V8HI 0 "register_operand" "=x")
8434 (match_operand:V8HI 1 "register_operand" "0")
8435 (parallel [(const_int 0)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8438 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8443 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8446 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8451 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8452 (parallel [(const_int 0)]))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8455 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8456 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8459 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8460 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8462 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8463 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8465 "phaddsw\t{%2, %0|%0, %2}"
8466 [(set_attr "type" "sseiadd")
8467 (set_attr "atom_unit" "complex")
8468 (set_attr "prefix_data16" "1")
8469 (set_attr "prefix_extra" "1")
8470 (set_attr "mode" "TI")])
8472 (define_insn "ssse3_phaddswv4hi3"
8473 [(set (match_operand:V4HI 0 "register_operand" "=y")
8478 (match_operand:V4HI 1 "register_operand" "0")
8479 (parallel [(const_int 0)]))
8480 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8482 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8483 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8487 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8488 (parallel [(const_int 0)]))
8489 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8492 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8494 "phaddsw\t{%2, %0|%0, %2}"
8495 [(set_attr "type" "sseiadd")
8496 (set_attr "atom_unit" "complex")
8497 (set_attr "prefix_extra" "1")
8498 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8499 (set_attr "mode" "DI")])
8501 (define_insn "*avx_phsubwv8hi3"
8502 [(set (match_operand:V8HI 0 "register_operand" "=x")
8508 (match_operand:V8HI 1 "register_operand" "x")
8509 (parallel [(const_int 0)]))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8513 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8516 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8517 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8519 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8520 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8525 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8526 (parallel [(const_int 0)]))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8530 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8533 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8534 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8536 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8537 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8539 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8540 [(set_attr "type" "sseiadd")
8541 (set_attr "prefix_extra" "1")
8542 (set_attr "prefix" "vex")
8543 (set_attr "mode" "TI")])
8545 (define_insn "ssse3_phsubwv8hi3"
8546 [(set (match_operand:V8HI 0 "register_operand" "=x")
8552 (match_operand:V8HI 1 "register_operand" "0")
8553 (parallel [(const_int 0)]))
8554 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8557 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8560 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8561 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8563 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8564 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8569 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8570 (parallel [(const_int 0)]))
8571 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8573 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8574 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8577 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8578 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8580 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8581 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8583 "phsubw\t{%2, %0|%0, %2}"
8584 [(set_attr "type" "sseiadd")
8585 (set_attr "atom_unit" "complex")
8586 (set_attr "prefix_data16" "1")
8587 (set_attr "prefix_extra" "1")
8588 (set_attr "mode" "TI")])
8590 (define_insn "ssse3_phsubwv4hi3"
8591 [(set (match_operand:V4HI 0 "register_operand" "=y")
8596 (match_operand:V4HI 1 "register_operand" "0")
8597 (parallel [(const_int 0)]))
8598 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8600 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8601 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8605 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8606 (parallel [(const_int 0)]))
8607 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8609 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8610 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8612 "phsubw\t{%2, %0|%0, %2}"
8613 [(set_attr "type" "sseiadd")
8614 (set_attr "atom_unit" "complex")
8615 (set_attr "prefix_extra" "1")
8616 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8617 (set_attr "mode" "DI")])
8619 (define_insn "*avx_phsubdv4si3"
8620 [(set (match_operand:V4SI 0 "register_operand" "=x")
8625 (match_operand:V4SI 1 "register_operand" "x")
8626 (parallel [(const_int 0)]))
8627 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8629 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8630 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8634 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8635 (parallel [(const_int 0)]))
8636 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8638 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8639 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8641 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8642 [(set_attr "type" "sseiadd")
8643 (set_attr "prefix_extra" "1")
8644 (set_attr "prefix" "vex")
8645 (set_attr "mode" "TI")])
8647 (define_insn "ssse3_phsubdv4si3"
8648 [(set (match_operand:V4SI 0 "register_operand" "=x")
8653 (match_operand:V4SI 1 "register_operand" "0")
8654 (parallel [(const_int 0)]))
8655 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8657 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8658 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8662 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8663 (parallel [(const_int 0)]))
8664 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8666 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8667 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8669 "phsubd\t{%2, %0|%0, %2}"
8670 [(set_attr "type" "sseiadd")
8671 (set_attr "atom_unit" "complex")
8672 (set_attr "prefix_data16" "1")
8673 (set_attr "prefix_extra" "1")
8674 (set_attr "mode" "TI")])
8676 (define_insn "ssse3_phsubdv2si3"
8677 [(set (match_operand:V2SI 0 "register_operand" "=y")
8681 (match_operand:V2SI 1 "register_operand" "0")
8682 (parallel [(const_int 0)]))
8683 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8686 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8687 (parallel [(const_int 0)]))
8688 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8690 "phsubd\t{%2, %0|%0, %2}"
8691 [(set_attr "type" "sseiadd")
8692 (set_attr "atom_unit" "complex")
8693 (set_attr "prefix_extra" "1")
8694 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8695 (set_attr "mode" "DI")])
8697 (define_insn "*avx_phsubswv8hi3"
8698 [(set (match_operand:V8HI 0 "register_operand" "=x")
8704 (match_operand:V8HI 1 "register_operand" "x")
8705 (parallel [(const_int 0)]))
8706 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8708 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8709 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8712 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8713 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8715 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8716 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8721 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8722 (parallel [(const_int 0)]))
8723 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8725 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8726 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8729 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8730 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8732 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8733 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8735 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8736 [(set_attr "type" "sseiadd")
8737 (set_attr "prefix_extra" "1")
8738 (set_attr "prefix" "vex")
8739 (set_attr "mode" "TI")])
8741 (define_insn "ssse3_phsubswv8hi3"
8742 [(set (match_operand:V8HI 0 "register_operand" "=x")
8748 (match_operand:V8HI 1 "register_operand" "0")
8749 (parallel [(const_int 0)]))
8750 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8752 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8753 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8756 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8757 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8759 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8760 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8765 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8766 (parallel [(const_int 0)]))
8767 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8769 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8770 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8773 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8774 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8776 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8777 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8779 "phsubsw\t{%2, %0|%0, %2}"
8780 [(set_attr "type" "sseiadd")
8781 (set_attr "atom_unit" "complex")
8782 (set_attr "prefix_data16" "1")
8783 (set_attr "prefix_extra" "1")
8784 (set_attr "mode" "TI")])
8786 (define_insn "ssse3_phsubswv4hi3"
8787 [(set (match_operand:V4HI 0 "register_operand" "=y")
8792 (match_operand:V4HI 1 "register_operand" "0")
8793 (parallel [(const_int 0)]))
8794 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8796 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8797 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8801 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8802 (parallel [(const_int 0)]))
8803 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8805 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8806 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8808 "phsubsw\t{%2, %0|%0, %2}"
8809 [(set_attr "type" "sseiadd")
8810 (set_attr "atom_unit" "complex")
8811 (set_attr "prefix_extra" "1")
8812 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8813 (set_attr "mode" "DI")])
8815 (define_insn "*avx_pmaddubsw128"
8816 [(set (match_operand:V8HI 0 "register_operand" "=x")
8821 (match_operand:V16QI 1 "register_operand" "x")
8822 (parallel [(const_int 0)
8832 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8833 (parallel [(const_int 0)
8843 (vec_select:V16QI (match_dup 1)
8844 (parallel [(const_int 1)
8853 (vec_select:V16QI (match_dup 2)
8854 (parallel [(const_int 1)
8861 (const_int 15)]))))))]
8863 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8864 [(set_attr "type" "sseiadd")
8865 (set_attr "prefix_extra" "1")
8866 (set_attr "prefix" "vex")
8867 (set_attr "mode" "TI")])
8869 (define_insn "ssse3_pmaddubsw128"
8870 [(set (match_operand:V8HI 0 "register_operand" "=x")
8875 (match_operand:V16QI 1 "register_operand" "0")
8876 (parallel [(const_int 0)
8886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8887 (parallel [(const_int 0)
8897 (vec_select:V16QI (match_dup 1)
8898 (parallel [(const_int 1)
8907 (vec_select:V16QI (match_dup 2)
8908 (parallel [(const_int 1)
8915 (const_int 15)]))))))]
8917 "pmaddubsw\t{%2, %0|%0, %2}"
8918 [(set_attr "type" "sseiadd")
8919 (set_attr "atom_unit" "simul")
8920 (set_attr "prefix_data16" "1")
8921 (set_attr "prefix_extra" "1")
8922 (set_attr "mode" "TI")])
8924 (define_insn "ssse3_pmaddubsw"
8925 [(set (match_operand:V4HI 0 "register_operand" "=y")
8930 (match_operand:V8QI 1 "register_operand" "0")
8931 (parallel [(const_int 0)
8937 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8938 (parallel [(const_int 0)
8944 (vec_select:V8QI (match_dup 1)
8945 (parallel [(const_int 1)
8950 (vec_select:V8QI (match_dup 2)
8951 (parallel [(const_int 1)
8954 (const_int 7)]))))))]
8956 "pmaddubsw\t{%2, %0|%0, %2}"
8957 [(set_attr "type" "sseiadd")
8958 (set_attr "atom_unit" "simul")
8959 (set_attr "prefix_extra" "1")
8960 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8961 (set_attr "mode" "DI")])
8963 (define_expand "ssse3_pmulhrswv8hi3"
8964 [(set (match_operand:V8HI 0 "register_operand" "")
8971 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8973 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8975 (const_vector:V8HI [(const_int 1) (const_int 1)
8976 (const_int 1) (const_int 1)
8977 (const_int 1) (const_int 1)
8978 (const_int 1) (const_int 1)]))
8981 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8983 (define_insn "*avx_pmulhrswv8hi3"
8984 [(set (match_operand:V8HI 0 "register_operand" "=x")
8991 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8993 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8995 (const_vector:V8HI [(const_int 1) (const_int 1)
8996 (const_int 1) (const_int 1)
8997 (const_int 1) (const_int 1)
8998 (const_int 1) (const_int 1)]))
9000 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9001 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9002 [(set_attr "type" "sseimul")
9003 (set_attr "prefix_extra" "1")
9004 (set_attr "prefix" "vex")
9005 (set_attr "mode" "TI")])
9007 (define_insn "*ssse3_pmulhrswv8hi3"
9008 [(set (match_operand:V8HI 0 "register_operand" "=x")
9015 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9017 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9019 (const_vector:V8HI [(const_int 1) (const_int 1)
9020 (const_int 1) (const_int 1)
9021 (const_int 1) (const_int 1)
9022 (const_int 1) (const_int 1)]))
9024 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9025 "pmulhrsw\t{%2, %0|%0, %2}"
9026 [(set_attr "type" "sseimul")
9027 (set_attr "prefix_data16" "1")
9028 (set_attr "prefix_extra" "1")
9029 (set_attr "mode" "TI")])
9031 (define_expand "ssse3_pmulhrswv4hi3"
9032 [(set (match_operand:V4HI 0 "register_operand" "")
9039 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9041 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9043 (const_vector:V4HI [(const_int 1) (const_int 1)
9044 (const_int 1) (const_int 1)]))
9047 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9049 (define_insn "*ssse3_pmulhrswv4hi3"
9050 [(set (match_operand:V4HI 0 "register_operand" "=y")
9057 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9059 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9061 (const_vector:V4HI [(const_int 1) (const_int 1)
9062 (const_int 1) (const_int 1)]))
9064 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9065 "pmulhrsw\t{%2, %0|%0, %2}"
9066 [(set_attr "type" "sseimul")
9067 (set_attr "prefix_extra" "1")
9068 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9069 (set_attr "mode" "DI")])
9071 (define_insn "*avx_pshufbv16qi3"
9072 [(set (match_operand:V16QI 0 "register_operand" "=x")
9073 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9074 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9077 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9078 [(set_attr "type" "sselog1")
9079 (set_attr "prefix_extra" "1")
9080 (set_attr "prefix" "vex")
9081 (set_attr "mode" "TI")])
9083 (define_insn "ssse3_pshufbv16qi3"
9084 [(set (match_operand:V16QI 0 "register_operand" "=x")
9085 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9086 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9089 "pshufb\t{%2, %0|%0, %2}";
9090 [(set_attr "type" "sselog1")
9091 (set_attr "prefix_data16" "1")
9092 (set_attr "prefix_extra" "1")
9093 (set_attr "mode" "TI")])
9095 (define_insn "ssse3_pshufbv8qi3"
9096 [(set (match_operand:V8QI 0 "register_operand" "=y")
9097 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9098 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9101 "pshufb\t{%2, %0|%0, %2}";
9102 [(set_attr "type" "sselog1")
9103 (set_attr "prefix_extra" "1")
9104 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9105 (set_attr "mode" "DI")])
9107 (define_insn "*avx_psign<mode>3"
9108 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9110 [(match_operand:SSEMODE124 1 "register_operand" "x")
9111 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9114 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9115 [(set_attr "type" "sselog1")
9116 (set_attr "prefix_extra" "1")
9117 (set_attr "prefix" "vex")
9118 (set_attr "mode" "TI")])
9120 (define_insn "ssse3_psign<mode>3"
9121 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9123 [(match_operand:SSEMODE124 1 "register_operand" "0")
9124 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9127 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9128 [(set_attr "type" "sselog1")
9129 (set_attr "prefix_data16" "1")
9130 (set_attr "prefix_extra" "1")
9131 (set_attr "mode" "TI")])
9133 (define_insn "ssse3_psign<mode>3"
9134 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9136 [(match_operand:MMXMODEI 1 "register_operand" "0")
9137 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9140 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9141 [(set_attr "type" "sselog1")
9142 (set_attr "prefix_extra" "1")
9143 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9144 (set_attr "mode" "DI")])
9146 (define_insn "*avx_palignrti"
9147 [(set (match_operand:TI 0 "register_operand" "=x")
9148 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9149 (match_operand:TI 2 "nonimmediate_operand" "xm")
9150 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9154 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9155 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9157 [(set_attr "type" "sseishft")
9158 (set_attr "prefix_extra" "1")
9159 (set_attr "length_immediate" "1")
9160 (set_attr "prefix" "vex")
9161 (set_attr "mode" "TI")])
9163 (define_insn "ssse3_palignrti"
9164 [(set (match_operand:TI 0 "register_operand" "=x")
9165 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9166 (match_operand:TI 2 "nonimmediate_operand" "xm")
9167 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9171 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9172 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9174 [(set_attr "type" "sseishft")
9175 (set_attr "atom_unit" "sishuf")
9176 (set_attr "prefix_data16" "1")
9177 (set_attr "prefix_extra" "1")
9178 (set_attr "length_immediate" "1")
9179 (set_attr "mode" "TI")])
9181 (define_insn "ssse3_palignrdi"
9182 [(set (match_operand:DI 0 "register_operand" "=y")
9183 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9184 (match_operand:DI 2 "nonimmediate_operand" "ym")
9185 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9189 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9190 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9192 [(set_attr "type" "sseishft")
9193 (set_attr "atom_unit" "sishuf")
9194 (set_attr "prefix_extra" "1")
9195 (set_attr "length_immediate" "1")
9196 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9197 (set_attr "mode" "DI")])
9199 (define_insn "abs<mode>2"
9200 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9201 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9203 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9204 [(set_attr "type" "sselog1")
9205 (set_attr "prefix_data16" "1")
9206 (set_attr "prefix_extra" "1")
9207 (set_attr "prefix" "maybe_vex")
9208 (set_attr "mode" "TI")])
9210 (define_insn "abs<mode>2"
9211 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9212 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9214 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9215 [(set_attr "type" "sselog1")
9216 (set_attr "prefix_rep" "0")
9217 (set_attr "prefix_extra" "1")
9218 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9219 (set_attr "mode" "DI")])
9221 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9223 ;; AMD SSE4A instructions
9225 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9227 (define_insn "sse4a_movnt<mode>"
9228 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9230 [(match_operand:MODEF 1 "register_operand" "x")]
9233 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9234 [(set_attr "type" "ssemov")
9235 (set_attr "mode" "<MODE>")])
9237 (define_insn "sse4a_vmmovnt<mode>"
9238 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9239 (unspec:<ssescalarmode>
9240 [(vec_select:<ssescalarmode>
9241 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9242 (parallel [(const_int 0)]))]
9245 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9246 [(set_attr "type" "ssemov")
9247 (set_attr "mode" "<ssescalarmode>")])
9249 (define_insn "sse4a_extrqi"
9250 [(set (match_operand:V2DI 0 "register_operand" "=x")
9251 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9252 (match_operand 2 "const_int_operand" "")
9253 (match_operand 3 "const_int_operand" "")]
9256 "extrq\t{%3, %2, %0|%0, %2, %3}"
9257 [(set_attr "type" "sse")
9258 (set_attr "prefix_data16" "1")
9259 (set_attr "length_immediate" "2")
9260 (set_attr "mode" "TI")])
9262 (define_insn "sse4a_extrq"
9263 [(set (match_operand:V2DI 0 "register_operand" "=x")
9264 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9265 (match_operand:V16QI 2 "register_operand" "x")]
9268 "extrq\t{%2, %0|%0, %2}"
9269 [(set_attr "type" "sse")
9270 (set_attr "prefix_data16" "1")
9271 (set_attr "mode" "TI")])
9273 (define_insn "sse4a_insertqi"
9274 [(set (match_operand:V2DI 0 "register_operand" "=x")
9275 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9276 (match_operand:V2DI 2 "register_operand" "x")
9277 (match_operand 3 "const_int_operand" "")
9278 (match_operand 4 "const_int_operand" "")]
9281 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9282 [(set_attr "type" "sseins")
9283 (set_attr "prefix_data16" "0")
9284 (set_attr "prefix_rep" "1")
9285 (set_attr "length_immediate" "2")
9286 (set_attr "mode" "TI")])
9288 (define_insn "sse4a_insertq"
9289 [(set (match_operand:V2DI 0 "register_operand" "=x")
9290 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9291 (match_operand:V2DI 2 "register_operand" "x")]
9294 "insertq\t{%2, %0|%0, %2}"
9295 [(set_attr "type" "sseins")
9296 (set_attr "prefix_data16" "0")
9297 (set_attr "prefix_rep" "1")
9298 (set_attr "mode" "TI")])
9300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9302 ;; Intel SSE4.1 instructions
9304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9306 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9307 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9308 (vec_merge:AVXMODEF2P
9309 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9310 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9311 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9313 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9314 [(set_attr "type" "ssemov")
9315 (set_attr "prefix_extra" "1")
9316 (set_attr "length_immediate" "1")
9317 (set_attr "prefix" "vex")
9318 (set_attr "mode" "<avxvecmode>")])
9320 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9321 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9323 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9324 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9325 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9328 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9329 [(set_attr "type" "ssemov")
9330 (set_attr "prefix_extra" "1")
9331 (set_attr "length_immediate" "1")
9332 (set_attr "prefix" "vex")
9333 (set_attr "mode" "<avxvecmode>")])
9335 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9336 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9337 (vec_merge:SSEMODEF2P
9338 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9339 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9340 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9342 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9343 [(set_attr "type" "ssemov")
9344 (set_attr "prefix_data16" "1")
9345 (set_attr "prefix_extra" "1")
9346 (set_attr "length_immediate" "1")
9347 (set_attr "mode" "<MODE>")])
9349 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9350 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9352 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9353 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9354 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9357 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9358 [(set_attr "type" "ssemov")
9359 (set_attr "prefix_data16" "1")
9360 (set_attr "prefix_extra" "1")
9361 (set_attr "mode" "<MODE>")])
9363 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9364 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9366 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9367 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9368 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9371 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9372 [(set_attr "type" "ssemul")
9373 (set_attr "prefix" "vex")
9374 (set_attr "prefix_extra" "1")
9375 (set_attr "length_immediate" "1")
9376 (set_attr "mode" "<avxvecmode>")])
9378 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9379 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9381 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9382 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9383 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9386 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9387 [(set_attr "type" "ssemul")
9388 (set_attr "prefix_data16" "1")
9389 (set_attr "prefix_extra" "1")
9390 (set_attr "length_immediate" "1")
9391 (set_attr "mode" "<MODE>")])
9393 (define_insn "sse4_1_movntdqa"
9394 [(set (match_operand:V2DI 0 "register_operand" "=x")
9395 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9398 "%vmovntdqa\t{%1, %0|%0, %1}"
9399 [(set_attr "type" "ssemov")
9400 (set_attr "prefix_extra" "1")
9401 (set_attr "prefix" "maybe_vex")
9402 (set_attr "mode" "TI")])
9404 (define_insn "*avx_mpsadbw"
9405 [(set (match_operand:V16QI 0 "register_operand" "=x")
9406 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9407 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9408 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9411 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9412 [(set_attr "type" "sselog1")
9413 (set_attr "prefix" "vex")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "length_immediate" "1")
9416 (set_attr "mode" "TI")])
9418 (define_insn "sse4_1_mpsadbw"
9419 [(set (match_operand:V16QI 0 "register_operand" "=x")
9420 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9421 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9422 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9425 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9426 [(set_attr "type" "sselog1")
9427 (set_attr "prefix_extra" "1")
9428 (set_attr "length_immediate" "1")
9429 (set_attr "mode" "TI")])
9431 (define_insn "*avx_packusdw"
9432 [(set (match_operand:V8HI 0 "register_operand" "=x")
9435 (match_operand:V4SI 1 "register_operand" "x"))
9437 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9439 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9440 [(set_attr "type" "sselog")
9441 (set_attr "prefix_extra" "1")
9442 (set_attr "prefix" "vex")
9443 (set_attr "mode" "TI")])
9445 (define_insn "sse4_1_packusdw"
9446 [(set (match_operand:V8HI 0 "register_operand" "=x")
9449 (match_operand:V4SI 1 "register_operand" "0"))
9451 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9453 "packusdw\t{%2, %0|%0, %2}"
9454 [(set_attr "type" "sselog")
9455 (set_attr "prefix_extra" "1")
9456 (set_attr "mode" "TI")])
9458 (define_insn "*avx_pblendvb"
9459 [(set (match_operand:V16QI 0 "register_operand" "=x")
9460 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9461 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9462 (match_operand:V16QI 3 "register_operand" "x")]
9465 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9466 [(set_attr "type" "ssemov")
9467 (set_attr "prefix_extra" "1")
9468 (set_attr "length_immediate" "1")
9469 (set_attr "prefix" "vex")
9470 (set_attr "mode" "TI")])
9472 (define_insn "sse4_1_pblendvb"
9473 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9474 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9475 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9476 (match_operand:V16QI 3 "register_operand" "Yz")]
9479 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9480 [(set_attr "type" "ssemov")
9481 (set_attr "prefix_extra" "1")
9482 (set_attr "mode" "TI")])
9484 (define_insn "*avx_pblendw"
9485 [(set (match_operand:V8HI 0 "register_operand" "=x")
9487 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9488 (match_operand:V8HI 1 "register_operand" "x")
9489 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9491 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9492 [(set_attr "type" "ssemov")
9493 (set_attr "prefix" "vex")
9494 (set_attr "prefix_extra" "1")
9495 (set_attr "length_immediate" "1")
9496 (set_attr "mode" "TI")])
9498 (define_insn "sse4_1_pblendw"
9499 [(set (match_operand:V8HI 0 "register_operand" "=x")
9501 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9502 (match_operand:V8HI 1 "register_operand" "0")
9503 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9505 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9506 [(set_attr "type" "ssemov")
9507 (set_attr "prefix_extra" "1")
9508 (set_attr "length_immediate" "1")
9509 (set_attr "mode" "TI")])
9511 (define_insn "sse4_1_phminposuw"
9512 [(set (match_operand:V8HI 0 "register_operand" "=x")
9513 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9514 UNSPEC_PHMINPOSUW))]
9516 "%vphminposuw\t{%1, %0|%0, %1}"
9517 [(set_attr "type" "sselog1")
9518 (set_attr "prefix_extra" "1")
9519 (set_attr "prefix" "maybe_vex")
9520 (set_attr "mode" "TI")])
9522 (define_insn "sse4_1_extendv8qiv8hi2"
9523 [(set (match_operand:V8HI 0 "register_operand" "=x")
9526 (match_operand:V16QI 1 "register_operand" "x")
9527 (parallel [(const_int 0)
9536 "%vpmovsxbw\t{%1, %0|%0, %1}"
9537 [(set_attr "type" "ssemov")
9538 (set_attr "prefix_extra" "1")
9539 (set_attr "prefix" "maybe_vex")
9540 (set_attr "mode" "TI")])
9542 (define_insn "*sse4_1_extendv8qiv8hi2"
9543 [(set (match_operand:V8HI 0 "register_operand" "=x")
9546 (vec_duplicate:V16QI
9547 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9548 (parallel [(const_int 0)
9557 "%vpmovsxbw\t{%1, %0|%0, %1}"
9558 [(set_attr "type" "ssemov")
9559 (set_attr "prefix_extra" "1")
9560 (set_attr "prefix" "maybe_vex")
9561 (set_attr "mode" "TI")])
9563 (define_insn "sse4_1_extendv4qiv4si2"
9564 [(set (match_operand:V4SI 0 "register_operand" "=x")
9567 (match_operand:V16QI 1 "register_operand" "x")
9568 (parallel [(const_int 0)
9573 "%vpmovsxbd\t{%1, %0|%0, %1}"
9574 [(set_attr "type" "ssemov")
9575 (set_attr "prefix_extra" "1")
9576 (set_attr "prefix" "maybe_vex")
9577 (set_attr "mode" "TI")])
9579 (define_insn "*sse4_1_extendv4qiv4si2"
9580 [(set (match_operand:V4SI 0 "register_operand" "=x")
9583 (vec_duplicate:V16QI
9584 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9585 (parallel [(const_int 0)
9590 "%vpmovsxbd\t{%1, %0|%0, %1}"
9591 [(set_attr "type" "ssemov")
9592 (set_attr "prefix_extra" "1")
9593 (set_attr "prefix" "maybe_vex")
9594 (set_attr "mode" "TI")])
9596 (define_insn "sse4_1_extendv2qiv2di2"
9597 [(set (match_operand:V2DI 0 "register_operand" "=x")
9600 (match_operand:V16QI 1 "register_operand" "x")
9601 (parallel [(const_int 0)
9604 "%vpmovsxbq\t{%1, %0|%0, %1}"
9605 [(set_attr "type" "ssemov")
9606 (set_attr "prefix_extra" "1")
9607 (set_attr "prefix" "maybe_vex")
9608 (set_attr "mode" "TI")])
9610 (define_insn "*sse4_1_extendv2qiv2di2"
9611 [(set (match_operand:V2DI 0 "register_operand" "=x")
9614 (vec_duplicate:V16QI
9615 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9616 (parallel [(const_int 0)
9619 "%vpmovsxbq\t{%1, %0|%0, %1}"
9620 [(set_attr "type" "ssemov")
9621 (set_attr "prefix_extra" "1")
9622 (set_attr "prefix" "maybe_vex")
9623 (set_attr "mode" "TI")])
9625 (define_insn "sse4_1_extendv4hiv4si2"
9626 [(set (match_operand:V4SI 0 "register_operand" "=x")
9629 (match_operand:V8HI 1 "register_operand" "x")
9630 (parallel [(const_int 0)
9635 "%vpmovsxwd\t{%1, %0|%0, %1}"
9636 [(set_attr "type" "ssemov")
9637 (set_attr "prefix_extra" "1")
9638 (set_attr "prefix" "maybe_vex")
9639 (set_attr "mode" "TI")])
9641 (define_insn "*sse4_1_extendv4hiv4si2"
9642 [(set (match_operand:V4SI 0 "register_operand" "=x")
9646 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9647 (parallel [(const_int 0)
9652 "%vpmovsxwd\t{%1, %0|%0, %1}"
9653 [(set_attr "type" "ssemov")
9654 (set_attr "prefix_extra" "1")
9655 (set_attr "prefix" "maybe_vex")
9656 (set_attr "mode" "TI")])
9658 (define_insn "sse4_1_extendv2hiv2di2"
9659 [(set (match_operand:V2DI 0 "register_operand" "=x")
9662 (match_operand:V8HI 1 "register_operand" "x")
9663 (parallel [(const_int 0)
9666 "%vpmovsxwq\t{%1, %0|%0, %1}"
9667 [(set_attr "type" "ssemov")
9668 (set_attr "prefix_extra" "1")
9669 (set_attr "prefix" "maybe_vex")
9670 (set_attr "mode" "TI")])
9672 (define_insn "*sse4_1_extendv2hiv2di2"
9673 [(set (match_operand:V2DI 0 "register_operand" "=x")
9677 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9678 (parallel [(const_int 0)
9681 "%vpmovsxwq\t{%1, %0|%0, %1}"
9682 [(set_attr "type" "ssemov")
9683 (set_attr "prefix_extra" "1")
9684 (set_attr "prefix" "maybe_vex")
9685 (set_attr "mode" "TI")])
9687 (define_insn "sse4_1_extendv2siv2di2"
9688 [(set (match_operand:V2DI 0 "register_operand" "=x")
9691 (match_operand:V4SI 1 "register_operand" "x")
9692 (parallel [(const_int 0)
9695 "%vpmovsxdq\t{%1, %0|%0, %1}"
9696 [(set_attr "type" "ssemov")
9697 (set_attr "prefix_extra" "1")
9698 (set_attr "prefix" "maybe_vex")
9699 (set_attr "mode" "TI")])
9701 (define_insn "*sse4_1_extendv2siv2di2"
9702 [(set (match_operand:V2DI 0 "register_operand" "=x")
9706 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9707 (parallel [(const_int 0)
9710 "%vpmovsxdq\t{%1, %0|%0, %1}"
9711 [(set_attr "type" "ssemov")
9712 (set_attr "prefix_extra" "1")
9713 (set_attr "prefix" "maybe_vex")
9714 (set_attr "mode" "TI")])
9716 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9717 [(set (match_operand:V8HI 0 "register_operand" "=x")
9720 (match_operand:V16QI 1 "register_operand" "x")
9721 (parallel [(const_int 0)
9730 "%vpmovzxbw\t{%1, %0|%0, %1}"
9731 [(set_attr "type" "ssemov")
9732 (set_attr "prefix_extra" "1")
9733 (set_attr "prefix" "maybe_vex")
9734 (set_attr "mode" "TI")])
9736 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9737 [(set (match_operand:V8HI 0 "register_operand" "=x")
9740 (vec_duplicate:V16QI
9741 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9742 (parallel [(const_int 0)
9751 "%vpmovzxbw\t{%1, %0|%0, %1}"
9752 [(set_attr "type" "ssemov")
9753 (set_attr "prefix_extra" "1")
9754 (set_attr "prefix" "maybe_vex")
9755 (set_attr "mode" "TI")])
9757 (define_insn "sse4_1_zero_extendv4qiv4si2"
9758 [(set (match_operand:V4SI 0 "register_operand" "=x")
9761 (match_operand:V16QI 1 "register_operand" "x")
9762 (parallel [(const_int 0)
9767 "%vpmovzxbd\t{%1, %0|%0, %1}"
9768 [(set_attr "type" "ssemov")
9769 (set_attr "prefix_extra" "1")
9770 (set_attr "prefix" "maybe_vex")
9771 (set_attr "mode" "TI")])
9773 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9774 [(set (match_operand:V4SI 0 "register_operand" "=x")
9777 (vec_duplicate:V16QI
9778 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9779 (parallel [(const_int 0)
9784 "%vpmovzxbd\t{%1, %0|%0, %1}"
9785 [(set_attr "type" "ssemov")
9786 (set_attr "prefix_extra" "1")
9787 (set_attr "prefix" "maybe_vex")
9788 (set_attr "mode" "TI")])
9790 (define_insn "sse4_1_zero_extendv2qiv2di2"
9791 [(set (match_operand:V2DI 0 "register_operand" "=x")
9794 (match_operand:V16QI 1 "register_operand" "x")
9795 (parallel [(const_int 0)
9798 "%vpmovzxbq\t{%1, %0|%0, %1}"
9799 [(set_attr "type" "ssemov")
9800 (set_attr "prefix_extra" "1")
9801 (set_attr "prefix" "maybe_vex")
9802 (set_attr "mode" "TI")])
9804 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9805 [(set (match_operand:V2DI 0 "register_operand" "=x")
9808 (vec_duplicate:V16QI
9809 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9810 (parallel [(const_int 0)
9813 "%vpmovzxbq\t{%1, %0|%0, %1}"
9814 [(set_attr "type" "ssemov")
9815 (set_attr "prefix_extra" "1")
9816 (set_attr "prefix" "maybe_vex")
9817 (set_attr "mode" "TI")])
9819 (define_insn "sse4_1_zero_extendv4hiv4si2"
9820 [(set (match_operand:V4SI 0 "register_operand" "=x")
9823 (match_operand:V8HI 1 "register_operand" "x")
9824 (parallel [(const_int 0)
9829 "%vpmovzxwd\t{%1, %0|%0, %1}"
9830 [(set_attr "type" "ssemov")
9831 (set_attr "prefix_extra" "1")
9832 (set_attr "prefix" "maybe_vex")
9833 (set_attr "mode" "TI")])
9835 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9836 [(set (match_operand:V4SI 0 "register_operand" "=x")
9840 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9841 (parallel [(const_int 0)
9846 "%vpmovzxwd\t{%1, %0|%0, %1}"
9847 [(set_attr "type" "ssemov")
9848 (set_attr "prefix_extra" "1")
9849 (set_attr "prefix" "maybe_vex")
9850 (set_attr "mode" "TI")])
9852 (define_insn "sse4_1_zero_extendv2hiv2di2"
9853 [(set (match_operand:V2DI 0 "register_operand" "=x")
9856 (match_operand:V8HI 1 "register_operand" "x")
9857 (parallel [(const_int 0)
9860 "%vpmovzxwq\t{%1, %0|%0, %1}"
9861 [(set_attr "type" "ssemov")
9862 (set_attr "prefix_extra" "1")
9863 (set_attr "prefix" "maybe_vex")
9864 (set_attr "mode" "TI")])
9866 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9867 [(set (match_operand:V2DI 0 "register_operand" "=x")
9871 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9872 (parallel [(const_int 0)
9875 "%vpmovzxwq\t{%1, %0|%0, %1}"
9876 [(set_attr "type" "ssemov")
9877 (set_attr "prefix_extra" "1")
9878 (set_attr "prefix" "maybe_vex")
9879 (set_attr "mode" "TI")])
9881 (define_insn "sse4_1_zero_extendv2siv2di2"
9882 [(set (match_operand:V2DI 0 "register_operand" "=x")
9885 (match_operand:V4SI 1 "register_operand" "x")
9886 (parallel [(const_int 0)
9889 "%vpmovzxdq\t{%1, %0|%0, %1}"
9890 [(set_attr "type" "ssemov")
9891 (set_attr "prefix_extra" "1")
9892 (set_attr "prefix" "maybe_vex")
9893 (set_attr "mode" "TI")])
9895 (define_insn "*sse4_1_zero_extendv2siv2di2"
9896 [(set (match_operand:V2DI 0 "register_operand" "=x")
9900 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9901 (parallel [(const_int 0)
9904 "%vpmovzxdq\t{%1, %0|%0, %1}"
9905 [(set_attr "type" "ssemov")
9906 (set_attr "prefix_extra" "1")
9907 (set_attr "prefix" "maybe_vex")
9908 (set_attr "mode" "TI")])
9910 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9911 ;; setting FLAGS_REG. But it is not a really compare instruction.
9912 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9913 [(set (reg:CC FLAGS_REG)
9914 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9915 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9918 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9919 [(set_attr "type" "ssecomi")
9920 (set_attr "prefix_extra" "1")
9921 (set_attr "prefix" "vex")
9922 (set_attr "mode" "<MODE>")])
9924 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9925 ;; But it is not a really compare instruction.
9926 (define_insn "avx_ptest256"
9927 [(set (reg:CC FLAGS_REG)
9928 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9929 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9932 "vptest\t{%1, %0|%0, %1}"
9933 [(set_attr "type" "ssecomi")
9934 (set_attr "prefix_extra" "1")
9935 (set_attr "prefix" "vex")
9936 (set_attr "mode" "OI")])
9938 (define_insn "sse4_1_ptest"
9939 [(set (reg:CC FLAGS_REG)
9940 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9941 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9944 "%vptest\t{%1, %0|%0, %1}"
9945 [(set_attr "type" "ssecomi")
9946 (set_attr "prefix_extra" "1")
9947 (set_attr "prefix" "maybe_vex")
9948 (set_attr "mode" "TI")])
9950 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9951 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9952 (unspec:AVX256MODEF2P
9953 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9954 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9957 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9958 [(set_attr "type" "ssecvt")
9959 (set_attr "prefix_extra" "1")
9960 (set_attr "length_immediate" "1")
9961 (set_attr "prefix" "vex")
9962 (set_attr "mode" "<MODE>")])
9964 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9965 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9967 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9968 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9971 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9972 [(set_attr "type" "ssecvt")
9973 (set_attr "prefix_data16" "1")
9974 (set_attr "prefix_extra" "1")
9975 (set_attr "length_immediate" "1")
9976 (set_attr "prefix" "maybe_vex")
9977 (set_attr "mode" "<MODE>")])
9979 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9980 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9981 (vec_merge:SSEMODEF2P
9983 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9984 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9986 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9989 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9990 [(set_attr "type" "ssecvt")
9991 (set_attr "prefix_extra" "1")
9992 (set_attr "length_immediate" "1")
9993 (set_attr "prefix" "vex")
9994 (set_attr "mode" "<MODE>")])
9996 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9997 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9998 (vec_merge:SSEMODEF2P
10000 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10001 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10003 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10006 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10007 [(set_attr "type" "ssecvt")
10008 (set_attr "prefix_data16" "1")
10009 (set_attr "prefix_extra" "1")
10010 (set_attr "length_immediate" "1")
10011 (set_attr "mode" "<MODE>")])
10013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10015 ;; Intel SSE4.2 string/text processing instructions
10017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10019 (define_insn_and_split "sse4_2_pcmpestr"
10020 [(set (match_operand:SI 0 "register_operand" "=c,c")
10022 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10023 (match_operand:SI 3 "register_operand" "a,a")
10024 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10025 (match_operand:SI 5 "register_operand" "d,d")
10026 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10028 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10036 (set (reg:CC FLAGS_REG)
10045 && can_create_pseudo_p ()"
10050 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10051 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10052 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10055 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10056 operands[3], operands[4],
10057 operands[5], operands[6]));
10059 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10060 operands[3], operands[4],
10061 operands[5], operands[6]));
10062 if (flags && !(ecx || xmm0))
10063 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10064 operands[2], operands[3],
10065 operands[4], operands[5],
10069 [(set_attr "type" "sselog")
10070 (set_attr "prefix_data16" "1")
10071 (set_attr "prefix_extra" "1")
10072 (set_attr "length_immediate" "1")
10073 (set_attr "memory" "none,load")
10074 (set_attr "mode" "TI")])
10076 (define_insn "sse4_2_pcmpestri"
10077 [(set (match_operand:SI 0 "register_operand" "=c,c")
10079 [(match_operand:V16QI 1 "register_operand" "x,x")
10080 (match_operand:SI 2 "register_operand" "a,a")
10081 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10082 (match_operand:SI 4 "register_operand" "d,d")
10083 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10085 (set (reg:CC FLAGS_REG)
10094 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10095 [(set_attr "type" "sselog")
10096 (set_attr "prefix_data16" "1")
10097 (set_attr "prefix_extra" "1")
10098 (set_attr "prefix" "maybe_vex")
10099 (set_attr "length_immediate" "1")
10100 (set_attr "memory" "none,load")
10101 (set_attr "mode" "TI")])
10103 (define_insn "sse4_2_pcmpestrm"
10104 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10106 [(match_operand:V16QI 1 "register_operand" "x,x")
10107 (match_operand:SI 2 "register_operand" "a,a")
10108 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10109 (match_operand:SI 4 "register_operand" "d,d")
10110 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10112 (set (reg:CC FLAGS_REG)
10121 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10122 [(set_attr "type" "sselog")
10123 (set_attr "prefix_data16" "1")
10124 (set_attr "prefix_extra" "1")
10125 (set_attr "length_immediate" "1")
10126 (set_attr "prefix" "maybe_vex")
10127 (set_attr "memory" "none,load")
10128 (set_attr "mode" "TI")])
10130 (define_insn "sse4_2_pcmpestr_cconly"
10131 [(set (reg:CC FLAGS_REG)
10133 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10134 (match_operand:SI 3 "register_operand" "a,a,a,a")
10135 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10136 (match_operand:SI 5 "register_operand" "d,d,d,d")
10137 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10139 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10140 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10143 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10144 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10145 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10146 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10147 [(set_attr "type" "sselog")
10148 (set_attr "prefix_data16" "1")
10149 (set_attr "prefix_extra" "1")
10150 (set_attr "length_immediate" "1")
10151 (set_attr "memory" "none,load,none,load")
10152 (set_attr "prefix" "maybe_vex")
10153 (set_attr "mode" "TI")])
10155 (define_insn_and_split "sse4_2_pcmpistr"
10156 [(set (match_operand:SI 0 "register_operand" "=c,c")
10158 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10159 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10160 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10162 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10168 (set (reg:CC FLAGS_REG)
10175 && can_create_pseudo_p ()"
10180 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10181 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10182 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10185 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10186 operands[3], operands[4]));
10188 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10189 operands[3], operands[4]));
10190 if (flags && !(ecx || xmm0))
10191 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10192 operands[2], operands[3],
10196 [(set_attr "type" "sselog")
10197 (set_attr "prefix_data16" "1")
10198 (set_attr "prefix_extra" "1")
10199 (set_attr "length_immediate" "1")
10200 (set_attr "memory" "none,load")
10201 (set_attr "mode" "TI")])
10203 (define_insn "sse4_2_pcmpistri"
10204 [(set (match_operand:SI 0 "register_operand" "=c,c")
10206 [(match_operand:V16QI 1 "register_operand" "x,x")
10207 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10208 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10210 (set (reg:CC FLAGS_REG)
10217 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10218 [(set_attr "type" "sselog")
10219 (set_attr "prefix_data16" "1")
10220 (set_attr "prefix_extra" "1")
10221 (set_attr "length_immediate" "1")
10222 (set_attr "prefix" "maybe_vex")
10223 (set_attr "memory" "none,load")
10224 (set_attr "mode" "TI")])
10226 (define_insn "sse4_2_pcmpistrm"
10227 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10229 [(match_operand:V16QI 1 "register_operand" "x,x")
10230 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10231 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10233 (set (reg:CC FLAGS_REG)
10240 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10241 [(set_attr "type" "sselog")
10242 (set_attr "prefix_data16" "1")
10243 (set_attr "prefix_extra" "1")
10244 (set_attr "length_immediate" "1")
10245 (set_attr "prefix" "maybe_vex")
10246 (set_attr "memory" "none,load")
10247 (set_attr "mode" "TI")])
10249 (define_insn "sse4_2_pcmpistr_cconly"
10250 [(set (reg:CC FLAGS_REG)
10252 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10253 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10254 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10256 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10257 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10260 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10261 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10262 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10263 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10264 [(set_attr "type" "sselog")
10265 (set_attr "prefix_data16" "1")
10266 (set_attr "prefix_extra" "1")
10267 (set_attr "length_immediate" "1")
10268 (set_attr "memory" "none,load,none,load")
10269 (set_attr "prefix" "maybe_vex")
10270 (set_attr "mode" "TI")])
10272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10274 ;; XOP instructions
10276 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10278 ;; XOP parallel integer multiply/add instructions.
10279 ;; Note the XOP multiply/add instructions
10280 ;; a[i] = b[i] * c[i] + d[i];
10281 ;; do not allow the value being added to be a memory operation.
10282 (define_insn "xop_pmacsww"
10283 [(set (match_operand:V8HI 0 "register_operand" "=x")
10286 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10287 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10288 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10290 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10291 [(set_attr "type" "ssemuladd")
10292 (set_attr "mode" "TI")])
10294 (define_insn "xop_pmacssww"
10295 [(set (match_operand:V8HI 0 "register_operand" "=x")
10297 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10298 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10299 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10301 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10302 [(set_attr "type" "ssemuladd")
10303 (set_attr "mode" "TI")])
10305 (define_insn "xop_pmacsdd"
10306 [(set (match_operand:V4SI 0 "register_operand" "=x")
10309 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10310 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10311 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10313 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10314 [(set_attr "type" "ssemuladd")
10315 (set_attr "mode" "TI")])
10317 (define_insn "xop_pmacssdd"
10318 [(set (match_operand:V4SI 0 "register_operand" "=x")
10320 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10321 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10322 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10324 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10325 [(set_attr "type" "ssemuladd")
10326 (set_attr "mode" "TI")])
10328 (define_insn "xop_pmacssdql"
10329 [(set (match_operand:V2DI 0 "register_operand" "=x")
10334 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10335 (parallel [(const_int 1)
10338 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10339 (parallel [(const_int 1)
10341 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10343 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10344 [(set_attr "type" "ssemuladd")
10345 (set_attr "mode" "TI")])
10347 (define_insn "xop_pmacssdqh"
10348 [(set (match_operand:V2DI 0 "register_operand" "=x")
10353 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10354 (parallel [(const_int 0)
10358 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10359 (parallel [(const_int 0)
10361 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10363 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10364 [(set_attr "type" "ssemuladd")
10365 (set_attr "mode" "TI")])
10367 (define_insn "xop_pmacsdql"
10368 [(set (match_operand:V2DI 0 "register_operand" "=x")
10373 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10374 (parallel [(const_int 1)
10378 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10379 (parallel [(const_int 1)
10381 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10383 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10384 [(set_attr "type" "ssemuladd")
10385 (set_attr "mode" "TI")])
10387 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10388 ;; fake it with a multiply/add. In general, we expect the define_split to
10389 ;; occur before register allocation, so we have to handle the corner case where
10390 ;; the target is the same as operands 1/2
10391 (define_insn_and_split "xop_mulv2div2di3_low"
10392 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10396 (match_operand:V4SI 1 "register_operand" "%x")
10397 (parallel [(const_int 1)
10401 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10402 (parallel [(const_int 1)
10403 (const_int 3)])))))]
10406 "&& reload_completed"
10407 [(set (match_dup 0)
10415 (parallel [(const_int 1)
10420 (parallel [(const_int 1)
10424 operands[3] = CONST0_RTX (V2DImode);
10426 [(set_attr "type" "ssemul")
10427 (set_attr "mode" "TI")])
10429 (define_insn "xop_pmacsdqh"
10430 [(set (match_operand:V2DI 0 "register_operand" "=x")
10435 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10436 (parallel [(const_int 0)
10440 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10441 (parallel [(const_int 0)
10443 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10445 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10446 [(set_attr "type" "ssemuladd")
10447 (set_attr "mode" "TI")])
10449 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10450 ;; fake it with a multiply/add. In general, we expect the define_split to
10451 ;; occur before register allocation, so we have to handle the corner case where
10452 ;; the target is the same as either operands[1] or operands[2]
10453 (define_insn_and_split "xop_mulv2div2di3_high"
10454 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10458 (match_operand:V4SI 1 "register_operand" "%x")
10459 (parallel [(const_int 0)
10463 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10464 (parallel [(const_int 0)
10465 (const_int 2)])))))]
10468 "&& reload_completed"
10469 [(set (match_dup 0)
10477 (parallel [(const_int 0)
10482 (parallel [(const_int 0)
10486 operands[3] = CONST0_RTX (V2DImode);
10488 [(set_attr "type" "ssemul")
10489 (set_attr "mode" "TI")])
10491 ;; XOP parallel integer multiply/add instructions for the intrinisics
10492 (define_insn "xop_pmacsswd"
10493 [(set (match_operand:V4SI 0 "register_operand" "=x")
10498 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10499 (parallel [(const_int 1)
10505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10506 (parallel [(const_int 1)
10510 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10512 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10513 [(set_attr "type" "ssemuladd")
10514 (set_attr "mode" "TI")])
10516 (define_insn "xop_pmacswd"
10517 [(set (match_operand:V4SI 0 "register_operand" "=x")
10522 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10523 (parallel [(const_int 1)
10529 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10530 (parallel [(const_int 1)
10534 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10536 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10537 [(set_attr "type" "ssemuladd")
10538 (set_attr "mode" "TI")])
10540 (define_insn "xop_pmadcsswd"
10541 [(set (match_operand:V4SI 0 "register_operand" "=x")
10547 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10548 (parallel [(const_int 0)
10554 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10555 (parallel [(const_int 0)
10563 (parallel [(const_int 1)
10570 (parallel [(const_int 1)
10573 (const_int 7)])))))
10574 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10576 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10577 [(set_attr "type" "ssemuladd")
10578 (set_attr "mode" "TI")])
10580 (define_insn "xop_pmadcswd"
10581 [(set (match_operand:V4SI 0 "register_operand" "=x")
10587 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10588 (parallel [(const_int 0)
10594 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10595 (parallel [(const_int 0)
10603 (parallel [(const_int 1)
10610 (parallel [(const_int 1)
10613 (const_int 7)])))))
10614 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10616 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10617 [(set_attr "type" "ssemuladd")
10618 (set_attr "mode" "TI")])
10620 ;; XOP parallel XMM conditional moves
10621 (define_insn "xop_pcmov_<mode>"
10622 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10623 (if_then_else:SSEMODE
10624 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10625 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10626 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10628 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10629 [(set_attr "type" "sse4arg")])
10631 (define_insn "xop_pcmov_<mode>256"
10632 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10633 (if_then_else:AVX256MODE
10634 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10635 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10636 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10638 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10639 [(set_attr "type" "sse4arg")])
10641 ;; XOP horizontal add/subtract instructions
10642 (define_insn "xop_phaddbw"
10643 [(set (match_operand:V8HI 0 "register_operand" "=x")
10647 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10648 (parallel [(const_int 0)
10659 (parallel [(const_int 1)
10666 (const_int 15)])))))]
10668 "vphaddbw\t{%1, %0|%0, %1}"
10669 [(set_attr "type" "sseiadd1")])
10671 (define_insn "xop_phaddbd"
10672 [(set (match_operand:V4SI 0 "register_operand" "=x")
10677 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10678 (parallel [(const_int 0)
10685 (parallel [(const_int 1)
10688 (const_int 13)]))))
10693 (parallel [(const_int 2)
10700 (parallel [(const_int 3)
10703 (const_int 15)]))))))]
10705 "vphaddbd\t{%1, %0|%0, %1}"
10706 [(set_attr "type" "sseiadd1")])
10708 (define_insn "xop_phaddbq"
10709 [(set (match_operand:V2DI 0 "register_operand" "=x")
10715 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10716 (parallel [(const_int 0)
10721 (parallel [(const_int 1)
10727 (parallel [(const_int 2)
10732 (parallel [(const_int 3)
10733 (const_int 7)])))))
10739 (parallel [(const_int 8)
10744 (parallel [(const_int 9)
10745 (const_int 13)]))))
10750 (parallel [(const_int 10)
10755 (parallel [(const_int 11)
10756 (const_int 15)])))))))]
10758 "vphaddbq\t{%1, %0|%0, %1}"
10759 [(set_attr "type" "sseiadd1")])
10761 (define_insn "xop_phaddwd"
10762 [(set (match_operand:V4SI 0 "register_operand" "=x")
10766 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10767 (parallel [(const_int 0)
10774 (parallel [(const_int 1)
10777 (const_int 7)])))))]
10779 "vphaddwd\t{%1, %0|%0, %1}"
10780 [(set_attr "type" "sseiadd1")])
10782 (define_insn "xop_phaddwq"
10783 [(set (match_operand:V2DI 0 "register_operand" "=x")
10788 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10789 (parallel [(const_int 0)
10794 (parallel [(const_int 1)
10800 (parallel [(const_int 2)
10805 (parallel [(const_int 3)
10806 (const_int 7)]))))))]
10808 "vphaddwq\t{%1, %0|%0, %1}"
10809 [(set_attr "type" "sseiadd1")])
10811 (define_insn "xop_phadddq"
10812 [(set (match_operand:V2DI 0 "register_operand" "=x")
10816 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10817 (parallel [(const_int 0)
10822 (parallel [(const_int 1)
10823 (const_int 3)])))))]
10825 "vphadddq\t{%1, %0|%0, %1}"
10826 [(set_attr "type" "sseiadd1")])
10828 (define_insn "xop_phaddubw"
10829 [(set (match_operand:V8HI 0 "register_operand" "=x")
10833 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10834 (parallel [(const_int 0)
10845 (parallel [(const_int 1)
10852 (const_int 15)])))))]
10854 "vphaddubw\t{%1, %0|%0, %1}"
10855 [(set_attr "type" "sseiadd1")])
10857 (define_insn "xop_phaddubd"
10858 [(set (match_operand:V4SI 0 "register_operand" "=x")
10863 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10864 (parallel [(const_int 0)
10871 (parallel [(const_int 1)
10874 (const_int 13)]))))
10879 (parallel [(const_int 2)
10886 (parallel [(const_int 3)
10889 (const_int 15)]))))))]
10891 "vphaddubd\t{%1, %0|%0, %1}"
10892 [(set_attr "type" "sseiadd1")])
10894 (define_insn "xop_phaddubq"
10895 [(set (match_operand:V2DI 0 "register_operand" "=x")
10901 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10902 (parallel [(const_int 0)
10907 (parallel [(const_int 1)
10913 (parallel [(const_int 2)
10918 (parallel [(const_int 3)
10919 (const_int 7)])))))
10925 (parallel [(const_int 8)
10930 (parallel [(const_int 9)
10931 (const_int 13)]))))
10936 (parallel [(const_int 10)
10941 (parallel [(const_int 11)
10942 (const_int 15)])))))))]
10944 "vphaddubq\t{%1, %0|%0, %1}"
10945 [(set_attr "type" "sseiadd1")])
10947 (define_insn "xop_phadduwd"
10948 [(set (match_operand:V4SI 0 "register_operand" "=x")
10952 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10953 (parallel [(const_int 0)
10960 (parallel [(const_int 1)
10963 (const_int 7)])))))]
10965 "vphadduwd\t{%1, %0|%0, %1}"
10966 [(set_attr "type" "sseiadd1")])
10968 (define_insn "xop_phadduwq"
10969 [(set (match_operand:V2DI 0 "register_operand" "=x")
10974 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10975 (parallel [(const_int 0)
10980 (parallel [(const_int 1)
10986 (parallel [(const_int 2)
10991 (parallel [(const_int 3)
10992 (const_int 7)]))))))]
10994 "vphadduwq\t{%1, %0|%0, %1}"
10995 [(set_attr "type" "sseiadd1")])
10997 (define_insn "xop_phaddudq"
10998 [(set (match_operand:V2DI 0 "register_operand" "=x")
11002 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11003 (parallel [(const_int 0)
11008 (parallel [(const_int 1)
11009 (const_int 3)])))))]
11011 "vphaddudq\t{%1, %0|%0, %1}"
11012 [(set_attr "type" "sseiadd1")])
11014 (define_insn "xop_phsubbw"
11015 [(set (match_operand:V8HI 0 "register_operand" "=x")
11019 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11020 (parallel [(const_int 0)
11031 (parallel [(const_int 1)
11038 (const_int 15)])))))]
11040 "vphsubbw\t{%1, %0|%0, %1}"
11041 [(set_attr "type" "sseiadd1")])
11043 (define_insn "xop_phsubwd"
11044 [(set (match_operand:V4SI 0 "register_operand" "=x")
11048 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11049 (parallel [(const_int 0)
11056 (parallel [(const_int 1)
11059 (const_int 7)])))))]
11061 "vphsubwd\t{%1, %0|%0, %1}"
11062 [(set_attr "type" "sseiadd1")])
11064 (define_insn "xop_phsubdq"
11065 [(set (match_operand:V2DI 0 "register_operand" "=x")
11069 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11070 (parallel [(const_int 0)
11075 (parallel [(const_int 1)
11076 (const_int 3)])))))]
11078 "vphsubdq\t{%1, %0|%0, %1}"
11079 [(set_attr "type" "sseiadd1")])
11081 ;; XOP permute instructions
11082 (define_insn "xop_pperm"
11083 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11085 [(match_operand:V16QI 1 "register_operand" "x,x")
11086 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11087 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11088 UNSPEC_XOP_PERMUTE))]
11089 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11090 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11091 [(set_attr "type" "sse4arg")
11092 (set_attr "mode" "TI")])
11094 ;; XOP pack instructions that combine two vectors into a smaller vector
11095 (define_insn "xop_pperm_pack_v2di_v4si"
11096 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11099 (match_operand:V2DI 1 "register_operand" "x,x"))
11101 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11102 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11103 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11104 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11105 [(set_attr "type" "sse4arg")
11106 (set_attr "mode" "TI")])
11108 (define_insn "xop_pperm_pack_v4si_v8hi"
11109 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11112 (match_operand:V4SI 1 "register_operand" "x,x"))
11114 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11115 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11116 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11117 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11118 [(set_attr "type" "sse4arg")
11119 (set_attr "mode" "TI")])
11121 (define_insn "xop_pperm_pack_v8hi_v16qi"
11122 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11125 (match_operand:V8HI 1 "register_operand" "x,x"))
11127 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11128 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11129 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11130 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11131 [(set_attr "type" "sse4arg")
11132 (set_attr "mode" "TI")])
11134 ;; XOP packed rotate instructions
11135 (define_expand "rotl<mode>3"
11136 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11137 (rotate:SSEMODE1248
11138 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11139 (match_operand:SI 2 "general_operand")))]
11142 /* If we were given a scalar, convert it to parallel */
11143 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11145 rtvec vs = rtvec_alloc (<ssescalarnum>);
11146 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11147 rtx reg = gen_reg_rtx (<MODE>mode);
11148 rtx op2 = operands[2];
11151 if (GET_MODE (op2) != <ssescalarmode>mode)
11153 op2 = gen_reg_rtx (<ssescalarmode>mode);
11154 convert_move (op2, operands[2], false);
11157 for (i = 0; i < <ssescalarnum>; i++)
11158 RTVEC_ELT (vs, i) = op2;
11160 emit_insn (gen_vec_init<mode> (reg, par));
11161 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11166 (define_expand "rotr<mode>3"
11167 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11168 (rotatert:SSEMODE1248
11169 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11170 (match_operand:SI 2 "general_operand")))]
11173 /* If we were given a scalar, convert it to parallel */
11174 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11176 rtvec vs = rtvec_alloc (<ssescalarnum>);
11177 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11178 rtx neg = gen_reg_rtx (<MODE>mode);
11179 rtx reg = gen_reg_rtx (<MODE>mode);
11180 rtx op2 = operands[2];
11183 if (GET_MODE (op2) != <ssescalarmode>mode)
11185 op2 = gen_reg_rtx (<ssescalarmode>mode);
11186 convert_move (op2, operands[2], false);
11189 for (i = 0; i < <ssescalarnum>; i++)
11190 RTVEC_ELT (vs, i) = op2;
11192 emit_insn (gen_vec_init<mode> (reg, par));
11193 emit_insn (gen_neg<mode>2 (neg, reg));
11194 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11199 (define_insn "xop_rotl<mode>3"
11200 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11201 (rotate:SSEMODE1248
11202 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11203 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11205 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11206 [(set_attr "type" "sseishft")
11207 (set_attr "length_immediate" "1")
11208 (set_attr "mode" "TI")])
11210 (define_insn "xop_rotr<mode>3"
11211 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11212 (rotatert:SSEMODE1248
11213 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11214 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11217 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11218 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11220 [(set_attr "type" "sseishft")
11221 (set_attr "length_immediate" "1")
11222 (set_attr "mode" "TI")])
11224 (define_expand "vrotr<mode>3"
11225 [(match_operand:SSEMODE1248 0 "register_operand" "")
11226 (match_operand:SSEMODE1248 1 "register_operand" "")
11227 (match_operand:SSEMODE1248 2 "register_operand" "")]
11230 rtx reg = gen_reg_rtx (<MODE>mode);
11231 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11232 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11236 (define_expand "vrotl<mode>3"
11237 [(match_operand:SSEMODE1248 0 "register_operand" "")
11238 (match_operand:SSEMODE1248 1 "register_operand" "")
11239 (match_operand:SSEMODE1248 2 "register_operand" "")]
11242 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11246 (define_insn "xop_vrotl<mode>3"
11247 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11248 (if_then_else:SSEMODE1248
11250 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11252 (rotate:SSEMODE1248
11253 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11255 (rotatert:SSEMODE1248
11257 (neg:SSEMODE1248 (match_dup 2)))))]
11258 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11259 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11260 [(set_attr "type" "sseishft")
11261 (set_attr "prefix_data16" "0")
11262 (set_attr "prefix_extra" "2")
11263 (set_attr "mode" "TI")])
11265 ;; XOP packed shift instructions.
11266 ;; FIXME: add V2DI back in
11267 (define_expand "vlshr<mode>3"
11268 [(match_operand:SSEMODE124 0 "register_operand" "")
11269 (match_operand:SSEMODE124 1 "register_operand" "")
11270 (match_operand:SSEMODE124 2 "register_operand" "")]
11273 rtx neg = gen_reg_rtx (<MODE>mode);
11274 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11275 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11279 (define_expand "vashr<mode>3"
11280 [(match_operand:SSEMODE124 0 "register_operand" "")
11281 (match_operand:SSEMODE124 1 "register_operand" "")
11282 (match_operand:SSEMODE124 2 "register_operand" "")]
11285 rtx neg = gen_reg_rtx (<MODE>mode);
11286 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11287 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11291 (define_expand "vashl<mode>3"
11292 [(match_operand:SSEMODE124 0 "register_operand" "")
11293 (match_operand:SSEMODE124 1 "register_operand" "")
11294 (match_operand:SSEMODE124 2 "register_operand" "")]
11297 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11301 (define_insn "xop_ashl<mode>3"
11302 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11303 (if_then_else:SSEMODE1248
11305 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11307 (ashift:SSEMODE1248
11308 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11310 (ashiftrt:SSEMODE1248
11312 (neg:SSEMODE1248 (match_dup 2)))))]
11313 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11314 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11315 [(set_attr "type" "sseishft")
11316 (set_attr "prefix_data16" "0")
11317 (set_attr "prefix_extra" "2")
11318 (set_attr "mode" "TI")])
11320 (define_insn "xop_lshl<mode>3"
11321 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11322 (if_then_else:SSEMODE1248
11324 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11326 (ashift:SSEMODE1248
11327 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11329 (lshiftrt:SSEMODE1248
11331 (neg:SSEMODE1248 (match_dup 2)))))]
11332 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11333 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11334 [(set_attr "type" "sseishft")
11335 (set_attr "prefix_data16" "0")
11336 (set_attr "prefix_extra" "2")
11337 (set_attr "mode" "TI")])
11339 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11340 (define_expand "ashlv16qi3"
11341 [(match_operand:V16QI 0 "register_operand" "")
11342 (match_operand:V16QI 1 "register_operand" "")
11343 (match_operand:SI 2 "nonmemory_operand" "")]
11346 rtvec vs = rtvec_alloc (16);
11347 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11348 rtx reg = gen_reg_rtx (V16QImode);
11350 for (i = 0; i < 16; i++)
11351 RTVEC_ELT (vs, i) = operands[2];
11353 emit_insn (gen_vec_initv16qi (reg, par));
11354 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11358 (define_expand "lshlv16qi3"
11359 [(match_operand:V16QI 0 "register_operand" "")
11360 (match_operand:V16QI 1 "register_operand" "")
11361 (match_operand:SI 2 "nonmemory_operand" "")]
11364 rtvec vs = rtvec_alloc (16);
11365 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11366 rtx reg = gen_reg_rtx (V16QImode);
11368 for (i = 0; i < 16; i++)
11369 RTVEC_ELT (vs, i) = operands[2];
11371 emit_insn (gen_vec_initv16qi (reg, par));
11372 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11376 (define_expand "ashrv16qi3"
11377 [(match_operand:V16QI 0 "register_operand" "")
11378 (match_operand:V16QI 1 "register_operand" "")
11379 (match_operand:SI 2 "nonmemory_operand" "")]
11382 rtvec vs = rtvec_alloc (16);
11383 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11384 rtx reg = gen_reg_rtx (V16QImode);
11386 rtx ele = ((CONST_INT_P (operands[2]))
11387 ? GEN_INT (- INTVAL (operands[2]))
11390 for (i = 0; i < 16; i++)
11391 RTVEC_ELT (vs, i) = ele;
11393 emit_insn (gen_vec_initv16qi (reg, par));
11395 if (!CONST_INT_P (operands[2]))
11397 rtx neg = gen_reg_rtx (V16QImode);
11398 emit_insn (gen_negv16qi2 (neg, reg));
11399 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11402 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11407 (define_expand "ashrv2di3"
11408 [(match_operand:V2DI 0 "register_operand" "")
11409 (match_operand:V2DI 1 "register_operand" "")
11410 (match_operand:DI 2 "nonmemory_operand" "")]
11413 rtvec vs = rtvec_alloc (2);
11414 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11415 rtx reg = gen_reg_rtx (V2DImode);
11418 if (CONST_INT_P (operands[2]))
11419 ele = GEN_INT (- INTVAL (operands[2]));
11420 else if (GET_MODE (operands[2]) != DImode)
11422 rtx move = gen_reg_rtx (DImode);
11423 ele = gen_reg_rtx (DImode);
11424 convert_move (move, operands[2], false);
11425 emit_insn (gen_negdi2 (ele, move));
11429 ele = gen_reg_rtx (DImode);
11430 emit_insn (gen_negdi2 (ele, operands[2]));
11433 RTVEC_ELT (vs, 0) = ele;
11434 RTVEC_ELT (vs, 1) = ele;
11435 emit_insn (gen_vec_initv2di (reg, par));
11436 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11440 ;; XOP FRCZ support
11442 (define_insn "xop_frcz<mode>2"
11443 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11445 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11448 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11449 [(set_attr "type" "ssecvt1")
11450 (set_attr "mode" "<MODE>")])
11453 (define_insn "xop_vmfrcz<mode>2"
11454 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11455 (vec_merge:SSEMODEF2P
11457 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11459 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11462 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11463 [(set_attr "type" "ssecvt1")
11464 (set_attr "mode" "<MODE>")])
11466 (define_insn "xop_frcz<mode>2256"
11467 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11469 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11472 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11473 [(set_attr "type" "ssecvt1")
11474 (set_attr "mode" "<MODE>")])
11476 (define_insn "xop_maskcmp<mode>3"
11477 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11478 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11479 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11480 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11482 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11483 [(set_attr "type" "sse4arg")
11484 (set_attr "prefix_data16" "0")
11485 (set_attr "prefix_rep" "0")
11486 (set_attr "prefix_extra" "2")
11487 (set_attr "length_immediate" "1")
11488 (set_attr "mode" "TI")])
11490 (define_insn "xop_maskcmp_uns<mode>3"
11491 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11492 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11493 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11494 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11496 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11497 [(set_attr "type" "ssecmp")
11498 (set_attr "prefix_data16" "0")
11499 (set_attr "prefix_rep" "0")
11500 (set_attr "prefix_extra" "2")
11501 (set_attr "length_immediate" "1")
11502 (set_attr "mode" "TI")])
11504 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11505 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11506 ;; the exact instruction generated for the intrinsic.
11507 (define_insn "xop_maskcmp_uns2<mode>3"
11508 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11509 (unspec:SSEMODE1248
11510 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11511 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11512 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11513 UNSPEC_XOP_UNSIGNED_CMP))]
11515 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11516 [(set_attr "type" "ssecmp")
11517 (set_attr "prefix_data16" "0")
11518 (set_attr "prefix_extra" "2")
11519 (set_attr "length_immediate" "1")
11520 (set_attr "mode" "TI")])
11522 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11523 ;; being added here to be complete.
11524 (define_insn "xop_pcom_tf<mode>3"
11525 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11526 (unspec:SSEMODE1248
11527 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11528 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11529 (match_operand:SI 3 "const_int_operand" "n")]
11530 UNSPEC_XOP_TRUEFALSE))]
11533 return ((INTVAL (operands[3]) != 0)
11534 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11535 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11537 [(set_attr "type" "ssecmp")
11538 (set_attr "prefix_data16" "0")
11539 (set_attr "prefix_extra" "2")
11540 (set_attr "length_immediate" "1")
11541 (set_attr "mode" "TI")])
11543 (define_insn "xop_vpermil2<mode>3"
11544 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11546 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11547 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11548 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11549 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11552 "vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11553 [(set_attr "type" "sse4arg")
11554 (set_attr "length_immediate" "1")
11555 (set_attr "mode" "<MODE>")])
11557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11558 (define_insn "*avx_aesenc"
11559 [(set (match_operand:V2DI 0 "register_operand" "=x")
11560 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11561 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11563 "TARGET_AES && TARGET_AVX"
11564 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11565 [(set_attr "type" "sselog1")
11566 (set_attr "prefix_extra" "1")
11567 (set_attr "prefix" "vex")
11568 (set_attr "mode" "TI")])
11570 (define_insn "aesenc"
11571 [(set (match_operand:V2DI 0 "register_operand" "=x")
11572 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11573 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11576 "aesenc\t{%2, %0|%0, %2}"
11577 [(set_attr "type" "sselog1")
11578 (set_attr "prefix_extra" "1")
11579 (set_attr "mode" "TI")])
11581 (define_insn "*avx_aesenclast"
11582 [(set (match_operand:V2DI 0 "register_operand" "=x")
11583 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11584 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11585 UNSPEC_AESENCLAST))]
11586 "TARGET_AES && TARGET_AVX"
11587 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11588 [(set_attr "type" "sselog1")
11589 (set_attr "prefix_extra" "1")
11590 (set_attr "prefix" "vex")
11591 (set_attr "mode" "TI")])
11593 (define_insn "aesenclast"
11594 [(set (match_operand:V2DI 0 "register_operand" "=x")
11595 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11596 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11597 UNSPEC_AESENCLAST))]
11599 "aesenclast\t{%2, %0|%0, %2}"
11600 [(set_attr "type" "sselog1")
11601 (set_attr "prefix_extra" "1")
11602 (set_attr "mode" "TI")])
11604 (define_insn "*avx_aesdec"
11605 [(set (match_operand:V2DI 0 "register_operand" "=x")
11606 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11607 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11609 "TARGET_AES && TARGET_AVX"
11610 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11611 [(set_attr "type" "sselog1")
11612 (set_attr "prefix_extra" "1")
11613 (set_attr "prefix" "vex")
11614 (set_attr "mode" "TI")])
11616 (define_insn "aesdec"
11617 [(set (match_operand:V2DI 0 "register_operand" "=x")
11618 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11619 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11622 "aesdec\t{%2, %0|%0, %2}"
11623 [(set_attr "type" "sselog1")
11624 (set_attr "prefix_extra" "1")
11625 (set_attr "mode" "TI")])
11627 (define_insn "*avx_aesdeclast"
11628 [(set (match_operand:V2DI 0 "register_operand" "=x")
11629 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11630 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11631 UNSPEC_AESDECLAST))]
11632 "TARGET_AES && TARGET_AVX"
11633 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11634 [(set_attr "type" "sselog1")
11635 (set_attr "prefix_extra" "1")
11636 (set_attr "prefix" "vex")
11637 (set_attr "mode" "TI")])
11639 (define_insn "aesdeclast"
11640 [(set (match_operand:V2DI 0 "register_operand" "=x")
11641 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11642 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11643 UNSPEC_AESDECLAST))]
11645 "aesdeclast\t{%2, %0|%0, %2}"
11646 [(set_attr "type" "sselog1")
11647 (set_attr "prefix_extra" "1")
11648 (set_attr "mode" "TI")])
11650 (define_insn "aesimc"
11651 [(set (match_operand:V2DI 0 "register_operand" "=x")
11652 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11655 "%vaesimc\t{%1, %0|%0, %1}"
11656 [(set_attr "type" "sselog1")
11657 (set_attr "prefix_extra" "1")
11658 (set_attr "prefix" "maybe_vex")
11659 (set_attr "mode" "TI")])
11661 (define_insn "aeskeygenassist"
11662 [(set (match_operand:V2DI 0 "register_operand" "=x")
11663 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11664 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11665 UNSPEC_AESKEYGENASSIST))]
11667 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11668 [(set_attr "type" "sselog1")
11669 (set_attr "prefix_extra" "1")
11670 (set_attr "length_immediate" "1")
11671 (set_attr "prefix" "maybe_vex")
11672 (set_attr "mode" "TI")])
11674 (define_insn "*vpclmulqdq"
11675 [(set (match_operand:V2DI 0 "register_operand" "=x")
11676 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11677 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11678 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11680 "TARGET_PCLMUL && TARGET_AVX"
11681 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11682 [(set_attr "type" "sselog1")
11683 (set_attr "prefix_extra" "1")
11684 (set_attr "length_immediate" "1")
11685 (set_attr "prefix" "vex")
11686 (set_attr "mode" "TI")])
11688 (define_insn "pclmulqdq"
11689 [(set (match_operand:V2DI 0 "register_operand" "=x")
11690 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11691 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11692 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11695 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11696 [(set_attr "type" "sselog1")
11697 (set_attr "prefix_extra" "1")
11698 (set_attr "length_immediate" "1")
11699 (set_attr "mode" "TI")])
11701 (define_expand "avx_vzeroall"
11702 [(match_par_dup 0 [(const_int 0)])]
11705 int nregs = TARGET_64BIT ? 16 : 8;
11708 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11710 XVECEXP (operands[0], 0, 0)
11711 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11714 for (regno = 0; regno < nregs; regno++)
11715 XVECEXP (operands[0], 0, regno + 1)
11716 = gen_rtx_SET (VOIDmode,
11717 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11718 CONST0_RTX (V8SImode));
11721 (define_insn "*avx_vzeroall"
11722 [(match_parallel 0 "vzeroall_operation"
11723 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11726 [(set_attr "type" "sse")
11727 (set_attr "modrm" "0")
11728 (set_attr "memory" "none")
11729 (set_attr "prefix" "vex")
11730 (set_attr "mode" "OI")])
11732 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11733 (define_expand "avx_vzeroupper"
11734 [(match_par_dup 0 [(const_int 0)])]
11737 int nregs = TARGET_64BIT ? 16 : 8;
11740 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11742 XVECEXP (operands[0], 0, 0)
11743 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11744 UNSPECV_VZEROUPPER);
11746 for (regno = 0; regno < nregs; regno++)
11747 XVECEXP (operands[0], 0, regno + 1)
11748 = gen_rtx_CLOBBER (VOIDmode,
11749 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11752 (define_insn "*avx_vzeroupper"
11753 [(match_parallel 0 "vzeroupper_operation"
11754 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11757 [(set_attr "type" "sse")
11758 (set_attr "modrm" "0")
11759 (set_attr "memory" "none")
11760 (set_attr "prefix" "vex")
11761 (set_attr "mode" "OI")])
11763 (define_insn_and_split "vec_dup<mode>"
11764 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11765 (vec_duplicate:AVX256MODE24P
11766 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11769 vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}
11771 "&& reload_completed && REG_P (operands[1])"
11772 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11773 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11775 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11777 [(set_attr "type" "ssemov")
11778 (set_attr "prefix_extra" "1")
11779 (set_attr "prefix" "vex")
11780 (set_attr "mode" "V8SF")])
11782 (define_insn "avx_vbroadcastf128_<mode>"
11783 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11784 (vec_concat:AVX256MODE
11785 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11789 vbroadcastf128\t{%1, %0|%0, %1}
11790 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11791 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11792 [(set_attr "type" "ssemov,sselog1,sselog1")
11793 (set_attr "prefix_extra" "1")
11794 (set_attr "length_immediate" "0,1,1")
11795 (set_attr "prefix" "vex")
11796 (set_attr "mode" "V4SF,V8SF,V8SF")])
11798 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11799 ;; If it so happens that the input is in memory, use vbroadcast.
11800 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11801 (define_insn "*avx_vperm_broadcast_v4sf"
11802 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11804 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11805 (match_parallel 2 "avx_vbroadcast_operand"
11806 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11809 int elt = INTVAL (operands[3]);
11810 switch (which_alternative)
11814 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11815 return "vbroadcastss\t{%1, %0|%0, %1}";
11817 operands[2] = GEN_INT (elt * 0x55);
11818 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11820 gcc_unreachable ();
11823 [(set_attr "type" "ssemov,ssemov,sselog1")
11824 (set_attr "prefix_extra" "1")
11825 (set_attr "length_immediate" "0,0,1")
11826 (set_attr "prefix" "vex")
11827 (set_attr "mode" "SF,SF,V4SF")])
11829 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11830 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11831 (vec_select:AVX256MODEF2P
11832 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11833 (match_parallel 2 "avx_vbroadcast_operand"
11834 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11837 "&& reload_completed"
11838 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11840 rtx op0 = operands[0], op1 = operands[1];
11841 int elt = INTVAL (operands[3]);
11847 /* Shuffle element we care about into all elements of the 128-bit lane.
11848 The other lane gets shuffled too, but we don't care. */
11849 if (<MODE>mode == V4DFmode)
11850 mask = (elt & 1 ? 15 : 0);
11852 mask = (elt & 3) * 0x55;
11853 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11855 /* Shuffle the lane we care about into both lanes of the dest. */
11856 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11857 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11861 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11862 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11865 (define_expand "avx_vpermil<mode>"
11866 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11867 (vec_select:AVXMODEFDP
11868 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11869 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11872 int mask = INTVAL (operands[2]);
11873 rtx perm[<ssescalarnum>];
11875 perm[0] = GEN_INT (mask & 1);
11876 perm[1] = GEN_INT ((mask >> 1) & 1);
11877 if (<MODE>mode == V4DFmode)
11879 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11880 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11884 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11887 (define_expand "avx_vpermil<mode>"
11888 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11889 (vec_select:AVXMODEFSP
11890 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11891 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11894 int mask = INTVAL (operands[2]);
11895 rtx perm[<ssescalarnum>];
11897 perm[0] = GEN_INT (mask & 3);
11898 perm[1] = GEN_INT ((mask >> 2) & 3);
11899 perm[2] = GEN_INT ((mask >> 4) & 3);
11900 perm[3] = GEN_INT ((mask >> 6) & 3);
11901 if (<MODE>mode == V8SFmode)
11903 perm[4] = GEN_INT ((mask & 3) + 4);
11904 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11905 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11906 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11910 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11913 (define_insn "*avx_vpermilp<mode>"
11914 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11915 (vec_select:AVXMODEF2P
11916 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11917 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11918 [(match_operand 3 "const_int_operand" "")])))]
11921 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11922 operands[2] = GEN_INT (mask);
11923 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
11925 [(set_attr "type" "sselog")
11926 (set_attr "prefix_extra" "1")
11927 (set_attr "length_immediate" "1")
11928 (set_attr "prefix" "vex")
11929 (set_attr "mode" "<MODE>")])
11931 (define_insn "avx_vpermilvar<mode>3"
11932 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11934 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11935 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11938 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11939 [(set_attr "type" "sselog")
11940 (set_attr "prefix_extra" "1")
11941 (set_attr "prefix" "vex")
11942 (set_attr "mode" "<MODE>")])
11944 (define_expand "avx_vperm2f128<mode>3"
11945 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11946 (unspec:AVX256MODE2P
11947 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11948 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11949 (match_operand:SI 3 "const_0_to_255_operand" "")]
11950 UNSPEC_VPERMIL2F128))]
11953 int mask = INTVAL (operands[3]);
11954 if ((mask & 0x88) == 0)
11956 rtx perm[<ssescalarnum>], t1, t2;
11957 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11959 base = (mask & 3) * nelt2;
11960 for (i = 0; i < nelt2; ++i)
11961 perm[i] = GEN_INT (base + i);
11963 base = ((mask >> 4) & 3) * nelt2;
11964 for (i = 0; i < nelt2; ++i)
11965 perm[i + nelt2] = GEN_INT (base + i);
11967 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11968 operands[1], operands[2]);
11969 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11970 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11971 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11977 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11978 ;; means that in order to represent this properly in rtl we'd have to
11979 ;; nest *another* vec_concat with a zero operand and do the select from
11980 ;; a 4x wide vector. That doesn't seem very nice.
11981 (define_insn "*avx_vperm2f128<mode>_full"
11982 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11983 (unspec:AVX256MODE2P
11984 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11985 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11986 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11987 UNSPEC_VPERMIL2F128))]
11989 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11990 [(set_attr "type" "sselog")
11991 (set_attr "prefix_extra" "1")
11992 (set_attr "length_immediate" "1")
11993 (set_attr "prefix" "vex")
11994 (set_attr "mode" "V8SF")])
11996 (define_insn "*avx_vperm2f128<mode>_nozero"
11997 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11998 (vec_select:AVX256MODE2P
11999 (vec_concat:<ssedoublesizemode>
12000 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12001 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12002 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
12003 [(match_operand 4 "const_int_operand" "")])))]
12006 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12007 operands[3] = GEN_INT (mask);
12008 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12010 [(set_attr "type" "sselog")
12011 (set_attr "prefix_extra" "1")
12012 (set_attr "length_immediate" "1")
12013 (set_attr "prefix" "vex")
12014 (set_attr "mode" "V8SF")])
12016 (define_expand "avx_vinsertf128<mode>"
12017 [(match_operand:AVX256MODE 0 "register_operand" "")
12018 (match_operand:AVX256MODE 1 "register_operand" "")
12019 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12020 (match_operand:SI 3 "const_0_to_1_operand" "")]
12023 switch (INTVAL (operands[3]))
12026 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12030 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12034 gcc_unreachable ();
12039 (define_insn "vec_set_lo_<mode>"
12040 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12041 (vec_concat:AVX256MODE4P
12042 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12043 (vec_select:<avxhalfvecmode>
12044 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12045 (parallel [(const_int 2) (const_int 3)]))))]
12047 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12048 [(set_attr "type" "sselog")
12049 (set_attr "prefix_extra" "1")
12050 (set_attr "length_immediate" "1")
12051 (set_attr "prefix" "vex")
12052 (set_attr "mode" "V8SF")])
12054 (define_insn "vec_set_hi_<mode>"
12055 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12056 (vec_concat:AVX256MODE4P
12057 (vec_select:<avxhalfvecmode>
12058 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12059 (parallel [(const_int 0) (const_int 1)]))
12060 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12062 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12063 [(set_attr "type" "sselog")
12064 (set_attr "prefix_extra" "1")
12065 (set_attr "length_immediate" "1")
12066 (set_attr "prefix" "vex")
12067 (set_attr "mode" "V8SF")])
12069 (define_insn "vec_set_lo_<mode>"
12070 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12071 (vec_concat:AVX256MODE8P
12072 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12073 (vec_select:<avxhalfvecmode>
12074 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12075 (parallel [(const_int 4) (const_int 5)
12076 (const_int 6) (const_int 7)]))))]
12078 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12079 [(set_attr "type" "sselog")
12080 (set_attr "prefix_extra" "1")
12081 (set_attr "length_immediate" "1")
12082 (set_attr "prefix" "vex")
12083 (set_attr "mode" "V8SF")])
12085 (define_insn "vec_set_hi_<mode>"
12086 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12087 (vec_concat:AVX256MODE8P
12088 (vec_select:<avxhalfvecmode>
12089 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12090 (parallel [(const_int 0) (const_int 1)
12091 (const_int 2) (const_int 3)]))
12092 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12094 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12095 [(set_attr "type" "sselog")
12096 (set_attr "prefix_extra" "1")
12097 (set_attr "length_immediate" "1")
12098 (set_attr "prefix" "vex")
12099 (set_attr "mode" "V8SF")])
12101 (define_insn "vec_set_lo_v16hi"
12102 [(set (match_operand:V16HI 0 "register_operand" "=x")
12104 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12106 (match_operand:V16HI 1 "register_operand" "x")
12107 (parallel [(const_int 8) (const_int 9)
12108 (const_int 10) (const_int 11)
12109 (const_int 12) (const_int 13)
12110 (const_int 14) (const_int 15)]))))]
12112 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12113 [(set_attr "type" "sselog")
12114 (set_attr "prefix_extra" "1")
12115 (set_attr "length_immediate" "1")
12116 (set_attr "prefix" "vex")
12117 (set_attr "mode" "V8SF")])
12119 (define_insn "vec_set_hi_v16hi"
12120 [(set (match_operand:V16HI 0 "register_operand" "=x")
12123 (match_operand:V16HI 1 "register_operand" "x")
12124 (parallel [(const_int 0) (const_int 1)
12125 (const_int 2) (const_int 3)
12126 (const_int 4) (const_int 5)
12127 (const_int 6) (const_int 7)]))
12128 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12130 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12131 [(set_attr "type" "sselog")
12132 (set_attr "prefix_extra" "1")
12133 (set_attr "length_immediate" "1")
12134 (set_attr "prefix" "vex")
12135 (set_attr "mode" "V8SF")])
12137 (define_insn "vec_set_lo_v32qi"
12138 [(set (match_operand:V32QI 0 "register_operand" "=x")
12140 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12142 (match_operand:V32QI 1 "register_operand" "x")
12143 (parallel [(const_int 16) (const_int 17)
12144 (const_int 18) (const_int 19)
12145 (const_int 20) (const_int 21)
12146 (const_int 22) (const_int 23)
12147 (const_int 24) (const_int 25)
12148 (const_int 26) (const_int 27)
12149 (const_int 28) (const_int 29)
12150 (const_int 30) (const_int 31)]))))]
12152 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12153 [(set_attr "type" "sselog")
12154 (set_attr "prefix_extra" "1")
12155 (set_attr "length_immediate" "1")
12156 (set_attr "prefix" "vex")
12157 (set_attr "mode" "V8SF")])
12159 (define_insn "vec_set_hi_v32qi"
12160 [(set (match_operand:V32QI 0 "register_operand" "=x")
12163 (match_operand:V32QI 1 "register_operand" "x")
12164 (parallel [(const_int 0) (const_int 1)
12165 (const_int 2) (const_int 3)
12166 (const_int 4) (const_int 5)
12167 (const_int 6) (const_int 7)
12168 (const_int 8) (const_int 9)
12169 (const_int 10) (const_int 11)
12170 (const_int 12) (const_int 13)
12171 (const_int 14) (const_int 15)]))
12172 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12174 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12175 [(set_attr "type" "sselog")
12176 (set_attr "prefix_extra" "1")
12177 (set_attr "length_immediate" "1")
12178 (set_attr "prefix" "vex")
12179 (set_attr "mode" "V8SF")])
12181 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12182 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12184 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12185 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12189 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12190 [(set_attr "type" "sselog1")
12191 (set_attr "prefix_extra" "1")
12192 (set_attr "prefix" "vex")
12193 (set_attr "mode" "<MODE>")])
12195 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12196 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12198 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12199 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12201 UNSPEC_MASKSTORE))]
12203 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12204 [(set_attr "type" "sselog1")
12205 (set_attr "prefix_extra" "1")
12206 (set_attr "prefix" "vex")
12207 (set_attr "mode" "<MODE>")])
12209 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12210 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12211 (unspec:AVX256MODE2P
12212 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12216 switch (which_alternative)
12221 switch (get_attr_mode (insn))
12224 return "vmovaps\t{%1, %x0|%x0, %1}";
12226 return "vmovapd\t{%1, %x0|%x0, %1}";
12228 return "vmovdqa\t{%1, %x0|%x0, %1}";
12235 gcc_unreachable ();
12237 [(set_attr "type" "ssemov")
12238 (set_attr "prefix" "vex")
12239 (set_attr "mode" "<avxvecmode>")
12240 (set (attr "length")
12241 (if_then_else (eq_attr "alternative" "0")
12243 (const_string "*")))])
12245 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12246 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12247 (unspec:<avxhalfvecmode>
12248 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12252 switch (which_alternative)
12257 switch (get_attr_mode (insn))
12260 return "vmovaps\t{%x1, %0|%0, %x1}";
12262 return "vmovapd\t{%x1, %0|%0, %x1}";
12264 return "vmovdqa\t{%x1, %0|%0, %x1}";
12271 gcc_unreachable ();
12273 [(set_attr "type" "ssemov")
12274 (set_attr "prefix" "vex")
12275 (set_attr "mode" "<avxvecmode>")
12276 (set (attr "length")
12277 (if_then_else (eq_attr "alternative" "0")
12279 (const_string "*")))])
12281 (define_expand "vec_init<mode>"
12282 [(match_operand:AVX256MODE 0 "register_operand" "")
12283 (match_operand 1 "" "")]
12286 ix86_expand_vector_init (false, operands[0], operands[1]);
12290 (define_insn "*vec_concat<mode>_avx"
12291 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12292 (vec_concat:AVX256MODE
12293 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12294 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12297 switch (which_alternative)
12300 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12302 switch (get_attr_mode (insn))
12305 return "vmovaps\t{%1, %x0|%x0, %1}";
12307 return "vmovapd\t{%1, %x0|%x0, %1}";
12309 return "vmovdqa\t{%1, %x0|%x0, %1}";
12312 gcc_unreachable ();
12315 [(set_attr "type" "sselog,ssemov")
12316 (set_attr "prefix_extra" "1,*")
12317 (set_attr "length_immediate" "1,*")
12318 (set_attr "prefix" "vex")
12319 (set_attr "mode" "<avxvecmode>")])