1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE
23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 ;; All 16-byte vector modes handled by SSE
26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
70 ;; Int-float size matches
71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
74 ;; Modes handled by integer vcond pattern
75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76 (V2DI "TARGET_SSE4_2")])
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
86 ;; Mapping from float mode to required SSE level
87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
89 ;; Mapping from integer vector mode to mnemonic suffix
90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
97 ;; Mapping of the avx suffix
98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99 (V4SF "ps") (V2DF "pd")])
101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
105 ;; Mapping of the max integer size for xop rotate immediate constraint
106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
108 ;; Mapping of vector modes back to the scalar modes
109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110 (V16QI "QI") (V8HI "HI")
111 (V4SI "SI") (V2DI "DI")])
113 ;; Mapping of vector modes to a vector mode of double size
114 (define_mode_attr ssedoublesizemode
115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
120 ;; Number of scalar elements in each vector type
121 (define_mode_attr ssescalarnum
122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
126 (define_mode_attr avxvecmode
127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130 (define_mode_attr avxvecpsmode
131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133 (define_mode_attr avxhalfvecmode
134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137 (define_mode_attr avxscalarmode
138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140 (define_mode_attr avxcvtvecmode
141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142 (define_mode_attr avxpermvecmode
143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144 (define_mode_attr avxmodesuffixf2c
145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146 (define_mode_attr avxmodesuffixp
147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
149 (define_mode_attr avxmodesuffix
150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151 (V8SI "256") (V8SF "256") (V4DF "256")])
153 ;; Mapping of immediate bits for blend instructions
154 (define_mode_attr blendbits
155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
157 ;; Mapping of immediate bits for pinsr instructions
158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
168 (define_expand "mov<mode>"
169 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
173 ix86_expand_vector_move (<MODE>mode, operands);
177 (define_insn "*avx_mov<mode>_internal"
178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || register_operand (operands[1], <MODE>mode))"
184 switch (which_alternative)
187 return standard_sse_constant_opcode (insn, operands[1]);
190 switch (get_attr_mode (insn))
194 return "vmovaps\t{%1, %0|%0, %1}";
197 return "vmovapd\t{%1, %0|%0, %1}";
199 return "vmovdqa\t{%1, %0|%0, %1}";
205 [(set_attr "type" "sselog1,ssemov,ssemov")
206 (set_attr "prefix" "vex")
207 (set_attr "mode" "<avxvecmode>")])
209 ;; All of these patterns are enabled for SSE1 as well as SSE2.
210 ;; This is essential for maintaining stable calling conventions.
212 (define_expand "mov<mode>"
213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
217 ix86_expand_vector_move (<MODE>mode, operands);
221 (define_insn "*mov<mode>_internal"
222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
225 && (register_operand (operands[0], <MODE>mode)
226 || register_operand (operands[1], <MODE>mode))"
228 switch (which_alternative)
231 return standard_sse_constant_opcode (insn, operands[1]);
234 switch (get_attr_mode (insn))
237 return "movaps\t{%1, %0|%0, %1}";
239 return "movapd\t{%1, %0|%0, %1}";
241 return "movdqa\t{%1, %0|%0, %1}";
247 [(set_attr "type" "sselog1,ssemov,ssemov")
249 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
250 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
251 (and (eq_attr "alternative" "2")
252 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
254 (const_string "V4SF")
255 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
256 (const_string "V4SF")
257 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
258 (const_string "V2DF")
260 (const_string "TI")))])
262 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
263 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
264 ;; from memory, we'd prefer to load the memory directly into the %xmm
265 ;; register. To facilitate this happy circumstance, this pattern won't
266 ;; split until after register allocation. If the 64-bit value didn't
267 ;; come from memory, this is the best we can do. This is much better
268 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
271 (define_insn_and_split "movdi_to_sse"
273 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
274 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
275 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
276 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
278 "&& reload_completed"
281 if (register_operand (operands[1], DImode))
283 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
284 Assemble the 64-bit DImode value in an xmm register. */
285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
286 gen_rtx_SUBREG (SImode, operands[1], 0)));
287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
288 gen_rtx_SUBREG (SImode, operands[1], 4)));
289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
292 else if (memory_operand (operands[1], DImode))
293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294 operands[1], const0_rtx));
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
302 "TARGET_SSE && reload_completed"
305 (vec_duplicate:V4SF (match_dup 1))
309 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
310 operands[2] = CONST0_RTX (V4SFmode);
314 [(set (match_operand:V2DF 0 "register_operand" "")
315 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
316 "TARGET_SSE2 && reload_completed"
317 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
319 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
320 operands[2] = CONST0_RTX (DFmode);
323 (define_expand "push<mode>1"
324 [(match_operand:AVX256MODE 0 "register_operand" "")]
327 ix86_expand_push (<MODE>mode, operands[0]);
331 (define_expand "push<mode>1"
332 [(match_operand:SSEMODE16 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "movmisalign<mode>"
340 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
341 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
344 ix86_expand_vector_move_misalign (<MODE>mode, operands);
348 (define_expand "movmisalign<mode>"
349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
353 ix86_expand_vector_move_misalign (<MODE>mode, operands);
357 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
358 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
360 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
365 [(set_attr "type" "ssemov")
366 (set_attr "movu" "1")
367 (set_attr "prefix" "vex")
368 (set_attr "mode" "<MODE>")])
370 (define_insn "sse2_movq128"
371 [(set (match_operand:V2DI 0 "register_operand" "=x")
374 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
375 (parallel [(const_int 0)]))
378 "%vmovq\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov")
380 (set_attr "prefix" "maybe_vex")
381 (set_attr "mode" "TI")])
383 (define_insn "<sse>_movup<ssemodesuffixf2c>"
384 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
386 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
391 [(set_attr "type" "ssemov")
392 (set_attr "movu" "1")
393 (set_attr "mode" "<MODE>")])
395 (define_insn "avx_movdqu<avxmodesuffix>"
396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
401 "vmovdqu\t{%1, %0|%0, %1}"
402 [(set_attr "type" "ssemov")
403 (set_attr "movu" "1")
404 (set_attr "prefix" "vex")
405 (set_attr "mode" "<avxvecmode>")])
407 (define_insn "sse2_movdqu"
408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
412 "movdqu\t{%1, %0|%0, %1}"
413 [(set_attr "type" "ssemov")
414 (set_attr "movu" "1")
415 (set_attr "prefix_data16" "1")
416 (set_attr "mode" "TI")])
418 (define_insn "avx_movnt<mode>"
419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
421 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
423 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
424 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
425 [(set_attr "type" "ssemov")
426 (set_attr "prefix" "vex")
427 (set_attr "mode" "<MODE>")])
429 (define_insn "<sse>_movnt<mode>"
430 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
432 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
434 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
435 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
436 [(set_attr "type" "ssemov")
437 (set_attr "mode" "<MODE>")])
439 (define_insn "avx_movnt<mode>"
440 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
442 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
445 "vmovntdq\t{%1, %0|%0, %1}"
446 [(set_attr "type" "ssecvt")
447 (set_attr "prefix" "vex")
448 (set_attr "mode" "<avxvecmode>")])
450 (define_insn "sse2_movntv2di"
451 [(set (match_operand:V2DI 0 "memory_operand" "=m")
452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
455 "movntdq\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "1")
458 (set_attr "mode" "TI")])
460 (define_insn "sse2_movntsi"
461 [(set (match_operand:SI 0 "memory_operand" "=m")
462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
465 "movnti\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix_data16" "0")
468 (set_attr "mode" "V2DF")])
470 (define_insn "avx_lddqu<avxmodesuffix>"
471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
476 "vlddqu\t{%1, %0|%0, %1}"
477 [(set_attr "type" "ssecvt")
478 (set_attr "movu" "1")
479 (set_attr "prefix" "vex")
480 (set_attr "mode" "<avxvecmode>")])
482 (define_insn "sse3_lddqu"
483 [(set (match_operand:V16QI 0 "register_operand" "=x")
484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
487 "lddqu\t{%1, %0|%0, %1}"
488 [(set_attr "type" "ssemov")
489 (set_attr "movu" "1")
490 (set_attr "prefix_data16" "0")
491 (set_attr "prefix_rep" "1")
492 (set_attr "mode" "TI")])
494 ; Expand patterns for non-temporal stores. At the moment, only those
495 ; that directly map to insns are defined; it would be possible to
496 ; define patterns for other modes that would expand to several insns.
498 (define_expand "storent<mode>"
499 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
501 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
503 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 (define_expand "storent<mode>"
507 [(set (match_operand:MODEF 0 "memory_operand" "")
509 [(match_operand:MODEF 1 "register_operand" "")]
514 (define_expand "storentv2di"
515 [(set (match_operand:V2DI 0 "memory_operand" "")
516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
521 (define_expand "storentsi"
522 [(set (match_operand:SI 0 "memory_operand" "")
523 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
528 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
530 ;; Parallel floating point arithmetic
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 (define_expand "<code><mode>2"
535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
537 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
538 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
539 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
541 (define_expand "<plusminus_insn><mode>3"
542 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
543 (plusminus:AVX256MODEF2P
544 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
545 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
546 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
547 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
549 (define_insn "*avx_<plusminus_insn><mode>3"
550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
551 (plusminus:AVXMODEF2P
552 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
554 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
555 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
556 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
557 [(set_attr "type" "sseadd")
558 (set_attr "prefix" "vex")
559 (set_attr "mode" "<avxvecmode>")])
561 (define_expand "<plusminus_insn><mode>3"
562 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
563 (plusminus:SSEMODEF2P
564 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
565 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
566 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
567 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
569 (define_insn "*<plusminus_insn><mode>3"
570 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
571 (plusminus:SSEMODEF2P
572 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
573 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
574 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
575 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
576 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sseadd")
578 (set_attr "mode" "<MODE>")])
580 (define_insn "*avx_vm<plusminus_insn><mode>3"
581 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582 (vec_merge:SSEMODEF2P
583 (plusminus:SSEMODEF2P
584 (match_operand:SSEMODEF2P 1 "register_operand" "x")
585 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
588 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
589 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "type" "sseadd")
591 (set_attr "prefix" "vex")
592 (set_attr "mode" "<ssescalarmode>")])
594 (define_insn "<sse>_vm<plusminus_insn><mode>3"
595 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596 (vec_merge:SSEMODEF2P
597 (plusminus:SSEMODEF2P
598 (match_operand:SSEMODEF2P 1 "register_operand" "0")
599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
602 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
603 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "mode" "<ssescalarmode>")])
607 (define_expand "mul<mode>3"
608 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
610 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
611 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
612 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
613 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
615 (define_insn "*avx_mul<mode>3"
616 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
618 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
619 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
620 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
621 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
622 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "type" "ssemul")
624 (set_attr "prefix" "vex")
625 (set_attr "mode" "<avxvecmode>")])
627 (define_expand "mul<mode>3"
628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
635 (define_insn "*mul<mode>3"
636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
641 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
642 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
643 [(set_attr "type" "ssemul")
644 (set_attr "mode" "<MODE>")])
646 (define_insn "*avx_vmmul<mode>3"
647 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
648 (vec_merge:SSEMODEF2P
650 (match_operand:SSEMODEF2P 1 "register_operand" "x")
651 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
654 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
655 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
656 [(set_attr "type" "ssemul")
657 (set_attr "prefix" "vex")
658 (set_attr "mode" "<ssescalarmode>")])
660 (define_insn "<sse>_vmmul<mode>3"
661 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662 (vec_merge:SSEMODEF2P
664 (match_operand:SSEMODEF2P 1 "register_operand" "0")
665 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
668 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
669 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
670 [(set_attr "type" "ssemul")
671 (set_attr "mode" "<ssescalarmode>")])
673 (define_expand "divv8sf3"
674 [(set (match_operand:V8SF 0 "register_operand" "")
675 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
676 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
682 && flag_finite_math_only && !flag_trapping_math
683 && flag_unsafe_math_optimizations)
685 ix86_emit_swdivsf (operands[0], operands[1],
686 operands[2], V8SFmode);
691 (define_expand "divv4df3"
692 [(set (match_operand:V4DF 0 "register_operand" "")
693 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
694 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
696 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
698 (define_insn "avx_div<mode>3"
699 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
701 (match_operand:AVXMODEF2P 1 "register_operand" "x")
702 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
703 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
704 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
705 [(set_attr "type" "ssediv")
706 (set_attr "prefix" "vex")
707 (set_attr "mode" "<MODE>")])
709 (define_expand "divv4sf3"
710 [(set (match_operand:V4SF 0 "register_operand" "")
711 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
712 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
715 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
716 && flag_finite_math_only && !flag_trapping_math
717 && flag_unsafe_math_optimizations)
719 ix86_emit_swdivsf (operands[0], operands[1],
720 operands[2], V4SFmode);
725 (define_expand "divv2df3"
726 [(set (match_operand:V2DF 0 "register_operand" "")
727 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
728 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
732 (define_insn "*avx_div<mode>3"
733 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
735 (match_operand:SSEMODEF2P 1 "register_operand" "x")
736 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
737 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
738 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
739 [(set_attr "type" "ssediv")
740 (set_attr "prefix" "vex")
741 (set_attr "mode" "<MODE>")])
743 (define_insn "<sse>_div<mode>3"
744 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
746 (match_operand:SSEMODEF2P 1 "register_operand" "0")
747 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
748 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
749 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssediv")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "*avx_vmdiv<mode>3"
754 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
755 (vec_merge:SSEMODEF2P
757 (match_operand:SSEMODEF2P 1 "register_operand" "x")
758 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
761 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
762 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
763 [(set_attr "type" "ssediv")
764 (set_attr "prefix" "vex")
765 (set_attr "mode" "<ssescalarmode>")])
767 (define_insn "<sse>_vmdiv<mode>3"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
769 (vec_merge:SSEMODEF2P
771 (match_operand:SSEMODEF2P 1 "register_operand" "0")
772 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
776 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
777 [(set_attr "type" "ssediv")
778 (set_attr "mode" "<ssescalarmode>")])
780 (define_insn "avx_rcpv8sf2"
781 [(set (match_operand:V8SF 0 "register_operand" "=x")
783 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
785 "vrcpps\t{%1, %0|%0, %1}"
786 [(set_attr "type" "sse")
787 (set_attr "prefix" "vex")
788 (set_attr "mode" "V8SF")])
790 (define_insn "sse_rcpv4sf2"
791 [(set (match_operand:V4SF 0 "register_operand" "=x")
793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
795 "%vrcpps\t{%1, %0|%0, %1}"
796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
798 (set_attr "prefix" "maybe_vex")
799 (set_attr "mode" "V4SF")])
801 (define_insn "*avx_vmrcpv4sf2"
802 [(set (match_operand:V4SF 0 "register_operand" "=x")
804 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
806 (match_operand:V4SF 2 "register_operand" "x")
809 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
810 [(set_attr "type" "sse")
811 (set_attr "prefix" "vex")
812 (set_attr "mode" "SF")])
814 (define_insn "sse_vmrcpv4sf2"
815 [(set (match_operand:V4SF 0 "register_operand" "=x")
817 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
819 (match_operand:V4SF 2 "register_operand" "0")
822 "rcpss\t{%1, %0|%0, %1}"
823 [(set_attr "type" "sse")
824 (set_attr "atom_sse_attr" "rcp")
825 (set_attr "mode" "SF")])
827 (define_expand "sqrtv8sf2"
828 [(set (match_operand:V8SF 0 "register_operand" "")
829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
833 && flag_finite_math_only && !flag_trapping_math
834 && flag_unsafe_math_optimizations)
836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
841 (define_insn "avx_sqrtv8sf2"
842 [(set (match_operand:V8SF 0 "register_operand" "=x")
843 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
845 "vsqrtps\t{%1, %0|%0, %1}"
846 [(set_attr "type" "sse")
847 (set_attr "prefix" "vex")
848 (set_attr "mode" "V8SF")])
850 (define_expand "sqrtv4sf2"
851 [(set (match_operand:V4SF 0 "register_operand" "")
852 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
855 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
864 (define_insn "sse_sqrtv4sf2"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrtps\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "V4SF")])
874 (define_insn "sqrtv4df2"
875 [(set (match_operand:V4DF 0 "register_operand" "=x")
876 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
878 "vsqrtpd\t{%1, %0|%0, %1}"
879 [(set_attr "type" "sse")
880 (set_attr "prefix" "vex")
881 (set_attr "mode" "V4DF")])
883 (define_insn "sqrtv2df2"
884 [(set (match_operand:V2DF 0 "register_operand" "=x")
885 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
887 "%vsqrtpd\t{%1, %0|%0, %1}"
888 [(set_attr "type" "sse")
889 (set_attr "prefix" "maybe_vex")
890 (set_attr "mode" "V2DF")])
892 (define_insn "*avx_vmsqrt<mode>2"
893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
894 (vec_merge:SSEMODEF2P
896 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
897 (match_operand:SSEMODEF2P 2 "register_operand" "x")
899 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
900 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
901 [(set_attr "type" "sse")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_insn "<sse>_vmsqrt<mode>2"
906 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907 (vec_merge:SSEMODEF2P
909 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
910 (match_operand:SSEMODEF2P 2 "register_operand" "0")
912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
914 [(set_attr "type" "sse")
915 (set_attr "atom_sse_attr" "sqrt")
916 (set_attr "mode" "<ssescalarmode>")])
918 (define_expand "rsqrtv8sf2"
919 [(set (match_operand:V8SF 0 "register_operand" "")
921 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
922 "TARGET_AVX && TARGET_SSE_MATH"
924 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
928 (define_insn "avx_rsqrtv8sf2"
929 [(set (match_operand:V8SF 0 "register_operand" "=x")
931 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
933 "vrsqrtps\t{%1, %0|%0, %1}"
934 [(set_attr "type" "sse")
935 (set_attr "prefix" "vex")
936 (set_attr "mode" "V8SF")])
938 (define_expand "rsqrtv4sf2"
939 [(set (match_operand:V4SF 0 "register_operand" "")
941 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
944 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
948 (define_insn "sse_rsqrtv4sf2"
949 [(set (match_operand:V4SF 0 "register_operand" "=x")
951 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
953 "%vrsqrtps\t{%1, %0|%0, %1}"
954 [(set_attr "type" "sse")
955 (set_attr "prefix" "maybe_vex")
956 (set_attr "mode" "V4SF")])
958 (define_insn "*avx_vmrsqrtv4sf2"
959 [(set (match_operand:V4SF 0 "register_operand" "=x")
961 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
963 (match_operand:V4SF 2 "register_operand" "x")
966 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
967 [(set_attr "type" "sse")
968 (set_attr "prefix" "vex")
969 (set_attr "mode" "SF")])
971 (define_insn "sse_vmrsqrtv4sf2"
972 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
976 (match_operand:V4SF 2 "register_operand" "0")
979 "rsqrtss\t{%1, %0|%0, %1}"
980 [(set_attr "type" "sse")
981 (set_attr "mode" "SF")])
983 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
984 ;; isn't really correct, as those rtl operators aren't defined when
985 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
987 (define_expand "<code><mode>3"
988 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
989 (smaxmin:AVX256MODEF2P
990 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
991 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
992 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
994 if (!flag_finite_math_only)
995 operands[1] = force_reg (<MODE>mode, operands[1]);
996 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
999 (define_expand "<code><mode>3"
1000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1002 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1003 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1004 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1006 if (!flag_finite_math_only)
1007 operands[1] = force_reg (<MODE>mode, operands[1]);
1008 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1011 (define_insn "*avx_<code><mode>3_finite"
1012 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1014 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1017 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1018 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "type" "sseadd")
1020 (set_attr "prefix" "vex")
1021 (set_attr "mode" "<MODE>")])
1023 (define_insn "*<code><mode>3_finite"
1024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1026 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1027 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1029 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1030 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1031 [(set_attr "type" "sseadd")
1032 (set_attr "mode" "<MODE>")])
1034 (define_insn "*avx_<code><mode>3"
1035 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1037 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1038 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1039 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1040 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1041 [(set_attr "type" "sseadd")
1042 (set_attr "prefix" "vex")
1043 (set_attr "mode" "<avxvecmode>")])
1045 (define_insn "*<code><mode>3"
1046 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1048 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1049 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1051 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1052 [(set_attr "type" "sseadd")
1053 (set_attr "mode" "<MODE>")])
1055 (define_insn "*avx_vm<code><mode>3"
1056 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1057 (vec_merge:SSEMODEF2P
1059 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1060 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1063 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1064 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "type" "sse")
1066 (set_attr "prefix" "vex")
1067 (set_attr "mode" "<ssescalarmode>")])
1069 (define_insn "<sse>_vm<code><mode>3"
1070 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1071 (vec_merge:SSEMODEF2P
1073 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "mode" "<ssescalarmode>")])
1082 ;; These versions of the min/max patterns implement exactly the operations
1083 ;; min = (op1 < op2 ? op1 : op2)
1084 ;; max = (!(op1 < op2) ? op1 : op2)
1085 ;; Their operands are not commutative, and thus they may be used in the
1086 ;; presence of -0.0 and NaN.
1088 (define_insn "*avx_ieee_smin<mode>3"
1089 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1091 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1092 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1094 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1095 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1096 [(set_attr "type" "sseadd")
1097 (set_attr "prefix" "vex")
1098 (set_attr "mode" "<avxvecmode>")])
1100 (define_insn "*avx_ieee_smax<mode>3"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1103 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1106 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1107 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<avxvecmode>")])
1112 (define_insn "*ieee_smin<mode>3"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1115 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1118 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1119 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1123 (define_insn "*ieee_smax<mode>3"
1124 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1126 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1127 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1129 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1130 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1131 [(set_attr "type" "sseadd")
1132 (set_attr "mode" "<MODE>")])
1134 (define_insn "avx_addsubv8sf3"
1135 [(set (match_operand:V8SF 0 "register_operand" "=x")
1138 (match_operand:V8SF 1 "register_operand" "x")
1139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1140 (minus:V8SF (match_dup 1) (match_dup 2))
1143 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1144 [(set_attr "type" "sseadd")
1145 (set_attr "prefix" "vex")
1146 (set_attr "mode" "V8SF")])
1148 (define_insn "avx_addsubv4df3"
1149 [(set (match_operand:V4DF 0 "register_operand" "=x")
1152 (match_operand:V4DF 1 "register_operand" "x")
1153 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1154 (minus:V4DF (match_dup 1) (match_dup 2))
1157 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1158 [(set_attr "type" "sseadd")
1159 (set_attr "prefix" "vex")
1160 (set_attr "mode" "V4DF")])
1162 (define_insn "*avx_addsubv4sf3"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1166 (match_operand:V4SF 1 "register_operand" "x")
1167 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1168 (minus:V4SF (match_dup 1) (match_dup 2))
1171 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1172 [(set_attr "type" "sseadd")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "V4SF")])
1176 (define_insn "sse3_addsubv4sf3"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x")
1180 (match_operand:V4SF 1 "register_operand" "0")
1181 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1182 (minus:V4SF (match_dup 1) (match_dup 2))
1185 "addsubps\t{%2, %0|%0, %2}"
1186 [(set_attr "type" "sseadd")
1187 (set_attr "prefix_rep" "1")
1188 (set_attr "mode" "V4SF")])
1190 (define_insn "*avx_addsubv2df3"
1191 [(set (match_operand:V2DF 0 "register_operand" "=x")
1194 (match_operand:V2DF 1 "register_operand" "x")
1195 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1196 (minus:V2DF (match_dup 1) (match_dup 2))
1199 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "type" "sseadd")
1201 (set_attr "prefix" "vex")
1202 (set_attr "mode" "V2DF")])
1204 (define_insn "sse3_addsubv2df3"
1205 [(set (match_operand:V2DF 0 "register_operand" "=x")
1208 (match_operand:V2DF 1 "register_operand" "0")
1209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1210 (minus:V2DF (match_dup 1) (match_dup 2))
1213 "addsubpd\t{%2, %0|%0, %2}"
1214 [(set_attr "type" "sseadd")
1215 (set_attr "atom_unit" "complex")
1216 (set_attr "mode" "V2DF")])
1218 (define_insn "avx_h<plusminus_insn>v4df3"
1219 [(set (match_operand:V4DF 0 "register_operand" "=x")
1224 (match_operand:V4DF 1 "register_operand" "x")
1225 (parallel [(const_int 0)]))
1226 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1228 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1233 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1234 (parallel [(const_int 0)]))
1235 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1237 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1240 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1241 [(set_attr "type" "sseadd")
1242 (set_attr "prefix" "vex")
1243 (set_attr "mode" "V4DF")])
1245 (define_insn "avx_h<plusminus_insn>v8sf3"
1246 [(set (match_operand:V8SF 0 "register_operand" "=x")
1252 (match_operand:V8SF 1 "register_operand" "x")
1253 (parallel [(const_int 0)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1261 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1262 (parallel [(const_int 0)]))
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1265 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1270 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1271 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1277 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1278 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1283 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "prefix" "vex")
1286 (set_attr "mode" "V8SF")])
1288 (define_insn "*avx_h<plusminus_insn>v4sf3"
1289 [(set (match_operand:V4SF 0 "register_operand" "=x")
1294 (match_operand:V4SF 1 "register_operand" "x")
1295 (parallel [(const_int 0)]))
1296 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1298 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304 (parallel [(const_int 0)]))
1305 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1307 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1310 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1311 [(set_attr "type" "sseadd")
1312 (set_attr "prefix" "vex")
1313 (set_attr "mode" "V4SF")])
1315 (define_insn "sse3_h<plusminus_insn>v4sf3"
1316 [(set (match_operand:V4SF 0 "register_operand" "=x")
1321 (match_operand:V4SF 1 "register_operand" "0")
1322 (parallel [(const_int 0)]))
1323 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1325 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1330 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1331 (parallel [(const_int 0)]))
1332 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1338 [(set_attr "type" "sseadd")
1339 (set_attr "atom_unit" "complex")
1340 (set_attr "prefix_rep" "1")
1341 (set_attr "mode" "V4SF")])
1343 (define_insn "*avx_h<plusminus_insn>v2df3"
1344 [(set (match_operand:V2DF 0 "register_operand" "=x")
1348 (match_operand:V2DF 1 "register_operand" "x")
1349 (parallel [(const_int 0)]))
1350 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1353 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1354 (parallel [(const_int 0)]))
1355 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1357 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1358 [(set_attr "type" "sseadd")
1359 (set_attr "prefix" "vex")
1360 (set_attr "mode" "V2DF")])
1362 (define_insn "sse3_h<plusminus_insn>v2df3"
1363 [(set (match_operand:V2DF 0 "register_operand" "=x")
1367 (match_operand:V2DF 1 "register_operand" "0")
1368 (parallel [(const_int 0)]))
1369 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1373 (parallel [(const_int 0)]))
1374 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1376 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1377 [(set_attr "type" "sseadd")
1378 (set_attr "mode" "V2DF")])
1380 (define_expand "reduc_splus_v4sf"
1381 [(match_operand:V4SF 0 "register_operand" "")
1382 (match_operand:V4SF 1 "register_operand" "")]
1387 rtx tmp = gen_reg_rtx (V4SFmode);
1388 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1389 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1392 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1396 (define_expand "reduc_splus_v2df"
1397 [(match_operand:V2DF 0 "register_operand" "")
1398 (match_operand:V2DF 1 "register_operand" "")]
1401 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1405 (define_expand "reduc_smax_v4sf"
1406 [(match_operand:V4SF 0 "register_operand" "")
1407 (match_operand:V4SF 1 "register_operand" "")]
1410 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1414 (define_expand "reduc_smin_v4sf"
1415 [(match_operand:V4SF 0 "register_operand" "")
1416 (match_operand:V4SF 1 "register_operand" "")]
1419 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1425 ;; Parallel floating point comparisons
1427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1429 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1430 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1432 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1433 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<MODE>")])
1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1445 (vec_merge:SSEMODEF2P
1447 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1448 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1449 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1457 (set_attr "prefix" "vex")
1458 (set_attr "mode" "<ssescalarmode>")])
1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1461 ;; may generate 256bit vector compare instructions.
1462 (define_insn "*avx_maskcmp<mode>3"
1463 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1464 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1465 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1469 [(set_attr "type" "ssecmp")
1470 (set_attr "prefix" "vex")
1471 (set_attr "length_immediate" "1")
1472 (set_attr "mode" "<avxvecmode>")])
1474 (define_insn "<sse>_maskcmp<mode>3"
1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1477 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1482 [(set_attr "type" "ssecmp")
1483 (set_attr "length_immediate" "1")
1484 (set_attr "mode" "<MODE>")])
1486 (define_insn "<sse>_vmmaskcmp<mode>3"
1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1488 (vec_merge:SSEMODEF2P
1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1490 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1494 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1495 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1496 [(set_attr "type" "ssecmp")
1497 (set_attr "length_immediate" "1")
1498 (set_attr "mode" "<ssescalarmode>")])
1500 (define_insn "<sse>_comi"
1501 [(set (reg:CCFP FLAGS_REG)
1504 (match_operand:<ssevecmode> 0 "register_operand" "x")
1505 (parallel [(const_int 0)]))
1507 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1508 (parallel [(const_int 0)]))))]
1509 "SSE_FLOAT_MODE_P (<MODE>mode)"
1510 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1511 [(set_attr "type" "ssecomi")
1512 (set_attr "prefix" "maybe_vex")
1513 (set_attr "prefix_rep" "0")
1514 (set (attr "prefix_data16")
1515 (if_then_else (eq_attr "mode" "DF")
1517 (const_string "0")))
1518 (set_attr "mode" "<MODE>")])
1520 (define_insn "<sse>_ucomi"
1521 [(set (reg:CCFPU FLAGS_REG)
1524 (match_operand:<ssevecmode> 0 "register_operand" "x")
1525 (parallel [(const_int 0)]))
1527 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1528 (parallel [(const_int 0)]))))]
1529 "SSE_FLOAT_MODE_P (<MODE>mode)"
1530 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1531 [(set_attr "type" "ssecomi")
1532 (set_attr "prefix" "maybe_vex")
1533 (set_attr "prefix_rep" "0")
1534 (set (attr "prefix_data16")
1535 (if_then_else (eq_attr "mode" "DF")
1537 (const_string "0")))
1538 (set_attr "mode" "<MODE>")])
1540 (define_expand "vcond<mode>"
1541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1542 (if_then_else:SSEMODEF2P
1543 (match_operator 3 ""
1544 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1545 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1546 (match_operand:SSEMODEF2P 1 "general_operand" "")
1547 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1548 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1550 bool ok = ix86_expand_fp_vcond (operands);
1555 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1557 ;; Parallel floating point logical operations
1559 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1561 (define_insn "avx_andnot<mode>3"
1562 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1565 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1566 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1567 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1568 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1569 [(set_attr "type" "sselog")
1570 (set_attr "prefix" "vex")
1571 (set_attr "mode" "<avxvecmode>")])
1573 (define_insn "<sse>_andnot<mode>3"
1574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1577 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1578 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1579 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1580 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1581 [(set_attr "type" "sselog")
1582 (set_attr "mode" "<MODE>")])
1584 (define_expand "<code><mode>3"
1585 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1586 (any_logic:AVX256MODEF2P
1587 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1588 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1589 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1590 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1592 (define_insn "*avx_<code><mode>3"
1593 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1594 (any_logic:AVXMODEF2P
1595 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1596 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1597 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1598 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1599 "v<logicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1600 [(set_attr "type" "sselog")
1601 (set_attr "prefix" "vex")
1602 (set_attr "mode" "<avxvecmode>")])
1604 (define_expand "<code><mode>3"
1605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1606 (any_logic:SSEMODEF2P
1607 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1610 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1612 (define_insn "*<code><mode>3"
1613 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1614 (any_logic:SSEMODEF2P
1615 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1616 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1617 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1618 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1619 "<logicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1620 [(set_attr "type" "sselog")
1621 (set_attr "mode" "<MODE>")])
1623 (define_expand "copysign<mode>3"
1626 (not:SSEMODEF2P (match_dup 3))
1627 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1629 (and:SSEMODEF2P (match_dup 3)
1630 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1631 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1632 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1633 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1635 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1637 operands[4] = gen_reg_rtx (<MODE>mode);
1638 operands[5] = gen_reg_rtx (<MODE>mode);
1641 ;; Also define scalar versions. These are used for abs, neg, and
1642 ;; conditional move. Using subregs into vector modes causes register
1643 ;; allocation lossage. These patterns do not allow memory operands
1644 ;; because the native instructions read the full 128-bits.
1646 (define_insn "*avx_andnot<mode>3"
1647 [(set (match_operand:MODEF 0 "register_operand" "=x")
1650 (match_operand:MODEF 1 "register_operand" "x"))
1651 (match_operand:MODEF 2 "register_operand" "x")))]
1652 "AVX_FLOAT_MODE_P (<MODE>mode)"
1653 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1654 [(set_attr "type" "sselog")
1655 (set_attr "prefix" "vex")
1656 (set_attr "mode" "<ssevecmode>")])
1658 (define_insn "*andnot<mode>3"
1659 [(set (match_operand:MODEF 0 "register_operand" "=x")
1662 (match_operand:MODEF 1 "register_operand" "0"))
1663 (match_operand:MODEF 2 "register_operand" "x")))]
1664 "SSE_FLOAT_MODE_P (<MODE>mode)"
1665 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1666 [(set_attr "type" "sselog")
1667 (set_attr "mode" "<ssevecmode>")])
1669 (define_insn "*avx_<code><mode>3"
1670 [(set (match_operand:MODEF 0 "register_operand" "=x")
1672 (match_operand:MODEF 1 "register_operand" "x")
1673 (match_operand:MODEF 2 "register_operand" "x")))]
1674 "AVX_FLOAT_MODE_P (<MODE>mode)"
1675 "v<logicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1676 [(set_attr "type" "sselog")
1677 (set_attr "prefix" "vex")
1678 (set_attr "mode" "<ssevecmode>")])
1680 (define_insn "*<code><mode>3"
1681 [(set (match_operand:MODEF 0 "register_operand" "=x")
1683 (match_operand:MODEF 1 "register_operand" "0")
1684 (match_operand:MODEF 2 "register_operand" "x")))]
1685 "SSE_FLOAT_MODE_P (<MODE>mode)"
1686 "<logicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1687 [(set_attr "type" "sselog")
1688 (set_attr "mode" "<ssevecmode>")])
1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1692 ;; FMA4 floating point multiply/accumulate instructions. This
1693 ;; includes the scalar version of the instructions as well as the
1696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1698 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1699 ;; combine to generate a multiply/add with two memory references. We then
1700 ;; split this insn, into loading up the destination register with one of the
1701 ;; memory operations. If we don't manage to split the insn, reload will
1702 ;; generate the appropriate moves. The reason this is needed, is that combine
1703 ;; has already folded one of the memory references into both the multiply and
1704 ;; add insns, and it can't generate a new pseudo. I.e.:
1705 ;; (set (reg1) (mem (addr1)))
1706 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1707 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1709 (define_insn "fma4_fmadd<mode>4256"
1710 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1713 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1714 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1715 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1716 "TARGET_FMA4 && TARGET_FUSED_MADD"
1717 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1721 ;; Floating multiply and subtract.
1722 (define_insn "fma4_fmsub<mode>4256"
1723 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1726 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1727 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1728 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1729 "TARGET_FMA4 && TARGET_FUSED_MADD"
1730 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1731 [(set_attr "type" "ssemuladd")
1732 (set_attr "mode" "<MODE>")])
1734 ;; Floating point negative multiply and add.
1735 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1736 (define_insn "fma4_fnmadd<mode>4256"
1737 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1739 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1741 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1742 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1743 "TARGET_FMA4 && TARGET_FUSED_MADD"
1744 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1745 [(set_attr "type" "ssemuladd")
1746 (set_attr "mode" "<MODE>")])
1748 ;; Floating point negative multiply and subtract.
1749 (define_insn "fma4_fnmsub<mode>4256"
1750 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1754 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1755 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1756 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1757 "TARGET_FMA4 && TARGET_FUSED_MADD"
1758 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1759 [(set_attr "type" "ssemuladd")
1760 (set_attr "mode" "<MODE>")])
1762 (define_insn "fma4_fmadd<mode>4"
1763 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1766 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1767 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1768 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1769 "TARGET_FMA4 && TARGET_FUSED_MADD"
1770 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1771 [(set_attr "type" "ssemuladd")
1772 (set_attr "mode" "<MODE>")])
1774 ;; For the scalar operations, use operand1 for the upper words that aren't
1775 ;; modified, so restrict the forms that are generated.
1776 ;; Scalar version of fmadd.
1777 (define_insn "fma4_vmfmadd<mode>4"
1778 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1779 (vec_merge:SSEMODEF2P
1782 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1783 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1784 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1787 "TARGET_FMA4 && TARGET_FUSED_MADD"
1788 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1789 [(set_attr "type" "ssemuladd")
1790 (set_attr "mode" "<MODE>")])
1792 ;; Floating multiply and subtract.
1793 ;; Allow two memory operands the same as fmadd.
1794 (define_insn "fma4_fmsub<mode>4"
1795 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1798 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1799 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1800 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1801 "TARGET_FMA4 && TARGET_FUSED_MADD"
1802 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1803 [(set_attr "type" "ssemuladd")
1804 (set_attr "mode" "<MODE>")])
1806 ;; For the scalar operations, use operand1 for the upper words that aren't
1807 ;; modified, so restrict the forms that are generated.
1808 ;; Scalar version of fmsub.
1809 (define_insn "fma4_vmfmsub<mode>4"
1810 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1811 (vec_merge:SSEMODEF2P
1814 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1815 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1816 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1819 "TARGET_FMA4 && TARGET_FUSED_MADD"
1820 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821 [(set_attr "type" "ssemuladd")
1822 (set_attr "mode" "<MODE>")])
1824 ;; Floating point negative multiply and add.
1825 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1826 (define_insn "fma4_fnmadd<mode>4"
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1829 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1833 "TARGET_FMA4 && TARGET_FUSED_MADD"
1834 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1835 [(set_attr "type" "ssemuladd")
1836 (set_attr "mode" "<MODE>")])
1838 ;; For the scalar operations, use operand1 for the upper words that aren't
1839 ;; modified, so restrict the forms that are generated.
1840 ;; Scalar version of fnmadd.
1841 (define_insn "fma4_vmfnmadd<mode>4"
1842 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1843 (vec_merge:SSEMODEF2P
1845 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1847 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1848 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1851 "TARGET_FMA4 && TARGET_FUSED_MADD"
1852 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1853 [(set_attr "type" "ssemuladd")
1854 (set_attr "mode" "<MODE>")])
1856 ;; Floating point negative multiply and subtract.
1857 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1858 (define_insn "fma4_fnmsub<mode>4"
1859 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1863 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1864 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1865 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1866 "TARGET_FMA4 && TARGET_FUSED_MADD"
1867 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1868 [(set_attr "type" "ssemuladd")
1869 (set_attr "mode" "<MODE>")])
1871 ;; For the scalar operations, use operand1 for the upper words that aren't
1872 ;; modified, so restrict the forms that are generated.
1873 ;; Scalar version of fnmsub.
1874 (define_insn "fma4_vmfnmsub<mode>4"
1875 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1876 (vec_merge:SSEMODEF2P
1880 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1881 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1882 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1885 "TARGET_FMA4 && TARGET_FUSED_MADD"
1886 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1887 [(set_attr "type" "ssemuladd")
1888 (set_attr "mode" "<MODE>")])
1890 (define_insn "fma4i_fmadd<mode>4256"
1891 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1895 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1896 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1897 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1898 UNSPEC_FMA4_INTRINSIC))]
1899 "TARGET_FMA4 && TARGET_FUSED_MADD"
1900 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901 [(set_attr "type" "ssemuladd")
1902 (set_attr "mode" "<MODE>")])
1904 (define_insn "fma4i_fmsub<mode>4256"
1905 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1909 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1910 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1911 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1912 UNSPEC_FMA4_INTRINSIC))]
1913 "TARGET_FMA4 && TARGET_FUSED_MADD"
1914 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1918 (define_insn "fma4i_fnmadd<mode>4256"
1919 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1922 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1924 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1925 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1926 UNSPEC_FMA4_INTRINSIC))]
1927 "TARGET_FMA4 && TARGET_FUSED_MADD"
1928 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1929 [(set_attr "type" "ssemuladd")
1930 (set_attr "mode" "<MODE>")])
1932 (define_insn "fma4i_fnmsub<mode>4256"
1933 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1938 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1939 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1940 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1941 UNSPEC_FMA4_INTRINSIC))]
1942 "TARGET_FMA4 && TARGET_FUSED_MADD"
1943 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1944 [(set_attr "type" "ssemuladd")
1945 (set_attr "mode" "<MODE>")])
1947 (define_insn "fma4i_fmadd<mode>4"
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1952 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1953 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1954 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1955 UNSPEC_FMA4_INTRINSIC))]
1956 "TARGET_FMA4 && TARGET_FUSED_MADD"
1957 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1958 [(set_attr "type" "ssemuladd")
1959 (set_attr "mode" "<MODE>")])
1961 (define_insn "fma4i_fmsub<mode>4"
1962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1966 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1967 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1968 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1969 UNSPEC_FMA4_INTRINSIC))]
1970 "TARGET_FMA4 && TARGET_FUSED_MADD"
1971 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1972 [(set_attr "type" "ssemuladd")
1973 (set_attr "mode" "<MODE>")])
1975 (define_insn "fma4i_fnmadd<mode>4"
1976 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1979 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1983 UNSPEC_FMA4_INTRINSIC))]
1984 "TARGET_FMA4 && TARGET_FUSED_MADD"
1985 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1986 [(set_attr "type" "ssemuladd")
1987 (set_attr "mode" "<MODE>")])
1989 (define_insn "fma4i_fnmsub<mode>4"
1990 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1995 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1996 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1997 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1998 UNSPEC_FMA4_INTRINSIC))]
1999 "TARGET_FMA4 && TARGET_FUSED_MADD"
2000 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2001 [(set_attr "type" "ssemuladd")
2002 (set_attr "mode" "<MODE>")])
2004 ;; For the scalar operations, use operand1 for the upper words that aren't
2005 ;; modified, so restrict the forms that are accepted.
2006 (define_insn "fma4i_vmfmadd<mode>4"
2007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2009 [(vec_merge:SSEMODEF2P
2012 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2013 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2014 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2017 UNSPEC_FMA4_INTRINSIC))]
2018 "TARGET_FMA4 && TARGET_FUSED_MADD"
2019 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2020 [(set_attr "type" "ssemuladd")
2021 (set_attr "mode" "<ssescalarmode>")])
2023 (define_insn "fma4i_vmfmsub<mode>4"
2024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2026 [(vec_merge:SSEMODEF2P
2029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2031 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2034 UNSPEC_FMA4_INTRINSIC))]
2035 "TARGET_FMA4 && TARGET_FUSED_MADD"
2036 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2037 [(set_attr "type" "ssemuladd")
2038 (set_attr "mode" "<ssescalarmode>")])
2040 (define_insn "fma4i_vmfnmadd<mode>4"
2041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2043 [(vec_merge:SSEMODEF2P
2045 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2047 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2048 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2051 UNSPEC_FMA4_INTRINSIC))]
2052 "TARGET_FMA4 && TARGET_FUSED_MADD"
2053 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2054 [(set_attr "type" "ssemuladd")
2055 (set_attr "mode" "<ssescalarmode>")])
2057 (define_insn "fma4i_vmfnmsub<mode>4"
2058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2060 [(vec_merge:SSEMODEF2P
2064 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2065 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2066 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2069 UNSPEC_FMA4_INTRINSIC))]
2070 "TARGET_FMA4 && TARGET_FUSED_MADD"
2071 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2072 [(set_attr "type" "ssemuladd")
2073 (set_attr "mode" "<ssescalarmode>")])
2075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2077 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2081 (define_insn "fma4_fmaddsubv8sf4"
2082 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2086 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2087 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2088 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2095 "TARGET_FMA4 && TARGET_FUSED_MADD"
2096 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2097 [(set_attr "type" "ssemuladd")
2098 (set_attr "mode" "V8SF")])
2100 (define_insn "fma4_fmaddsubv4df4"
2101 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2105 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2106 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2107 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2114 "TARGET_FMA4 && TARGET_FUSED_MADD"
2115 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2116 [(set_attr "type" "ssemuladd")
2117 (set_attr "mode" "V4DF")])
2119 (define_insn "fma4_fmaddsubv4sf4"
2120 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2124 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2125 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2126 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2133 "TARGET_FMA4 && TARGET_FUSED_MADD"
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "V4SF")])
2138 (define_insn "fma4_fmaddsubv2df4"
2139 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2143 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2144 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2145 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2152 "TARGET_FMA4 && TARGET_FUSED_MADD"
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2154 [(set_attr "type" "ssemuladd")
2155 (set_attr "mode" "V2DF")])
2157 (define_insn "fma4_fmsubaddv8sf4"
2158 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2162 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2163 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2164 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2171 "TARGET_FMA4 && TARGET_FUSED_MADD"
2172 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "V8SF")])
2176 (define_insn "fma4_fmsubaddv4df4"
2177 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2181 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2182 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2183 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2190 "TARGET_FMA4 && TARGET_FUSED_MADD"
2191 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "V4DF")])
2195 (define_insn "fma4_fmsubaddv4sf4"
2196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2200 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2201 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2202 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2209 "TARGET_FMA4 && TARGET_FUSED_MADD"
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2211 [(set_attr "type" "ssemuladd")
2212 (set_attr "mode" "V4SF")])
2214 (define_insn "fma4_fmsubaddv2df4"
2215 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2219 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2220 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2221 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2228 "TARGET_FMA4 && TARGET_FUSED_MADD"
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2230 [(set_attr "type" "ssemuladd")
2231 (set_attr "mode" "V2DF")])
2233 (define_insn "fma4i_fmaddsubv8sf4"
2234 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2239 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2240 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2241 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2248 UNSPEC_FMA4_INTRINSIC))]
2249 "TARGET_FMA4 && TARGET_FUSED_MADD"
2250 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2251 [(set_attr "type" "ssemuladd")
2252 (set_attr "mode" "V8SF")])
2254 (define_insn "fma4i_fmaddsubv4df4"
2255 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2260 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2261 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2262 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2269 UNSPEC_FMA4_INTRINSIC))]
2270 "TARGET_FMA4 && TARGET_FUSED_MADD"
2271 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2272 [(set_attr "type" "ssemuladd")
2273 (set_attr "mode" "V4DF")])
2275 (define_insn "fma4i_fmaddsubv4sf4"
2276 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2281 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2282 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2283 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2290 UNSPEC_FMA4_INTRINSIC))]
2291 "TARGET_FMA4 && TARGET_FUSED_MADD"
2292 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2293 [(set_attr "type" "ssemuladd")
2294 (set_attr "mode" "V4SF")])
2296 (define_insn "fma4i_fmaddsubv2df4"
2297 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2302 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2303 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2304 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2311 UNSPEC_FMA4_INTRINSIC))]
2312 "TARGET_FMA4 && TARGET_FUSED_MADD"
2313 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2314 [(set_attr "type" "ssemuladd")
2315 (set_attr "mode" "V2DF")])
2317 (define_insn "fma4i_fmsubaddv8sf4"
2318 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2323 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2324 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2325 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2332 UNSPEC_FMA4_INTRINSIC))]
2333 "TARGET_FMA4 && TARGET_FUSED_MADD"
2334 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2335 [(set_attr "type" "ssemuladd")
2336 (set_attr "mode" "V8SF")])
2338 (define_insn "fma4i_fmsubaddv4df4"
2339 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2344 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2345 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2346 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2353 UNSPEC_FMA4_INTRINSIC))]
2354 "TARGET_FMA4 && TARGET_FUSED_MADD"
2355 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2356 [(set_attr "type" "ssemuladd")
2357 (set_attr "mode" "V4DF")])
2359 (define_insn "fma4i_fmsubaddv4sf4"
2360 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2365 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2366 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2367 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2374 UNSPEC_FMA4_INTRINSIC))]
2375 "TARGET_FMA4 && TARGET_FUSED_MADD"
2376 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2377 [(set_attr "type" "ssemuladd")
2378 (set_attr "mode" "V4SF")])
2380 (define_insn "fma4i_fmsubaddv2df4"
2381 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2386 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2387 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2388 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2395 UNSPEC_FMA4_INTRINSIC))]
2396 "TARGET_FMA4 && TARGET_FUSED_MADD"
2397 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2398 [(set_attr "type" "ssemuladd")
2399 (set_attr "mode" "V2DF")])
2401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2403 ;; Parallel single-precision floating point conversion operations
2405 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2407 (define_insn "sse_cvtpi2ps"
2408 [(set (match_operand:V4SF 0 "register_operand" "=x")
2411 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2412 (match_operand:V4SF 1 "register_operand" "0")
2415 "cvtpi2ps\t{%2, %0|%0, %2}"
2416 [(set_attr "type" "ssecvt")
2417 (set_attr "mode" "V4SF")])
2419 (define_insn "sse_cvtps2pi"
2420 [(set (match_operand:V2SI 0 "register_operand" "=y")
2422 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2424 (parallel [(const_int 0) (const_int 1)])))]
2426 "cvtps2pi\t{%1, %0|%0, %1}"
2427 [(set_attr "type" "ssecvt")
2428 (set_attr "unit" "mmx")
2429 (set_attr "mode" "DI")])
2431 (define_insn "sse_cvttps2pi"
2432 [(set (match_operand:V2SI 0 "register_operand" "=y")
2434 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2435 (parallel [(const_int 0) (const_int 1)])))]
2437 "cvttps2pi\t{%1, %0|%0, %1}"
2438 [(set_attr "type" "ssecvt")
2439 (set_attr "unit" "mmx")
2440 (set_attr "prefix_rep" "0")
2441 (set_attr "mode" "SF")])
2443 (define_insn "*avx_cvtsi2ss"
2444 [(set (match_operand:V4SF 0 "register_operand" "=x")
2447 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2448 (match_operand:V4SF 1 "register_operand" "x")
2451 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2452 [(set_attr "type" "sseicvt")
2453 (set_attr "prefix" "vex")
2454 (set_attr "mode" "SF")])
2456 (define_insn "sse_cvtsi2ss"
2457 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2460 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2461 (match_operand:V4SF 1 "register_operand" "0,0")
2464 "cvtsi2ss\t{%2, %0|%0, %2}"
2465 [(set_attr "type" "sseicvt")
2466 (set_attr "athlon_decode" "vector,double")
2467 (set_attr "amdfam10_decode" "vector,double")
2468 (set_attr "mode" "SF")])
2470 (define_insn "*avx_cvtsi2ssq"
2471 [(set (match_operand:V4SF 0 "register_operand" "=x")
2474 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2475 (match_operand:V4SF 1 "register_operand" "x")
2477 "TARGET_AVX && TARGET_64BIT"
2478 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2479 [(set_attr "type" "sseicvt")
2480 (set_attr "length_vex" "4")
2481 (set_attr "prefix" "vex")
2482 (set_attr "mode" "SF")])
2484 (define_insn "sse_cvtsi2ssq"
2485 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2488 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2489 (match_operand:V4SF 1 "register_operand" "0,0")
2491 "TARGET_SSE && TARGET_64BIT"
2492 "cvtsi2ssq\t{%2, %0|%0, %2}"
2493 [(set_attr "type" "sseicvt")
2494 (set_attr "prefix_rex" "1")
2495 (set_attr "athlon_decode" "vector,double")
2496 (set_attr "amdfam10_decode" "vector,double")
2497 (set_attr "mode" "SF")])
2499 (define_insn "sse_cvtss2si"
2500 [(set (match_operand:SI 0 "register_operand" "=r,r")
2503 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2504 (parallel [(const_int 0)]))]
2505 UNSPEC_FIX_NOTRUNC))]
2507 "%vcvtss2si\t{%1, %0|%0, %1}"
2508 [(set_attr "type" "sseicvt")
2509 (set_attr "athlon_decode" "double,vector")
2510 (set_attr "prefix_rep" "1")
2511 (set_attr "prefix" "maybe_vex")
2512 (set_attr "mode" "SI")])
2514 (define_insn "sse_cvtss2si_2"
2515 [(set (match_operand:SI 0 "register_operand" "=r,r")
2516 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2517 UNSPEC_FIX_NOTRUNC))]
2519 "%vcvtss2si\t{%1, %0|%0, %1}"
2520 [(set_attr "type" "sseicvt")
2521 (set_attr "athlon_decode" "double,vector")
2522 (set_attr "amdfam10_decode" "double,double")
2523 (set_attr "prefix_rep" "1")
2524 (set_attr "prefix" "maybe_vex")
2525 (set_attr "mode" "SI")])
2527 (define_insn "sse_cvtss2siq"
2528 [(set (match_operand:DI 0 "register_operand" "=r,r")
2531 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2532 (parallel [(const_int 0)]))]
2533 UNSPEC_FIX_NOTRUNC))]
2534 "TARGET_SSE && TARGET_64BIT"
2535 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2536 [(set_attr "type" "sseicvt")
2537 (set_attr "athlon_decode" "double,vector")
2538 (set_attr "prefix_rep" "1")
2539 (set_attr "prefix" "maybe_vex")
2540 (set_attr "mode" "DI")])
2542 (define_insn "sse_cvtss2siq_2"
2543 [(set (match_operand:DI 0 "register_operand" "=r,r")
2544 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2545 UNSPEC_FIX_NOTRUNC))]
2546 "TARGET_SSE && TARGET_64BIT"
2547 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2548 [(set_attr "type" "sseicvt")
2549 (set_attr "athlon_decode" "double,vector")
2550 (set_attr "amdfam10_decode" "double,double")
2551 (set_attr "prefix_rep" "1")
2552 (set_attr "prefix" "maybe_vex")
2553 (set_attr "mode" "DI")])
2555 (define_insn "sse_cvttss2si"
2556 [(set (match_operand:SI 0 "register_operand" "=r,r")
2559 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2560 (parallel [(const_int 0)]))))]
2562 "%vcvttss2si\t{%1, %0|%0, %1}"
2563 [(set_attr "type" "sseicvt")
2564 (set_attr "athlon_decode" "double,vector")
2565 (set_attr "amdfam10_decode" "double,double")
2566 (set_attr "prefix_rep" "1")
2567 (set_attr "prefix" "maybe_vex")
2568 (set_attr "mode" "SI")])
2570 (define_insn "sse_cvttss2siq"
2571 [(set (match_operand:DI 0 "register_operand" "=r,r")
2574 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2575 (parallel [(const_int 0)]))))]
2576 "TARGET_SSE && TARGET_64BIT"
2577 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2578 [(set_attr "type" "sseicvt")
2579 (set_attr "athlon_decode" "double,vector")
2580 (set_attr "amdfam10_decode" "double,double")
2581 (set_attr "prefix_rep" "1")
2582 (set_attr "prefix" "maybe_vex")
2583 (set_attr "mode" "DI")])
2585 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2586 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2587 (float:AVXMODEDCVTDQ2PS
2588 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2590 "vcvtdq2ps\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "ssecvt")
2592 (set_attr "prefix" "vex")
2593 (set_attr "mode" "<avxvecmode>")])
2595 (define_insn "sse2_cvtdq2ps"
2596 [(set (match_operand:V4SF 0 "register_operand" "=x")
2597 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2599 "cvtdq2ps\t{%1, %0|%0, %1}"
2600 [(set_attr "type" "ssecvt")
2601 (set_attr "mode" "V4SF")])
2603 (define_expand "sse2_cvtudq2ps"
2605 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2607 (lt:V4SF (match_dup 5) (match_dup 3)))
2609 (and:V4SF (match_dup 6) (match_dup 4)))
2610 (set (match_operand:V4SF 0 "register_operand" "")
2611 (plus:V4SF (match_dup 5) (match_dup 7)))]
2614 REAL_VALUE_TYPE TWO32r;
2618 real_ldexp (&TWO32r, &dconst1, 32);
2619 x = const_double_from_real_value (TWO32r, SFmode);
2621 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2622 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2624 for (i = 5; i < 8; i++)
2625 operands[i] = gen_reg_rtx (V4SFmode);
2628 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2629 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2630 (unspec:AVXMODEDCVTPS2DQ
2631 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2632 UNSPEC_FIX_NOTRUNC))]
2634 "vcvtps2dq\t{%1, %0|%0, %1}"
2635 [(set_attr "type" "ssecvt")
2636 (set_attr "prefix" "vex")
2637 (set_attr "mode" "<avxvecmode>")])
2639 (define_insn "sse2_cvtps2dq"
2640 [(set (match_operand:V4SI 0 "register_operand" "=x")
2641 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2642 UNSPEC_FIX_NOTRUNC))]
2644 "cvtps2dq\t{%1, %0|%0, %1}"
2645 [(set_attr "type" "ssecvt")
2646 (set_attr "prefix_data16" "1")
2647 (set_attr "mode" "TI")])
2649 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2650 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2651 (fix:AVXMODEDCVTPS2DQ
2652 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2654 "vcvttps2dq\t{%1, %0|%0, %1}"
2655 [(set_attr "type" "ssecvt")
2656 (set_attr "prefix" "vex")
2657 (set_attr "mode" "<avxvecmode>")])
2659 (define_insn "sse2_cvttps2dq"
2660 [(set (match_operand:V4SI 0 "register_operand" "=x")
2661 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2663 "cvttps2dq\t{%1, %0|%0, %1}"
2664 [(set_attr "type" "ssecvt")
2665 (set_attr "prefix_rep" "1")
2666 (set_attr "prefix_data16" "0")
2667 (set_attr "mode" "TI")])
2669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2671 ;; Parallel double-precision floating point conversion operations
2673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2675 (define_insn "sse2_cvtpi2pd"
2676 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2677 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2679 "cvtpi2pd\t{%1, %0|%0, %1}"
2680 [(set_attr "type" "ssecvt")
2681 (set_attr "unit" "mmx,*")
2682 (set_attr "prefix_data16" "1,*")
2683 (set_attr "mode" "V2DF")])
2685 (define_insn "sse2_cvtpd2pi"
2686 [(set (match_operand:V2SI 0 "register_operand" "=y")
2687 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2688 UNSPEC_FIX_NOTRUNC))]
2690 "cvtpd2pi\t{%1, %0|%0, %1}"
2691 [(set_attr "type" "ssecvt")
2692 (set_attr "unit" "mmx")
2693 (set_attr "prefix_data16" "1")
2694 (set_attr "mode" "DI")])
2696 (define_insn "sse2_cvttpd2pi"
2697 [(set (match_operand:V2SI 0 "register_operand" "=y")
2698 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2700 "cvttpd2pi\t{%1, %0|%0, %1}"
2701 [(set_attr "type" "ssecvt")
2702 (set_attr "unit" "mmx")
2703 (set_attr "prefix_data16" "1")
2704 (set_attr "mode" "TI")])
2706 (define_insn "*avx_cvtsi2sd"
2707 [(set (match_operand:V2DF 0 "register_operand" "=x")
2710 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2711 (match_operand:V2DF 1 "register_operand" "x")
2714 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2715 [(set_attr "type" "sseicvt")
2716 (set_attr "prefix" "vex")
2717 (set_attr "mode" "DF")])
2719 (define_insn "sse2_cvtsi2sd"
2720 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2723 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2724 (match_operand:V2DF 1 "register_operand" "0,0")
2727 "cvtsi2sd\t{%2, %0|%0, %2}"
2728 [(set_attr "type" "sseicvt")
2729 (set_attr "mode" "DF")
2730 (set_attr "athlon_decode" "double,direct")
2731 (set_attr "amdfam10_decode" "vector,double")])
2733 (define_insn "*avx_cvtsi2sdq"
2734 [(set (match_operand:V2DF 0 "register_operand" "=x")
2737 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2738 (match_operand:V2DF 1 "register_operand" "x")
2740 "TARGET_AVX && TARGET_64BIT"
2741 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2742 [(set_attr "type" "sseicvt")
2743 (set_attr "length_vex" "4")
2744 (set_attr "prefix" "vex")
2745 (set_attr "mode" "DF")])
2747 (define_insn "sse2_cvtsi2sdq"
2748 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2751 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2752 (match_operand:V2DF 1 "register_operand" "0,0")
2754 "TARGET_SSE2 && TARGET_64BIT"
2755 "cvtsi2sdq\t{%2, %0|%0, %2}"
2756 [(set_attr "type" "sseicvt")
2757 (set_attr "prefix_rex" "1")
2758 (set_attr "mode" "DF")
2759 (set_attr "athlon_decode" "double,direct")
2760 (set_attr "amdfam10_decode" "vector,double")])
2762 (define_insn "sse2_cvtsd2si"
2763 [(set (match_operand:SI 0 "register_operand" "=r,r")
2766 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2767 (parallel [(const_int 0)]))]
2768 UNSPEC_FIX_NOTRUNC))]
2770 "%vcvtsd2si\t{%1, %0|%0, %1}"
2771 [(set_attr "type" "sseicvt")
2772 (set_attr "athlon_decode" "double,vector")
2773 (set_attr "prefix_rep" "1")
2774 (set_attr "prefix" "maybe_vex")
2775 (set_attr "mode" "SI")])
2777 (define_insn "sse2_cvtsd2si_2"
2778 [(set (match_operand:SI 0 "register_operand" "=r,r")
2779 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2780 UNSPEC_FIX_NOTRUNC))]
2782 "%vcvtsd2si\t{%1, %0|%0, %1}"
2783 [(set_attr "type" "sseicvt")
2784 (set_attr "athlon_decode" "double,vector")
2785 (set_attr "amdfam10_decode" "double,double")
2786 (set_attr "prefix_rep" "1")
2787 (set_attr "prefix" "maybe_vex")
2788 (set_attr "mode" "SI")])
2790 (define_insn "sse2_cvtsd2siq"
2791 [(set (match_operand:DI 0 "register_operand" "=r,r")
2794 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2795 (parallel [(const_int 0)]))]
2796 UNSPEC_FIX_NOTRUNC))]
2797 "TARGET_SSE2 && TARGET_64BIT"
2798 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2799 [(set_attr "type" "sseicvt")
2800 (set_attr "athlon_decode" "double,vector")
2801 (set_attr "prefix_rep" "1")
2802 (set_attr "prefix" "maybe_vex")
2803 (set_attr "mode" "DI")])
2805 (define_insn "sse2_cvtsd2siq_2"
2806 [(set (match_operand:DI 0 "register_operand" "=r,r")
2807 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2808 UNSPEC_FIX_NOTRUNC))]
2809 "TARGET_SSE2 && TARGET_64BIT"
2810 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2811 [(set_attr "type" "sseicvt")
2812 (set_attr "athlon_decode" "double,vector")
2813 (set_attr "amdfam10_decode" "double,double")
2814 (set_attr "prefix_rep" "1")
2815 (set_attr "prefix" "maybe_vex")
2816 (set_attr "mode" "DI")])
2818 (define_insn "sse2_cvttsd2si"
2819 [(set (match_operand:SI 0 "register_operand" "=r,r")
2822 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2823 (parallel [(const_int 0)]))))]
2825 "%vcvttsd2si\t{%1, %0|%0, %1}"
2826 [(set_attr "type" "sseicvt")
2827 (set_attr "prefix_rep" "1")
2828 (set_attr "prefix" "maybe_vex")
2829 (set_attr "mode" "SI")
2830 (set_attr "athlon_decode" "double,vector")
2831 (set_attr "amdfam10_decode" "double,double")])
2833 (define_insn "sse2_cvttsd2siq"
2834 [(set (match_operand:DI 0 "register_operand" "=r,r")
2837 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2838 (parallel [(const_int 0)]))))]
2839 "TARGET_SSE2 && TARGET_64BIT"
2840 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2841 [(set_attr "type" "sseicvt")
2842 (set_attr "prefix_rep" "1")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "mode" "DI")
2845 (set_attr "athlon_decode" "double,vector")
2846 (set_attr "amdfam10_decode" "double,double")])
2848 (define_insn "avx_cvtdq2pd256"
2849 [(set (match_operand:V4DF 0 "register_operand" "=x")
2850 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2852 "vcvtdq2pd\t{%1, %0|%0, %1}"
2853 [(set_attr "type" "ssecvt")
2854 (set_attr "prefix" "vex")
2855 (set_attr "mode" "V4DF")])
2857 (define_insn "sse2_cvtdq2pd"
2858 [(set (match_operand:V2DF 0 "register_operand" "=x")
2861 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2862 (parallel [(const_int 0) (const_int 1)]))))]
2864 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2865 [(set_attr "type" "ssecvt")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "V2DF")])
2869 (define_insn "avx_cvtpd2dq256"
2870 [(set (match_operand:V4SI 0 "register_operand" "=x")
2871 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2872 UNSPEC_FIX_NOTRUNC))]
2874 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2875 [(set_attr "type" "ssecvt")
2876 (set_attr "prefix" "vex")
2877 (set_attr "mode" "OI")])
2879 (define_expand "sse2_cvtpd2dq"
2880 [(set (match_operand:V4SI 0 "register_operand" "")
2882 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2886 "operands[2] = CONST0_RTX (V2SImode);")
2888 (define_insn "*sse2_cvtpd2dq"
2889 [(set (match_operand:V4SI 0 "register_operand" "=x")
2891 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2893 (match_operand:V2SI 2 "const0_operand" "")))]
2895 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2896 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2897 [(set_attr "type" "ssecvt")
2898 (set_attr "prefix_rep" "1")
2899 (set_attr "prefix_data16" "0")
2900 (set_attr "prefix" "maybe_vex")
2901 (set_attr "mode" "TI")
2902 (set_attr "amdfam10_decode" "double")])
2904 (define_insn "avx_cvttpd2dq256"
2905 [(set (match_operand:V4SI 0 "register_operand" "=x")
2906 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2908 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2909 [(set_attr "type" "ssecvt")
2910 (set_attr "prefix" "vex")
2911 (set_attr "mode" "OI")])
2913 (define_expand "sse2_cvttpd2dq"
2914 [(set (match_operand:V4SI 0 "register_operand" "")
2916 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2919 "operands[2] = CONST0_RTX (V2SImode);")
2921 (define_insn "*sse2_cvttpd2dq"
2922 [(set (match_operand:V4SI 0 "register_operand" "=x")
2924 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2925 (match_operand:V2SI 2 "const0_operand" "")))]
2927 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2928 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2929 [(set_attr "type" "ssecvt")
2930 (set_attr "prefix" "maybe_vex")
2931 (set_attr "mode" "TI")
2932 (set_attr "amdfam10_decode" "double")])
2934 (define_insn "*avx_cvtsd2ss"
2935 [(set (match_operand:V4SF 0 "register_operand" "=x")
2938 (float_truncate:V2SF
2939 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2940 (match_operand:V4SF 1 "register_operand" "x")
2943 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2944 [(set_attr "type" "ssecvt")
2945 (set_attr "prefix" "vex")
2946 (set_attr "mode" "SF")])
2948 (define_insn "sse2_cvtsd2ss"
2949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2952 (float_truncate:V2SF
2953 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2954 (match_operand:V4SF 1 "register_operand" "0,0")
2957 "cvtsd2ss\t{%2, %0|%0, %2}"
2958 [(set_attr "type" "ssecvt")
2959 (set_attr "athlon_decode" "vector,double")
2960 (set_attr "amdfam10_decode" "vector,double")
2961 (set_attr "mode" "SF")])
2963 (define_insn "*avx_cvtss2sd"
2964 [(set (match_operand:V2DF 0 "register_operand" "=x")
2968 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2969 (parallel [(const_int 0) (const_int 1)])))
2970 (match_operand:V2DF 1 "register_operand" "x")
2973 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2974 [(set_attr "type" "ssecvt")
2975 (set_attr "prefix" "vex")
2976 (set_attr "mode" "DF")])
2978 (define_insn "sse2_cvtss2sd"
2979 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2983 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2984 (parallel [(const_int 0) (const_int 1)])))
2985 (match_operand:V2DF 1 "register_operand" "0,0")
2988 "cvtss2sd\t{%2, %0|%0, %2}"
2989 [(set_attr "type" "ssecvt")
2990 (set_attr "amdfam10_decode" "vector,double")
2991 (set_attr "mode" "DF")])
2993 (define_insn "avx_cvtpd2ps256"
2994 [(set (match_operand:V4SF 0 "register_operand" "=x")
2995 (float_truncate:V4SF
2996 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2998 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2999 [(set_attr "type" "ssecvt")
3000 (set_attr "prefix" "vex")
3001 (set_attr "mode" "V4SF")])
3003 (define_expand "sse2_cvtpd2ps"
3004 [(set (match_operand:V4SF 0 "register_operand" "")
3006 (float_truncate:V2SF
3007 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3010 "operands[2] = CONST0_RTX (V2SFmode);")
3012 (define_insn "*sse2_cvtpd2ps"
3013 [(set (match_operand:V4SF 0 "register_operand" "=x")
3015 (float_truncate:V2SF
3016 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3017 (match_operand:V2SF 2 "const0_operand" "")))]
3019 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3020 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3021 [(set_attr "type" "ssecvt")
3022 (set_attr "prefix_data16" "1")
3023 (set_attr "prefix" "maybe_vex")
3024 (set_attr "mode" "V4SF")
3025 (set_attr "amdfam10_decode" "double")])
3027 (define_insn "avx_cvtps2pd256"
3028 [(set (match_operand:V4DF 0 "register_operand" "=x")
3030 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3032 "vcvtps2pd\t{%1, %0|%0, %1}"
3033 [(set_attr "type" "ssecvt")
3034 (set_attr "prefix" "vex")
3035 (set_attr "mode" "V4DF")])
3037 (define_insn "sse2_cvtps2pd"
3038 [(set (match_operand:V2DF 0 "register_operand" "=x")
3041 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3042 (parallel [(const_int 0) (const_int 1)]))))]
3044 "%vcvtps2pd\t{%1, %0|%0, %1}"
3045 [(set_attr "type" "ssecvt")
3046 (set_attr "prefix" "maybe_vex")
3047 (set_attr "mode" "V2DF")
3048 (set_attr "prefix_data16" "0")
3049 (set_attr "amdfam10_decode" "direct")])
3051 (define_expand "vec_unpacks_hi_v4sf"
3056 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3057 (parallel [(const_int 6)
3061 (set (match_operand:V2DF 0 "register_operand" "")
3065 (parallel [(const_int 0) (const_int 1)]))))]
3068 operands[2] = gen_reg_rtx (V4SFmode);
3071 (define_expand "vec_unpacks_lo_v4sf"
3072 [(set (match_operand:V2DF 0 "register_operand" "")
3075 (match_operand:V4SF 1 "nonimmediate_operand" "")
3076 (parallel [(const_int 0) (const_int 1)]))))]
3079 (define_expand "vec_unpacks_float_hi_v8hi"
3080 [(match_operand:V4SF 0 "register_operand" "")
3081 (match_operand:V8HI 1 "register_operand" "")]
3084 rtx tmp = gen_reg_rtx (V4SImode);
3086 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3087 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3091 (define_expand "vec_unpacks_float_lo_v8hi"
3092 [(match_operand:V4SF 0 "register_operand" "")
3093 (match_operand:V8HI 1 "register_operand" "")]
3096 rtx tmp = gen_reg_rtx (V4SImode);
3098 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3099 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3103 (define_expand "vec_unpacku_float_hi_v8hi"
3104 [(match_operand:V4SF 0 "register_operand" "")
3105 (match_operand:V8HI 1 "register_operand" "")]
3108 rtx tmp = gen_reg_rtx (V4SImode);
3110 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3111 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3115 (define_expand "vec_unpacku_float_lo_v8hi"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V8HI 1 "register_operand" "")]
3120 rtx tmp = gen_reg_rtx (V4SImode);
3122 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3123 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3127 (define_expand "vec_unpacks_float_hi_v4si"
3130 (match_operand:V4SI 1 "nonimmediate_operand" "")
3131 (parallel [(const_int 2)
3135 (set (match_operand:V2DF 0 "register_operand" "")
3139 (parallel [(const_int 0) (const_int 1)]))))]
3141 "operands[2] = gen_reg_rtx (V4SImode);")
3143 (define_expand "vec_unpacks_float_lo_v4si"
3144 [(set (match_operand:V2DF 0 "register_operand" "")
3147 (match_operand:V4SI 1 "nonimmediate_operand" "")
3148 (parallel [(const_int 0) (const_int 1)]))))]
3151 (define_expand "vec_unpacku_float_hi_v4si"
3154 (match_operand:V4SI 1 "nonimmediate_operand" "")
3155 (parallel [(const_int 2)
3163 (parallel [(const_int 0) (const_int 1)]))))
3165 (lt:V2DF (match_dup 6) (match_dup 3)))
3167 (and:V2DF (match_dup 7) (match_dup 4)))
3168 (set (match_operand:V2DF 0 "register_operand" "")
3169 (plus:V2DF (match_dup 6) (match_dup 8)))]
3172 REAL_VALUE_TYPE TWO32r;
3176 real_ldexp (&TWO32r, &dconst1, 32);
3177 x = const_double_from_real_value (TWO32r, DFmode);
3179 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3180 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3182 operands[5] = gen_reg_rtx (V4SImode);
3184 for (i = 6; i < 9; i++)
3185 operands[i] = gen_reg_rtx (V2DFmode);
3188 (define_expand "vec_unpacku_float_lo_v4si"
3192 (match_operand:V4SI 1 "nonimmediate_operand" "")
3193 (parallel [(const_int 0) (const_int 1)]))))
3195 (lt:V2DF (match_dup 5) (match_dup 3)))
3197 (and:V2DF (match_dup 6) (match_dup 4)))
3198 (set (match_operand:V2DF 0 "register_operand" "")
3199 (plus:V2DF (match_dup 5) (match_dup 7)))]
3202 REAL_VALUE_TYPE TWO32r;
3206 real_ldexp (&TWO32r, &dconst1, 32);
3207 x = const_double_from_real_value (TWO32r, DFmode);
3209 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3210 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3212 for (i = 5; i < 8; i++)
3213 operands[i] = gen_reg_rtx (V2DFmode);
3216 (define_expand "vec_pack_trunc_v2df"
3217 [(match_operand:V4SF 0 "register_operand" "")
3218 (match_operand:V2DF 1 "nonimmediate_operand" "")
3219 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3224 r1 = gen_reg_rtx (V4SFmode);
3225 r2 = gen_reg_rtx (V4SFmode);
3227 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3228 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3229 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3233 (define_expand "vec_pack_sfix_trunc_v2df"
3234 [(match_operand:V4SI 0 "register_operand" "")
3235 (match_operand:V2DF 1 "nonimmediate_operand" "")
3236 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3241 r1 = gen_reg_rtx (V4SImode);
3242 r2 = gen_reg_rtx (V4SImode);
3244 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3245 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3246 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3247 gen_lowpart (V2DImode, r1),
3248 gen_lowpart (V2DImode, r2)));
3252 (define_expand "vec_pack_sfix_v2df"
3253 [(match_operand:V4SI 0 "register_operand" "")
3254 (match_operand:V2DF 1 "nonimmediate_operand" "")
3255 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3260 r1 = gen_reg_rtx (V4SImode);
3261 r2 = gen_reg_rtx (V4SImode);
3263 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3264 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3265 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3266 gen_lowpart (V2DImode, r1),
3267 gen_lowpart (V2DImode, r2)));
3271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3273 ;; Parallel single-precision floating point element swizzling
3275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3277 (define_expand "sse_movhlps_exp"
3278 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3281 (match_operand:V4SF 1 "nonimmediate_operand" "")
3282 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3283 (parallel [(const_int 6)
3288 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3290 (define_insn "*avx_movhlps"
3291 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3294 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3295 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3296 (parallel [(const_int 6)
3300 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3302 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3303 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3304 vmovhps\t{%2, %0|%0, %2}"
3305 [(set_attr "type" "ssemov")
3306 (set_attr "prefix" "vex")
3307 (set_attr "mode" "V4SF,V2SF,V2SF")])
3309 (define_insn "sse_movhlps"
3310 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3313 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3314 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3315 (parallel [(const_int 6)
3319 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3321 movhlps\t{%2, %0|%0, %2}
3322 movlps\t{%H2, %0|%0, %H2}
3323 movhps\t{%2, %0|%0, %2}"
3324 [(set_attr "type" "ssemov")
3325 (set_attr "mode" "V4SF,V2SF,V2SF")])
3327 (define_expand "sse_movlhps_exp"
3328 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3331 (match_operand:V4SF 1 "nonimmediate_operand" "")
3332 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3333 (parallel [(const_int 0)
3338 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3340 (define_insn "*avx_movlhps"
3341 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3344 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3345 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3346 (parallel [(const_int 0)
3350 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3352 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3353 vmovhps\t{%2, %1, %0|%0, %1, %2}
3354 vmovlps\t{%2, %H0|%H0, %2}"
3355 [(set_attr "type" "ssemov")
3356 (set_attr "prefix" "vex")
3357 (set_attr "mode" "V4SF,V2SF,V2SF")])
3359 (define_insn "sse_movlhps"
3360 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3363 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3364 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3365 (parallel [(const_int 0)
3369 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3371 movlhps\t{%2, %0|%0, %2}
3372 movhps\t{%2, %0|%0, %2}
3373 movlps\t{%2, %H0|%H0, %2}"
3374 [(set_attr "type" "ssemov")
3375 (set_attr "mode" "V4SF,V2SF,V2SF")])
3377 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3378 (define_insn "avx_unpckhps256"
3379 [(set (match_operand:V8SF 0 "register_operand" "=x")
3382 (match_operand:V8SF 1 "register_operand" "x")
3383 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3384 (parallel [(const_int 2) (const_int 10)
3385 (const_int 3) (const_int 11)
3386 (const_int 6) (const_int 14)
3387 (const_int 7) (const_int 15)])))]
3389 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3390 [(set_attr "type" "sselog")
3391 (set_attr "prefix" "vex")
3392 (set_attr "mode" "V8SF")])
3394 (define_insn "*avx_interleave_highv4sf"
3395 [(set (match_operand:V4SF 0 "register_operand" "=x")
3398 (match_operand:V4SF 1 "register_operand" "x")
3399 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3400 (parallel [(const_int 2) (const_int 6)
3401 (const_int 3) (const_int 7)])))]
3403 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3404 [(set_attr "type" "sselog")
3405 (set_attr "prefix" "vex")
3406 (set_attr "mode" "V4SF")])
3408 (define_insn "vec_interleave_highv4sf"
3409 [(set (match_operand:V4SF 0 "register_operand" "=x")
3412 (match_operand:V4SF 1 "register_operand" "0")
3413 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3414 (parallel [(const_int 2) (const_int 6)
3415 (const_int 3) (const_int 7)])))]
3417 "unpckhps\t{%2, %0|%0, %2}"
3418 [(set_attr "type" "sselog")
3419 (set_attr "mode" "V4SF")])
3421 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3422 (define_insn "avx_unpcklps256"
3423 [(set (match_operand:V8SF 0 "register_operand" "=x")
3426 (match_operand:V8SF 1 "register_operand" "x")
3427 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3428 (parallel [(const_int 0) (const_int 8)
3429 (const_int 1) (const_int 9)
3430 (const_int 4) (const_int 12)
3431 (const_int 5) (const_int 13)])))]
3433 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3434 [(set_attr "type" "sselog")
3435 (set_attr "prefix" "vex")
3436 (set_attr "mode" "V8SF")])
3438 (define_insn "*avx_interleave_lowv4sf"
3439 [(set (match_operand:V4SF 0 "register_operand" "=x")
3442 (match_operand:V4SF 1 "register_operand" "x")
3443 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3444 (parallel [(const_int 0) (const_int 4)
3445 (const_int 1) (const_int 5)])))]
3447 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3448 [(set_attr "type" "sselog")
3449 (set_attr "prefix" "vex")
3450 (set_attr "mode" "V4SF")])
3452 (define_insn "vec_interleave_lowv4sf"
3453 [(set (match_operand:V4SF 0 "register_operand" "=x")
3456 (match_operand:V4SF 1 "register_operand" "0")
3457 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3458 (parallel [(const_int 0) (const_int 4)
3459 (const_int 1) (const_int 5)])))]
3461 "unpcklps\t{%2, %0|%0, %2}"
3462 [(set_attr "type" "sselog")
3463 (set_attr "mode" "V4SF")])
3465 ;; These are modeled with the same vec_concat as the others so that we
3466 ;; capture users of shufps that can use the new instructions
3467 (define_insn "avx_movshdup256"
3468 [(set (match_operand:V8SF 0 "register_operand" "=x")
3471 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3473 (parallel [(const_int 1) (const_int 1)
3474 (const_int 3) (const_int 3)
3475 (const_int 5) (const_int 5)
3476 (const_int 7) (const_int 7)])))]
3478 "vmovshdup\t{%1, %0|%0, %1}"
3479 [(set_attr "type" "sse")
3480 (set_attr "prefix" "vex")
3481 (set_attr "mode" "V8SF")])
3483 (define_insn "sse3_movshdup"
3484 [(set (match_operand:V4SF 0 "register_operand" "=x")
3487 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3489 (parallel [(const_int 1)
3494 "%vmovshdup\t{%1, %0|%0, %1}"
3495 [(set_attr "type" "sse")
3496 (set_attr "prefix_rep" "1")
3497 (set_attr "prefix" "maybe_vex")
3498 (set_attr "mode" "V4SF")])
3500 (define_insn "avx_movsldup256"
3501 [(set (match_operand:V8SF 0 "register_operand" "=x")
3504 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3506 (parallel [(const_int 0) (const_int 0)
3507 (const_int 2) (const_int 2)
3508 (const_int 4) (const_int 4)
3509 (const_int 6) (const_int 6)])))]
3511 "vmovsldup\t{%1, %0|%0, %1}"
3512 [(set_attr "type" "sse")
3513 (set_attr "prefix" "vex")
3514 (set_attr "mode" "V8SF")])
3516 (define_insn "sse3_movsldup"
3517 [(set (match_operand:V4SF 0 "register_operand" "=x")
3520 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3522 (parallel [(const_int 0)
3527 "%vmovsldup\t{%1, %0|%0, %1}"
3528 [(set_attr "type" "sse")
3529 (set_attr "prefix_rep" "1")
3530 (set_attr "prefix" "maybe_vex")
3531 (set_attr "mode" "V4SF")])
3533 (define_expand "avx_shufps256"
3534 [(match_operand:V8SF 0 "register_operand" "")
3535 (match_operand:V8SF 1 "register_operand" "")
3536 (match_operand:V8SF 2 "nonimmediate_operand" "")
3537 (match_operand:SI 3 "const_int_operand" "")]
3540 int mask = INTVAL (operands[3]);
3541 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3542 GEN_INT ((mask >> 0) & 3),
3543 GEN_INT ((mask >> 2) & 3),
3544 GEN_INT (((mask >> 4) & 3) + 8),
3545 GEN_INT (((mask >> 6) & 3) + 8),
3546 GEN_INT (((mask >> 0) & 3) + 4),
3547 GEN_INT (((mask >> 2) & 3) + 4),
3548 GEN_INT (((mask >> 4) & 3) + 12),
3549 GEN_INT (((mask >> 6) & 3) + 12)));
3553 ;; One bit in mask selects 2 elements.
3554 (define_insn "avx_shufps256_1"
3555 [(set (match_operand:V8SF 0 "register_operand" "=x")
3558 (match_operand:V8SF 1 "register_operand" "x")
3559 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3560 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3561 (match_operand 4 "const_0_to_3_operand" "")
3562 (match_operand 5 "const_8_to_11_operand" "")
3563 (match_operand 6 "const_8_to_11_operand" "")
3564 (match_operand 7 "const_4_to_7_operand" "")
3565 (match_operand 8 "const_4_to_7_operand" "")
3566 (match_operand 9 "const_12_to_15_operand" "")
3567 (match_operand 10 "const_12_to_15_operand" "")])))]
3569 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3570 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3571 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3572 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3575 mask = INTVAL (operands[3]);
3576 mask |= INTVAL (operands[4]) << 2;
3577 mask |= (INTVAL (operands[5]) - 8) << 4;
3578 mask |= (INTVAL (operands[6]) - 8) << 6;
3579 operands[3] = GEN_INT (mask);
3581 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3583 [(set_attr "type" "sselog")
3584 (set_attr "length_immediate" "1")
3585 (set_attr "prefix" "vex")
3586 (set_attr "mode" "V8SF")])
3588 (define_expand "sse_shufps"
3589 [(match_operand:V4SF 0 "register_operand" "")
3590 (match_operand:V4SF 1 "register_operand" "")
3591 (match_operand:V4SF 2 "nonimmediate_operand" "")
3592 (match_operand:SI 3 "const_int_operand" "")]
3595 int mask = INTVAL (operands[3]);
3596 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3597 GEN_INT ((mask >> 0) & 3),
3598 GEN_INT ((mask >> 2) & 3),
3599 GEN_INT (((mask >> 4) & 3) + 4),
3600 GEN_INT (((mask >> 6) & 3) + 4)));
3604 (define_insn "*avx_shufps_<mode>"
3605 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3606 (vec_select:SSEMODE4S
3607 (vec_concat:<ssedoublesizemode>
3608 (match_operand:SSEMODE4S 1 "register_operand" "x")
3609 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3610 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3611 (match_operand 4 "const_0_to_3_operand" "")
3612 (match_operand 5 "const_4_to_7_operand" "")
3613 (match_operand 6 "const_4_to_7_operand" "")])))]
3617 mask |= INTVAL (operands[3]) << 0;
3618 mask |= INTVAL (operands[4]) << 2;
3619 mask |= (INTVAL (operands[5]) - 4) << 4;
3620 mask |= (INTVAL (operands[6]) - 4) << 6;
3621 operands[3] = GEN_INT (mask);
3623 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3625 [(set_attr "type" "sselog")
3626 (set_attr "length_immediate" "1")
3627 (set_attr "prefix" "vex")
3628 (set_attr "mode" "V4SF")])
3630 (define_insn "sse_shufps_<mode>"
3631 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3632 (vec_select:SSEMODE4S
3633 (vec_concat:<ssedoublesizemode>
3634 (match_operand:SSEMODE4S 1 "register_operand" "0")
3635 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3636 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3637 (match_operand 4 "const_0_to_3_operand" "")
3638 (match_operand 5 "const_4_to_7_operand" "")
3639 (match_operand 6 "const_4_to_7_operand" "")])))]
3643 mask |= INTVAL (operands[3]) << 0;
3644 mask |= INTVAL (operands[4]) << 2;
3645 mask |= (INTVAL (operands[5]) - 4) << 4;
3646 mask |= (INTVAL (operands[6]) - 4) << 6;
3647 operands[3] = GEN_INT (mask);
3649 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3651 [(set_attr "type" "sselog")
3652 (set_attr "length_immediate" "1")
3653 (set_attr "mode" "V4SF")])
3655 (define_insn "sse_storehps"
3656 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3658 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3659 (parallel [(const_int 2) (const_int 3)])))]
3662 %vmovhps\t{%1, %0|%0, %1}
3663 %vmovhlps\t{%1, %d0|%d0, %1}
3664 %vmovlps\t{%H1, %d0|%d0, %H1}"
3665 [(set_attr "type" "ssemov")
3666 (set_attr "prefix" "maybe_vex")
3667 (set_attr "mode" "V2SF,V4SF,V2SF")])
3669 (define_expand "sse_loadhps_exp"
3670 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3673 (match_operand:V4SF 1 "nonimmediate_operand" "")
3674 (parallel [(const_int 0) (const_int 1)]))
3675 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3677 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3679 (define_insn "*avx_loadhps"
3680 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3683 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3684 (parallel [(const_int 0) (const_int 1)]))
3685 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3688 vmovhps\t{%2, %1, %0|%0, %1, %2}
3689 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3690 vmovlps\t{%2, %H0|%H0, %2}"
3691 [(set_attr "type" "ssemov")
3692 (set_attr "prefix" "vex")
3693 (set_attr "mode" "V2SF,V4SF,V2SF")])
3695 (define_insn "sse_loadhps"
3696 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3699 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3700 (parallel [(const_int 0) (const_int 1)]))
3701 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3704 movhps\t{%2, %0|%0, %2}
3705 movlhps\t{%2, %0|%0, %2}
3706 movlps\t{%2, %H0|%H0, %2}"
3707 [(set_attr "type" "ssemov")
3708 (set_attr "mode" "V2SF,V4SF,V2SF")])
3710 (define_insn "*avx_storelps"
3711 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3713 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3714 (parallel [(const_int 0) (const_int 1)])))]
3717 vmovlps\t{%1, %0|%0, %1}
3718 vmovaps\t{%1, %0|%0, %1}
3719 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3720 [(set_attr "type" "ssemov")
3721 (set_attr "prefix" "vex")
3722 (set_attr "mode" "V2SF,V2DF,V2SF")])
3724 (define_insn "sse_storelps"
3725 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3727 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3728 (parallel [(const_int 0) (const_int 1)])))]
3731 movlps\t{%1, %0|%0, %1}
3732 movaps\t{%1, %0|%0, %1}
3733 movlps\t{%1, %0|%0, %1}"
3734 [(set_attr "type" "ssemov")
3735 (set_attr "mode" "V2SF,V4SF,V2SF")])
3737 (define_expand "sse_loadlps_exp"
3738 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3740 (match_operand:V2SF 2 "nonimmediate_operand" "")
3742 (match_operand:V4SF 1 "nonimmediate_operand" "")
3743 (parallel [(const_int 2) (const_int 3)]))))]
3745 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3747 (define_insn "*avx_loadlps"
3748 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3750 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3752 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3753 (parallel [(const_int 2) (const_int 3)]))))]
3756 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3757 vmovlps\t{%2, %1, %0|%0, %1, %2}
3758 vmovlps\t{%2, %0|%0, %2}"
3759 [(set_attr "type" "sselog,ssemov,ssemov")
3760 (set_attr "length_immediate" "1,*,*")
3761 (set_attr "prefix" "vex")
3762 (set_attr "mode" "V4SF,V2SF,V2SF")])
3764 (define_insn "sse_loadlps"
3765 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3767 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3769 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3770 (parallel [(const_int 2) (const_int 3)]))))]
3773 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3774 movlps\t{%2, %0|%0, %2}
3775 movlps\t{%2, %0|%0, %2}"
3776 [(set_attr "type" "sselog,ssemov,ssemov")
3777 (set_attr "length_immediate" "1,*,*")
3778 (set_attr "mode" "V4SF,V2SF,V2SF")])
3780 (define_insn "*avx_movss"
3781 [(set (match_operand:V4SF 0 "register_operand" "=x")
3783 (match_operand:V4SF 2 "register_operand" "x")
3784 (match_operand:V4SF 1 "register_operand" "x")
3787 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3788 [(set_attr "type" "ssemov")
3789 (set_attr "prefix" "vex")
3790 (set_attr "mode" "SF")])
3792 (define_insn "sse_movss"
3793 [(set (match_operand:V4SF 0 "register_operand" "=x")
3795 (match_operand:V4SF 2 "register_operand" "x")
3796 (match_operand:V4SF 1 "register_operand" "0")
3799 "movss\t{%2, %0|%0, %2}"
3800 [(set_attr "type" "ssemov")
3801 (set_attr "mode" "SF")])
3803 (define_expand "vec_dupv4sf"
3804 [(set (match_operand:V4SF 0 "register_operand" "")
3806 (match_operand:SF 1 "nonimmediate_operand" "")))]
3810 operands[1] = force_reg (V4SFmode, operands[1]);
3813 (define_insn "*vec_dupv4sf_avx"
3814 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3816 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3819 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3820 vbroadcastss\t{%1, %0|%0, %1}"
3821 [(set_attr "type" "sselog1,ssemov")
3822 (set_attr "length_immediate" "1,0")
3823 (set_attr "prefix_extra" "0,1")
3824 (set_attr "prefix" "vex")
3825 (set_attr "mode" "V4SF")])
3827 (define_insn "*vec_dupv4sf"
3828 [(set (match_operand:V4SF 0 "register_operand" "=x")
3830 (match_operand:SF 1 "register_operand" "0")))]
3832 "shufps\t{$0, %0, %0|%0, %0, 0}"
3833 [(set_attr "type" "sselog1")
3834 (set_attr "length_immediate" "1")
3835 (set_attr "mode" "V4SF")])
3837 (define_insn "*vec_concatv2sf_avx"
3838 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3840 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3841 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3844 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3845 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3846 vmovss\t{%1, %0|%0, %1}
3847 punpckldq\t{%2, %0|%0, %2}
3848 movd\t{%1, %0|%0, %1}"
3849 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3850 (set_attr "length_immediate" "*,1,*,*,*")
3851 (set_attr "prefix_extra" "*,1,*,*,*")
3852 (set (attr "prefix")
3853 (if_then_else (eq_attr "alternative" "3,4")
3854 (const_string "orig")
3855 (const_string "vex")))
3856 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3858 ;; Although insertps takes register source, we prefer
3859 ;; unpcklps with register source since it is shorter.
3860 (define_insn "*vec_concatv2sf_sse4_1"
3861 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3863 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3864 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3867 unpcklps\t{%2, %0|%0, %2}
3868 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3869 movss\t{%1, %0|%0, %1}
3870 punpckldq\t{%2, %0|%0, %2}
3871 movd\t{%1, %0|%0, %1}"
3872 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3873 (set_attr "prefix_data16" "*,1,*,*,*")
3874 (set_attr "prefix_extra" "*,1,*,*,*")
3875 (set_attr "length_immediate" "*,1,*,*,*")
3876 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3878 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3879 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3880 ;; alternatives pretty much forces the MMX alternative to be chosen.
3881 (define_insn "*vec_concatv2sf_sse"
3882 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3884 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3885 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3888 unpcklps\t{%2, %0|%0, %2}
3889 movss\t{%1, %0|%0, %1}
3890 punpckldq\t{%2, %0|%0, %2}
3891 movd\t{%1, %0|%0, %1}"
3892 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3893 (set_attr "mode" "V4SF,SF,DI,DI")])
3895 (define_insn "*vec_concatv4sf_avx"
3896 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3898 (match_operand:V2SF 1 "register_operand" " x,x")
3899 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3902 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3903 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3904 [(set_attr "type" "ssemov")
3905 (set_attr "prefix" "vex")
3906 (set_attr "mode" "V4SF,V2SF")])
3908 (define_insn "*vec_concatv4sf_sse"
3909 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3911 (match_operand:V2SF 1 "register_operand" " 0,0")
3912 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3915 movlhps\t{%2, %0|%0, %2}
3916 movhps\t{%2, %0|%0, %2}"
3917 [(set_attr "type" "ssemov")
3918 (set_attr "mode" "V4SF,V2SF")])
3920 (define_expand "vec_init<mode>"
3921 [(match_operand:SSEMODE 0 "register_operand" "")
3922 (match_operand 1 "" "")]
3925 ix86_expand_vector_init (false, operands[0], operands[1]);
3929 (define_insn "*vec_set<mode>_0_avx"
3930 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3931 (vec_merge:SSEMODE4S
3932 (vec_duplicate:SSEMODE4S
3933 (match_operand:<ssescalarmode> 2
3934 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3935 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3939 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3940 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3941 vmovd\t{%2, %0|%0, %2}
3942 vmovss\t{%2, %1, %0|%0, %1, %2}
3943 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3945 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3946 (set_attr "prefix_extra" "*,*,*,*,1,*")
3947 (set_attr "length_immediate" "*,*,*,*,1,*")
3948 (set_attr "prefix" "vex")
3949 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3951 (define_insn "*vec_set<mode>_0_sse4_1"
3952 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3953 (vec_merge:SSEMODE4S
3954 (vec_duplicate:SSEMODE4S
3955 (match_operand:<ssescalarmode> 2
3956 "general_operand" " x,m,*r,x,*rm,*rfF"))
3957 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3961 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3962 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3963 movd\t{%2, %0|%0, %2}
3964 movss\t{%2, %0|%0, %2}
3965 pinsrd\t{$0, %2, %0|%0, %2, 0}
3967 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3968 (set_attr "prefix_extra" "*,*,*,*,1,*")
3969 (set_attr "length_immediate" "*,*,*,*,1,*")
3970 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3972 (define_insn "*vec_set<mode>_0_sse2"
3973 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
3974 (vec_merge:SSEMODE4S
3975 (vec_duplicate:SSEMODE4S
3976 (match_operand:<ssescalarmode> 2
3977 "general_operand" " m,*r,x,x*rfF"))
3978 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
3982 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3983 movd\t{%2, %0|%0, %2}
3984 movss\t{%2, %0|%0, %2}
3986 [(set_attr "type" "ssemov")
3987 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
3989 (define_insn "vec_set<mode>_0"
3990 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
3991 (vec_merge:SSEMODE4S
3992 (vec_duplicate:SSEMODE4S
3993 (match_operand:<ssescalarmode> 2
3994 "general_operand" " m,x,x*rfF"))
3995 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
3999 movss\t{%2, %0|%0, %2}
4000 movss\t{%2, %0|%0, %2}
4002 [(set_attr "type" "ssemov")
4003 (set_attr "mode" "SF")])
4005 ;; A subset is vec_setv4sf.
4006 (define_insn "*vec_setv4sf_avx"
4007 [(set (match_operand:V4SF 0 "register_operand" "=x")
4010 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4011 (match_operand:V4SF 1 "register_operand" "x")
4012 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4015 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4016 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4018 [(set_attr "type" "sselog")
4019 (set_attr "prefix_extra" "1")
4020 (set_attr "length_immediate" "1")
4021 (set_attr "prefix" "vex")
4022 (set_attr "mode" "V4SF")])
4024 (define_insn "*vec_setv4sf_sse4_1"
4025 [(set (match_operand:V4SF 0 "register_operand" "=x")
4028 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4029 (match_operand:V4SF 1 "register_operand" "0")
4030 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4034 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4036 [(set_attr "type" "sselog")
4037 (set_attr "prefix_data16" "1")
4038 (set_attr "prefix_extra" "1")
4039 (set_attr "length_immediate" "1")
4040 (set_attr "mode" "V4SF")])
4042 (define_insn "*avx_insertps"
4043 [(set (match_operand:V4SF 0 "register_operand" "=x")
4044 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4045 (match_operand:V4SF 1 "register_operand" "x")
4046 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4049 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4050 [(set_attr "type" "sselog")
4051 (set_attr "prefix" "vex")
4052 (set_attr "prefix_extra" "1")
4053 (set_attr "length_immediate" "1")
4054 (set_attr "mode" "V4SF")])
4056 (define_insn "sse4_1_insertps"
4057 [(set (match_operand:V4SF 0 "register_operand" "=x")
4058 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4059 (match_operand:V4SF 1 "register_operand" "0")
4060 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4063 "insertps\t{%3, %2, %0|%0, %2, %3}";
4064 [(set_attr "type" "sselog")
4065 (set_attr "prefix_data16" "1")
4066 (set_attr "prefix_extra" "1")
4067 (set_attr "length_immediate" "1")
4068 (set_attr "mode" "V4SF")])
4071 [(set (match_operand:V4SF 0 "memory_operand" "")
4074 (match_operand:SF 1 "nonmemory_operand" ""))
4077 "TARGET_SSE && reload_completed"
4080 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4084 (define_expand "vec_set<mode>"
4085 [(match_operand:SSEMODE 0 "register_operand" "")
4086 (match_operand:<ssescalarmode> 1 "register_operand" "")
4087 (match_operand 2 "const_int_operand" "")]
4090 ix86_expand_vector_set (false, operands[0], operands[1],
4091 INTVAL (operands[2]));
4095 (define_insn_and_split "*vec_extractv4sf_0"
4096 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4098 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4099 (parallel [(const_int 0)])))]
4100 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4102 "&& reload_completed"
4105 rtx op1 = operands[1];
4107 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4109 op1 = gen_lowpart (SFmode, op1);
4110 emit_move_insn (operands[0], op1);
4114 (define_expand "avx_vextractf128<mode>"
4115 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4116 (match_operand:AVX256MODE 1 "register_operand" "")
4117 (match_operand:SI 2 "const_0_to_1_operand" "")]
4120 switch (INTVAL (operands[2]))
4123 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4126 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4134 (define_insn "vec_extract_lo_<mode>"
4135 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4136 (vec_select:<avxhalfvecmode>
4137 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4138 (parallel [(const_int 0) (const_int 1)])))]
4140 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4141 [(set_attr "type" "sselog")
4142 (set_attr "prefix_extra" "1")
4143 (set_attr "length_immediate" "1")
4144 (set_attr "memory" "none,store")
4145 (set_attr "prefix" "vex")
4146 (set_attr "mode" "V8SF")])
4148 (define_insn "vec_extract_hi_<mode>"
4149 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4150 (vec_select:<avxhalfvecmode>
4151 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4152 (parallel [(const_int 2) (const_int 3)])))]
4154 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4155 [(set_attr "type" "sselog")
4156 (set_attr "prefix_extra" "1")
4157 (set_attr "length_immediate" "1")
4158 (set_attr "memory" "none,store")
4159 (set_attr "prefix" "vex")
4160 (set_attr "mode" "V8SF")])
4162 (define_insn "vec_extract_lo_<mode>"
4163 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4164 (vec_select:<avxhalfvecmode>
4165 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4166 (parallel [(const_int 0) (const_int 1)
4167 (const_int 2) (const_int 3)])))]
4169 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4170 [(set_attr "type" "sselog")
4171 (set_attr "prefix_extra" "1")
4172 (set_attr "length_immediate" "1")
4173 (set_attr "memory" "none,store")
4174 (set_attr "prefix" "vex")
4175 (set_attr "mode" "V8SF")])
4177 (define_insn "vec_extract_hi_<mode>"
4178 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4179 (vec_select:<avxhalfvecmode>
4180 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4181 (parallel [(const_int 4) (const_int 5)
4182 (const_int 6) (const_int 7)])))]
4184 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4185 [(set_attr "type" "sselog")
4186 (set_attr "prefix_extra" "1")
4187 (set_attr "length_immediate" "1")
4188 (set_attr "memory" "none,store")
4189 (set_attr "prefix" "vex")
4190 (set_attr "mode" "V8SF")])
4192 (define_insn "vec_extract_lo_v16hi"
4193 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4195 (match_operand:V16HI 1 "register_operand" "x,x")
4196 (parallel [(const_int 0) (const_int 1)
4197 (const_int 2) (const_int 3)
4198 (const_int 4) (const_int 5)
4199 (const_int 6) (const_int 7)])))]
4201 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4202 [(set_attr "type" "sselog")
4203 (set_attr "prefix_extra" "1")
4204 (set_attr "length_immediate" "1")
4205 (set_attr "memory" "none,store")
4206 (set_attr "prefix" "vex")
4207 (set_attr "mode" "V8SF")])
4209 (define_insn "vec_extract_hi_v16hi"
4210 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4212 (match_operand:V16HI 1 "register_operand" "x,x")
4213 (parallel [(const_int 8) (const_int 9)
4214 (const_int 10) (const_int 11)
4215 (const_int 12) (const_int 13)
4216 (const_int 14) (const_int 15)])))]
4218 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4219 [(set_attr "type" "sselog")
4220 (set_attr "prefix_extra" "1")
4221 (set_attr "length_immediate" "1")
4222 (set_attr "memory" "none,store")
4223 (set_attr "prefix" "vex")
4224 (set_attr "mode" "V8SF")])
4226 (define_insn "vec_extract_lo_v32qi"
4227 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4229 (match_operand:V32QI 1 "register_operand" "x,x")
4230 (parallel [(const_int 0) (const_int 1)
4231 (const_int 2) (const_int 3)
4232 (const_int 4) (const_int 5)
4233 (const_int 6) (const_int 7)
4234 (const_int 8) (const_int 9)
4235 (const_int 10) (const_int 11)
4236 (const_int 12) (const_int 13)
4237 (const_int 14) (const_int 15)])))]
4239 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4240 [(set_attr "type" "sselog")
4241 (set_attr "prefix_extra" "1")
4242 (set_attr "length_immediate" "1")
4243 (set_attr "memory" "none,store")
4244 (set_attr "prefix" "vex")
4245 (set_attr "mode" "V8SF")])
4247 (define_insn "vec_extract_hi_v32qi"
4248 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4250 (match_operand:V32QI 1 "register_operand" "x,x")
4251 (parallel [(const_int 16) (const_int 17)
4252 (const_int 18) (const_int 19)
4253 (const_int 20) (const_int 21)
4254 (const_int 22) (const_int 23)
4255 (const_int 24) (const_int 25)
4256 (const_int 26) (const_int 27)
4257 (const_int 28) (const_int 29)
4258 (const_int 30) (const_int 31)])))]
4260 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4261 [(set_attr "type" "sselog")
4262 (set_attr "prefix_extra" "1")
4263 (set_attr "length_immediate" "1")
4264 (set_attr "memory" "none,store")
4265 (set_attr "prefix" "vex")
4266 (set_attr "mode" "V8SF")])
4268 (define_insn "*sse4_1_extractps"
4269 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4271 (match_operand:V4SF 1 "register_operand" "x")
4272 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4274 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4275 [(set_attr "type" "sselog")
4276 (set_attr "prefix_data16" "1")
4277 (set_attr "prefix_extra" "1")
4278 (set_attr "length_immediate" "1")
4279 (set_attr "prefix" "maybe_vex")
4280 (set_attr "mode" "V4SF")])
4282 (define_insn_and_split "*vec_extract_v4sf_mem"
4283 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4285 (match_operand:V4SF 1 "memory_operand" "o")
4286 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4292 int i = INTVAL (operands[2]);
4294 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4298 (define_expand "vec_extract<mode>"
4299 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4300 (match_operand:SSEMODE 1 "register_operand" "")
4301 (match_operand 2 "const_int_operand" "")]
4304 ix86_expand_vector_extract (false, operands[0], operands[1],
4305 INTVAL (operands[2]));
4309 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4311 ;; Parallel double-precision floating point element swizzling
4313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4315 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4316 (define_insn "avx_unpckhpd256"
4317 [(set (match_operand:V4DF 0 "register_operand" "=x")
4320 (match_operand:V4DF 1 "register_operand" "x")
4321 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4322 (parallel [(const_int 1) (const_int 5)
4323 (const_int 3) (const_int 7)])))]
4325 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4326 [(set_attr "type" "sselog")
4327 (set_attr "prefix" "vex")
4328 (set_attr "mode" "V4DF")])
4330 (define_expand "vec_interleave_highv2df"
4331 [(set (match_operand:V2DF 0 "register_operand" "")
4334 (match_operand:V2DF 1 "nonimmediate_operand" "")
4335 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4336 (parallel [(const_int 1)
4340 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4341 operands[2] = force_reg (V2DFmode, operands[2]);
4344 (define_insn "*avx_interleave_highv2df"
4345 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4348 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4349 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4350 (parallel [(const_int 1)
4352 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4354 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4355 vmovddup\t{%H1, %0|%0, %H1}
4356 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4357 vmovhpd\t{%1, %0|%0, %1}"
4358 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4359 (set_attr "prefix" "vex")
4360 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4362 (define_insn "*sse3_interleave_highv2df"
4363 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4366 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4367 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4368 (parallel [(const_int 1)
4370 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4372 unpckhpd\t{%2, %0|%0, %2}
4373 movddup\t{%H1, %0|%0, %H1}
4374 movlpd\t{%H1, %0|%0, %H1}
4375 movhpd\t{%1, %0|%0, %1}"
4376 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4377 (set_attr "prefix_data16" "*,*,1,1")
4378 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4380 (define_insn "*sse2_interleave_highv2df"
4381 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4384 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4385 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4386 (parallel [(const_int 1)
4388 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4390 unpckhpd\t{%2, %0|%0, %2}
4391 movlpd\t{%H1, %0|%0, %H1}
4392 movhpd\t{%1, %0|%0, %1}"
4393 [(set_attr "type" "sselog,ssemov,ssemov")
4394 (set_attr "prefix_data16" "*,1,1")
4395 (set_attr "mode" "V2DF,V1DF,V1DF")])
4397 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4398 (define_expand "avx_movddup256"
4399 [(set (match_operand:V4DF 0 "register_operand" "")
4402 (match_operand:V4DF 1 "nonimmediate_operand" "")
4404 (parallel [(const_int 0) (const_int 4)
4405 (const_int 2) (const_int 6)])))]
4409 (define_expand "avx_unpcklpd256"
4410 [(set (match_operand:V4DF 0 "register_operand" "")
4413 (match_operand:V4DF 1 "register_operand" "")
4414 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4415 (parallel [(const_int 0) (const_int 4)
4416 (const_int 2) (const_int 6)])))]
4420 (define_insn "*avx_unpcklpd256"
4421 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4424 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4425 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4426 (parallel [(const_int 0) (const_int 4)
4427 (const_int 2) (const_int 6)])))]
4429 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4431 vmovddup\t{%1, %0|%0, %1}
4432 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4433 [(set_attr "type" "sselog")
4434 (set_attr "prefix" "vex")
4435 (set_attr "mode" "V4DF")])
4437 (define_expand "vec_interleave_lowv2df"
4438 [(set (match_operand:V2DF 0 "register_operand" "")
4441 (match_operand:V2DF 1 "nonimmediate_operand" "")
4442 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4443 (parallel [(const_int 0)
4447 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4448 operands[1] = force_reg (V2DFmode, operands[1]);
4451 (define_insn "*avx_interleave_lowv2df"
4452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4455 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4456 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4457 (parallel [(const_int 0)
4459 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4461 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4462 vmovddup\t{%1, %0|%0, %1}
4463 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4464 vmovlpd\t{%2, %H0|%H0, %2}"
4465 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4466 (set_attr "prefix" "vex")
4467 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4469 (define_insn "*sse3_interleave_lowv2df"
4470 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4473 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4474 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4475 (parallel [(const_int 0)
4477 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4479 unpcklpd\t{%2, %0|%0, %2}
4480 movddup\t{%1, %0|%0, %1}
4481 movhpd\t{%2, %0|%0, %2}
4482 movlpd\t{%2, %H0|%H0, %2}"
4483 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4484 (set_attr "prefix_data16" "*,*,1,1")
4485 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4487 (define_insn "*sse2_interleave_lowv2df"
4488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4491 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4492 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4493 (parallel [(const_int 0)
4495 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4497 unpcklpd\t{%2, %0|%0, %2}
4498 movhpd\t{%2, %0|%0, %2}
4499 movlpd\t{%2, %H0|%H0, %2}"
4500 [(set_attr "type" "sselog,ssemov,ssemov")
4501 (set_attr "prefix_data16" "*,1,1")
4502 (set_attr "mode" "V2DF,V1DF,V1DF")])
4505 [(set (match_operand:V2DF 0 "memory_operand" "")
4508 (match_operand:V2DF 1 "register_operand" "")
4510 (parallel [(const_int 0)
4512 "TARGET_SSE3 && reload_completed"
4515 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4516 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4517 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4522 [(set (match_operand:V2DF 0 "register_operand" "")
4525 (match_operand:V2DF 1 "memory_operand" "")
4527 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4528 (match_operand:SI 3 "const_int_operand" "")])))]
4529 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4530 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4532 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4535 (define_expand "avx_shufpd256"
4536 [(match_operand:V4DF 0 "register_operand" "")
4537 (match_operand:V4DF 1 "register_operand" "")
4538 (match_operand:V4DF 2 "nonimmediate_operand" "")
4539 (match_operand:SI 3 "const_int_operand" "")]
4542 int mask = INTVAL (operands[3]);
4543 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4545 GEN_INT (mask & 2 ? 5 : 4),
4546 GEN_INT (mask & 4 ? 3 : 2),
4547 GEN_INT (mask & 8 ? 7 : 6)));
4551 (define_insn "avx_shufpd256_1"
4552 [(set (match_operand:V4DF 0 "register_operand" "=x")
4555 (match_operand:V4DF 1 "register_operand" "x")
4556 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4557 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4558 (match_operand 4 "const_4_to_5_operand" "")
4559 (match_operand 5 "const_2_to_3_operand" "")
4560 (match_operand 6 "const_6_to_7_operand" "")])))]
4564 mask = INTVAL (operands[3]);
4565 mask |= (INTVAL (operands[4]) - 4) << 1;
4566 mask |= (INTVAL (operands[5]) - 2) << 2;
4567 mask |= (INTVAL (operands[6]) - 6) << 3;
4568 operands[3] = GEN_INT (mask);
4570 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4572 [(set_attr "type" "sselog")
4573 (set_attr "length_immediate" "1")
4574 (set_attr "prefix" "vex")
4575 (set_attr "mode" "V4DF")])
4577 (define_expand "sse2_shufpd"
4578 [(match_operand:V2DF 0 "register_operand" "")
4579 (match_operand:V2DF 1 "register_operand" "")
4580 (match_operand:V2DF 2 "nonimmediate_operand" "")
4581 (match_operand:SI 3 "const_int_operand" "")]
4584 int mask = INTVAL (operands[3]);
4585 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4587 GEN_INT (mask & 2 ? 3 : 2)));
4591 (define_expand "vec_extract_even<mode>"
4592 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4593 (match_operand:SSEMODE_EO 1 "register_operand" "")
4594 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4597 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4601 (define_expand "vec_extract_odd<mode>"
4602 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4603 (match_operand:SSEMODE_EO 1 "register_operand" "")
4604 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4607 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4611 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4612 (define_insn "*avx_interleave_highv2di"
4613 [(set (match_operand:V2DI 0 "register_operand" "=x")
4616 (match_operand:V2DI 1 "register_operand" "x")
4617 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4618 (parallel [(const_int 1)
4621 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4622 [(set_attr "type" "sselog")
4623 (set_attr "prefix" "vex")
4624 (set_attr "mode" "TI")])
4626 (define_insn "vec_interleave_highv2di"
4627 [(set (match_operand:V2DI 0 "register_operand" "=x")
4630 (match_operand:V2DI 1 "register_operand" "0")
4631 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4632 (parallel [(const_int 1)
4635 "punpckhqdq\t{%2, %0|%0, %2}"
4636 [(set_attr "type" "sselog")
4637 (set_attr "prefix_data16" "1")
4638 (set_attr "mode" "TI")])
4640 (define_insn "*avx_interleave_lowv2di"
4641 [(set (match_operand:V2DI 0 "register_operand" "=x")
4644 (match_operand:V2DI 1 "register_operand" "x")
4645 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4646 (parallel [(const_int 0)
4649 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4650 [(set_attr "type" "sselog")
4651 (set_attr "prefix" "vex")
4652 (set_attr "mode" "TI")])
4654 (define_insn "vec_interleave_lowv2di"
4655 [(set (match_operand:V2DI 0 "register_operand" "=x")
4658 (match_operand:V2DI 1 "register_operand" "0")
4659 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4660 (parallel [(const_int 0)
4663 "punpcklqdq\t{%2, %0|%0, %2}"
4664 [(set_attr "type" "sselog")
4665 (set_attr "prefix_data16" "1")
4666 (set_attr "mode" "TI")])
4668 (define_insn "*avx_shufpd_<mode>"
4669 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4670 (vec_select:SSEMODE2D
4671 (vec_concat:<ssedoublesizemode>
4672 (match_operand:SSEMODE2D 1 "register_operand" "x")
4673 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4674 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4675 (match_operand 4 "const_2_to_3_operand" "")])))]
4679 mask = INTVAL (operands[3]);
4680 mask |= (INTVAL (operands[4]) - 2) << 1;
4681 operands[3] = GEN_INT (mask);
4683 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4685 [(set_attr "type" "sselog")
4686 (set_attr "length_immediate" "1")
4687 (set_attr "prefix" "vex")
4688 (set_attr "mode" "V2DF")])
4690 (define_insn "sse2_shufpd_<mode>"
4691 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4692 (vec_select:SSEMODE2D
4693 (vec_concat:<ssedoublesizemode>
4694 (match_operand:SSEMODE2D 1 "register_operand" "0")
4695 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4696 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4697 (match_operand 4 "const_2_to_3_operand" "")])))]
4701 mask = INTVAL (operands[3]);
4702 mask |= (INTVAL (operands[4]) - 2) << 1;
4703 operands[3] = GEN_INT (mask);
4705 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4707 [(set_attr "type" "sselog")
4708 (set_attr "length_immediate" "1")
4709 (set_attr "mode" "V2DF")])
4711 ;; Avoid combining registers from different units in a single alternative,
4712 ;; see comment above inline_secondary_memory_needed function in i386.c
4713 (define_insn "*avx_storehpd"
4714 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4716 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4717 (parallel [(const_int 1)])))]
4718 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4720 vmovhpd\t{%1, %0|%0, %1}
4721 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4725 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4726 (set_attr "prefix" "vex")
4727 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4729 (define_insn "sse2_storehpd"
4730 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4732 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4733 (parallel [(const_int 1)])))]
4734 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4736 movhpd\t{%1, %0|%0, %1}
4741 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4742 (set_attr "prefix_data16" "1,*,*,*,*")
4743 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4746 [(set (match_operand:DF 0 "register_operand" "")
4748 (match_operand:V2DF 1 "memory_operand" "")
4749 (parallel [(const_int 1)])))]
4750 "TARGET_SSE2 && reload_completed"
4751 [(set (match_dup 0) (match_dup 1))]
4753 operands[1] = adjust_address (operands[1], DFmode, 8);
4756 ;; Avoid combining registers from different units in a single alternative,
4757 ;; see comment above inline_secondary_memory_needed function in i386.c
4758 (define_insn "sse2_storelpd"
4759 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4761 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4762 (parallel [(const_int 0)])))]
4763 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4765 %vmovlpd\t{%1, %0|%0, %1}
4770 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4771 (set_attr "prefix_data16" "1,*,*,*,*")
4772 (set_attr "prefix" "maybe_vex")
4773 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4776 [(set (match_operand:DF 0 "register_operand" "")
4778 (match_operand:V2DF 1 "nonimmediate_operand" "")
4779 (parallel [(const_int 0)])))]
4780 "TARGET_SSE2 && reload_completed"
4783 rtx op1 = operands[1];
4785 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4787 op1 = gen_lowpart (DFmode, op1);
4788 emit_move_insn (operands[0], op1);
4792 (define_expand "sse2_loadhpd_exp"
4793 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4796 (match_operand:V2DF 1 "nonimmediate_operand" "")
4797 (parallel [(const_int 0)]))
4798 (match_operand:DF 2 "nonimmediate_operand" "")))]
4800 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4802 ;; Avoid combining registers from different units in a single alternative,
4803 ;; see comment above inline_secondary_memory_needed function in i386.c
4804 (define_insn "*avx_loadhpd"
4805 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4808 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4809 (parallel [(const_int 0)]))
4810 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4811 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4813 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4814 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4818 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4819 (set_attr "prefix" "vex")
4820 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4822 (define_insn "sse2_loadhpd"
4823 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4826 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4827 (parallel [(const_int 0)]))
4828 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4829 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4831 movhpd\t{%2, %0|%0, %2}
4832 unpcklpd\t{%2, %0|%0, %2}
4833 shufpd\t{$1, %1, %0|%0, %1, 1}
4837 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4838 (set_attr "prefix_data16" "1,*,*,*,*,*")
4839 (set_attr "length_immediate" "*,*,1,*,*,*")
4840 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4843 [(set (match_operand:V2DF 0 "memory_operand" "")
4845 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4846 (match_operand:DF 1 "register_operand" "")))]
4847 "TARGET_SSE2 && reload_completed"
4848 [(set (match_dup 0) (match_dup 1))]
4850 operands[0] = adjust_address (operands[0], DFmode, 8);
4853 (define_expand "sse2_loadlpd_exp"
4854 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4856 (match_operand:DF 2 "nonimmediate_operand" "")
4858 (match_operand:V2DF 1 "nonimmediate_operand" "")
4859 (parallel [(const_int 1)]))))]
4861 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4863 ;; Avoid combining registers from different units in a single alternative,
4864 ;; see comment above inline_secondary_memory_needed function in i386.c
4865 (define_insn "*avx_loadlpd"
4866 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4868 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4870 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4871 (parallel [(const_int 1)]))))]
4872 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4874 vmovsd\t{%2, %0|%0, %2}
4875 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4876 vmovsd\t{%2, %1, %0|%0, %1, %2}
4877 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4881 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4882 (set_attr "prefix" "vex")
4883 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4885 (define_insn "sse2_loadlpd"
4886 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4888 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4890 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4891 (parallel [(const_int 1)]))))]
4892 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4894 movsd\t{%2, %0|%0, %2}
4895 movlpd\t{%2, %0|%0, %2}
4896 movsd\t{%2, %0|%0, %2}
4897 shufpd\t{$2, %2, %0|%0, %2, 2}
4898 movhpd\t{%H1, %0|%0, %H1}
4902 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4903 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4904 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4905 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4908 [(set (match_operand:V2DF 0 "memory_operand" "")
4910 (match_operand:DF 1 "register_operand" "")
4911 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4912 "TARGET_SSE2 && reload_completed"
4913 [(set (match_dup 0) (match_dup 1))]
4915 operands[0] = adjust_address (operands[0], DFmode, 8);
4918 ;; Not sure these two are ever used, but it doesn't hurt to have
4920 (define_insn "*vec_extractv2df_1_sse"
4921 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4923 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4924 (parallel [(const_int 1)])))]
4925 "!TARGET_SSE2 && TARGET_SSE
4926 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4928 movhps\t{%1, %0|%0, %1}
4929 movhlps\t{%1, %0|%0, %1}
4930 movlps\t{%H1, %0|%0, %H1}"
4931 [(set_attr "type" "ssemov")
4932 (set_attr "mode" "V2SF,V4SF,V2SF")])
4934 (define_insn "*vec_extractv2df_0_sse"
4935 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4937 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4938 (parallel [(const_int 0)])))]
4939 "!TARGET_SSE2 && TARGET_SSE
4940 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4942 movlps\t{%1, %0|%0, %1}
4943 movaps\t{%1, %0|%0, %1}
4944 movlps\t{%1, %0|%0, %1}"
4945 [(set_attr "type" "ssemov")
4946 (set_attr "mode" "V2SF,V4SF,V2SF")])
4948 (define_insn "*avx_movsd"
4949 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4951 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4952 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4956 vmovsd\t{%2, %1, %0|%0, %1, %2}
4957 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4958 vmovlpd\t{%2, %0|%0, %2}
4959 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4960 vmovhps\t{%1, %H0|%H0, %1}"
4961 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4962 (set_attr "prefix" "vex")
4963 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4965 (define_insn "sse2_movsd"
4966 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
4968 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4969 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4973 movsd\t{%2, %0|%0, %2}
4974 movlpd\t{%2, %0|%0, %2}
4975 movlpd\t{%2, %0|%0, %2}
4976 shufpd\t{$2, %2, %0|%0, %2, 2}
4977 movhps\t{%H1, %0|%0, %H1}
4978 movhps\t{%1, %H0|%H0, %1}"
4979 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4980 (set_attr "prefix_data16" "*,1,1,*,*,*")
4981 (set_attr "length_immediate" "*,*,*,1,*,*")
4982 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4984 (define_insn "*vec_dupv2df_sse3"
4985 [(set (match_operand:V2DF 0 "register_operand" "=x")
4987 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4989 "%vmovddup\t{%1, %0|%0, %1}"
4990 [(set_attr "type" "sselog1")
4991 (set_attr "prefix" "maybe_vex")
4992 (set_attr "mode" "DF")])
4994 (define_insn "vec_dupv2df"
4995 [(set (match_operand:V2DF 0 "register_operand" "=x")
4997 (match_operand:DF 1 "register_operand" "0")))]
5000 [(set_attr "type" "sselog1")
5001 (set_attr "mode" "V2DF")])
5003 (define_insn "*vec_concatv2df_sse3"
5004 [(set (match_operand:V2DF 0 "register_operand" "=x")
5006 (match_operand:DF 1 "nonimmediate_operand" "xm")
5009 "%vmovddup\t{%1, %0|%0, %1}"
5010 [(set_attr "type" "sselog1")
5011 (set_attr "prefix" "maybe_vex")
5012 (set_attr "mode" "DF")])
5014 (define_insn "*vec_concatv2df_avx"
5015 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5017 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5018 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5021 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5022 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5023 vmovsd\t{%1, %0|%0, %1}"
5024 [(set_attr "type" "ssemov")
5025 (set_attr "prefix" "vex")
5026 (set_attr "mode" "DF,V1DF,DF")])
5028 (define_insn "*vec_concatv2df"
5029 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5031 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5032 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5035 unpcklpd\t{%2, %0|%0, %2}
5036 movhpd\t{%2, %0|%0, %2}
5037 movsd\t{%1, %0|%0, %1}
5038 movlhps\t{%2, %0|%0, %2}
5039 movhps\t{%2, %0|%0, %2}"
5040 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5041 (set_attr "prefix_data16" "*,1,*,*,*")
5042 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5044 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5046 ;; Parallel integral arithmetic
5048 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5050 (define_expand "neg<mode>2"
5051 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5054 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5056 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5058 (define_expand "<plusminus_insn><mode>3"
5059 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5061 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5062 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5064 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5066 (define_insn "*avx_<plusminus_insn><mode>3"
5067 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5069 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5070 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5071 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5072 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5073 [(set_attr "type" "sseiadd")
5074 (set_attr "prefix" "vex")
5075 (set_attr "mode" "TI")])
5077 (define_insn "*<plusminus_insn><mode>3"
5078 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5080 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5081 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5082 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5083 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5084 [(set_attr "type" "sseiadd")
5085 (set_attr "prefix_data16" "1")
5086 (set_attr "mode" "TI")])
5088 (define_expand "sse2_<plusminus_insn><mode>3"
5089 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5090 (sat_plusminus:SSEMODE12
5091 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5092 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5094 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5096 (define_insn "*avx_<plusminus_insn><mode>3"
5097 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5098 (sat_plusminus:SSEMODE12
5099 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5100 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5101 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5102 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5103 [(set_attr "type" "sseiadd")
5104 (set_attr "prefix" "vex")
5105 (set_attr "mode" "TI")])
5107 (define_insn "*sse2_<plusminus_insn><mode>3"
5108 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5109 (sat_plusminus:SSEMODE12
5110 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5111 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5112 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5113 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5114 [(set_attr "type" "sseiadd")
5115 (set_attr "prefix_data16" "1")
5116 (set_attr "mode" "TI")])
5118 (define_insn_and_split "mulv16qi3"
5119 [(set (match_operand:V16QI 0 "register_operand" "")
5120 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5121 (match_operand:V16QI 2 "register_operand" "")))]
5123 && can_create_pseudo_p ()"
5131 for (i = 0; i < 6; ++i)
5132 t[i] = gen_reg_rtx (V16QImode);
5134 /* Unpack data such that we've got a source byte in each low byte of
5135 each word. We don't care what goes into the high byte of each word.
5136 Rather than trying to get zero in there, most convenient is to let
5137 it be a copy of the low byte. */
5138 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5139 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5140 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5141 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5143 /* Multiply words. The end-of-line annotations here give a picture of what
5144 the output of that instruction looks like. Dot means don't care; the
5145 letters are the bytes of the result with A being the most significant. */
5146 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5147 gen_lowpart (V8HImode, t[0]),
5148 gen_lowpart (V8HImode, t[1])));
5149 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5150 gen_lowpart (V8HImode, t[2]),
5151 gen_lowpart (V8HImode, t[3])));
5153 /* Extract the even bytes and merge them back together. */
5154 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5158 (define_expand "mulv8hi3"
5159 [(set (match_operand:V8HI 0 "register_operand" "")
5160 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5161 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5163 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5165 (define_insn "*avx_mulv8hi3"
5166 [(set (match_operand:V8HI 0 "register_operand" "=x")
5167 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5168 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5169 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5170 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5171 [(set_attr "type" "sseimul")
5172 (set_attr "prefix" "vex")
5173 (set_attr "mode" "TI")])
5175 (define_insn "*mulv8hi3"
5176 [(set (match_operand:V8HI 0 "register_operand" "=x")
5177 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5178 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5179 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5180 "pmullw\t{%2, %0|%0, %2}"
5181 [(set_attr "type" "sseimul")
5182 (set_attr "prefix_data16" "1")
5183 (set_attr "mode" "TI")])
5185 (define_expand "smulv8hi3_highpart"
5186 [(set (match_operand:V8HI 0 "register_operand" "")
5191 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5193 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5196 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5198 (define_insn "*avxv8hi3_highpart"
5199 [(set (match_operand:V8HI 0 "register_operand" "=x")
5204 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5206 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5208 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5209 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5210 [(set_attr "type" "sseimul")
5211 (set_attr "prefix" "vex")
5212 (set_attr "mode" "TI")])
5214 (define_insn "*smulv8hi3_highpart"
5215 [(set (match_operand:V8HI 0 "register_operand" "=x")
5220 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5222 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5224 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5225 "pmulhw\t{%2, %0|%0, %2}"
5226 [(set_attr "type" "sseimul")
5227 (set_attr "prefix_data16" "1")
5228 (set_attr "mode" "TI")])
5230 (define_expand "umulv8hi3_highpart"
5231 [(set (match_operand:V8HI 0 "register_operand" "")
5236 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5238 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5241 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5243 (define_insn "*avx_umulv8hi3_highpart"
5244 [(set (match_operand:V8HI 0 "register_operand" "=x")
5249 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5251 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5253 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5254 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5255 [(set_attr "type" "sseimul")
5256 (set_attr "prefix" "vex")
5257 (set_attr "mode" "TI")])
5259 (define_insn "*umulv8hi3_highpart"
5260 [(set (match_operand:V8HI 0 "register_operand" "=x")
5265 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5267 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5269 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5270 "pmulhuw\t{%2, %0|%0, %2}"
5271 [(set_attr "type" "sseimul")
5272 (set_attr "prefix_data16" "1")
5273 (set_attr "mode" "TI")])
5275 (define_expand "sse2_umulv2siv2di3"
5276 [(set (match_operand:V2DI 0 "register_operand" "")
5280 (match_operand:V4SI 1 "nonimmediate_operand" "")
5281 (parallel [(const_int 0) (const_int 2)])))
5284 (match_operand:V4SI 2 "nonimmediate_operand" "")
5285 (parallel [(const_int 0) (const_int 2)])))))]
5287 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5289 (define_insn "*avx_umulv2siv2di3"
5290 [(set (match_operand:V2DI 0 "register_operand" "=x")
5294 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5295 (parallel [(const_int 0) (const_int 2)])))
5298 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5299 (parallel [(const_int 0) (const_int 2)])))))]
5300 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5301 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5302 [(set_attr "type" "sseimul")
5303 (set_attr "prefix" "vex")
5304 (set_attr "mode" "TI")])
5306 (define_insn "*sse2_umulv2siv2di3"
5307 [(set (match_operand:V2DI 0 "register_operand" "=x")
5311 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5312 (parallel [(const_int 0) (const_int 2)])))
5315 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5316 (parallel [(const_int 0) (const_int 2)])))))]
5317 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5318 "pmuludq\t{%2, %0|%0, %2}"
5319 [(set_attr "type" "sseimul")
5320 (set_attr "prefix_data16" "1")
5321 (set_attr "mode" "TI")])
5323 (define_expand "sse4_1_mulv2siv2di3"
5324 [(set (match_operand:V2DI 0 "register_operand" "")
5328 (match_operand:V4SI 1 "nonimmediate_operand" "")
5329 (parallel [(const_int 0) (const_int 2)])))
5332 (match_operand:V4SI 2 "nonimmediate_operand" "")
5333 (parallel [(const_int 0) (const_int 2)])))))]
5335 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5337 (define_insn "*avx_mulv2siv2di3"
5338 [(set (match_operand:V2DI 0 "register_operand" "=x")
5342 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5343 (parallel [(const_int 0) (const_int 2)])))
5346 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5347 (parallel [(const_int 0) (const_int 2)])))))]
5348 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5349 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5350 [(set_attr "type" "sseimul")
5351 (set_attr "prefix_extra" "1")
5352 (set_attr "prefix" "vex")
5353 (set_attr "mode" "TI")])
5355 (define_insn "*sse4_1_mulv2siv2di3"
5356 [(set (match_operand:V2DI 0 "register_operand" "=x")
5360 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5361 (parallel [(const_int 0) (const_int 2)])))
5364 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5365 (parallel [(const_int 0) (const_int 2)])))))]
5366 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5367 "pmuldq\t{%2, %0|%0, %2}"
5368 [(set_attr "type" "sseimul")
5369 (set_attr "prefix_extra" "1")
5370 (set_attr "mode" "TI")])
5372 (define_expand "sse2_pmaddwd"
5373 [(set (match_operand:V4SI 0 "register_operand" "")
5378 (match_operand:V8HI 1 "nonimmediate_operand" "")
5379 (parallel [(const_int 0)
5385 (match_operand:V8HI 2 "nonimmediate_operand" "")
5386 (parallel [(const_int 0)
5392 (vec_select:V4HI (match_dup 1)
5393 (parallel [(const_int 1)
5398 (vec_select:V4HI (match_dup 2)
5399 (parallel [(const_int 1)
5402 (const_int 7)]))))))]
5404 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5406 (define_insn "*avx_pmaddwd"
5407 [(set (match_operand:V4SI 0 "register_operand" "=x")
5412 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5413 (parallel [(const_int 0)
5419 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5420 (parallel [(const_int 0)
5426 (vec_select:V4HI (match_dup 1)
5427 (parallel [(const_int 1)
5432 (vec_select:V4HI (match_dup 2)
5433 (parallel [(const_int 1)
5436 (const_int 7)]))))))]
5437 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5438 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5439 [(set_attr "type" "sseiadd")
5440 (set_attr "prefix" "vex")
5441 (set_attr "mode" "TI")])
5443 (define_insn "*sse2_pmaddwd"
5444 [(set (match_operand:V4SI 0 "register_operand" "=x")
5449 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5450 (parallel [(const_int 0)
5456 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5457 (parallel [(const_int 0)
5463 (vec_select:V4HI (match_dup 1)
5464 (parallel [(const_int 1)
5469 (vec_select:V4HI (match_dup 2)
5470 (parallel [(const_int 1)
5473 (const_int 7)]))))))]
5474 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5475 "pmaddwd\t{%2, %0|%0, %2}"
5476 [(set_attr "type" "sseiadd")
5477 (set_attr "atom_unit" "simul")
5478 (set_attr "prefix_data16" "1")
5479 (set_attr "mode" "TI")])
5481 (define_expand "mulv4si3"
5482 [(set (match_operand:V4SI 0 "register_operand" "")
5483 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5484 (match_operand:V4SI 2 "register_operand" "")))]
5487 if (TARGET_SSE4_1 || TARGET_AVX)
5488 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5491 (define_insn "*avx_mulv4si3"
5492 [(set (match_operand:V4SI 0 "register_operand" "=x")
5493 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5494 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5495 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5496 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5497 [(set_attr "type" "sseimul")
5498 (set_attr "prefix_extra" "1")
5499 (set_attr "prefix" "vex")
5500 (set_attr "mode" "TI")])
5502 (define_insn "*sse4_1_mulv4si3"
5503 [(set (match_operand:V4SI 0 "register_operand" "=x")
5504 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5505 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5506 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5507 "pmulld\t{%2, %0|%0, %2}"
5508 [(set_attr "type" "sseimul")
5509 (set_attr "prefix_extra" "1")
5510 (set_attr "mode" "TI")])
5512 (define_insn_and_split "*sse2_mulv4si3"
5513 [(set (match_operand:V4SI 0 "register_operand" "")
5514 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5515 (match_operand:V4SI 2 "register_operand" "")))]
5516 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5517 && can_create_pseudo_p ()"
5522 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5528 t1 = gen_reg_rtx (V4SImode);
5529 t2 = gen_reg_rtx (V4SImode);
5530 t3 = gen_reg_rtx (V4SImode);
5531 t4 = gen_reg_rtx (V4SImode);
5532 t5 = gen_reg_rtx (V4SImode);
5533 t6 = gen_reg_rtx (V4SImode);
5534 thirtytwo = GEN_INT (32);
5536 /* Multiply elements 2 and 0. */
5537 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5540 /* Shift both input vectors down one element, so that elements 3
5541 and 1 are now in the slots for elements 2 and 0. For K8, at
5542 least, this is faster than using a shuffle. */
5543 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5544 gen_lowpart (V1TImode, op1),
5546 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5547 gen_lowpart (V1TImode, op2),
5549 /* Multiply elements 3 and 1. */
5550 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5553 /* Move the results in element 2 down to element 1; we don't care
5554 what goes in elements 2 and 3. */
5555 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5556 const0_rtx, const0_rtx));
5557 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5558 const0_rtx, const0_rtx));
5560 /* Merge the parts back together. */
5561 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5565 (define_insn_and_split "mulv2di3"
5566 [(set (match_operand:V2DI 0 "register_operand" "")
5567 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5568 (match_operand:V2DI 2 "register_operand" "")))]
5570 && can_create_pseudo_p ()"
5575 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5584 /* op1: A,B,C,D, op2: E,F,G,H */
5585 op1 = gen_lowpart (V4SImode, op1);
5586 op2 = gen_lowpart (V4SImode, op2);
5588 t1 = gen_reg_rtx (V4SImode);
5589 t2 = gen_reg_rtx (V4SImode);
5590 t3 = gen_reg_rtx (V2DImode);
5591 t4 = gen_reg_rtx (V2DImode);
5594 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5600 /* t2: (B*E),(A*F),(D*G),(C*H) */
5601 emit_insn (gen_mulv4si3 (t2, t1, op2));
5603 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5604 emit_insn (gen_xop_phadddq (t3, t2));
5606 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5607 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5609 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5610 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5614 t1 = gen_reg_rtx (V2DImode);
5615 t2 = gen_reg_rtx (V2DImode);
5616 t3 = gen_reg_rtx (V2DImode);
5617 t4 = gen_reg_rtx (V2DImode);
5618 t5 = gen_reg_rtx (V2DImode);
5619 t6 = gen_reg_rtx (V2DImode);
5620 thirtytwo = GEN_INT (32);
5622 /* Multiply low parts. */
5623 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5624 gen_lowpart (V4SImode, op2)));
5626 /* Shift input vectors left 32 bits so we can multiply high parts. */
5627 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5628 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5630 /* Multiply high parts by low parts. */
5631 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5632 gen_lowpart (V4SImode, t3)));
5633 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5634 gen_lowpart (V4SImode, t2)));
5636 /* Shift them back. */
5637 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5638 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5640 /* Add the three parts together. */
5641 emit_insn (gen_addv2di3 (t6, t1, t4));
5642 emit_insn (gen_addv2di3 (op0, t6, t5));
5647 (define_expand "vec_widen_smult_hi_v8hi"
5648 [(match_operand:V4SI 0 "register_operand" "")
5649 (match_operand:V8HI 1 "register_operand" "")
5650 (match_operand:V8HI 2 "register_operand" "")]
5653 rtx op1, op2, t1, t2, dest;
5657 t1 = gen_reg_rtx (V8HImode);
5658 t2 = gen_reg_rtx (V8HImode);
5659 dest = gen_lowpart (V8HImode, operands[0]);
5661 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5662 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5663 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5667 (define_expand "vec_widen_smult_lo_v8hi"
5668 [(match_operand:V4SI 0 "register_operand" "")
5669 (match_operand:V8HI 1 "register_operand" "")
5670 (match_operand:V8HI 2 "register_operand" "")]
5673 rtx op1, op2, t1, t2, dest;
5677 t1 = gen_reg_rtx (V8HImode);
5678 t2 = gen_reg_rtx (V8HImode);
5679 dest = gen_lowpart (V8HImode, operands[0]);
5681 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5682 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5683 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5687 (define_expand "vec_widen_umult_hi_v8hi"
5688 [(match_operand:V4SI 0 "register_operand" "")
5689 (match_operand:V8HI 1 "register_operand" "")
5690 (match_operand:V8HI 2 "register_operand" "")]
5693 rtx op1, op2, t1, t2, dest;
5697 t1 = gen_reg_rtx (V8HImode);
5698 t2 = gen_reg_rtx (V8HImode);
5699 dest = gen_lowpart (V8HImode, operands[0]);
5701 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5702 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5703 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5707 (define_expand "vec_widen_umult_lo_v8hi"
5708 [(match_operand:V4SI 0 "register_operand" "")
5709 (match_operand:V8HI 1 "register_operand" "")
5710 (match_operand:V8HI 2 "register_operand" "")]
5713 rtx op1, op2, t1, t2, dest;
5717 t1 = gen_reg_rtx (V8HImode);
5718 t2 = gen_reg_rtx (V8HImode);
5719 dest = gen_lowpart (V8HImode, operands[0]);
5721 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5722 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5723 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5727 (define_expand "vec_widen_smult_hi_v4si"
5728 [(match_operand:V2DI 0 "register_operand" "")
5729 (match_operand:V4SI 1 "register_operand" "")
5730 (match_operand:V4SI 2 "register_operand" "")]
5735 t1 = gen_reg_rtx (V4SImode);
5736 t2 = gen_reg_rtx (V4SImode);
5738 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5743 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5748 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5752 (define_expand "vec_widen_smult_lo_v4si"
5753 [(match_operand:V2DI 0 "register_operand" "")
5754 (match_operand:V4SI 1 "register_operand" "")
5755 (match_operand:V4SI 2 "register_operand" "")]
5760 t1 = gen_reg_rtx (V4SImode);
5761 t2 = gen_reg_rtx (V4SImode);
5763 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5768 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5773 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5777 (define_expand "vec_widen_umult_hi_v4si"
5778 [(match_operand:V2DI 0 "register_operand" "")
5779 (match_operand:V4SI 1 "register_operand" "")
5780 (match_operand:V4SI 2 "register_operand" "")]
5783 rtx op1, op2, t1, t2;
5787 t1 = gen_reg_rtx (V4SImode);
5788 t2 = gen_reg_rtx (V4SImode);
5790 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5791 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5792 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5796 (define_expand "vec_widen_umult_lo_v4si"
5797 [(match_operand:V2DI 0 "register_operand" "")
5798 (match_operand:V4SI 1 "register_operand" "")
5799 (match_operand:V4SI 2 "register_operand" "")]
5802 rtx op1, op2, t1, t2;
5806 t1 = gen_reg_rtx (V4SImode);
5807 t2 = gen_reg_rtx (V4SImode);
5809 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5810 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5811 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5815 (define_expand "sdot_prodv8hi"
5816 [(match_operand:V4SI 0 "register_operand" "")
5817 (match_operand:V8HI 1 "register_operand" "")
5818 (match_operand:V8HI 2 "register_operand" "")
5819 (match_operand:V4SI 3 "register_operand" "")]
5822 rtx t = gen_reg_rtx (V4SImode);
5823 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5824 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5828 (define_expand "udot_prodv4si"
5829 [(match_operand:V2DI 0 "register_operand" "")
5830 (match_operand:V4SI 1 "register_operand" "")
5831 (match_operand:V4SI 2 "register_operand" "")
5832 (match_operand:V2DI 3 "register_operand" "")]
5837 t1 = gen_reg_rtx (V2DImode);
5838 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5839 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5841 t2 = gen_reg_rtx (V4SImode);
5842 t3 = gen_reg_rtx (V4SImode);
5843 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5844 gen_lowpart (V1TImode, operands[1]),
5846 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5847 gen_lowpart (V1TImode, operands[2]),
5850 t4 = gen_reg_rtx (V2DImode);
5851 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5853 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5857 (define_insn "*avx_ashr<mode>3"
5858 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5860 (match_operand:SSEMODE24 1 "register_operand" "x")
5861 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5863 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5864 [(set_attr "type" "sseishft")
5865 (set_attr "prefix" "vex")
5866 (set (attr "length_immediate")
5867 (if_then_else (match_operand 2 "const_int_operand" "")
5869 (const_string "0")))
5870 (set_attr "mode" "TI")])
5872 (define_insn "ashr<mode>3"
5873 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5875 (match_operand:SSEMODE24 1 "register_operand" "0")
5876 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5878 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5879 [(set_attr "type" "sseishft")
5880 (set_attr "prefix_data16" "1")
5881 (set (attr "length_immediate")
5882 (if_then_else (match_operand 2 "const_int_operand" "")
5884 (const_string "0")))
5885 (set_attr "mode" "TI")])
5887 (define_insn "*avx_lshrv1ti3"
5888 [(set (match_operand:V1TI 0 "register_operand" "=x")
5890 (match_operand:V1TI 1 "register_operand" "x")
5891 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5894 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5895 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5897 [(set_attr "type" "sseishft")
5898 (set_attr "prefix" "vex")
5899 (set_attr "length_immediate" "1")
5900 (set_attr "mode" "TI")])
5902 (define_insn "*avx_lshr<mode>3"
5903 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5904 (lshiftrt:SSEMODE248
5905 (match_operand:SSEMODE248 1 "register_operand" "x")
5906 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5908 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5909 [(set_attr "type" "sseishft")
5910 (set_attr "prefix" "vex")
5911 (set (attr "length_immediate")
5912 (if_then_else (match_operand 2 "const_int_operand" "")
5914 (const_string "0")))
5915 (set_attr "mode" "TI")])
5917 (define_insn "sse2_lshrv1ti3"
5918 [(set (match_operand:V1TI 0 "register_operand" "=x")
5920 (match_operand:V1TI 1 "register_operand" "0")
5921 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5924 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5925 return "psrldq\t{%2, %0|%0, %2}";
5927 [(set_attr "type" "sseishft")
5928 (set_attr "prefix_data16" "1")
5929 (set_attr "length_immediate" "1")
5930 (set_attr "mode" "TI")])
5932 (define_insn "lshr<mode>3"
5933 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5934 (lshiftrt:SSEMODE248
5935 (match_operand:SSEMODE248 1 "register_operand" "0")
5936 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5938 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5939 [(set_attr "type" "sseishft")
5940 (set_attr "prefix_data16" "1")
5941 (set (attr "length_immediate")
5942 (if_then_else (match_operand 2 "const_int_operand" "")
5944 (const_string "0")))
5945 (set_attr "mode" "TI")])
5947 (define_insn "*avx_ashlv1ti3"
5948 [(set (match_operand:V1TI 0 "register_operand" "=x")
5949 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5950 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5953 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5954 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5956 [(set_attr "type" "sseishft")
5957 (set_attr "prefix" "vex")
5958 (set_attr "length_immediate" "1")
5959 (set_attr "mode" "TI")])
5961 (define_insn "*avx_ashl<mode>3"
5962 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5964 (match_operand:SSEMODE248 1 "register_operand" "x")
5965 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5967 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5968 [(set_attr "type" "sseishft")
5969 (set_attr "prefix" "vex")
5970 (set (attr "length_immediate")
5971 (if_then_else (match_operand 2 "const_int_operand" "")
5973 (const_string "0")))
5974 (set_attr "mode" "TI")])
5976 (define_insn "sse2_ashlv1ti3"
5977 [(set (match_operand:V1TI 0 "register_operand" "=x")
5978 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5979 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5982 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5983 return "pslldq\t{%2, %0|%0, %2}";
5985 [(set_attr "type" "sseishft")
5986 (set_attr "prefix_data16" "1")
5987 (set_attr "length_immediate" "1")
5988 (set_attr "mode" "TI")])
5990 (define_insn "ashl<mode>3"
5991 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5993 (match_operand:SSEMODE248 1 "register_operand" "0")
5994 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5996 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5997 [(set_attr "type" "sseishft")
5998 (set_attr "prefix_data16" "1")
5999 (set (attr "length_immediate")
6000 (if_then_else (match_operand 2 "const_int_operand" "")
6002 (const_string "0")))
6003 (set_attr "mode" "TI")])
6005 (define_expand "vec_shl_<mode>"
6006 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6008 (match_operand:SSEMODEI 1 "register_operand" "")
6009 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6012 operands[0] = gen_lowpart (V1TImode, operands[0]);
6013 operands[1] = gen_lowpart (V1TImode, operands[1]);
6016 (define_expand "vec_shr_<mode>"
6017 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6019 (match_operand:SSEMODEI 1 "register_operand" "")
6020 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6023 operands[0] = gen_lowpart (V1TImode, operands[0]);
6024 operands[1] = gen_lowpart (V1TImode, operands[1]);
6027 (define_insn "*avx_<code><mode>3"
6028 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6030 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6031 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6032 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6033 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6034 [(set_attr "type" "sseiadd")
6035 (set (attr "prefix_extra")
6037 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6040 (const_string "0")))
6041 (set_attr "prefix" "vex")
6042 (set_attr "mode" "TI")])
6044 (define_expand "<code>v16qi3"
6045 [(set (match_operand:V16QI 0 "register_operand" "")
6047 (match_operand:V16QI 1 "nonimmediate_operand" "")
6048 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6050 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6052 (define_insn "*<code>v16qi3"
6053 [(set (match_operand:V16QI 0 "register_operand" "=x")
6055 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6056 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6057 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6058 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
6059 [(set_attr "type" "sseiadd")
6060 (set_attr "prefix_data16" "1")
6061 (set_attr "mode" "TI")])
6063 (define_expand "<code>v8hi3"
6064 [(set (match_operand:V8HI 0 "register_operand" "")
6066 (match_operand:V8HI 1 "nonimmediate_operand" "")
6067 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6069 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6071 (define_insn "*<code>v8hi3"
6072 [(set (match_operand:V8HI 0 "register_operand" "=x")
6074 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6075 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6076 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6077 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
6078 [(set_attr "type" "sseiadd")
6079 (set_attr "prefix_data16" "1")
6080 (set_attr "mode" "TI")])
6082 (define_expand "umaxv8hi3"
6083 [(set (match_operand:V8HI 0 "register_operand" "")
6084 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6085 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6089 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6092 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6093 if (rtx_equal_p (op3, op2))
6094 op3 = gen_reg_rtx (V8HImode);
6095 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6096 emit_insn (gen_addv8hi3 (op0, op3, op2));
6101 (define_expand "smax<mode>3"
6102 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6103 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6104 (match_operand:SSEMODE14 2 "register_operand" "")))]
6108 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6114 xops[0] = operands[0];
6115 xops[1] = operands[1];
6116 xops[2] = operands[2];
6117 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6118 xops[4] = operands[1];
6119 xops[5] = operands[2];
6120 ok = ix86_expand_int_vcond (xops);
6126 (define_insn "*sse4_1_<code><mode>3"
6127 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6129 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6130 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6131 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6132 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6133 [(set_attr "type" "sseiadd")
6134 (set_attr "prefix_extra" "1")
6135 (set_attr "mode" "TI")])
6137 (define_expand "smaxv2di3"
6138 [(set (match_operand:V2DI 0 "register_operand" "")
6139 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6140 (match_operand:V2DI 2 "register_operand" "")))]
6146 xops[0] = operands[0];
6147 xops[1] = operands[1];
6148 xops[2] = operands[2];
6149 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6150 xops[4] = operands[1];
6151 xops[5] = operands[2];
6152 ok = ix86_expand_int_vcond (xops);
6157 (define_expand "umaxv4si3"
6158 [(set (match_operand:V4SI 0 "register_operand" "")
6159 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6160 (match_operand:V4SI 2 "register_operand" "")))]
6164 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6170 xops[0] = operands[0];
6171 xops[1] = operands[1];
6172 xops[2] = operands[2];
6173 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6174 xops[4] = operands[1];
6175 xops[5] = operands[2];
6176 ok = ix86_expand_int_vcond (xops);
6182 (define_insn "*sse4_1_<code><mode>3"
6183 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6185 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6186 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6187 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6188 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6189 [(set_attr "type" "sseiadd")
6190 (set_attr "prefix_extra" "1")
6191 (set_attr "mode" "TI")])
6193 (define_expand "umaxv2di3"
6194 [(set (match_operand:V2DI 0 "register_operand" "")
6195 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6196 (match_operand:V2DI 2 "register_operand" "")))]
6202 xops[0] = operands[0];
6203 xops[1] = operands[1];
6204 xops[2] = operands[2];
6205 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6206 xops[4] = operands[1];
6207 xops[5] = operands[2];
6208 ok = ix86_expand_int_vcond (xops);
6213 (define_expand "smin<mode>3"
6214 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6215 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6216 (match_operand:SSEMODE14 2 "register_operand" "")))]
6220 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6226 xops[0] = operands[0];
6227 xops[1] = operands[2];
6228 xops[2] = operands[1];
6229 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6230 xops[4] = operands[1];
6231 xops[5] = operands[2];
6232 ok = ix86_expand_int_vcond (xops);
6238 (define_expand "sminv2di3"
6239 [(set (match_operand:V2DI 0 "register_operand" "")
6240 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6241 (match_operand:V2DI 2 "register_operand" "")))]
6247 xops[0] = operands[0];
6248 xops[1] = operands[2];
6249 xops[2] = operands[1];
6250 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6251 xops[4] = operands[1];
6252 xops[5] = operands[2];
6253 ok = ix86_expand_int_vcond (xops);
6258 (define_expand "umin<mode>3"
6259 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6260 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6261 (match_operand:SSEMODE24 2 "register_operand" "")))]
6265 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6271 xops[0] = operands[0];
6272 xops[1] = operands[2];
6273 xops[2] = operands[1];
6274 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6275 xops[4] = operands[1];
6276 xops[5] = operands[2];
6277 ok = ix86_expand_int_vcond (xops);
6283 (define_expand "uminv2di3"
6284 [(set (match_operand:V2DI 0 "register_operand" "")
6285 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6286 (match_operand:V2DI 2 "register_operand" "")))]
6292 xops[0] = operands[0];
6293 xops[1] = operands[2];
6294 xops[2] = operands[1];
6295 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6296 xops[4] = operands[1];
6297 xops[5] = operands[2];
6298 ok = ix86_expand_int_vcond (xops);
6303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6305 ;; Parallel integral comparisons
6307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6309 (define_expand "sse2_eq<mode>3"
6310 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6312 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6313 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6314 "TARGET_SSE2 && !TARGET_XOP "
6315 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6317 (define_insn "*avx_eq<mode>3"
6318 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6320 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6321 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6322 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6323 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6324 [(set_attr "type" "ssecmp")
6325 (set (attr "prefix_extra")
6326 (if_then_else (match_operand:V2DI 0 "" "")
6328 (const_string "*")))
6329 (set_attr "prefix" "vex")
6330 (set_attr "mode" "TI")])
6332 (define_insn "*sse2_eq<mode>3"
6333 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6335 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6336 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6337 "TARGET_SSE2 && !TARGET_XOP
6338 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6339 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6340 [(set_attr "type" "ssecmp")
6341 (set_attr "prefix_data16" "1")
6342 (set_attr "mode" "TI")])
6344 (define_expand "sse4_1_eqv2di3"
6345 [(set (match_operand:V2DI 0 "register_operand" "")
6347 (match_operand:V2DI 1 "nonimmediate_operand" "")
6348 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6350 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6352 (define_insn "*sse4_1_eqv2di3"
6353 [(set (match_operand:V2DI 0 "register_operand" "=x")
6355 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6356 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6357 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6358 "pcmpeqq\t{%2, %0|%0, %2}"
6359 [(set_attr "type" "ssecmp")
6360 (set_attr "prefix_extra" "1")
6361 (set_attr "mode" "TI")])
6363 (define_insn "*avx_gt<mode>3"
6364 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6366 (match_operand:SSEMODE1248 1 "register_operand" "x")
6367 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6369 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6370 [(set_attr "type" "ssecmp")
6371 (set (attr "prefix_extra")
6372 (if_then_else (match_operand:V2DI 0 "" "")
6374 (const_string "*")))
6375 (set_attr "prefix" "vex")
6376 (set_attr "mode" "TI")])
6378 (define_insn "sse2_gt<mode>3"
6379 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6381 (match_operand:SSEMODE124 1 "register_operand" "0")
6382 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6383 "TARGET_SSE2 && !TARGET_XOP"
6384 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6385 [(set_attr "type" "ssecmp")
6386 (set_attr "prefix_data16" "1")
6387 (set_attr "mode" "TI")])
6389 (define_insn "sse4_2_gtv2di3"
6390 [(set (match_operand:V2DI 0 "register_operand" "=x")
6392 (match_operand:V2DI 1 "register_operand" "0")
6393 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6395 "pcmpgtq\t{%2, %0|%0, %2}"
6396 [(set_attr "type" "ssecmp")
6397 (set_attr "prefix_extra" "1")
6398 (set_attr "mode" "TI")])
6400 (define_expand "vcond<mode>"
6401 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6402 (if_then_else:SSEMODE124C8
6403 (match_operator 3 ""
6404 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6405 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6406 (match_operand:SSEMODE124C8 1 "general_operand" "")
6407 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6410 bool ok = ix86_expand_int_vcond (operands);
6415 (define_expand "vcondu<mode>"
6416 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6417 (if_then_else:SSEMODE124C8
6418 (match_operator 3 ""
6419 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6420 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6421 (match_operand:SSEMODE124C8 1 "general_operand" "")
6422 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6425 bool ok = ix86_expand_int_vcond (operands);
6430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6432 ;; Parallel bitwise logical operations
6434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6436 (define_expand "one_cmpl<mode>2"
6437 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6438 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6442 int i, n = GET_MODE_NUNITS (<MODE>mode);
6443 rtvec v = rtvec_alloc (n);
6445 for (i = 0; i < n; ++i)
6446 RTVEC_ELT (v, i) = constm1_rtx;
6448 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6451 (define_insn "*avx_andnot<mode>3"
6452 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6454 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6455 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6457 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6458 [(set_attr "type" "sselog")
6459 (set_attr "prefix" "vex")
6460 (set_attr "mode" "<avxvecpsmode>")])
6462 (define_insn "*sse_andnot<mode>3"
6463 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6465 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6466 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6467 "(TARGET_SSE && !TARGET_SSE2)"
6468 "andnps\t{%2, %0|%0, %2}"
6469 [(set_attr "type" "sselog")
6470 (set_attr "mode" "V4SF")])
6472 (define_insn "*avx_andnot<mode>3"
6473 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6475 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6476 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6478 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6479 [(set_attr "type" "sselog")
6480 (set_attr "prefix" "vex")
6481 (set_attr "mode" "TI")])
6483 (define_insn "sse2_andnot<mode>3"
6484 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6486 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6487 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6489 "pandn\t{%2, %0|%0, %2}"
6490 [(set_attr "type" "sselog")
6491 (set_attr "prefix_data16" "1")
6492 (set_attr "mode" "TI")])
6494 (define_insn "*andnottf3"
6495 [(set (match_operand:TF 0 "register_operand" "=x")
6497 (not:TF (match_operand:TF 1 "register_operand" "0"))
6498 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6500 "pandn\t{%2, %0|%0, %2}"
6501 [(set_attr "type" "sselog")
6502 (set_attr "prefix_data16" "1")
6503 (set_attr "mode" "TI")])
6505 (define_expand "<code><mode>3"
6506 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6508 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6509 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6511 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6513 (define_insn "*avx_<code><mode>3"
6514 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6515 (any_logic:AVX256MODEI
6516 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6517 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6519 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6520 "v<logicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6521 [(set_attr "type" "sselog")
6522 (set_attr "prefix" "vex")
6523 (set_attr "mode" "<avxvecpsmode>")])
6525 (define_insn "*sse_<code><mode>3"
6526 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6528 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6529 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6530 "(TARGET_SSE && !TARGET_SSE2)
6531 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6532 "<logicprefix>ps\t{%2, %0|%0, %2}"
6533 [(set_attr "type" "sselog")
6534 (set_attr "mode" "V4SF")])
6536 (define_insn "*avx_<code><mode>3"
6537 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6539 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6540 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6542 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6543 "vp<logicprefix>\t{%2, %1, %0|%0, %1, %2}"
6544 [(set_attr "type" "sselog")
6545 (set_attr "prefix" "vex")
6546 (set_attr "mode" "TI")])
6548 (define_insn "*sse2_<code><mode>3"
6549 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6551 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6552 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6553 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6554 "p<logicprefix>\t{%2, %0|%0, %2}"
6555 [(set_attr "type" "sselog")
6556 (set_attr "prefix_data16" "1")
6557 (set_attr "mode" "TI")])
6559 (define_expand "<code>tf3"
6560 [(set (match_operand:TF 0 "register_operand" "")
6562 (match_operand:TF 1 "nonimmediate_operand" "")
6563 (match_operand:TF 2 "nonimmediate_operand" "")))]
6565 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6567 (define_insn "*<code>tf3"
6568 [(set (match_operand:TF 0 "register_operand" "=x")
6570 (match_operand:TF 1 "nonimmediate_operand" "%0")
6571 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6572 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6573 "p<logicprefix>\t{%2, %0|%0, %2}"
6574 [(set_attr "type" "sselog")
6575 (set_attr "prefix_data16" "1")
6576 (set_attr "mode" "TI")])
6578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6580 ;; Parallel integral element swizzling
6582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6584 (define_expand "vec_pack_trunc_v8hi"
6585 [(match_operand:V16QI 0 "register_operand" "")
6586 (match_operand:V8HI 1 "register_operand" "")
6587 (match_operand:V8HI 2 "register_operand" "")]
6590 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6591 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6592 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6596 (define_expand "vec_pack_trunc_v4si"
6597 [(match_operand:V8HI 0 "register_operand" "")
6598 (match_operand:V4SI 1 "register_operand" "")
6599 (match_operand:V4SI 2 "register_operand" "")]
6602 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6603 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6604 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6608 (define_expand "vec_pack_trunc_v2di"
6609 [(match_operand:V4SI 0 "register_operand" "")
6610 (match_operand:V2DI 1 "register_operand" "")
6611 (match_operand:V2DI 2 "register_operand" "")]
6614 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6615 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6616 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6620 (define_insn "*avx_packsswb"
6621 [(set (match_operand:V16QI 0 "register_operand" "=x")
6624 (match_operand:V8HI 1 "register_operand" "x"))
6626 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6628 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6629 [(set_attr "type" "sselog")
6630 (set_attr "prefix" "vex")
6631 (set_attr "mode" "TI")])
6633 (define_insn "sse2_packsswb"
6634 [(set (match_operand:V16QI 0 "register_operand" "=x")
6637 (match_operand:V8HI 1 "register_operand" "0"))
6639 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6641 "packsswb\t{%2, %0|%0, %2}"
6642 [(set_attr "type" "sselog")
6643 (set_attr "prefix_data16" "1")
6644 (set_attr "mode" "TI")])
6646 (define_insn "*avx_packssdw"
6647 [(set (match_operand:V8HI 0 "register_operand" "=x")
6650 (match_operand:V4SI 1 "register_operand" "x"))
6652 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6654 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6655 [(set_attr "type" "sselog")
6656 (set_attr "prefix" "vex")
6657 (set_attr "mode" "TI")])
6659 (define_insn "sse2_packssdw"
6660 [(set (match_operand:V8HI 0 "register_operand" "=x")
6663 (match_operand:V4SI 1 "register_operand" "0"))
6665 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6667 "packssdw\t{%2, %0|%0, %2}"
6668 [(set_attr "type" "sselog")
6669 (set_attr "prefix_data16" "1")
6670 (set_attr "mode" "TI")])
6672 (define_insn "*avx_packuswb"
6673 [(set (match_operand:V16QI 0 "register_operand" "=x")
6676 (match_operand:V8HI 1 "register_operand" "x"))
6678 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6680 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6681 [(set_attr "type" "sselog")
6682 (set_attr "prefix" "vex")
6683 (set_attr "mode" "TI")])
6685 (define_insn "sse2_packuswb"
6686 [(set (match_operand:V16QI 0 "register_operand" "=x")
6689 (match_operand:V8HI 1 "register_operand" "0"))
6691 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6693 "packuswb\t{%2, %0|%0, %2}"
6694 [(set_attr "type" "sselog")
6695 (set_attr "prefix_data16" "1")
6696 (set_attr "mode" "TI")])
6698 (define_insn "*avx_interleave_highv16qi"
6699 [(set (match_operand:V16QI 0 "register_operand" "=x")
6702 (match_operand:V16QI 1 "register_operand" "x")
6703 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6704 (parallel [(const_int 8) (const_int 24)
6705 (const_int 9) (const_int 25)
6706 (const_int 10) (const_int 26)
6707 (const_int 11) (const_int 27)
6708 (const_int 12) (const_int 28)
6709 (const_int 13) (const_int 29)
6710 (const_int 14) (const_int 30)
6711 (const_int 15) (const_int 31)])))]
6713 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6714 [(set_attr "type" "sselog")
6715 (set_attr "prefix" "vex")
6716 (set_attr "mode" "TI")])
6718 (define_insn "vec_interleave_highv16qi"
6719 [(set (match_operand:V16QI 0 "register_operand" "=x")
6722 (match_operand:V16QI 1 "register_operand" "0")
6723 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6724 (parallel [(const_int 8) (const_int 24)
6725 (const_int 9) (const_int 25)
6726 (const_int 10) (const_int 26)
6727 (const_int 11) (const_int 27)
6728 (const_int 12) (const_int 28)
6729 (const_int 13) (const_int 29)
6730 (const_int 14) (const_int 30)
6731 (const_int 15) (const_int 31)])))]
6733 "punpckhbw\t{%2, %0|%0, %2}"
6734 [(set_attr "type" "sselog")
6735 (set_attr "prefix_data16" "1")
6736 (set_attr "mode" "TI")])
6738 (define_insn "*avx_interleave_lowv16qi"
6739 [(set (match_operand:V16QI 0 "register_operand" "=x")
6742 (match_operand:V16QI 1 "register_operand" "x")
6743 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6744 (parallel [(const_int 0) (const_int 16)
6745 (const_int 1) (const_int 17)
6746 (const_int 2) (const_int 18)
6747 (const_int 3) (const_int 19)
6748 (const_int 4) (const_int 20)
6749 (const_int 5) (const_int 21)
6750 (const_int 6) (const_int 22)
6751 (const_int 7) (const_int 23)])))]
6753 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6754 [(set_attr "type" "sselog")
6755 (set_attr "prefix" "vex")
6756 (set_attr "mode" "TI")])
6758 (define_insn "vec_interleave_lowv16qi"
6759 [(set (match_operand:V16QI 0 "register_operand" "=x")
6762 (match_operand:V16QI 1 "register_operand" "0")
6763 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6764 (parallel [(const_int 0) (const_int 16)
6765 (const_int 1) (const_int 17)
6766 (const_int 2) (const_int 18)
6767 (const_int 3) (const_int 19)
6768 (const_int 4) (const_int 20)
6769 (const_int 5) (const_int 21)
6770 (const_int 6) (const_int 22)
6771 (const_int 7) (const_int 23)])))]
6773 "punpcklbw\t{%2, %0|%0, %2}"
6774 [(set_attr "type" "sselog")
6775 (set_attr "prefix_data16" "1")
6776 (set_attr "mode" "TI")])
6778 (define_insn "*avx_interleave_highv8hi"
6779 [(set (match_operand:V8HI 0 "register_operand" "=x")
6782 (match_operand:V8HI 1 "register_operand" "x")
6783 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6784 (parallel [(const_int 4) (const_int 12)
6785 (const_int 5) (const_int 13)
6786 (const_int 6) (const_int 14)
6787 (const_int 7) (const_int 15)])))]
6789 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6790 [(set_attr "type" "sselog")
6791 (set_attr "prefix" "vex")
6792 (set_attr "mode" "TI")])
6794 (define_insn "vec_interleave_highv8hi"
6795 [(set (match_operand:V8HI 0 "register_operand" "=x")
6798 (match_operand:V8HI 1 "register_operand" "0")
6799 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6800 (parallel [(const_int 4) (const_int 12)
6801 (const_int 5) (const_int 13)
6802 (const_int 6) (const_int 14)
6803 (const_int 7) (const_int 15)])))]
6805 "punpckhwd\t{%2, %0|%0, %2}"
6806 [(set_attr "type" "sselog")
6807 (set_attr "prefix_data16" "1")
6808 (set_attr "mode" "TI")])
6810 (define_insn "*avx_interleave_lowv8hi"
6811 [(set (match_operand:V8HI 0 "register_operand" "=x")
6814 (match_operand:V8HI 1 "register_operand" "x")
6815 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6816 (parallel [(const_int 0) (const_int 8)
6817 (const_int 1) (const_int 9)
6818 (const_int 2) (const_int 10)
6819 (const_int 3) (const_int 11)])))]
6821 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6822 [(set_attr "type" "sselog")
6823 (set_attr "prefix" "vex")
6824 (set_attr "mode" "TI")])
6826 (define_insn "vec_interleave_lowv8hi"
6827 [(set (match_operand:V8HI 0 "register_operand" "=x")
6830 (match_operand:V8HI 1 "register_operand" "0")
6831 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6832 (parallel [(const_int 0) (const_int 8)
6833 (const_int 1) (const_int 9)
6834 (const_int 2) (const_int 10)
6835 (const_int 3) (const_int 11)])))]
6837 "punpcklwd\t{%2, %0|%0, %2}"
6838 [(set_attr "type" "sselog")
6839 (set_attr "prefix_data16" "1")
6840 (set_attr "mode" "TI")])
6842 (define_insn "*avx_interleave_highv4si"
6843 [(set (match_operand:V4SI 0 "register_operand" "=x")
6846 (match_operand:V4SI 1 "register_operand" "x")
6847 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6848 (parallel [(const_int 2) (const_int 6)
6849 (const_int 3) (const_int 7)])))]
6851 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6852 [(set_attr "type" "sselog")
6853 (set_attr "prefix" "vex")
6854 (set_attr "mode" "TI")])
6856 (define_insn "vec_interleave_highv4si"
6857 [(set (match_operand:V4SI 0 "register_operand" "=x")
6860 (match_operand:V4SI 1 "register_operand" "0")
6861 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6862 (parallel [(const_int 2) (const_int 6)
6863 (const_int 3) (const_int 7)])))]
6865 "punpckhdq\t{%2, %0|%0, %2}"
6866 [(set_attr "type" "sselog")
6867 (set_attr "prefix_data16" "1")
6868 (set_attr "mode" "TI")])
6870 (define_insn "*avx_interleave_lowv4si"
6871 [(set (match_operand:V4SI 0 "register_operand" "=x")
6874 (match_operand:V4SI 1 "register_operand" "x")
6875 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6876 (parallel [(const_int 0) (const_int 4)
6877 (const_int 1) (const_int 5)])))]
6879 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6880 [(set_attr "type" "sselog")
6881 (set_attr "prefix" "vex")
6882 (set_attr "mode" "TI")])
6884 (define_insn "vec_interleave_lowv4si"
6885 [(set (match_operand:V4SI 0 "register_operand" "=x")
6888 (match_operand:V4SI 1 "register_operand" "0")
6889 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6890 (parallel [(const_int 0) (const_int 4)
6891 (const_int 1) (const_int 5)])))]
6893 "punpckldq\t{%2, %0|%0, %2}"
6894 [(set_attr "type" "sselog")
6895 (set_attr "prefix_data16" "1")
6896 (set_attr "mode" "TI")])
6898 (define_insn "*avx_pinsr<ssevecsize>"
6899 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6900 (vec_merge:SSEMODE124
6901 (vec_duplicate:SSEMODE124
6902 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6903 (match_operand:SSEMODE124 1 "register_operand" "x")
6904 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6907 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6908 if (MEM_P (operands[2]))
6909 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6911 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6913 [(set_attr "type" "sselog")
6914 (set (attr "prefix_extra")
6915 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6917 (const_string "1")))
6918 (set_attr "length_immediate" "1")
6919 (set_attr "prefix" "vex")
6920 (set_attr "mode" "TI")])
6922 (define_insn "*sse4_1_pinsrb"
6923 [(set (match_operand:V16QI 0 "register_operand" "=x")
6925 (vec_duplicate:V16QI
6926 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6927 (match_operand:V16QI 1 "register_operand" "0")
6928 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6931 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6932 if (MEM_P (operands[2]))
6933 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6935 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6937 [(set_attr "type" "sselog")
6938 (set_attr "prefix_extra" "1")
6939 (set_attr "length_immediate" "1")
6940 (set_attr "mode" "TI")])
6942 (define_insn "*sse2_pinsrw"
6943 [(set (match_operand:V8HI 0 "register_operand" "=x")
6946 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6947 (match_operand:V8HI 1 "register_operand" "0")
6948 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6951 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6952 if (MEM_P (operands[2]))
6953 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6955 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6957 [(set_attr "type" "sselog")
6958 (set_attr "prefix_data16" "1")
6959 (set_attr "length_immediate" "1")
6960 (set_attr "mode" "TI")])
6962 ;; It must come before sse2_loadld since it is preferred.
6963 (define_insn "*sse4_1_pinsrd"
6964 [(set (match_operand:V4SI 0 "register_operand" "=x")
6967 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6968 (match_operand:V4SI 1 "register_operand" "0")
6969 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6972 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6973 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6975 [(set_attr "type" "sselog")
6976 (set_attr "prefix_extra" "1")
6977 (set_attr "length_immediate" "1")
6978 (set_attr "mode" "TI")])
6980 (define_insn "*avx_pinsrq"
6981 [(set (match_operand:V2DI 0 "register_operand" "=x")
6984 (match_operand:DI 2 "nonimmediate_operand" "rm"))
6985 (match_operand:V2DI 1 "register_operand" "x")
6986 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
6987 "TARGET_AVX && TARGET_64BIT"
6989 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6990 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6992 [(set_attr "type" "sselog")
6993 (set_attr "prefix_extra" "1")
6994 (set_attr "length_immediate" "1")
6995 (set_attr "prefix" "vex")
6996 (set_attr "mode" "TI")])
6998 (define_insn "*sse4_1_pinsrq"
6999 [(set (match_operand:V2DI 0 "register_operand" "=x")
7002 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7003 (match_operand:V2DI 1 "register_operand" "0")
7004 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7005 "TARGET_SSE4_1 && TARGET_64BIT"
7007 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7008 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7010 [(set_attr "type" "sselog")
7011 (set_attr "prefix_rex" "1")
7012 (set_attr "prefix_extra" "1")
7013 (set_attr "length_immediate" "1")
7014 (set_attr "mode" "TI")])
7016 (define_insn "*sse4_1_pextrb"
7017 [(set (match_operand:SI 0 "register_operand" "=r")
7020 (match_operand:V16QI 1 "register_operand" "x")
7021 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7023 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7024 [(set_attr "type" "sselog")
7025 (set_attr "prefix_extra" "1")
7026 (set_attr "length_immediate" "1")
7027 (set_attr "prefix" "maybe_vex")
7028 (set_attr "mode" "TI")])
7030 (define_insn "*sse4_1_pextrb_memory"
7031 [(set (match_operand:QI 0 "memory_operand" "=m")
7033 (match_operand:V16QI 1 "register_operand" "x")
7034 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7036 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7037 [(set_attr "type" "sselog")
7038 (set_attr "prefix_extra" "1")
7039 (set_attr "length_immediate" "1")
7040 (set_attr "prefix" "maybe_vex")
7041 (set_attr "mode" "TI")])
7043 (define_insn "*sse2_pextrw"
7044 [(set (match_operand:SI 0 "register_operand" "=r")
7047 (match_operand:V8HI 1 "register_operand" "x")
7048 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7050 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7051 [(set_attr "type" "sselog")
7052 (set_attr "prefix_data16" "1")
7053 (set_attr "length_immediate" "1")
7054 (set_attr "prefix" "maybe_vex")
7055 (set_attr "mode" "TI")])
7057 (define_insn "*sse4_1_pextrw_memory"
7058 [(set (match_operand:HI 0 "memory_operand" "=m")
7060 (match_operand:V8HI 1 "register_operand" "x")
7061 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7063 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7064 [(set_attr "type" "sselog")
7065 (set_attr "prefix_extra" "1")
7066 (set_attr "length_immediate" "1")
7067 (set_attr "prefix" "maybe_vex")
7068 (set_attr "mode" "TI")])
7070 (define_insn "*sse4_1_pextrd"
7071 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7073 (match_operand:V4SI 1 "register_operand" "x")
7074 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7076 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7077 [(set_attr "type" "sselog")
7078 (set_attr "prefix_extra" "1")
7079 (set_attr "length_immediate" "1")
7080 (set_attr "prefix" "maybe_vex")
7081 (set_attr "mode" "TI")])
7083 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7084 (define_insn "*sse4_1_pextrq"
7085 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7087 (match_operand:V2DI 1 "register_operand" "x")
7088 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7089 "TARGET_SSE4_1 && TARGET_64BIT"
7090 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7091 [(set_attr "type" "sselog")
7092 (set_attr "prefix_rex" "1")
7093 (set_attr "prefix_extra" "1")
7094 (set_attr "length_immediate" "1")
7095 (set_attr "prefix" "maybe_vex")
7096 (set_attr "mode" "TI")])
7098 (define_expand "sse2_pshufd"
7099 [(match_operand:V4SI 0 "register_operand" "")
7100 (match_operand:V4SI 1 "nonimmediate_operand" "")
7101 (match_operand:SI 2 "const_int_operand" "")]
7104 int mask = INTVAL (operands[2]);
7105 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7106 GEN_INT ((mask >> 0) & 3),
7107 GEN_INT ((mask >> 2) & 3),
7108 GEN_INT ((mask >> 4) & 3),
7109 GEN_INT ((mask >> 6) & 3)));
7113 (define_insn "sse2_pshufd_1"
7114 [(set (match_operand:V4SI 0 "register_operand" "=x")
7116 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7117 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7118 (match_operand 3 "const_0_to_3_operand" "")
7119 (match_operand 4 "const_0_to_3_operand" "")
7120 (match_operand 5 "const_0_to_3_operand" "")])))]
7124 mask |= INTVAL (operands[2]) << 0;
7125 mask |= INTVAL (operands[3]) << 2;
7126 mask |= INTVAL (operands[4]) << 4;
7127 mask |= INTVAL (operands[5]) << 6;
7128 operands[2] = GEN_INT (mask);
7130 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7132 [(set_attr "type" "sselog1")
7133 (set_attr "prefix_data16" "1")
7134 (set_attr "prefix" "maybe_vex")
7135 (set_attr "length_immediate" "1")
7136 (set_attr "mode" "TI")])
7138 (define_expand "sse2_pshuflw"
7139 [(match_operand:V8HI 0 "register_operand" "")
7140 (match_operand:V8HI 1 "nonimmediate_operand" "")
7141 (match_operand:SI 2 "const_int_operand" "")]
7144 int mask = INTVAL (operands[2]);
7145 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7146 GEN_INT ((mask >> 0) & 3),
7147 GEN_INT ((mask >> 2) & 3),
7148 GEN_INT ((mask >> 4) & 3),
7149 GEN_INT ((mask >> 6) & 3)));
7153 (define_insn "sse2_pshuflw_1"
7154 [(set (match_operand:V8HI 0 "register_operand" "=x")
7156 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7157 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7158 (match_operand 3 "const_0_to_3_operand" "")
7159 (match_operand 4 "const_0_to_3_operand" "")
7160 (match_operand 5 "const_0_to_3_operand" "")
7168 mask |= INTVAL (operands[2]) << 0;
7169 mask |= INTVAL (operands[3]) << 2;
7170 mask |= INTVAL (operands[4]) << 4;
7171 mask |= INTVAL (operands[5]) << 6;
7172 operands[2] = GEN_INT (mask);
7174 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7176 [(set_attr "type" "sselog")
7177 (set_attr "prefix_data16" "0")
7178 (set_attr "prefix_rep" "1")
7179 (set_attr "prefix" "maybe_vex")
7180 (set_attr "length_immediate" "1")
7181 (set_attr "mode" "TI")])
7183 (define_expand "sse2_pshufhw"
7184 [(match_operand:V8HI 0 "register_operand" "")
7185 (match_operand:V8HI 1 "nonimmediate_operand" "")
7186 (match_operand:SI 2 "const_int_operand" "")]
7189 int mask = INTVAL (operands[2]);
7190 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7191 GEN_INT (((mask >> 0) & 3) + 4),
7192 GEN_INT (((mask >> 2) & 3) + 4),
7193 GEN_INT (((mask >> 4) & 3) + 4),
7194 GEN_INT (((mask >> 6) & 3) + 4)));
7198 (define_insn "sse2_pshufhw_1"
7199 [(set (match_operand:V8HI 0 "register_operand" "=x")
7201 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7202 (parallel [(const_int 0)
7206 (match_operand 2 "const_4_to_7_operand" "")
7207 (match_operand 3 "const_4_to_7_operand" "")
7208 (match_operand 4 "const_4_to_7_operand" "")
7209 (match_operand 5 "const_4_to_7_operand" "")])))]
7213 mask |= (INTVAL (operands[2]) - 4) << 0;
7214 mask |= (INTVAL (operands[3]) - 4) << 2;
7215 mask |= (INTVAL (operands[4]) - 4) << 4;
7216 mask |= (INTVAL (operands[5]) - 4) << 6;
7217 operands[2] = GEN_INT (mask);
7219 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7221 [(set_attr "type" "sselog")
7222 (set_attr "prefix_rep" "1")
7223 (set_attr "prefix_data16" "0")
7224 (set_attr "prefix" "maybe_vex")
7225 (set_attr "length_immediate" "1")
7226 (set_attr "mode" "TI")])
7228 (define_expand "sse2_loadd"
7229 [(set (match_operand:V4SI 0 "register_operand" "")
7232 (match_operand:SI 1 "nonimmediate_operand" ""))
7236 "operands[2] = CONST0_RTX (V4SImode);")
7238 (define_insn "*avx_loadld"
7239 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7242 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7243 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7247 vmovd\t{%2, %0|%0, %2}
7248 vmovd\t{%2, %0|%0, %2}
7249 vmovss\t{%2, %1, %0|%0, %1, %2}"
7250 [(set_attr "type" "ssemov")
7251 (set_attr "prefix" "vex")
7252 (set_attr "mode" "TI,TI,V4SF")])
7254 (define_insn "sse2_loadld"
7255 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7258 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7259 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7263 movd\t{%2, %0|%0, %2}
7264 movd\t{%2, %0|%0, %2}
7265 movss\t{%2, %0|%0, %2}
7266 movss\t{%2, %0|%0, %2}"
7267 [(set_attr "type" "ssemov")
7268 (set_attr "mode" "TI,TI,V4SF,SF")])
7270 (define_insn_and_split "sse2_stored"
7271 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7273 (match_operand:V4SI 1 "register_operand" "x,Yi")
7274 (parallel [(const_int 0)])))]
7277 "&& reload_completed
7278 && (TARGET_INTER_UNIT_MOVES
7279 || MEM_P (operands [0])
7280 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7281 [(set (match_dup 0) (match_dup 1))]
7283 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7286 (define_insn_and_split "*vec_ext_v4si_mem"
7287 [(set (match_operand:SI 0 "register_operand" "=r")
7289 (match_operand:V4SI 1 "memory_operand" "o")
7290 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7296 int i = INTVAL (operands[2]);
7298 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7302 (define_expand "sse_storeq"
7303 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7305 (match_operand:V2DI 1 "register_operand" "")
7306 (parallel [(const_int 0)])))]
7310 (define_insn "*sse2_storeq_rex64"
7311 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7313 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7314 (parallel [(const_int 0)])))]
7315 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7319 %vmov{q}\t{%1, %0|%0, %1}"
7320 [(set_attr "type" "*,*,imov")
7321 (set_attr "prefix" "*,*,maybe_vex")
7322 (set_attr "mode" "*,*,DI")])
7324 (define_insn "*sse2_storeq"
7325 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7327 (match_operand:V2DI 1 "register_operand" "x")
7328 (parallel [(const_int 0)])))]
7333 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7335 (match_operand:V2DI 1 "register_operand" "")
7336 (parallel [(const_int 0)])))]
7339 && (TARGET_INTER_UNIT_MOVES
7340 || MEM_P (operands [0])
7341 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7342 [(set (match_dup 0) (match_dup 1))]
7344 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7347 (define_insn "*vec_extractv2di_1_rex64_avx"
7348 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7350 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7351 (parallel [(const_int 1)])))]
7354 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7356 vmovhps\t{%1, %0|%0, %1}
7357 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7358 vmovq\t{%H1, %0|%0, %H1}
7359 vmov{q}\t{%H1, %0|%0, %H1}"
7360 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7361 (set_attr "length_immediate" "*,1,*,*")
7362 (set_attr "memory" "*,none,*,*")
7363 (set_attr "prefix" "vex")
7364 (set_attr "mode" "V2SF,TI,TI,DI")])
7366 (define_insn "*vec_extractv2di_1_rex64"
7367 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7369 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7370 (parallel [(const_int 1)])))]
7371 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7373 movhps\t{%1, %0|%0, %1}
7374 psrldq\t{$8, %0|%0, 8}
7375 movq\t{%H1, %0|%0, %H1}
7376 mov{q}\t{%H1, %0|%0, %H1}"
7377 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7378 (set_attr "length_immediate" "*,1,*,*")
7379 (set_attr "atom_unit" "*,sishuf,*,*")
7380 (set_attr "memory" "*,none,*,*")
7381 (set_attr "mode" "V2SF,TI,TI,DI")])
7383 (define_insn "*vec_extractv2di_1_avx"
7384 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7386 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7387 (parallel [(const_int 1)])))]
7390 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7392 vmovhps\t{%1, %0|%0, %1}
7393 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7394 vmovq\t{%H1, %0|%0, %H1}"
7395 [(set_attr "type" "ssemov,sseishft,ssemov")
7396 (set_attr "length_immediate" "*,1,*")
7397 (set_attr "memory" "*,none,*")
7398 (set_attr "prefix" "vex")
7399 (set_attr "mode" "V2SF,TI,TI")])
7401 (define_insn "*vec_extractv2di_1_sse2"
7402 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7404 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7405 (parallel [(const_int 1)])))]
7407 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7409 movhps\t{%1, %0|%0, %1}
7410 psrldq\t{$8, %0|%0, 8}
7411 movq\t{%H1, %0|%0, %H1}"
7412 [(set_attr "type" "ssemov,sseishft,ssemov")
7413 (set_attr "length_immediate" "*,1,*")
7414 (set_attr "atom_unit" "*,sishuf,*")
7415 (set_attr "memory" "*,none,*")
7416 (set_attr "mode" "V2SF,TI,TI")])
7418 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7419 (define_insn "*vec_extractv2di_1_sse"
7420 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7422 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7423 (parallel [(const_int 1)])))]
7424 "!TARGET_SSE2 && TARGET_SSE
7425 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7427 movhps\t{%1, %0|%0, %1}
7428 movhlps\t{%1, %0|%0, %1}
7429 movlps\t{%H1, %0|%0, %H1}"
7430 [(set_attr "type" "ssemov")
7431 (set_attr "mode" "V2SF,V4SF,V2SF")])
7433 (define_insn "*vec_dupv4si_avx"
7434 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7436 (match_operand:SI 1 "register_operand" "x,m")))]
7439 vpshufd\t{$0, %1, %0|%0, %1, 0}
7440 vbroadcastss\t{%1, %0|%0, %1}"
7441 [(set_attr "type" "sselog1,ssemov")
7442 (set_attr "length_immediate" "1,0")
7443 (set_attr "prefix_extra" "0,1")
7444 (set_attr "prefix" "vex")
7445 (set_attr "mode" "TI,V4SF")])
7447 (define_insn "*vec_dupv4si"
7448 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7450 (match_operand:SI 1 "register_operand" " Y2,0")))]
7453 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7454 shufps\t{$0, %0, %0|%0, %0, 0}"
7455 [(set_attr "type" "sselog1")
7456 (set_attr "length_immediate" "1")
7457 (set_attr "mode" "TI,V4SF")])
7459 (define_insn "*vec_dupv2di_avx"
7460 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7462 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7465 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7466 vmovddup\t{%1, %0|%0, %1}"
7467 [(set_attr "type" "sselog1")
7468 (set_attr "prefix" "vex")
7469 (set_attr "mode" "TI,DF")])
7471 (define_insn "*vec_dupv2di_sse3"
7472 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7474 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7478 movddup\t{%1, %0|%0, %1}"
7479 [(set_attr "type" "sselog1")
7480 (set_attr "mode" "TI,DF")])
7482 (define_insn "*vec_dupv2di"
7483 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7485 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7490 [(set_attr "type" "sselog1,ssemov")
7491 (set_attr "mode" "TI,V4SF")])
7493 (define_insn "*vec_concatv2si_avx"
7494 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7496 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7497 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7500 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7501 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7502 vmovd\t{%1, %0|%0, %1}
7503 punpckldq\t{%2, %0|%0, %2}
7504 movd\t{%1, %0|%0, %1}"
7505 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7506 (set_attr "prefix_extra" "1,*,*,*,*")
7507 (set_attr "length_immediate" "1,*,*,*,*")
7508 (set (attr "prefix")
7509 (if_then_else (eq_attr "alternative" "3,4")
7510 (const_string "orig")
7511 (const_string "vex")))
7512 (set_attr "mode" "TI,TI,TI,DI,DI")])
7514 (define_insn "*vec_concatv2si_sse4_1"
7515 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7517 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7518 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7521 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7522 punpckldq\t{%2, %0|%0, %2}
7523 movd\t{%1, %0|%0, %1}
7524 punpckldq\t{%2, %0|%0, %2}
7525 movd\t{%1, %0|%0, %1}"
7526 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7527 (set_attr "prefix_extra" "1,*,*,*,*")
7528 (set_attr "length_immediate" "1,*,*,*,*")
7529 (set_attr "mode" "TI,TI,TI,DI,DI")])
7531 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7532 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7533 ;; alternatives pretty much forces the MMX alternative to be chosen.
7534 (define_insn "*vec_concatv2si_sse2"
7535 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7537 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7538 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7541 punpckldq\t{%2, %0|%0, %2}
7542 movd\t{%1, %0|%0, %1}
7543 punpckldq\t{%2, %0|%0, %2}
7544 movd\t{%1, %0|%0, %1}"
7545 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7546 (set_attr "mode" "TI,TI,DI,DI")])
7548 (define_insn "*vec_concatv2si_sse"
7549 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7551 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7552 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7555 unpcklps\t{%2, %0|%0, %2}
7556 movss\t{%1, %0|%0, %1}
7557 punpckldq\t{%2, %0|%0, %2}
7558 movd\t{%1, %0|%0, %1}"
7559 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7560 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7562 (define_insn "*vec_concatv4si_1_avx"
7563 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7565 (match_operand:V2SI 1 "register_operand" " x,x")
7566 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7569 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7570 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7571 [(set_attr "type" "sselog,ssemov")
7572 (set_attr "prefix" "vex")
7573 (set_attr "mode" "TI,V2SF")])
7575 (define_insn "*vec_concatv4si_1"
7576 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7578 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7579 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7582 punpcklqdq\t{%2, %0|%0, %2}
7583 movlhps\t{%2, %0|%0, %2}
7584 movhps\t{%2, %0|%0, %2}"
7585 [(set_attr "type" "sselog,ssemov,ssemov")
7586 (set_attr "mode" "TI,V4SF,V2SF")])
7588 (define_insn "*vec_concatv2di_avx"
7589 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7591 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7592 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7593 "!TARGET_64BIT && TARGET_AVX"
7595 vmovq\t{%1, %0|%0, %1}
7596 movq2dq\t{%1, %0|%0, %1}
7597 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7598 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7599 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7600 (set (attr "prefix")
7601 (if_then_else (eq_attr "alternative" "1")
7602 (const_string "orig")
7603 (const_string "vex")))
7604 (set_attr "mode" "TI,TI,TI,V2SF")])
7606 (define_insn "vec_concatv2di"
7607 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7609 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7610 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7611 "!TARGET_64BIT && TARGET_SSE"
7613 movq\t{%1, %0|%0, %1}
7614 movq2dq\t{%1, %0|%0, %1}
7615 punpcklqdq\t{%2, %0|%0, %2}
7616 movlhps\t{%2, %0|%0, %2}
7617 movhps\t{%2, %0|%0, %2}"
7618 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7619 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7621 (define_insn "*vec_concatv2di_rex64_avx"
7622 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7624 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7625 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7626 "TARGET_64BIT && TARGET_AVX"
7628 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7629 vmovq\t{%1, %0|%0, %1}
7630 vmovq\t{%1, %0|%0, %1}
7631 movq2dq\t{%1, %0|%0, %1}
7632 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7633 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7634 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7635 (set_attr "prefix_extra" "1,*,*,*,*,*")
7636 (set_attr "length_immediate" "1,*,*,*,*,*")
7637 (set (attr "prefix")
7638 (if_then_else (eq_attr "alternative" "3")
7639 (const_string "orig")
7640 (const_string "vex")))
7641 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7643 (define_insn "*vec_concatv2di_rex64_sse4_1"
7644 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7646 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7647 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7648 "TARGET_64BIT && TARGET_SSE4_1"
7650 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7651 movq\t{%1, %0|%0, %1}
7652 movq\t{%1, %0|%0, %1}
7653 movq2dq\t{%1, %0|%0, %1}
7654 punpcklqdq\t{%2, %0|%0, %2}
7655 movlhps\t{%2, %0|%0, %2}
7656 movhps\t{%2, %0|%0, %2}"
7657 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7658 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7659 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7660 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7661 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7663 (define_insn "*vec_concatv2di_rex64_sse"
7664 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7666 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7667 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7668 "TARGET_64BIT && TARGET_SSE"
7670 movq\t{%1, %0|%0, %1}
7671 movq\t{%1, %0|%0, %1}
7672 movq2dq\t{%1, %0|%0, %1}
7673 punpcklqdq\t{%2, %0|%0, %2}
7674 movlhps\t{%2, %0|%0, %2}
7675 movhps\t{%2, %0|%0, %2}"
7676 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7677 (set_attr "prefix_rex" "*,1,*,*,*,*")
7678 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7680 (define_expand "vec_unpacku_hi_v16qi"
7681 [(match_operand:V8HI 0 "register_operand" "")
7682 (match_operand:V16QI 1 "register_operand" "")]
7686 ix86_expand_sse4_unpack (operands, true, true);
7688 ix86_expand_sse_unpack (operands, true, true);
7692 (define_expand "vec_unpacks_hi_v16qi"
7693 [(match_operand:V8HI 0 "register_operand" "")
7694 (match_operand:V16QI 1 "register_operand" "")]
7698 ix86_expand_sse4_unpack (operands, false, true);
7700 ix86_expand_sse_unpack (operands, false, true);
7704 (define_expand "vec_unpacku_lo_v16qi"
7705 [(match_operand:V8HI 0 "register_operand" "")
7706 (match_operand:V16QI 1 "register_operand" "")]
7710 ix86_expand_sse4_unpack (operands, true, false);
7712 ix86_expand_sse_unpack (operands, true, false);
7716 (define_expand "vec_unpacks_lo_v16qi"
7717 [(match_operand:V8HI 0 "register_operand" "")
7718 (match_operand:V16QI 1 "register_operand" "")]
7722 ix86_expand_sse4_unpack (operands, false, false);
7724 ix86_expand_sse_unpack (operands, false, false);
7728 (define_expand "vec_unpacku_hi_v8hi"
7729 [(match_operand:V4SI 0 "register_operand" "")
7730 (match_operand:V8HI 1 "register_operand" "")]
7734 ix86_expand_sse4_unpack (operands, true, true);
7736 ix86_expand_sse_unpack (operands, true, true);
7740 (define_expand "vec_unpacks_hi_v8hi"
7741 [(match_operand:V4SI 0 "register_operand" "")
7742 (match_operand:V8HI 1 "register_operand" "")]
7746 ix86_expand_sse4_unpack (operands, false, true);
7748 ix86_expand_sse_unpack (operands, false, true);
7752 (define_expand "vec_unpacku_lo_v8hi"
7753 [(match_operand:V4SI 0 "register_operand" "")
7754 (match_operand:V8HI 1 "register_operand" "")]
7758 ix86_expand_sse4_unpack (operands, true, false);
7760 ix86_expand_sse_unpack (operands, true, false);
7764 (define_expand "vec_unpacks_lo_v8hi"
7765 [(match_operand:V4SI 0 "register_operand" "")
7766 (match_operand:V8HI 1 "register_operand" "")]
7770 ix86_expand_sse4_unpack (operands, false, false);
7772 ix86_expand_sse_unpack (operands, false, false);
7776 (define_expand "vec_unpacku_hi_v4si"
7777 [(match_operand:V2DI 0 "register_operand" "")
7778 (match_operand:V4SI 1 "register_operand" "")]
7782 ix86_expand_sse4_unpack (operands, true, true);
7784 ix86_expand_sse_unpack (operands, true, true);
7788 (define_expand "vec_unpacks_hi_v4si"
7789 [(match_operand:V2DI 0 "register_operand" "")
7790 (match_operand:V4SI 1 "register_operand" "")]
7794 ix86_expand_sse4_unpack (operands, false, true);
7796 ix86_expand_sse_unpack (operands, false, true);
7800 (define_expand "vec_unpacku_lo_v4si"
7801 [(match_operand:V2DI 0 "register_operand" "")
7802 (match_operand:V4SI 1 "register_operand" "")]
7806 ix86_expand_sse4_unpack (operands, true, false);
7808 ix86_expand_sse_unpack (operands, true, false);
7812 (define_expand "vec_unpacks_lo_v4si"
7813 [(match_operand:V2DI 0 "register_operand" "")
7814 (match_operand:V4SI 1 "register_operand" "")]
7818 ix86_expand_sse4_unpack (operands, false, false);
7820 ix86_expand_sse_unpack (operands, false, false);
7824 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7828 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7830 (define_expand "sse2_uavgv16qi3"
7831 [(set (match_operand:V16QI 0 "register_operand" "")
7837 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7839 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7840 (const_vector:V16QI [(const_int 1) (const_int 1)
7841 (const_int 1) (const_int 1)
7842 (const_int 1) (const_int 1)
7843 (const_int 1) (const_int 1)
7844 (const_int 1) (const_int 1)
7845 (const_int 1) (const_int 1)
7846 (const_int 1) (const_int 1)
7847 (const_int 1) (const_int 1)]))
7850 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7852 (define_insn "*avx_uavgv16qi3"
7853 [(set (match_operand:V16QI 0 "register_operand" "=x")
7859 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7861 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7862 (const_vector:V16QI [(const_int 1) (const_int 1)
7863 (const_int 1) (const_int 1)
7864 (const_int 1) (const_int 1)
7865 (const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)]))
7871 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7872 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7873 [(set_attr "type" "sseiadd")
7874 (set_attr "prefix" "vex")
7875 (set_attr "mode" "TI")])
7877 (define_insn "*sse2_uavgv16qi3"
7878 [(set (match_operand:V16QI 0 "register_operand" "=x")
7884 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7887 (const_vector:V16QI [(const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)]))
7896 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7897 "pavgb\t{%2, %0|%0, %2}"
7898 [(set_attr "type" "sseiadd")
7899 (set_attr "prefix_data16" "1")
7900 (set_attr "mode" "TI")])
7902 (define_expand "sse2_uavgv8hi3"
7903 [(set (match_operand:V8HI 0 "register_operand" "")
7909 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7911 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7912 (const_vector:V8HI [(const_int 1) (const_int 1)
7913 (const_int 1) (const_int 1)
7914 (const_int 1) (const_int 1)
7915 (const_int 1) (const_int 1)]))
7918 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7920 (define_insn "*avx_uavgv8hi3"
7921 [(set (match_operand:V8HI 0 "register_operand" "=x")
7927 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7929 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7930 (const_vector:V8HI [(const_int 1) (const_int 1)
7931 (const_int 1) (const_int 1)
7932 (const_int 1) (const_int 1)
7933 (const_int 1) (const_int 1)]))
7935 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7936 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7937 [(set_attr "type" "sseiadd")
7938 (set_attr "prefix" "vex")
7939 (set_attr "mode" "TI")])
7941 (define_insn "*sse2_uavgv8hi3"
7942 [(set (match_operand:V8HI 0 "register_operand" "=x")
7948 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7950 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7951 (const_vector:V8HI [(const_int 1) (const_int 1)
7952 (const_int 1) (const_int 1)
7953 (const_int 1) (const_int 1)
7954 (const_int 1) (const_int 1)]))
7956 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7957 "pavgw\t{%2, %0|%0, %2}"
7958 [(set_attr "type" "sseiadd")
7959 (set_attr "prefix_data16" "1")
7960 (set_attr "mode" "TI")])
7962 ;; The correct representation for this is absolutely enormous, and
7963 ;; surely not generally useful.
7964 (define_insn "*avx_psadbw"
7965 [(set (match_operand:V2DI 0 "register_operand" "=x")
7966 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7967 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7970 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7971 [(set_attr "type" "sseiadd")
7972 (set_attr "prefix" "vex")
7973 (set_attr "mode" "TI")])
7975 (define_insn "sse2_psadbw"
7976 [(set (match_operand:V2DI 0 "register_operand" "=x")
7977 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7978 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7981 "psadbw\t{%2, %0|%0, %2}"
7982 [(set_attr "type" "sseiadd")
7983 (set_attr "atom_unit" "simul")
7984 (set_attr "prefix_data16" "1")
7985 (set_attr "mode" "TI")])
7987 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7988 [(set (match_operand:SI 0 "register_operand" "=r")
7990 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
7992 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
7993 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
7994 [(set_attr "type" "ssecvt")
7995 (set_attr "prefix" "vex")
7996 (set_attr "mode" "<MODE>")])
7998 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
7999 [(set (match_operand:SI 0 "register_operand" "=r")
8001 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8003 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8004 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8005 [(set_attr "type" "ssemov")
8006 (set_attr "prefix" "maybe_vex")
8007 (set_attr "mode" "<MODE>")])
8009 (define_insn "sse2_pmovmskb"
8010 [(set (match_operand:SI 0 "register_operand" "=r")
8011 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8014 "%vpmovmskb\t{%1, %0|%0, %1}"
8015 [(set_attr "type" "ssemov")
8016 (set_attr "prefix_data16" "1")
8017 (set_attr "prefix" "maybe_vex")
8018 (set_attr "mode" "SI")])
8020 (define_expand "sse2_maskmovdqu"
8021 [(set (match_operand:V16QI 0 "memory_operand" "")
8022 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8023 (match_operand:V16QI 2 "register_operand" "")
8029 (define_insn "*sse2_maskmovdqu"
8030 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8031 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8032 (match_operand:V16QI 2 "register_operand" "x")
8033 (mem:V16QI (match_dup 0))]
8035 "TARGET_SSE2 && !TARGET_64BIT"
8036 ;; @@@ check ordering of operands in intel/nonintel syntax
8037 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8038 [(set_attr "type" "ssemov")
8039 (set_attr "prefix_data16" "1")
8040 ;; The implicit %rdi operand confuses default length_vex computation.
8041 (set_attr "length_vex" "3")
8042 (set_attr "prefix" "maybe_vex")
8043 (set_attr "mode" "TI")])
8045 (define_insn "*sse2_maskmovdqu_rex64"
8046 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8047 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8048 (match_operand:V16QI 2 "register_operand" "x")
8049 (mem:V16QI (match_dup 0))]
8051 "TARGET_SSE2 && TARGET_64BIT"
8052 ;; @@@ check ordering of operands in intel/nonintel syntax
8053 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8054 [(set_attr "type" "ssemov")
8055 (set_attr "prefix_data16" "1")
8056 ;; The implicit %rdi operand confuses default length_vex computation.
8057 (set (attr "length_vex")
8058 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8059 (set_attr "prefix" "maybe_vex")
8060 (set_attr "mode" "TI")])
8062 (define_insn "sse_ldmxcsr"
8063 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8067 [(set_attr "type" "sse")
8068 (set_attr "atom_sse_attr" "mxcsr")
8069 (set_attr "prefix" "maybe_vex")
8070 (set_attr "memory" "load")])
8072 (define_insn "sse_stmxcsr"
8073 [(set (match_operand:SI 0 "memory_operand" "=m")
8074 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8077 [(set_attr "type" "sse")
8078 (set_attr "atom_sse_attr" "mxcsr")
8079 (set_attr "prefix" "maybe_vex")
8080 (set_attr "memory" "store")])
8082 (define_expand "sse_sfence"
8084 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8085 "TARGET_SSE || TARGET_3DNOW_A"
8087 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8088 MEM_VOLATILE_P (operands[0]) = 1;
8091 (define_insn "*sse_sfence"
8092 [(set (match_operand:BLK 0 "" "")
8093 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8094 "TARGET_SSE || TARGET_3DNOW_A"
8096 [(set_attr "type" "sse")
8097 (set_attr "length_address" "0")
8098 (set_attr "atom_sse_attr" "fence")
8099 (set_attr "memory" "unknown")])
8101 (define_insn "sse2_clflush"
8102 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8106 [(set_attr "type" "sse")
8107 (set_attr "atom_sse_attr" "fence")
8108 (set_attr "memory" "unknown")])
8110 (define_expand "sse2_mfence"
8112 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8115 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8116 MEM_VOLATILE_P (operands[0]) = 1;
8119 (define_insn "*sse2_mfence"
8120 [(set (match_operand:BLK 0 "" "")
8121 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8122 "TARGET_64BIT || TARGET_SSE2"
8124 [(set_attr "type" "sse")
8125 (set_attr "length_address" "0")
8126 (set_attr "atom_sse_attr" "fence")
8127 (set_attr "memory" "unknown")])
8129 (define_expand "sse2_lfence"
8131 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8134 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8135 MEM_VOLATILE_P (operands[0]) = 1;
8138 (define_insn "*sse2_lfence"
8139 [(set (match_operand:BLK 0 "" "")
8140 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8143 [(set_attr "type" "sse")
8144 (set_attr "length_address" "0")
8145 (set_attr "atom_sse_attr" "lfence")
8146 (set_attr "memory" "unknown")])
8148 (define_insn "sse3_mwait"
8149 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8150 (match_operand:SI 1 "register_operand" "c")]
8153 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8154 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8155 ;; we only need to set up 32bit registers.
8157 [(set_attr "length" "3")])
8159 (define_insn "sse3_monitor"
8160 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8161 (match_operand:SI 1 "register_operand" "c")
8162 (match_operand:SI 2 "register_operand" "d")]
8164 "TARGET_SSE3 && !TARGET_64BIT"
8165 "monitor\t%0, %1, %2"
8166 [(set_attr "length" "3")])
8168 (define_insn "sse3_monitor64"
8169 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8170 (match_operand:SI 1 "register_operand" "c")
8171 (match_operand:SI 2 "register_operand" "d")]
8173 "TARGET_SSE3 && TARGET_64BIT"
8174 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8175 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8176 ;; zero extended to 64bit, we only need to set up 32bit registers.
8178 [(set_attr "length" "3")])
8180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8182 ;; SSSE3 instructions
8184 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8186 (define_insn "*avx_phaddwv8hi3"
8187 [(set (match_operand:V8HI 0 "register_operand" "=x")
8193 (match_operand:V8HI 1 "register_operand" "x")
8194 (parallel [(const_int 0)]))
8195 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8197 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8198 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8201 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8202 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8204 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8205 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8210 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8211 (parallel [(const_int 0)]))
8212 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8214 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8215 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8218 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8219 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8221 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8222 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8224 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8225 [(set_attr "type" "sseiadd")
8226 (set_attr "prefix_extra" "1")
8227 (set_attr "prefix" "vex")
8228 (set_attr "mode" "TI")])
8230 (define_insn "ssse3_phaddwv8hi3"
8231 [(set (match_operand:V8HI 0 "register_operand" "=x")
8237 (match_operand:V8HI 1 "register_operand" "0")
8238 (parallel [(const_int 0)]))
8239 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8241 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8242 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8245 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8246 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8248 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8249 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8254 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8255 (parallel [(const_int 0)]))
8256 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8258 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8259 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8262 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8263 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8265 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8266 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8268 "phaddw\t{%2, %0|%0, %2}"
8269 [(set_attr "type" "sseiadd")
8270 (set_attr "atom_unit" "complex")
8271 (set_attr "prefix_data16" "1")
8272 (set_attr "prefix_extra" "1")
8273 (set_attr "mode" "TI")])
8275 (define_insn "ssse3_phaddwv4hi3"
8276 [(set (match_operand:V4HI 0 "register_operand" "=y")
8281 (match_operand:V4HI 1 "register_operand" "0")
8282 (parallel [(const_int 0)]))
8283 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8285 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8286 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8290 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8291 (parallel [(const_int 0)]))
8292 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8294 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8295 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8297 "phaddw\t{%2, %0|%0, %2}"
8298 [(set_attr "type" "sseiadd")
8299 (set_attr "atom_unit" "complex")
8300 (set_attr "prefix_extra" "1")
8301 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8302 (set_attr "mode" "DI")])
8304 (define_insn "*avx_phadddv4si3"
8305 [(set (match_operand:V4SI 0 "register_operand" "=x")
8310 (match_operand:V4SI 1 "register_operand" "x")
8311 (parallel [(const_int 0)]))
8312 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8314 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8315 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8319 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8320 (parallel [(const_int 0)]))
8321 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8323 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8324 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8326 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8327 [(set_attr "type" "sseiadd")
8328 (set_attr "prefix_extra" "1")
8329 (set_attr "prefix" "vex")
8330 (set_attr "mode" "TI")])
8332 (define_insn "ssse3_phadddv4si3"
8333 [(set (match_operand:V4SI 0 "register_operand" "=x")
8338 (match_operand:V4SI 1 "register_operand" "0")
8339 (parallel [(const_int 0)]))
8340 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8342 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8343 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8347 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8348 (parallel [(const_int 0)]))
8349 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8351 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8352 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8354 "phaddd\t{%2, %0|%0, %2}"
8355 [(set_attr "type" "sseiadd")
8356 (set_attr "atom_unit" "complex")
8357 (set_attr "prefix_data16" "1")
8358 (set_attr "prefix_extra" "1")
8359 (set_attr "mode" "TI")])
8361 (define_insn "ssse3_phadddv2si3"
8362 [(set (match_operand:V2SI 0 "register_operand" "=y")
8366 (match_operand:V2SI 1 "register_operand" "0")
8367 (parallel [(const_int 0)]))
8368 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8371 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8372 (parallel [(const_int 0)]))
8373 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8375 "phaddd\t{%2, %0|%0, %2}"
8376 [(set_attr "type" "sseiadd")
8377 (set_attr "atom_unit" "complex")
8378 (set_attr "prefix_extra" "1")
8379 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8380 (set_attr "mode" "DI")])
8382 (define_insn "*avx_phaddswv8hi3"
8383 [(set (match_operand:V8HI 0 "register_operand" "=x")
8389 (match_operand:V8HI 1 "register_operand" "x")
8390 (parallel [(const_int 0)]))
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8393 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8394 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8397 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8398 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8400 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8401 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8406 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8407 (parallel [(const_int 0)]))
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8410 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8414 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8415 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8417 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8418 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8420 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8421 [(set_attr "type" "sseiadd")
8422 (set_attr "prefix_extra" "1")
8423 (set_attr "prefix" "vex")
8424 (set_attr "mode" "TI")])
8426 (define_insn "ssse3_phaddswv8hi3"
8427 [(set (match_operand:V8HI 0 "register_operand" "=x")
8433 (match_operand:V8HI 1 "register_operand" "0")
8434 (parallel [(const_int 0)]))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8437 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8438 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8441 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8442 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8444 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8445 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8450 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8451 (parallel [(const_int 0)]))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8454 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8455 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8458 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8459 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8461 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8462 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8464 "phaddsw\t{%2, %0|%0, %2}"
8465 [(set_attr "type" "sseiadd")
8466 (set_attr "atom_unit" "complex")
8467 (set_attr "prefix_data16" "1")
8468 (set_attr "prefix_extra" "1")
8469 (set_attr "mode" "TI")])
8471 (define_insn "ssse3_phaddswv4hi3"
8472 [(set (match_operand:V4HI 0 "register_operand" "=y")
8477 (match_operand:V4HI 1 "register_operand" "0")
8478 (parallel [(const_int 0)]))
8479 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8481 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8482 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8486 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8487 (parallel [(const_int 0)]))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8490 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8491 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8493 "phaddsw\t{%2, %0|%0, %2}"
8494 [(set_attr "type" "sseiadd")
8495 (set_attr "atom_unit" "complex")
8496 (set_attr "prefix_extra" "1")
8497 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8498 (set_attr "mode" "DI")])
8500 (define_insn "*avx_phsubwv8hi3"
8501 [(set (match_operand:V8HI 0 "register_operand" "=x")
8507 (match_operand:V8HI 1 "register_operand" "x")
8508 (parallel [(const_int 0)]))
8509 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8512 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8516 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8519 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8524 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8525 (parallel [(const_int 0)]))
8526 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8528 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8532 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8533 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8535 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8536 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8538 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8539 [(set_attr "type" "sseiadd")
8540 (set_attr "prefix_extra" "1")
8541 (set_attr "prefix" "vex")
8542 (set_attr "mode" "TI")])
8544 (define_insn "ssse3_phsubwv8hi3"
8545 [(set (match_operand:V8HI 0 "register_operand" "=x")
8551 (match_operand:V8HI 1 "register_operand" "0")
8552 (parallel [(const_int 0)]))
8553 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8555 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8556 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8559 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8560 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8562 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8563 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8568 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8569 (parallel [(const_int 0)]))
8570 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8572 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8573 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8576 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8577 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8579 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8580 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8582 "phsubw\t{%2, %0|%0, %2}"
8583 [(set_attr "type" "sseiadd")
8584 (set_attr "atom_unit" "complex")
8585 (set_attr "prefix_data16" "1")
8586 (set_attr "prefix_extra" "1")
8587 (set_attr "mode" "TI")])
8589 (define_insn "ssse3_phsubwv4hi3"
8590 [(set (match_operand:V4HI 0 "register_operand" "=y")
8595 (match_operand:V4HI 1 "register_operand" "0")
8596 (parallel [(const_int 0)]))
8597 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8599 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8600 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8604 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8605 (parallel [(const_int 0)]))
8606 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8608 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8609 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8611 "phsubw\t{%2, %0|%0, %2}"
8612 [(set_attr "type" "sseiadd")
8613 (set_attr "atom_unit" "complex")
8614 (set_attr "prefix_extra" "1")
8615 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8616 (set_attr "mode" "DI")])
8618 (define_insn "*avx_phsubdv4si3"
8619 [(set (match_operand:V4SI 0 "register_operand" "=x")
8624 (match_operand:V4SI 1 "register_operand" "x")
8625 (parallel [(const_int 0)]))
8626 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8628 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8629 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8633 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8634 (parallel [(const_int 0)]))
8635 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8637 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8638 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8640 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8641 [(set_attr "type" "sseiadd")
8642 (set_attr "prefix_extra" "1")
8643 (set_attr "prefix" "vex")
8644 (set_attr "mode" "TI")])
8646 (define_insn "ssse3_phsubdv4si3"
8647 [(set (match_operand:V4SI 0 "register_operand" "=x")
8652 (match_operand:V4SI 1 "register_operand" "0")
8653 (parallel [(const_int 0)]))
8654 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8656 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8657 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8661 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8662 (parallel [(const_int 0)]))
8663 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8665 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8666 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8668 "phsubd\t{%2, %0|%0, %2}"
8669 [(set_attr "type" "sseiadd")
8670 (set_attr "atom_unit" "complex")
8671 (set_attr "prefix_data16" "1")
8672 (set_attr "prefix_extra" "1")
8673 (set_attr "mode" "TI")])
8675 (define_insn "ssse3_phsubdv2si3"
8676 [(set (match_operand:V2SI 0 "register_operand" "=y")
8680 (match_operand:V2SI 1 "register_operand" "0")
8681 (parallel [(const_int 0)]))
8682 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8685 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8686 (parallel [(const_int 0)]))
8687 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8689 "phsubd\t{%2, %0|%0, %2}"
8690 [(set_attr "type" "sseiadd")
8691 (set_attr "atom_unit" "complex")
8692 (set_attr "prefix_extra" "1")
8693 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8694 (set_attr "mode" "DI")])
8696 (define_insn "*avx_phsubswv8hi3"
8697 [(set (match_operand:V8HI 0 "register_operand" "=x")
8703 (match_operand:V8HI 1 "register_operand" "x")
8704 (parallel [(const_int 0)]))
8705 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8707 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8708 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8711 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8712 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8714 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8715 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8720 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8721 (parallel [(const_int 0)]))
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8724 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8725 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8728 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8729 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8731 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8732 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8734 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8735 [(set_attr "type" "sseiadd")
8736 (set_attr "prefix_extra" "1")
8737 (set_attr "prefix" "vex")
8738 (set_attr "mode" "TI")])
8740 (define_insn "ssse3_phsubswv8hi3"
8741 [(set (match_operand:V8HI 0 "register_operand" "=x")
8747 (match_operand:V8HI 1 "register_operand" "0")
8748 (parallel [(const_int 0)]))
8749 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8751 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8752 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8755 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8756 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8758 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8759 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8764 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8765 (parallel [(const_int 0)]))
8766 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8768 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8769 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8772 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8773 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8775 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8776 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8778 "phsubsw\t{%2, %0|%0, %2}"
8779 [(set_attr "type" "sseiadd")
8780 (set_attr "atom_unit" "complex")
8781 (set_attr "prefix_data16" "1")
8782 (set_attr "prefix_extra" "1")
8783 (set_attr "mode" "TI")])
8785 (define_insn "ssse3_phsubswv4hi3"
8786 [(set (match_operand:V4HI 0 "register_operand" "=y")
8791 (match_operand:V4HI 1 "register_operand" "0")
8792 (parallel [(const_int 0)]))
8793 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8795 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8796 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8800 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8801 (parallel [(const_int 0)]))
8802 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8804 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8805 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8807 "phsubsw\t{%2, %0|%0, %2}"
8808 [(set_attr "type" "sseiadd")
8809 (set_attr "atom_unit" "complex")
8810 (set_attr "prefix_extra" "1")
8811 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8812 (set_attr "mode" "DI")])
8814 (define_insn "*avx_pmaddubsw128"
8815 [(set (match_operand:V8HI 0 "register_operand" "=x")
8820 (match_operand:V16QI 1 "register_operand" "x")
8821 (parallel [(const_int 0)
8831 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8832 (parallel [(const_int 0)
8842 (vec_select:V16QI (match_dup 1)
8843 (parallel [(const_int 1)
8852 (vec_select:V16QI (match_dup 2)
8853 (parallel [(const_int 1)
8860 (const_int 15)]))))))]
8862 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8863 [(set_attr "type" "sseiadd")
8864 (set_attr "prefix_extra" "1")
8865 (set_attr "prefix" "vex")
8866 (set_attr "mode" "TI")])
8868 (define_insn "ssse3_pmaddubsw128"
8869 [(set (match_operand:V8HI 0 "register_operand" "=x")
8874 (match_operand:V16QI 1 "register_operand" "0")
8875 (parallel [(const_int 0)
8885 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8886 (parallel [(const_int 0)
8896 (vec_select:V16QI (match_dup 1)
8897 (parallel [(const_int 1)
8906 (vec_select:V16QI (match_dup 2)
8907 (parallel [(const_int 1)
8914 (const_int 15)]))))))]
8916 "pmaddubsw\t{%2, %0|%0, %2}"
8917 [(set_attr "type" "sseiadd")
8918 (set_attr "atom_unit" "simul")
8919 (set_attr "prefix_data16" "1")
8920 (set_attr "prefix_extra" "1")
8921 (set_attr "mode" "TI")])
8923 (define_insn "ssse3_pmaddubsw"
8924 [(set (match_operand:V4HI 0 "register_operand" "=y")
8929 (match_operand:V8QI 1 "register_operand" "0")
8930 (parallel [(const_int 0)
8936 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8937 (parallel [(const_int 0)
8943 (vec_select:V8QI (match_dup 1)
8944 (parallel [(const_int 1)
8949 (vec_select:V8QI (match_dup 2)
8950 (parallel [(const_int 1)
8953 (const_int 7)]))))))]
8955 "pmaddubsw\t{%2, %0|%0, %2}"
8956 [(set_attr "type" "sseiadd")
8957 (set_attr "atom_unit" "simul")
8958 (set_attr "prefix_extra" "1")
8959 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8960 (set_attr "mode" "DI")])
8962 (define_expand "ssse3_pmulhrswv8hi3"
8963 [(set (match_operand:V8HI 0 "register_operand" "")
8970 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8972 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8974 (const_vector:V8HI [(const_int 1) (const_int 1)
8975 (const_int 1) (const_int 1)
8976 (const_int 1) (const_int 1)
8977 (const_int 1) (const_int 1)]))
8980 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8982 (define_insn "*avx_pmulhrswv8hi3"
8983 [(set (match_operand:V8HI 0 "register_operand" "=x")
8990 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8992 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8994 (const_vector:V8HI [(const_int 1) (const_int 1)
8995 (const_int 1) (const_int 1)
8996 (const_int 1) (const_int 1)
8997 (const_int 1) (const_int 1)]))
8999 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9000 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9001 [(set_attr "type" "sseimul")
9002 (set_attr "prefix_extra" "1")
9003 (set_attr "prefix" "vex")
9004 (set_attr "mode" "TI")])
9006 (define_insn "*ssse3_pmulhrswv8hi3"
9007 [(set (match_operand:V8HI 0 "register_operand" "=x")
9014 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9016 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9018 (const_vector:V8HI [(const_int 1) (const_int 1)
9019 (const_int 1) (const_int 1)
9020 (const_int 1) (const_int 1)
9021 (const_int 1) (const_int 1)]))
9023 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9024 "pmulhrsw\t{%2, %0|%0, %2}"
9025 [(set_attr "type" "sseimul")
9026 (set_attr "prefix_data16" "1")
9027 (set_attr "prefix_extra" "1")
9028 (set_attr "mode" "TI")])
9030 (define_expand "ssse3_pmulhrswv4hi3"
9031 [(set (match_operand:V4HI 0 "register_operand" "")
9038 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9040 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9042 (const_vector:V4HI [(const_int 1) (const_int 1)
9043 (const_int 1) (const_int 1)]))
9046 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9048 (define_insn "*ssse3_pmulhrswv4hi3"
9049 [(set (match_operand:V4HI 0 "register_operand" "=y")
9056 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9058 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9060 (const_vector:V4HI [(const_int 1) (const_int 1)
9061 (const_int 1) (const_int 1)]))
9063 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9064 "pmulhrsw\t{%2, %0|%0, %2}"
9065 [(set_attr "type" "sseimul")
9066 (set_attr "prefix_extra" "1")
9067 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9068 (set_attr "mode" "DI")])
9070 (define_insn "*avx_pshufbv16qi3"
9071 [(set (match_operand:V16QI 0 "register_operand" "=x")
9072 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9073 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9076 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9077 [(set_attr "type" "sselog1")
9078 (set_attr "prefix_extra" "1")
9079 (set_attr "prefix" "vex")
9080 (set_attr "mode" "TI")])
9082 (define_insn "ssse3_pshufbv16qi3"
9083 [(set (match_operand:V16QI 0 "register_operand" "=x")
9084 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9085 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9088 "pshufb\t{%2, %0|%0, %2}";
9089 [(set_attr "type" "sselog1")
9090 (set_attr "prefix_data16" "1")
9091 (set_attr "prefix_extra" "1")
9092 (set_attr "mode" "TI")])
9094 (define_insn "ssse3_pshufbv8qi3"
9095 [(set (match_operand:V8QI 0 "register_operand" "=y")
9096 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9097 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9100 "pshufb\t{%2, %0|%0, %2}";
9101 [(set_attr "type" "sselog1")
9102 (set_attr "prefix_extra" "1")
9103 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9104 (set_attr "mode" "DI")])
9106 (define_insn "*avx_psign<mode>3"
9107 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9109 [(match_operand:SSEMODE124 1 "register_operand" "x")
9110 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9113 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9114 [(set_attr "type" "sselog1")
9115 (set_attr "prefix_extra" "1")
9116 (set_attr "prefix" "vex")
9117 (set_attr "mode" "TI")])
9119 (define_insn "ssse3_psign<mode>3"
9120 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9122 [(match_operand:SSEMODE124 1 "register_operand" "0")
9123 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9126 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9127 [(set_attr "type" "sselog1")
9128 (set_attr "prefix_data16" "1")
9129 (set_attr "prefix_extra" "1")
9130 (set_attr "mode" "TI")])
9132 (define_insn "ssse3_psign<mode>3"
9133 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9135 [(match_operand:MMXMODEI 1 "register_operand" "0")
9136 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9139 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9140 [(set_attr "type" "sselog1")
9141 (set_attr "prefix_extra" "1")
9142 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9143 (set_attr "mode" "DI")])
9145 (define_insn "*avx_palignrti"
9146 [(set (match_operand:TI 0 "register_operand" "=x")
9147 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9148 (match_operand:TI 2 "nonimmediate_operand" "xm")
9149 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9153 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9154 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9156 [(set_attr "type" "sseishft")
9157 (set_attr "prefix_extra" "1")
9158 (set_attr "length_immediate" "1")
9159 (set_attr "prefix" "vex")
9160 (set_attr "mode" "TI")])
9162 (define_insn "ssse3_palignrti"
9163 [(set (match_operand:TI 0 "register_operand" "=x")
9164 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9165 (match_operand:TI 2 "nonimmediate_operand" "xm")
9166 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9170 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9171 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9173 [(set_attr "type" "sseishft")
9174 (set_attr "atom_unit" "sishuf")
9175 (set_attr "prefix_data16" "1")
9176 (set_attr "prefix_extra" "1")
9177 (set_attr "length_immediate" "1")
9178 (set_attr "mode" "TI")])
9180 (define_insn "ssse3_palignrdi"
9181 [(set (match_operand:DI 0 "register_operand" "=y")
9182 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9183 (match_operand:DI 2 "nonimmediate_operand" "ym")
9184 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9188 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9189 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9191 [(set_attr "type" "sseishft")
9192 (set_attr "atom_unit" "sishuf")
9193 (set_attr "prefix_extra" "1")
9194 (set_attr "length_immediate" "1")
9195 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9196 (set_attr "mode" "DI")])
9198 (define_insn "abs<mode>2"
9199 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9200 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9202 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9203 [(set_attr "type" "sselog1")
9204 (set_attr "prefix_data16" "1")
9205 (set_attr "prefix_extra" "1")
9206 (set_attr "prefix" "maybe_vex")
9207 (set_attr "mode" "TI")])
9209 (define_insn "abs<mode>2"
9210 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9211 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9213 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9214 [(set_attr "type" "sselog1")
9215 (set_attr "prefix_rep" "0")
9216 (set_attr "prefix_extra" "1")
9217 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9218 (set_attr "mode" "DI")])
9220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9222 ;; AMD SSE4A instructions
9224 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9226 (define_insn "sse4a_movnt<mode>"
9227 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9229 [(match_operand:MODEF 1 "register_operand" "x")]
9232 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9233 [(set_attr "type" "ssemov")
9234 (set_attr "mode" "<MODE>")])
9236 (define_insn "sse4a_vmmovnt<mode>"
9237 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9238 (unspec:<ssescalarmode>
9239 [(vec_select:<ssescalarmode>
9240 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9241 (parallel [(const_int 0)]))]
9244 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9245 [(set_attr "type" "ssemov")
9246 (set_attr "mode" "<ssescalarmode>")])
9248 (define_insn "sse4a_extrqi"
9249 [(set (match_operand:V2DI 0 "register_operand" "=x")
9250 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9251 (match_operand 2 "const_int_operand" "")
9252 (match_operand 3 "const_int_operand" "")]
9255 "extrq\t{%3, %2, %0|%0, %2, %3}"
9256 [(set_attr "type" "sse")
9257 (set_attr "prefix_data16" "1")
9258 (set_attr "length_immediate" "2")
9259 (set_attr "mode" "TI")])
9261 (define_insn "sse4a_extrq"
9262 [(set (match_operand:V2DI 0 "register_operand" "=x")
9263 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9264 (match_operand:V16QI 2 "register_operand" "x")]
9267 "extrq\t{%2, %0|%0, %2}"
9268 [(set_attr "type" "sse")
9269 (set_attr "prefix_data16" "1")
9270 (set_attr "mode" "TI")])
9272 (define_insn "sse4a_insertqi"
9273 [(set (match_operand:V2DI 0 "register_operand" "=x")
9274 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9275 (match_operand:V2DI 2 "register_operand" "x")
9276 (match_operand 3 "const_int_operand" "")
9277 (match_operand 4 "const_int_operand" "")]
9280 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9281 [(set_attr "type" "sseins")
9282 (set_attr "prefix_data16" "0")
9283 (set_attr "prefix_rep" "1")
9284 (set_attr "length_immediate" "2")
9285 (set_attr "mode" "TI")])
9287 (define_insn "sse4a_insertq"
9288 [(set (match_operand:V2DI 0 "register_operand" "=x")
9289 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9290 (match_operand:V2DI 2 "register_operand" "x")]
9293 "insertq\t{%2, %0|%0, %2}"
9294 [(set_attr "type" "sseins")
9295 (set_attr "prefix_data16" "0")
9296 (set_attr "prefix_rep" "1")
9297 (set_attr "mode" "TI")])
9299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9301 ;; Intel SSE4.1 instructions
9303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9305 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9306 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9307 (vec_merge:AVXMODEF2P
9308 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9309 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9310 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9312 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9313 [(set_attr "type" "ssemov")
9314 (set_attr "prefix_extra" "1")
9315 (set_attr "length_immediate" "1")
9316 (set_attr "prefix" "vex")
9317 (set_attr "mode" "<avxvecmode>")])
9319 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9320 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9322 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9323 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9324 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9327 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9328 [(set_attr "type" "ssemov")
9329 (set_attr "prefix_extra" "1")
9330 (set_attr "length_immediate" "1")
9331 (set_attr "prefix" "vex")
9332 (set_attr "mode" "<avxvecmode>")])
9334 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9335 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9336 (vec_merge:SSEMODEF2P
9337 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9338 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9339 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9341 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9342 [(set_attr "type" "ssemov")
9343 (set_attr "prefix_data16" "1")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "length_immediate" "1")
9346 (set_attr "mode" "<MODE>")])
9348 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9349 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9351 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9352 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9353 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9356 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9357 [(set_attr "type" "ssemov")
9358 (set_attr "prefix_data16" "1")
9359 (set_attr "prefix_extra" "1")
9360 (set_attr "mode" "<MODE>")])
9362 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9363 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9365 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9366 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9367 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9370 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9371 [(set_attr "type" "ssemul")
9372 (set_attr "prefix" "vex")
9373 (set_attr "prefix_extra" "1")
9374 (set_attr "length_immediate" "1")
9375 (set_attr "mode" "<avxvecmode>")])
9377 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9378 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9380 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9381 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9382 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9385 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9386 [(set_attr "type" "ssemul")
9387 (set_attr "prefix_data16" "1")
9388 (set_attr "prefix_extra" "1")
9389 (set_attr "length_immediate" "1")
9390 (set_attr "mode" "<MODE>")])
9392 (define_insn "sse4_1_movntdqa"
9393 [(set (match_operand:V2DI 0 "register_operand" "=x")
9394 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9397 "%vmovntdqa\t{%1, %0|%0, %1}"
9398 [(set_attr "type" "ssemov")
9399 (set_attr "prefix_extra" "1")
9400 (set_attr "prefix" "maybe_vex")
9401 (set_attr "mode" "TI")])
9403 (define_insn "*avx_mpsadbw"
9404 [(set (match_operand:V16QI 0 "register_operand" "=x")
9405 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9406 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9407 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9410 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9411 [(set_attr "type" "sselog1")
9412 (set_attr "prefix" "vex")
9413 (set_attr "prefix_extra" "1")
9414 (set_attr "length_immediate" "1")
9415 (set_attr "mode" "TI")])
9417 (define_insn "sse4_1_mpsadbw"
9418 [(set (match_operand:V16QI 0 "register_operand" "=x")
9419 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9420 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9421 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9424 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9425 [(set_attr "type" "sselog1")
9426 (set_attr "prefix_extra" "1")
9427 (set_attr "length_immediate" "1")
9428 (set_attr "mode" "TI")])
9430 (define_insn "*avx_packusdw"
9431 [(set (match_operand:V8HI 0 "register_operand" "=x")
9434 (match_operand:V4SI 1 "register_operand" "x"))
9436 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9438 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9439 [(set_attr "type" "sselog")
9440 (set_attr "prefix_extra" "1")
9441 (set_attr "prefix" "vex")
9442 (set_attr "mode" "TI")])
9444 (define_insn "sse4_1_packusdw"
9445 [(set (match_operand:V8HI 0 "register_operand" "=x")
9448 (match_operand:V4SI 1 "register_operand" "0"))
9450 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9452 "packusdw\t{%2, %0|%0, %2}"
9453 [(set_attr "type" "sselog")
9454 (set_attr "prefix_extra" "1")
9455 (set_attr "mode" "TI")])
9457 (define_insn "*avx_pblendvb"
9458 [(set (match_operand:V16QI 0 "register_operand" "=x")
9459 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9460 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9461 (match_operand:V16QI 3 "register_operand" "x")]
9464 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9465 [(set_attr "type" "ssemov")
9466 (set_attr "prefix_extra" "1")
9467 (set_attr "length_immediate" "1")
9468 (set_attr "prefix" "vex")
9469 (set_attr "mode" "TI")])
9471 (define_insn "sse4_1_pblendvb"
9472 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9473 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9474 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9475 (match_operand:V16QI 3 "register_operand" "Yz")]
9478 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9479 [(set_attr "type" "ssemov")
9480 (set_attr "prefix_extra" "1")
9481 (set_attr "mode" "TI")])
9483 (define_insn "*avx_pblendw"
9484 [(set (match_operand:V8HI 0 "register_operand" "=x")
9486 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9487 (match_operand:V8HI 1 "register_operand" "x")
9488 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9490 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9491 [(set_attr "type" "ssemov")
9492 (set_attr "prefix" "vex")
9493 (set_attr "prefix_extra" "1")
9494 (set_attr "length_immediate" "1")
9495 (set_attr "mode" "TI")])
9497 (define_insn "sse4_1_pblendw"
9498 [(set (match_operand:V8HI 0 "register_operand" "=x")
9500 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9501 (match_operand:V8HI 1 "register_operand" "0")
9502 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9504 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9505 [(set_attr "type" "ssemov")
9506 (set_attr "prefix_extra" "1")
9507 (set_attr "length_immediate" "1")
9508 (set_attr "mode" "TI")])
9510 (define_insn "sse4_1_phminposuw"
9511 [(set (match_operand:V8HI 0 "register_operand" "=x")
9512 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9513 UNSPEC_PHMINPOSUW))]
9515 "%vphminposuw\t{%1, %0|%0, %1}"
9516 [(set_attr "type" "sselog1")
9517 (set_attr "prefix_extra" "1")
9518 (set_attr "prefix" "maybe_vex")
9519 (set_attr "mode" "TI")])
9521 (define_insn "sse4_1_extendv8qiv8hi2"
9522 [(set (match_operand:V8HI 0 "register_operand" "=x")
9525 (match_operand:V16QI 1 "register_operand" "x")
9526 (parallel [(const_int 0)
9535 "%vpmovsxbw\t{%1, %0|%0, %1}"
9536 [(set_attr "type" "ssemov")
9537 (set_attr "prefix_extra" "1")
9538 (set_attr "prefix" "maybe_vex")
9539 (set_attr "mode" "TI")])
9541 (define_insn "*sse4_1_extendv8qiv8hi2"
9542 [(set (match_operand:V8HI 0 "register_operand" "=x")
9545 (vec_duplicate:V16QI
9546 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9547 (parallel [(const_int 0)
9556 "%vpmovsxbw\t{%1, %0|%0, %1}"
9557 [(set_attr "type" "ssemov")
9558 (set_attr "prefix_extra" "1")
9559 (set_attr "prefix" "maybe_vex")
9560 (set_attr "mode" "TI")])
9562 (define_insn "sse4_1_extendv4qiv4si2"
9563 [(set (match_operand:V4SI 0 "register_operand" "=x")
9566 (match_operand:V16QI 1 "register_operand" "x")
9567 (parallel [(const_int 0)
9572 "%vpmovsxbd\t{%1, %0|%0, %1}"
9573 [(set_attr "type" "ssemov")
9574 (set_attr "prefix_extra" "1")
9575 (set_attr "prefix" "maybe_vex")
9576 (set_attr "mode" "TI")])
9578 (define_insn "*sse4_1_extendv4qiv4si2"
9579 [(set (match_operand:V4SI 0 "register_operand" "=x")
9582 (vec_duplicate:V16QI
9583 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9584 (parallel [(const_int 0)
9589 "%vpmovsxbd\t{%1, %0|%0, %1}"
9590 [(set_attr "type" "ssemov")
9591 (set_attr "prefix_extra" "1")
9592 (set_attr "prefix" "maybe_vex")
9593 (set_attr "mode" "TI")])
9595 (define_insn "sse4_1_extendv2qiv2di2"
9596 [(set (match_operand:V2DI 0 "register_operand" "=x")
9599 (match_operand:V16QI 1 "register_operand" "x")
9600 (parallel [(const_int 0)
9603 "%vpmovsxbq\t{%1, %0|%0, %1}"
9604 [(set_attr "type" "ssemov")
9605 (set_attr "prefix_extra" "1")
9606 (set_attr "prefix" "maybe_vex")
9607 (set_attr "mode" "TI")])
9609 (define_insn "*sse4_1_extendv2qiv2di2"
9610 [(set (match_operand:V2DI 0 "register_operand" "=x")
9613 (vec_duplicate:V16QI
9614 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9615 (parallel [(const_int 0)
9618 "%vpmovsxbq\t{%1, %0|%0, %1}"
9619 [(set_attr "type" "ssemov")
9620 (set_attr "prefix_extra" "1")
9621 (set_attr "prefix" "maybe_vex")
9622 (set_attr "mode" "TI")])
9624 (define_insn "sse4_1_extendv4hiv4si2"
9625 [(set (match_operand:V4SI 0 "register_operand" "=x")
9628 (match_operand:V8HI 1 "register_operand" "x")
9629 (parallel [(const_int 0)
9634 "%vpmovsxwd\t{%1, %0|%0, %1}"
9635 [(set_attr "type" "ssemov")
9636 (set_attr "prefix_extra" "1")
9637 (set_attr "prefix" "maybe_vex")
9638 (set_attr "mode" "TI")])
9640 (define_insn "*sse4_1_extendv4hiv4si2"
9641 [(set (match_operand:V4SI 0 "register_operand" "=x")
9645 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9646 (parallel [(const_int 0)
9651 "%vpmovsxwd\t{%1, %0|%0, %1}"
9652 [(set_attr "type" "ssemov")
9653 (set_attr "prefix_extra" "1")
9654 (set_attr "prefix" "maybe_vex")
9655 (set_attr "mode" "TI")])
9657 (define_insn "sse4_1_extendv2hiv2di2"
9658 [(set (match_operand:V2DI 0 "register_operand" "=x")
9661 (match_operand:V8HI 1 "register_operand" "x")
9662 (parallel [(const_int 0)
9665 "%vpmovsxwq\t{%1, %0|%0, %1}"
9666 [(set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "prefix" "maybe_vex")
9669 (set_attr "mode" "TI")])
9671 (define_insn "*sse4_1_extendv2hiv2di2"
9672 [(set (match_operand:V2DI 0 "register_operand" "=x")
9676 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9677 (parallel [(const_int 0)
9680 "%vpmovsxwq\t{%1, %0|%0, %1}"
9681 [(set_attr "type" "ssemov")
9682 (set_attr "prefix_extra" "1")
9683 (set_attr "prefix" "maybe_vex")
9684 (set_attr "mode" "TI")])
9686 (define_insn "sse4_1_extendv2siv2di2"
9687 [(set (match_operand:V2DI 0 "register_operand" "=x")
9690 (match_operand:V4SI 1 "register_operand" "x")
9691 (parallel [(const_int 0)
9694 "%vpmovsxdq\t{%1, %0|%0, %1}"
9695 [(set_attr "type" "ssemov")
9696 (set_attr "prefix_extra" "1")
9697 (set_attr "prefix" "maybe_vex")
9698 (set_attr "mode" "TI")])
9700 (define_insn "*sse4_1_extendv2siv2di2"
9701 [(set (match_operand:V2DI 0 "register_operand" "=x")
9705 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9706 (parallel [(const_int 0)
9709 "%vpmovsxdq\t{%1, %0|%0, %1}"
9710 [(set_attr "type" "ssemov")
9711 (set_attr "prefix_extra" "1")
9712 (set_attr "prefix" "maybe_vex")
9713 (set_attr "mode" "TI")])
9715 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9716 [(set (match_operand:V8HI 0 "register_operand" "=x")
9719 (match_operand:V16QI 1 "register_operand" "x")
9720 (parallel [(const_int 0)
9729 "%vpmovzxbw\t{%1, %0|%0, %1}"
9730 [(set_attr "type" "ssemov")
9731 (set_attr "prefix_extra" "1")
9732 (set_attr "prefix" "maybe_vex")
9733 (set_attr "mode" "TI")])
9735 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9736 [(set (match_operand:V8HI 0 "register_operand" "=x")
9739 (vec_duplicate:V16QI
9740 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9741 (parallel [(const_int 0)
9750 "%vpmovzxbw\t{%1, %0|%0, %1}"
9751 [(set_attr "type" "ssemov")
9752 (set_attr "prefix_extra" "1")
9753 (set_attr "prefix" "maybe_vex")
9754 (set_attr "mode" "TI")])
9756 (define_insn "sse4_1_zero_extendv4qiv4si2"
9757 [(set (match_operand:V4SI 0 "register_operand" "=x")
9760 (match_operand:V16QI 1 "register_operand" "x")
9761 (parallel [(const_int 0)
9766 "%vpmovzxbd\t{%1, %0|%0, %1}"
9767 [(set_attr "type" "ssemov")
9768 (set_attr "prefix_extra" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "mode" "TI")])
9772 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9773 [(set (match_operand:V4SI 0 "register_operand" "=x")
9776 (vec_duplicate:V16QI
9777 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9778 (parallel [(const_int 0)
9783 "%vpmovzxbd\t{%1, %0|%0, %1}"
9784 [(set_attr "type" "ssemov")
9785 (set_attr "prefix_extra" "1")
9786 (set_attr "prefix" "maybe_vex")
9787 (set_attr "mode" "TI")])
9789 (define_insn "sse4_1_zero_extendv2qiv2di2"
9790 [(set (match_operand:V2DI 0 "register_operand" "=x")
9793 (match_operand:V16QI 1 "register_operand" "x")
9794 (parallel [(const_int 0)
9797 "%vpmovzxbq\t{%1, %0|%0, %1}"
9798 [(set_attr "type" "ssemov")
9799 (set_attr "prefix_extra" "1")
9800 (set_attr "prefix" "maybe_vex")
9801 (set_attr "mode" "TI")])
9803 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9804 [(set (match_operand:V2DI 0 "register_operand" "=x")
9807 (vec_duplicate:V16QI
9808 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9809 (parallel [(const_int 0)
9812 "%vpmovzxbq\t{%1, %0|%0, %1}"
9813 [(set_attr "type" "ssemov")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "prefix" "maybe_vex")
9816 (set_attr "mode" "TI")])
9818 (define_insn "sse4_1_zero_extendv4hiv4si2"
9819 [(set (match_operand:V4SI 0 "register_operand" "=x")
9822 (match_operand:V8HI 1 "register_operand" "x")
9823 (parallel [(const_int 0)
9828 "%vpmovzxwd\t{%1, %0|%0, %1}"
9829 [(set_attr "type" "ssemov")
9830 (set_attr "prefix_extra" "1")
9831 (set_attr "prefix" "maybe_vex")
9832 (set_attr "mode" "TI")])
9834 (define_insn "*sse4_1_zero_extendv4hiv4si2"
9835 [(set (match_operand:V4SI 0 "register_operand" "=x")
9839 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9840 (parallel [(const_int 0)
9845 "%vpmovzxwd\t{%1, %0|%0, %1}"
9846 [(set_attr "type" "ssemov")
9847 (set_attr "prefix_extra" "1")
9848 (set_attr "prefix" "maybe_vex")
9849 (set_attr "mode" "TI")])
9851 (define_insn "sse4_1_zero_extendv2hiv2di2"
9852 [(set (match_operand:V2DI 0 "register_operand" "=x")
9855 (match_operand:V8HI 1 "register_operand" "x")
9856 (parallel [(const_int 0)
9859 "%vpmovzxwq\t{%1, %0|%0, %1}"
9860 [(set_attr "type" "ssemov")
9861 (set_attr "prefix_extra" "1")
9862 (set_attr "prefix" "maybe_vex")
9863 (set_attr "mode" "TI")])
9865 (define_insn "*sse4_1_zero_extendv2hiv2di2"
9866 [(set (match_operand:V2DI 0 "register_operand" "=x")
9870 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9871 (parallel [(const_int 0)
9874 "%vpmovzxwq\t{%1, %0|%0, %1}"
9875 [(set_attr "type" "ssemov")
9876 (set_attr "prefix_extra" "1")
9877 (set_attr "prefix" "maybe_vex")
9878 (set_attr "mode" "TI")])
9880 (define_insn "sse4_1_zero_extendv2siv2di2"
9881 [(set (match_operand:V2DI 0 "register_operand" "=x")
9884 (match_operand:V4SI 1 "register_operand" "x")
9885 (parallel [(const_int 0)
9888 "%vpmovzxdq\t{%1, %0|%0, %1}"
9889 [(set_attr "type" "ssemov")
9890 (set_attr "prefix_extra" "1")
9891 (set_attr "prefix" "maybe_vex")
9892 (set_attr "mode" "TI")])
9894 (define_insn "*sse4_1_zero_extendv2siv2di2"
9895 [(set (match_operand:V2DI 0 "register_operand" "=x")
9899 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9900 (parallel [(const_int 0)
9903 "%vpmovzxdq\t{%1, %0|%0, %1}"
9904 [(set_attr "type" "ssemov")
9905 (set_attr "prefix_extra" "1")
9906 (set_attr "prefix" "maybe_vex")
9907 (set_attr "mode" "TI")])
9909 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9910 ;; setting FLAGS_REG. But it is not a really compare instruction.
9911 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
9912 [(set (reg:CC FLAGS_REG)
9913 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9914 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9917 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9918 [(set_attr "type" "ssecomi")
9919 (set_attr "prefix_extra" "1")
9920 (set_attr "prefix" "vex")
9921 (set_attr "mode" "<MODE>")])
9923 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9924 ;; But it is not a really compare instruction.
9925 (define_insn "avx_ptest256"
9926 [(set (reg:CC FLAGS_REG)
9927 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9928 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9931 "vptest\t{%1, %0|%0, %1}"
9932 [(set_attr "type" "ssecomi")
9933 (set_attr "prefix_extra" "1")
9934 (set_attr "prefix" "vex")
9935 (set_attr "mode" "OI")])
9937 (define_insn "sse4_1_ptest"
9938 [(set (reg:CC FLAGS_REG)
9939 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9940 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9943 "%vptest\t{%1, %0|%0, %1}"
9944 [(set_attr "type" "ssecomi")
9945 (set_attr "prefix_extra" "1")
9946 (set_attr "prefix" "maybe_vex")
9947 (set_attr "mode" "TI")])
9949 (define_insn "avx_roundp<avxmodesuffixf2c>256"
9950 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9951 (unspec:AVX256MODEF2P
9952 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9953 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9956 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9957 [(set_attr "type" "ssecvt")
9958 (set_attr "prefix_extra" "1")
9959 (set_attr "length_immediate" "1")
9960 (set_attr "prefix" "vex")
9961 (set_attr "mode" "<MODE>")])
9963 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9964 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9966 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9967 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9970 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9971 [(set_attr "type" "ssecvt")
9972 (set_attr "prefix_data16" "1")
9973 (set_attr "prefix_extra" "1")
9974 (set_attr "length_immediate" "1")
9975 (set_attr "prefix" "maybe_vex")
9976 (set_attr "mode" "<MODE>")])
9978 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9979 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9980 (vec_merge:SSEMODEF2P
9982 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9983 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9985 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9988 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9989 [(set_attr "type" "ssecvt")
9990 (set_attr "prefix_extra" "1")
9991 (set_attr "length_immediate" "1")
9992 (set_attr "prefix" "vex")
9993 (set_attr "mode" "<MODE>")])
9995 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9996 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9997 (vec_merge:SSEMODEF2P
9999 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10000 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10002 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10005 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10006 [(set_attr "type" "ssecvt")
10007 (set_attr "prefix_data16" "1")
10008 (set_attr "prefix_extra" "1")
10009 (set_attr "length_immediate" "1")
10010 (set_attr "mode" "<MODE>")])
10012 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10014 ;; Intel SSE4.2 string/text processing instructions
10016 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10018 (define_insn_and_split "sse4_2_pcmpestr"
10019 [(set (match_operand:SI 0 "register_operand" "=c,c")
10021 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10022 (match_operand:SI 3 "register_operand" "a,a")
10023 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10024 (match_operand:SI 5 "register_operand" "d,d")
10025 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10027 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10035 (set (reg:CC FLAGS_REG)
10044 && can_create_pseudo_p ()"
10049 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10050 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10051 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10054 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10055 operands[3], operands[4],
10056 operands[5], operands[6]));
10058 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10059 operands[3], operands[4],
10060 operands[5], operands[6]));
10061 if (flags && !(ecx || xmm0))
10062 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10063 operands[2], operands[3],
10064 operands[4], operands[5],
10068 [(set_attr "type" "sselog")
10069 (set_attr "prefix_data16" "1")
10070 (set_attr "prefix_extra" "1")
10071 (set_attr "length_immediate" "1")
10072 (set_attr "memory" "none,load")
10073 (set_attr "mode" "TI")])
10075 (define_insn "sse4_2_pcmpestri"
10076 [(set (match_operand:SI 0 "register_operand" "=c,c")
10078 [(match_operand:V16QI 1 "register_operand" "x,x")
10079 (match_operand:SI 2 "register_operand" "a,a")
10080 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10081 (match_operand:SI 4 "register_operand" "d,d")
10082 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10084 (set (reg:CC FLAGS_REG)
10093 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10094 [(set_attr "type" "sselog")
10095 (set_attr "prefix_data16" "1")
10096 (set_attr "prefix_extra" "1")
10097 (set_attr "prefix" "maybe_vex")
10098 (set_attr "length_immediate" "1")
10099 (set_attr "memory" "none,load")
10100 (set_attr "mode" "TI")])
10102 (define_insn "sse4_2_pcmpestrm"
10103 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10105 [(match_operand:V16QI 1 "register_operand" "x,x")
10106 (match_operand:SI 2 "register_operand" "a,a")
10107 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10108 (match_operand:SI 4 "register_operand" "d,d")
10109 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10111 (set (reg:CC FLAGS_REG)
10120 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10121 [(set_attr "type" "sselog")
10122 (set_attr "prefix_data16" "1")
10123 (set_attr "prefix_extra" "1")
10124 (set_attr "length_immediate" "1")
10125 (set_attr "prefix" "maybe_vex")
10126 (set_attr "memory" "none,load")
10127 (set_attr "mode" "TI")])
10129 (define_insn "sse4_2_pcmpestr_cconly"
10130 [(set (reg:CC FLAGS_REG)
10132 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10133 (match_operand:SI 3 "register_operand" "a,a,a,a")
10134 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10135 (match_operand:SI 5 "register_operand" "d,d,d,d")
10136 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10138 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10139 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10142 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10143 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10144 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10145 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10146 [(set_attr "type" "sselog")
10147 (set_attr "prefix_data16" "1")
10148 (set_attr "prefix_extra" "1")
10149 (set_attr "length_immediate" "1")
10150 (set_attr "memory" "none,load,none,load")
10151 (set_attr "prefix" "maybe_vex")
10152 (set_attr "mode" "TI")])
10154 (define_insn_and_split "sse4_2_pcmpistr"
10155 [(set (match_operand:SI 0 "register_operand" "=c,c")
10157 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10158 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10159 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10161 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10167 (set (reg:CC FLAGS_REG)
10174 && can_create_pseudo_p ()"
10179 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10180 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10181 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10184 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10185 operands[3], operands[4]));
10187 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10188 operands[3], operands[4]));
10189 if (flags && !(ecx || xmm0))
10190 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10191 operands[2], operands[3],
10195 [(set_attr "type" "sselog")
10196 (set_attr "prefix_data16" "1")
10197 (set_attr "prefix_extra" "1")
10198 (set_attr "length_immediate" "1")
10199 (set_attr "memory" "none,load")
10200 (set_attr "mode" "TI")])
10202 (define_insn "sse4_2_pcmpistri"
10203 [(set (match_operand:SI 0 "register_operand" "=c,c")
10205 [(match_operand:V16QI 1 "register_operand" "x,x")
10206 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10207 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10209 (set (reg:CC FLAGS_REG)
10216 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10217 [(set_attr "type" "sselog")
10218 (set_attr "prefix_data16" "1")
10219 (set_attr "prefix_extra" "1")
10220 (set_attr "length_immediate" "1")
10221 (set_attr "prefix" "maybe_vex")
10222 (set_attr "memory" "none,load")
10223 (set_attr "mode" "TI")])
10225 (define_insn "sse4_2_pcmpistrm"
10226 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10228 [(match_operand:V16QI 1 "register_operand" "x,x")
10229 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10230 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10232 (set (reg:CC FLAGS_REG)
10239 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10240 [(set_attr "type" "sselog")
10241 (set_attr "prefix_data16" "1")
10242 (set_attr "prefix_extra" "1")
10243 (set_attr "length_immediate" "1")
10244 (set_attr "prefix" "maybe_vex")
10245 (set_attr "memory" "none,load")
10246 (set_attr "mode" "TI")])
10248 (define_insn "sse4_2_pcmpistr_cconly"
10249 [(set (reg:CC FLAGS_REG)
10251 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10252 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10253 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10255 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10256 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10259 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10260 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10261 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10262 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10263 [(set_attr "type" "sselog")
10264 (set_attr "prefix_data16" "1")
10265 (set_attr "prefix_extra" "1")
10266 (set_attr "length_immediate" "1")
10267 (set_attr "memory" "none,load,none,load")
10268 (set_attr "prefix" "maybe_vex")
10269 (set_attr "mode" "TI")])
10271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10273 ;; XOP instructions
10275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10277 ;; XOP parallel integer multiply/add instructions.
10278 ;; Note the XOP multiply/add instructions
10279 ;; a[i] = b[i] * c[i] + d[i];
10280 ;; do not allow the value being added to be a memory operation.
10281 (define_insn "xop_pmacsww"
10282 [(set (match_operand:V8HI 0 "register_operand" "=x")
10285 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10286 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10287 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10289 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10290 [(set_attr "type" "ssemuladd")
10291 (set_attr "mode" "TI")])
10293 (define_insn "xop_pmacssww"
10294 [(set (match_operand:V8HI 0 "register_operand" "=x")
10296 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10297 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10298 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10300 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10301 [(set_attr "type" "ssemuladd")
10302 (set_attr "mode" "TI")])
10304 (define_insn "xop_pmacsdd"
10305 [(set (match_operand:V4SI 0 "register_operand" "=x")
10308 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10309 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10310 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10312 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10313 [(set_attr "type" "ssemuladd")
10314 (set_attr "mode" "TI")])
10316 (define_insn "xop_pmacssdd"
10317 [(set (match_operand:V4SI 0 "register_operand" "=x")
10319 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10320 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10321 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10323 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10324 [(set_attr "type" "ssemuladd")
10325 (set_attr "mode" "TI")])
10327 (define_insn "xop_pmacssdql"
10328 [(set (match_operand:V2DI 0 "register_operand" "=x")
10333 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10334 (parallel [(const_int 1)
10337 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10338 (parallel [(const_int 1)
10340 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10342 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10343 [(set_attr "type" "ssemuladd")
10344 (set_attr "mode" "TI")])
10346 (define_insn "xop_pmacssdqh"
10347 [(set (match_operand:V2DI 0 "register_operand" "=x")
10352 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10353 (parallel [(const_int 0)
10357 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10358 (parallel [(const_int 0)
10360 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10362 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10363 [(set_attr "type" "ssemuladd")
10364 (set_attr "mode" "TI")])
10366 (define_insn "xop_pmacsdql"
10367 [(set (match_operand:V2DI 0 "register_operand" "=x")
10372 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10373 (parallel [(const_int 1)
10377 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10378 (parallel [(const_int 1)
10380 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10382 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10383 [(set_attr "type" "ssemuladd")
10384 (set_attr "mode" "TI")])
10386 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10387 ;; fake it with a multiply/add. In general, we expect the define_split to
10388 ;; occur before register allocation, so we have to handle the corner case where
10389 ;; the target is the same as operands 1/2
10390 (define_insn_and_split "xop_mulv2div2di3_low"
10391 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10395 (match_operand:V4SI 1 "register_operand" "%x")
10396 (parallel [(const_int 1)
10400 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10401 (parallel [(const_int 1)
10402 (const_int 3)])))))]
10405 "&& reload_completed"
10406 [(set (match_dup 0)
10414 (parallel [(const_int 1)
10419 (parallel [(const_int 1)
10423 operands[3] = CONST0_RTX (V2DImode);
10425 [(set_attr "type" "ssemuladd")
10426 (set_attr "mode" "TI")])
10428 (define_insn "xop_pmacsdqh"
10429 [(set (match_operand:V2DI 0 "register_operand" "=x")
10434 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10435 (parallel [(const_int 0)
10439 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10440 (parallel [(const_int 0)
10442 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10444 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10445 [(set_attr "type" "ssemuladd")
10446 (set_attr "mode" "TI")])
10448 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10449 ;; fake it with a multiply/add. In general, we expect the define_split to
10450 ;; occur before register allocation, so we have to handle the corner case where
10451 ;; the target is the same as either operands[1] or operands[2]
10452 (define_insn_and_split "xop_mulv2div2di3_high"
10453 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10457 (match_operand:V4SI 1 "register_operand" "%x")
10458 (parallel [(const_int 0)
10462 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10463 (parallel [(const_int 0)
10464 (const_int 2)])))))]
10467 "&& reload_completed"
10468 [(set (match_dup 0)
10476 (parallel [(const_int 0)
10481 (parallel [(const_int 0)
10485 operands[3] = CONST0_RTX (V2DImode);
10487 [(set_attr "type" "ssemuladd")
10488 (set_attr "mode" "TI")])
10490 ;; XOP parallel integer multiply/add instructions for the intrinisics
10491 (define_insn "xop_pmacsswd"
10492 [(set (match_operand:V4SI 0 "register_operand" "=x")
10497 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10498 (parallel [(const_int 1)
10504 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10505 (parallel [(const_int 1)
10509 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10511 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10512 [(set_attr "type" "ssemuladd")
10513 (set_attr "mode" "TI")])
10515 (define_insn "xop_pmacswd"
10516 [(set (match_operand:V4SI 0 "register_operand" "=x")
10521 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10522 (parallel [(const_int 1)
10528 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10529 (parallel [(const_int 1)
10533 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10535 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10536 [(set_attr "type" "ssemuladd")
10537 (set_attr "mode" "TI")])
10539 (define_insn "xop_pmadcsswd"
10540 [(set (match_operand:V4SI 0 "register_operand" "=x")
10546 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10547 (parallel [(const_int 0)
10553 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10554 (parallel [(const_int 0)
10562 (parallel [(const_int 1)
10569 (parallel [(const_int 1)
10572 (const_int 7)])))))
10573 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10575 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10576 [(set_attr "type" "ssemuladd")
10577 (set_attr "mode" "TI")])
10579 (define_insn "xop_pmadcswd"
10580 [(set (match_operand:V4SI 0 "register_operand" "=x")
10586 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10587 (parallel [(const_int 0)
10593 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10594 (parallel [(const_int 0)
10602 (parallel [(const_int 1)
10609 (parallel [(const_int 1)
10612 (const_int 7)])))))
10613 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10615 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10616 [(set_attr "type" "ssemuladd")
10617 (set_attr "mode" "TI")])
10619 ;; XOP parallel XMM conditional moves
10620 (define_insn "xop_pcmov_<mode>"
10621 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10622 (if_then_else:SSEMODE
10623 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10624 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10625 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10627 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10628 [(set_attr "type" "sse4arg")])
10630 (define_insn "xop_pcmov_<mode>256"
10631 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10632 (if_then_else:AVX256MODE
10633 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10634 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10635 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10637 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10638 [(set_attr "type" "sse4arg")])
10640 ;; XOP horizontal add/subtract instructions
10641 (define_insn "xop_phaddbw"
10642 [(set (match_operand:V8HI 0 "register_operand" "=x")
10646 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10647 (parallel [(const_int 0)
10658 (parallel [(const_int 1)
10665 (const_int 15)])))))]
10667 "vphaddbw\t{%1, %0|%0, %1}"
10668 [(set_attr "type" "sseiadd1")])
10670 (define_insn "xop_phaddbd"
10671 [(set (match_operand:V4SI 0 "register_operand" "=x")
10676 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10677 (parallel [(const_int 0)
10684 (parallel [(const_int 1)
10687 (const_int 13)]))))
10692 (parallel [(const_int 2)
10699 (parallel [(const_int 3)
10702 (const_int 15)]))))))]
10704 "vphaddbd\t{%1, %0|%0, %1}"
10705 [(set_attr "type" "sseiadd1")])
10707 (define_insn "xop_phaddbq"
10708 [(set (match_operand:V2DI 0 "register_operand" "=x")
10714 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10715 (parallel [(const_int 0)
10720 (parallel [(const_int 1)
10726 (parallel [(const_int 2)
10731 (parallel [(const_int 3)
10732 (const_int 7)])))))
10738 (parallel [(const_int 8)
10743 (parallel [(const_int 9)
10744 (const_int 13)]))))
10749 (parallel [(const_int 10)
10754 (parallel [(const_int 11)
10755 (const_int 15)])))))))]
10757 "vphaddbq\t{%1, %0|%0, %1}"
10758 [(set_attr "type" "sseiadd1")])
10760 (define_insn "xop_phaddwd"
10761 [(set (match_operand:V4SI 0 "register_operand" "=x")
10765 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10766 (parallel [(const_int 0)
10773 (parallel [(const_int 1)
10776 (const_int 7)])))))]
10778 "vphaddwd\t{%1, %0|%0, %1}"
10779 [(set_attr "type" "sseiadd1")])
10781 (define_insn "xop_phaddwq"
10782 [(set (match_operand:V2DI 0 "register_operand" "=x")
10787 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10788 (parallel [(const_int 0)
10793 (parallel [(const_int 1)
10799 (parallel [(const_int 2)
10804 (parallel [(const_int 3)
10805 (const_int 7)]))))))]
10807 "vphaddwq\t{%1, %0|%0, %1}"
10808 [(set_attr "type" "sseiadd1")])
10810 (define_insn "xop_phadddq"
10811 [(set (match_operand:V2DI 0 "register_operand" "=x")
10815 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10816 (parallel [(const_int 0)
10821 (parallel [(const_int 1)
10822 (const_int 3)])))))]
10824 "vphadddq\t{%1, %0|%0, %1}"
10825 [(set_attr "type" "sseiadd1")])
10827 (define_insn "xop_phaddubw"
10828 [(set (match_operand:V8HI 0 "register_operand" "=x")
10832 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10833 (parallel [(const_int 0)
10844 (parallel [(const_int 1)
10851 (const_int 15)])))))]
10853 "vphaddubw\t{%1, %0|%0, %1}"
10854 [(set_attr "type" "sseiadd1")])
10856 (define_insn "xop_phaddubd"
10857 [(set (match_operand:V4SI 0 "register_operand" "=x")
10862 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10863 (parallel [(const_int 0)
10870 (parallel [(const_int 1)
10873 (const_int 13)]))))
10878 (parallel [(const_int 2)
10885 (parallel [(const_int 3)
10888 (const_int 15)]))))))]
10890 "vphaddubd\t{%1, %0|%0, %1}"
10891 [(set_attr "type" "sseiadd1")])
10893 (define_insn "xop_phaddubq"
10894 [(set (match_operand:V2DI 0 "register_operand" "=x")
10900 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10901 (parallel [(const_int 0)
10906 (parallel [(const_int 1)
10912 (parallel [(const_int 2)
10917 (parallel [(const_int 3)
10918 (const_int 7)])))))
10924 (parallel [(const_int 8)
10929 (parallel [(const_int 9)
10930 (const_int 13)]))))
10935 (parallel [(const_int 10)
10940 (parallel [(const_int 11)
10941 (const_int 15)])))))))]
10943 "vphaddubq\t{%1, %0|%0, %1}"
10944 [(set_attr "type" "sseiadd1")])
10946 (define_insn "xop_phadduwd"
10947 [(set (match_operand:V4SI 0 "register_operand" "=x")
10951 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10952 (parallel [(const_int 0)
10959 (parallel [(const_int 1)
10962 (const_int 7)])))))]
10964 "vphadduwd\t{%1, %0|%0, %1}"
10965 [(set_attr "type" "sseiadd1")])
10967 (define_insn "xop_phadduwq"
10968 [(set (match_operand:V2DI 0 "register_operand" "=x")
10973 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10974 (parallel [(const_int 0)
10979 (parallel [(const_int 1)
10985 (parallel [(const_int 2)
10990 (parallel [(const_int 3)
10991 (const_int 7)]))))))]
10993 "vphadduwq\t{%1, %0|%0, %1}"
10994 [(set_attr "type" "sseiadd1")])
10996 (define_insn "xop_phaddudq"
10997 [(set (match_operand:V2DI 0 "register_operand" "=x")
11001 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11002 (parallel [(const_int 0)
11007 (parallel [(const_int 1)
11008 (const_int 3)])))))]
11010 "vphaddudq\t{%1, %0|%0, %1}"
11011 [(set_attr "type" "sseiadd1")])
11013 (define_insn "xop_phsubbw"
11014 [(set (match_operand:V8HI 0 "register_operand" "=x")
11018 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11019 (parallel [(const_int 0)
11030 (parallel [(const_int 1)
11037 (const_int 15)])))))]
11039 "vphsubbw\t{%1, %0|%0, %1}"
11040 [(set_attr "type" "sseiadd1")])
11042 (define_insn "xop_phsubwd"
11043 [(set (match_operand:V4SI 0 "register_operand" "=x")
11047 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11048 (parallel [(const_int 0)
11055 (parallel [(const_int 1)
11058 (const_int 7)])))))]
11060 "vphsubwd\t{%1, %0|%0, %1}"
11061 [(set_attr "type" "sseiadd1")])
11063 (define_insn "xop_phsubdq"
11064 [(set (match_operand:V2DI 0 "register_operand" "=x")
11068 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11069 (parallel [(const_int 0)
11074 (parallel [(const_int 1)
11075 (const_int 3)])))))]
11077 "vphsubdq\t{%1, %0|%0, %1}"
11078 [(set_attr "type" "sseiadd1")])
11080 ;; XOP permute instructions
11081 (define_insn "xop_pperm"
11082 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11084 [(match_operand:V16QI 1 "register_operand" "x,x")
11085 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11086 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11087 UNSPEC_XOP_PERMUTE))]
11088 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11089 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11090 [(set_attr "type" "sse4arg")
11091 (set_attr "mode" "TI")])
11093 ;; XOP pack instructions that combine two vectors into a smaller vector
11094 (define_insn "xop_pperm_pack_v2di_v4si"
11095 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11098 (match_operand:V2DI 1 "register_operand" "x,x"))
11100 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11101 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11102 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11103 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11104 [(set_attr "type" "sse4arg")
11105 (set_attr "mode" "TI")])
11107 (define_insn "xop_pperm_pack_v4si_v8hi"
11108 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11111 (match_operand:V4SI 1 "register_operand" "x,x"))
11113 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11114 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11115 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11116 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11117 [(set_attr "type" "sse4arg")
11118 (set_attr "mode" "TI")])
11120 (define_insn "xop_pperm_pack_v8hi_v16qi"
11121 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11124 (match_operand:V8HI 1 "register_operand" "x,x"))
11126 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11127 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11128 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11129 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11130 [(set_attr "type" "sse4arg")
11131 (set_attr "mode" "TI")])
11133 ;; XOP packed rotate instructions
11134 (define_expand "rotl<mode>3"
11135 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11136 (rotate:SSEMODE1248
11137 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11138 (match_operand:SI 2 "general_operand")))]
11141 /* If we were given a scalar, convert it to parallel */
11142 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11144 rtvec vs = rtvec_alloc (<ssescalarnum>);
11145 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11146 rtx reg = gen_reg_rtx (<MODE>mode);
11147 rtx op2 = operands[2];
11150 if (GET_MODE (op2) != <ssescalarmode>mode)
11152 op2 = gen_reg_rtx (<ssescalarmode>mode);
11153 convert_move (op2, operands[2], false);
11156 for (i = 0; i < <ssescalarnum>; i++)
11157 RTVEC_ELT (vs, i) = op2;
11159 emit_insn (gen_vec_init<mode> (reg, par));
11160 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11165 (define_expand "rotr<mode>3"
11166 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11167 (rotatert:SSEMODE1248
11168 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11169 (match_operand:SI 2 "general_operand")))]
11172 /* If we were given a scalar, convert it to parallel */
11173 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11175 rtvec vs = rtvec_alloc (<ssescalarnum>);
11176 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11177 rtx neg = gen_reg_rtx (<MODE>mode);
11178 rtx reg = gen_reg_rtx (<MODE>mode);
11179 rtx op2 = operands[2];
11182 if (GET_MODE (op2) != <ssescalarmode>mode)
11184 op2 = gen_reg_rtx (<ssescalarmode>mode);
11185 convert_move (op2, operands[2], false);
11188 for (i = 0; i < <ssescalarnum>; i++)
11189 RTVEC_ELT (vs, i) = op2;
11191 emit_insn (gen_vec_init<mode> (reg, par));
11192 emit_insn (gen_neg<mode>2 (neg, reg));
11193 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11198 (define_insn "xop_rotl<mode>3"
11199 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11200 (rotate:SSEMODE1248
11201 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11202 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11204 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11205 [(set_attr "type" "sseishft")
11206 (set_attr "length_immediate" "1")
11207 (set_attr "mode" "TI")])
11209 (define_insn "xop_rotr<mode>3"
11210 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11211 (rotatert:SSEMODE1248
11212 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11213 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11216 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11217 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11219 [(set_attr "type" "sseishft")
11220 (set_attr "length_immediate" "1")
11221 (set_attr "mode" "TI")])
11223 (define_expand "vrotr<mode>3"
11224 [(match_operand:SSEMODE1248 0 "register_operand" "")
11225 (match_operand:SSEMODE1248 1 "register_operand" "")
11226 (match_operand:SSEMODE1248 2 "register_operand" "")]
11229 rtx reg = gen_reg_rtx (<MODE>mode);
11230 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11231 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11235 (define_expand "vrotl<mode>3"
11236 [(match_operand:SSEMODE1248 0 "register_operand" "")
11237 (match_operand:SSEMODE1248 1 "register_operand" "")
11238 (match_operand:SSEMODE1248 2 "register_operand" "")]
11241 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11245 (define_insn "xop_vrotl<mode>3"
11246 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11247 (if_then_else:SSEMODE1248
11249 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11251 (rotate:SSEMODE1248
11252 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11254 (rotatert:SSEMODE1248
11256 (neg:SSEMODE1248 (match_dup 2)))))]
11257 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11258 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11259 [(set_attr "type" "sseishft")
11260 (set_attr "prefix_data16" "0")
11261 (set_attr "prefix_extra" "2")
11262 (set_attr "mode" "TI")])
11264 ;; XOP packed shift instructions.
11265 ;; FIXME: add V2DI back in
11266 (define_expand "vlshr<mode>3"
11267 [(match_operand:SSEMODE124 0 "register_operand" "")
11268 (match_operand:SSEMODE124 1 "register_operand" "")
11269 (match_operand:SSEMODE124 2 "register_operand" "")]
11272 rtx neg = gen_reg_rtx (<MODE>mode);
11273 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11274 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11278 (define_expand "vashr<mode>3"
11279 [(match_operand:SSEMODE124 0 "register_operand" "")
11280 (match_operand:SSEMODE124 1 "register_operand" "")
11281 (match_operand:SSEMODE124 2 "register_operand" "")]
11284 rtx neg = gen_reg_rtx (<MODE>mode);
11285 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11286 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11290 (define_expand "vashl<mode>3"
11291 [(match_operand:SSEMODE124 0 "register_operand" "")
11292 (match_operand:SSEMODE124 1 "register_operand" "")
11293 (match_operand:SSEMODE124 2 "register_operand" "")]
11296 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11300 (define_insn "xop_ashl<mode>3"
11301 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11302 (if_then_else:SSEMODE1248
11304 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11306 (ashift:SSEMODE1248
11307 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11309 (ashiftrt:SSEMODE1248
11311 (neg:SSEMODE1248 (match_dup 2)))))]
11312 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11313 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11314 [(set_attr "type" "sseishft")
11315 (set_attr "prefix_data16" "0")
11316 (set_attr "prefix_extra" "2")
11317 (set_attr "mode" "TI")])
11319 (define_insn "xop_lshl<mode>3"
11320 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11321 (if_then_else:SSEMODE1248
11323 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11325 (ashift:SSEMODE1248
11326 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11328 (lshiftrt:SSEMODE1248
11330 (neg:SSEMODE1248 (match_dup 2)))))]
11331 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11332 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11333 [(set_attr "type" "sseishft")
11334 (set_attr "prefix_data16" "0")
11335 (set_attr "prefix_extra" "2")
11336 (set_attr "mode" "TI")])
11338 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11339 (define_expand "ashlv16qi3"
11340 [(match_operand:V16QI 0 "register_operand" "")
11341 (match_operand:V16QI 1 "register_operand" "")
11342 (match_operand:SI 2 "nonmemory_operand" "")]
11345 rtvec vs = rtvec_alloc (16);
11346 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11347 rtx reg = gen_reg_rtx (V16QImode);
11349 for (i = 0; i < 16; i++)
11350 RTVEC_ELT (vs, i) = operands[2];
11352 emit_insn (gen_vec_initv16qi (reg, par));
11353 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11357 (define_expand "lshlv16qi3"
11358 [(match_operand:V16QI 0 "register_operand" "")
11359 (match_operand:V16QI 1 "register_operand" "")
11360 (match_operand:SI 2 "nonmemory_operand" "")]
11363 rtvec vs = rtvec_alloc (16);
11364 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11365 rtx reg = gen_reg_rtx (V16QImode);
11367 for (i = 0; i < 16; i++)
11368 RTVEC_ELT (vs, i) = operands[2];
11370 emit_insn (gen_vec_initv16qi (reg, par));
11371 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11375 (define_expand "ashrv16qi3"
11376 [(match_operand:V16QI 0 "register_operand" "")
11377 (match_operand:V16QI 1 "register_operand" "")
11378 (match_operand:SI 2 "nonmemory_operand" "")]
11381 rtvec vs = rtvec_alloc (16);
11382 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11383 rtx reg = gen_reg_rtx (V16QImode);
11385 rtx ele = ((CONST_INT_P (operands[2]))
11386 ? GEN_INT (- INTVAL (operands[2]))
11389 for (i = 0; i < 16; i++)
11390 RTVEC_ELT (vs, i) = ele;
11392 emit_insn (gen_vec_initv16qi (reg, par));
11394 if (!CONST_INT_P (operands[2]))
11396 rtx neg = gen_reg_rtx (V16QImode);
11397 emit_insn (gen_negv16qi2 (neg, reg));
11398 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11401 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11406 (define_expand "ashrv2di3"
11407 [(match_operand:V2DI 0 "register_operand" "")
11408 (match_operand:V2DI 1 "register_operand" "")
11409 (match_operand:DI 2 "nonmemory_operand" "")]
11412 rtvec vs = rtvec_alloc (2);
11413 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11414 rtx reg = gen_reg_rtx (V2DImode);
11417 if (CONST_INT_P (operands[2]))
11418 ele = GEN_INT (- INTVAL (operands[2]));
11419 else if (GET_MODE (operands[2]) != DImode)
11421 rtx move = gen_reg_rtx (DImode);
11422 ele = gen_reg_rtx (DImode);
11423 convert_move (move, operands[2], false);
11424 emit_insn (gen_negdi2 (ele, move));
11428 ele = gen_reg_rtx (DImode);
11429 emit_insn (gen_negdi2 (ele, operands[2]));
11432 RTVEC_ELT (vs, 0) = ele;
11433 RTVEC_ELT (vs, 1) = ele;
11434 emit_insn (gen_vec_initv2di (reg, par));
11435 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11439 ;; XOP FRCZ support
11441 (define_insn "xop_frcz<mode>2"
11442 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11444 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11447 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11448 [(set_attr "type" "ssecvt1")
11449 (set_attr "mode" "<MODE>")])
11452 (define_insn "xop_vmfrcz<mode>2"
11453 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11454 (vec_merge:SSEMODEF2P
11456 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11458 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11461 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11462 [(set_attr "type" "ssecvt1")
11463 (set_attr "mode" "<MODE>")])
11465 (define_insn "xop_frcz<mode>2256"
11466 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11468 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11471 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11472 [(set_attr "type" "ssecvt1")
11473 (set_attr "mode" "<MODE>")])
11475 (define_insn "xop_maskcmp<mode>3"
11476 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11477 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11478 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11479 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11481 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11482 [(set_attr "type" "sse4arg")
11483 (set_attr "prefix_data16" "0")
11484 (set_attr "prefix_rep" "0")
11485 (set_attr "prefix_extra" "2")
11486 (set_attr "length_immediate" "1")
11487 (set_attr "mode" "TI")])
11489 (define_insn "xop_maskcmp_uns<mode>3"
11490 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11491 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11492 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11493 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11495 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11496 [(set_attr "type" "ssecmp")
11497 (set_attr "prefix_data16" "0")
11498 (set_attr "prefix_rep" "0")
11499 (set_attr "prefix_extra" "2")
11500 (set_attr "length_immediate" "1")
11501 (set_attr "mode" "TI")])
11503 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11504 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11505 ;; the exact instruction generated for the intrinsic.
11506 (define_insn "xop_maskcmp_uns2<mode>3"
11507 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11508 (unspec:SSEMODE1248
11509 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11510 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11511 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11512 UNSPEC_XOP_UNSIGNED_CMP))]
11514 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11515 [(set_attr "type" "ssecmp")
11516 (set_attr "prefix_data16" "0")
11517 (set_attr "prefix_extra" "2")
11518 (set_attr "length_immediate" "1")
11519 (set_attr "mode" "TI")])
11521 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11522 ;; being added here to be complete.
11523 (define_insn "xop_pcom_tf<mode>3"
11524 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11525 (unspec:SSEMODE1248
11526 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11527 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11528 (match_operand:SI 3 "const_int_operand" "n")]
11529 UNSPEC_XOP_TRUEFALSE))]
11532 return ((INTVAL (operands[3]) != 0)
11533 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11534 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11536 [(set_attr "type" "ssecmp")
11537 (set_attr "prefix_data16" "0")
11538 (set_attr "prefix_extra" "2")
11539 (set_attr "length_immediate" "1")
11540 (set_attr "mode" "TI")])
11542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11543 (define_insn "*avx_aesenc"
11544 [(set (match_operand:V2DI 0 "register_operand" "=x")
11545 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11546 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11548 "TARGET_AES && TARGET_AVX"
11549 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11550 [(set_attr "type" "sselog1")
11551 (set_attr "prefix_extra" "1")
11552 (set_attr "prefix" "vex")
11553 (set_attr "mode" "TI")])
11555 (define_insn "aesenc"
11556 [(set (match_operand:V2DI 0 "register_operand" "=x")
11557 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11558 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11561 "aesenc\t{%2, %0|%0, %2}"
11562 [(set_attr "type" "sselog1")
11563 (set_attr "prefix_extra" "1")
11564 (set_attr "mode" "TI")])
11566 (define_insn "*avx_aesenclast"
11567 [(set (match_operand:V2DI 0 "register_operand" "=x")
11568 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11569 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11570 UNSPEC_AESENCLAST))]
11571 "TARGET_AES && TARGET_AVX"
11572 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11573 [(set_attr "type" "sselog1")
11574 (set_attr "prefix_extra" "1")
11575 (set_attr "prefix" "vex")
11576 (set_attr "mode" "TI")])
11578 (define_insn "aesenclast"
11579 [(set (match_operand:V2DI 0 "register_operand" "=x")
11580 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11581 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11582 UNSPEC_AESENCLAST))]
11584 "aesenclast\t{%2, %0|%0, %2}"
11585 [(set_attr "type" "sselog1")
11586 (set_attr "prefix_extra" "1")
11587 (set_attr "mode" "TI")])
11589 (define_insn "*avx_aesdec"
11590 [(set (match_operand:V2DI 0 "register_operand" "=x")
11591 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11592 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11594 "TARGET_AES && TARGET_AVX"
11595 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11596 [(set_attr "type" "sselog1")
11597 (set_attr "prefix_extra" "1")
11598 (set_attr "prefix" "vex")
11599 (set_attr "mode" "TI")])
11601 (define_insn "aesdec"
11602 [(set (match_operand:V2DI 0 "register_operand" "=x")
11603 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11604 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11607 "aesdec\t{%2, %0|%0, %2}"
11608 [(set_attr "type" "sselog1")
11609 (set_attr "prefix_extra" "1")
11610 (set_attr "mode" "TI")])
11612 (define_insn "*avx_aesdeclast"
11613 [(set (match_operand:V2DI 0 "register_operand" "=x")
11614 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11615 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11616 UNSPEC_AESDECLAST))]
11617 "TARGET_AES && TARGET_AVX"
11618 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11619 [(set_attr "type" "sselog1")
11620 (set_attr "prefix_extra" "1")
11621 (set_attr "prefix" "vex")
11622 (set_attr "mode" "TI")])
11624 (define_insn "aesdeclast"
11625 [(set (match_operand:V2DI 0 "register_operand" "=x")
11626 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11627 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11628 UNSPEC_AESDECLAST))]
11630 "aesdeclast\t{%2, %0|%0, %2}"
11631 [(set_attr "type" "sselog1")
11632 (set_attr "prefix_extra" "1")
11633 (set_attr "mode" "TI")])
11635 (define_insn "aesimc"
11636 [(set (match_operand:V2DI 0 "register_operand" "=x")
11637 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11640 "%vaesimc\t{%1, %0|%0, %1}"
11641 [(set_attr "type" "sselog1")
11642 (set_attr "prefix_extra" "1")
11643 (set_attr "prefix" "maybe_vex")
11644 (set_attr "mode" "TI")])
11646 (define_insn "aeskeygenassist"
11647 [(set (match_operand:V2DI 0 "register_operand" "=x")
11648 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11649 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11650 UNSPEC_AESKEYGENASSIST))]
11652 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11653 [(set_attr "type" "sselog1")
11654 (set_attr "prefix_extra" "1")
11655 (set_attr "length_immediate" "1")
11656 (set_attr "prefix" "maybe_vex")
11657 (set_attr "mode" "TI")])
11659 (define_insn "*vpclmulqdq"
11660 [(set (match_operand:V2DI 0 "register_operand" "=x")
11661 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11662 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11663 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11665 "TARGET_PCLMUL && TARGET_AVX"
11666 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11667 [(set_attr "type" "sselog1")
11668 (set_attr "prefix_extra" "1")
11669 (set_attr "length_immediate" "1")
11670 (set_attr "prefix" "vex")
11671 (set_attr "mode" "TI")])
11673 (define_insn "pclmulqdq"
11674 [(set (match_operand:V2DI 0 "register_operand" "=x")
11675 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11676 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11677 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11680 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11681 [(set_attr "type" "sselog1")
11682 (set_attr "prefix_extra" "1")
11683 (set_attr "length_immediate" "1")
11684 (set_attr "mode" "TI")])
11686 (define_expand "avx_vzeroall"
11687 [(match_par_dup 0 [(const_int 0)])]
11690 int nregs = TARGET_64BIT ? 16 : 8;
11693 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11695 XVECEXP (operands[0], 0, 0)
11696 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11699 for (regno = 0; regno < nregs; regno++)
11700 XVECEXP (operands[0], 0, regno + 1)
11701 = gen_rtx_SET (VOIDmode,
11702 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11703 CONST0_RTX (V8SImode));
11706 (define_insn "*avx_vzeroall"
11707 [(match_parallel 0 "vzeroall_operation"
11708 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11711 [(set_attr "type" "sse")
11712 (set_attr "modrm" "0")
11713 (set_attr "memory" "none")
11714 (set_attr "prefix" "vex")
11715 (set_attr "mode" "OI")])
11717 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11718 (define_expand "avx_vzeroupper"
11719 [(match_par_dup 0 [(const_int 0)])]
11722 int nregs = TARGET_64BIT ? 16 : 8;
11725 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11727 XVECEXP (operands[0], 0, 0)
11728 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11729 UNSPECV_VZEROUPPER);
11731 for (regno = 0; regno < nregs; regno++)
11732 XVECEXP (operands[0], 0, regno + 1)
11733 = gen_rtx_CLOBBER (VOIDmode,
11734 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11737 (define_insn "*avx_vzeroupper"
11738 [(match_parallel 0 "vzeroupper_operation"
11739 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11742 [(set_attr "type" "sse")
11743 (set_attr "modrm" "0")
11744 (set_attr "memory" "none")
11745 (set_attr "prefix" "vex")
11746 (set_attr "mode" "OI")])
11748 (define_insn_and_split "vec_dup<mode>"
11749 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11750 (vec_duplicate:AVX256MODE24P
11751 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11754 vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}
11756 "&& reload_completed && REG_P (operands[1])"
11757 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11758 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11760 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11762 [(set_attr "type" "ssemov")
11763 (set_attr "prefix_extra" "1")
11764 (set_attr "prefix" "vex")
11765 (set_attr "mode" "V8SF")])
11767 (define_insn "avx_vbroadcastf128_<mode>"
11768 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11769 (vec_concat:AVX256MODE
11770 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11774 vbroadcastf128\t{%1, %0|%0, %1}
11775 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11776 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11777 [(set_attr "type" "ssemov,sselog1,sselog1")
11778 (set_attr "prefix_extra" "1")
11779 (set_attr "length_immediate" "0,1,1")
11780 (set_attr "prefix" "vex")
11781 (set_attr "mode" "V4SF,V8SF,V8SF")])
11783 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11784 ;; If it so happens that the input is in memory, use vbroadcast.
11785 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11786 (define_insn "*avx_vperm_broadcast_v4sf"
11787 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11789 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11790 (match_parallel 2 "avx_vbroadcast_operand"
11791 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11794 int elt = INTVAL (operands[3]);
11795 switch (which_alternative)
11799 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11800 return "vbroadcastss\t{%1, %0|%0, %1}";
11802 operands[2] = GEN_INT (elt * 0x55);
11803 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11805 gcc_unreachable ();
11808 [(set_attr "type" "ssemov,ssemov,sselog1")
11809 (set_attr "prefix_extra" "1")
11810 (set_attr "length_immediate" "0,0,1")
11811 (set_attr "prefix" "vex")
11812 (set_attr "mode" "SF,SF,V4SF")])
11814 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11815 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11816 (vec_select:AVX256MODEF2P
11817 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11818 (match_parallel 2 "avx_vbroadcast_operand"
11819 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11822 "&& reload_completed"
11823 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11825 rtx op0 = operands[0], op1 = operands[1];
11826 int elt = INTVAL (operands[3]);
11832 /* Shuffle element we care about into all elements of the 128-bit lane.
11833 The other lane gets shuffled too, but we don't care. */
11834 if (<MODE>mode == V4DFmode)
11835 mask = (elt & 1 ? 15 : 0);
11837 mask = (elt & 3) * 0x55;
11838 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11840 /* Shuffle the lane we care about into both lanes of the dest. */
11841 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11842 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11846 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11847 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11850 (define_expand "avx_vpermil<mode>"
11851 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11852 (vec_select:AVXMODEFDP
11853 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11854 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11857 int mask = INTVAL (operands[2]);
11858 rtx perm[<ssescalarnum>];
11860 perm[0] = GEN_INT (mask & 1);
11861 perm[1] = GEN_INT ((mask >> 1) & 1);
11862 if (<MODE>mode == V4DFmode)
11864 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11865 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11869 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11872 (define_expand "avx_vpermil<mode>"
11873 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11874 (vec_select:AVXMODEFSP
11875 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11876 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11879 int mask = INTVAL (operands[2]);
11880 rtx perm[<ssescalarnum>];
11882 perm[0] = GEN_INT (mask & 3);
11883 perm[1] = GEN_INT ((mask >> 2) & 3);
11884 perm[2] = GEN_INT ((mask >> 4) & 3);
11885 perm[3] = GEN_INT ((mask >> 6) & 3);
11886 if (<MODE>mode == V8SFmode)
11888 perm[4] = GEN_INT ((mask & 3) + 4);
11889 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11890 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11891 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11895 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11898 (define_insn "*avx_vpermilp<mode>"
11899 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11900 (vec_select:AVXMODEF2P
11901 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11902 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11903 [(match_operand 3 "const_int_operand" "")])))]
11906 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11907 operands[2] = GEN_INT (mask);
11908 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
11910 [(set_attr "type" "sselog")
11911 (set_attr "prefix_extra" "1")
11912 (set_attr "length_immediate" "1")
11913 (set_attr "prefix" "vex")
11914 (set_attr "mode" "<MODE>")])
11916 (define_insn "avx_vpermilvar<mode>3"
11917 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11919 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11920 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11923 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11924 [(set_attr "type" "sselog")
11925 (set_attr "prefix_extra" "1")
11926 (set_attr "prefix" "vex")
11927 (set_attr "mode" "<MODE>")])
11929 (define_expand "avx_vperm2f128<mode>3"
11930 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11931 (unspec:AVX256MODE2P
11932 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11933 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11934 (match_operand:SI 3 "const_0_to_255_operand" "")]
11935 UNSPEC_VPERMIL2F128))]
11938 int mask = INTVAL (operands[2]);
11939 if ((mask & 0x88) == 0)
11941 rtx perm[<ssescalarnum>], t1, t2;
11942 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11944 base = (mask & 3) * nelt2;
11945 for (i = 0; i < nelt2; ++i)
11946 perm[i] = GEN_INT (base + i);
11948 base = ((mask >> 4) & 3) * nelt2;
11949 for (i = 0; i < nelt2; ++i)
11950 perm[i + nelt2] = GEN_INT (base + i);
11952 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11953 operands[1], operands[2]);
11954 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11955 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11956 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11962 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11963 ;; means that in order to represent this properly in rtl we'd have to
11964 ;; nest *another* vec_concat with a zero operand and do the select from
11965 ;; a 4x wide vector. That doesn't seem very nice.
11966 (define_insn "*avx_vperm2f128<mode>_full"
11967 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11968 (unspec:AVX256MODE2P
11969 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11970 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11971 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11972 UNSPEC_VPERMIL2F128))]
11974 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11975 [(set_attr "type" "sselog")
11976 (set_attr "prefix_extra" "1")
11977 (set_attr "length_immediate" "1")
11978 (set_attr "prefix" "vex")
11979 (set_attr "mode" "V8SF")])
11981 (define_insn "*avx_vperm2f128<mode>_nozero"
11982 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11983 (vec_select:AVX256MODE2P
11984 (vec_concat:<ssedoublesizemode>
11985 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11986 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11987 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11988 [(match_operand 4 "const_int_operand" "")])))]
11991 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11992 operands[3] = GEN_INT (mask);
11993 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11995 [(set_attr "type" "sselog")
11996 (set_attr "prefix_extra" "1")
11997 (set_attr "length_immediate" "1")
11998 (set_attr "prefix" "vex")
11999 (set_attr "mode" "V8SF")])
12001 (define_expand "avx_vinsertf128<mode>"
12002 [(match_operand:AVX256MODE 0 "register_operand" "")
12003 (match_operand:AVX256MODE 1 "register_operand" "")
12004 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12005 (match_operand:SI 3 "const_0_to_1_operand" "")]
12008 switch (INTVAL (operands[3]))
12011 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12015 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12019 gcc_unreachable ();
12024 (define_insn "vec_set_lo_<mode>"
12025 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12026 (vec_concat:AVX256MODE4P
12027 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12028 (vec_select:<avxhalfvecmode>
12029 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12030 (parallel [(const_int 2) (const_int 3)]))))]
12032 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12033 [(set_attr "type" "sselog")
12034 (set_attr "prefix_extra" "1")
12035 (set_attr "length_immediate" "1")
12036 (set_attr "prefix" "vex")
12037 (set_attr "mode" "V8SF")])
12039 (define_insn "vec_set_hi_<mode>"
12040 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12041 (vec_concat:AVX256MODE4P
12042 (vec_select:<avxhalfvecmode>
12043 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12044 (parallel [(const_int 0) (const_int 1)]))
12045 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12047 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12048 [(set_attr "type" "sselog")
12049 (set_attr "prefix_extra" "1")
12050 (set_attr "length_immediate" "1")
12051 (set_attr "prefix" "vex")
12052 (set_attr "mode" "V8SF")])
12054 (define_insn "vec_set_lo_<mode>"
12055 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12056 (vec_concat:AVX256MODE8P
12057 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12058 (vec_select:<avxhalfvecmode>
12059 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12060 (parallel [(const_int 4) (const_int 5)
12061 (const_int 6) (const_int 7)]))))]
12063 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12064 [(set_attr "type" "sselog")
12065 (set_attr "prefix_extra" "1")
12066 (set_attr "length_immediate" "1")
12067 (set_attr "prefix" "vex")
12068 (set_attr "mode" "V8SF")])
12070 (define_insn "vec_set_hi_<mode>"
12071 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12072 (vec_concat:AVX256MODE8P
12073 (vec_select:<avxhalfvecmode>
12074 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12075 (parallel [(const_int 0) (const_int 1)
12076 (const_int 2) (const_int 3)]))
12077 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12079 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12080 [(set_attr "type" "sselog")
12081 (set_attr "prefix_extra" "1")
12082 (set_attr "length_immediate" "1")
12083 (set_attr "prefix" "vex")
12084 (set_attr "mode" "V8SF")])
12086 (define_insn "vec_set_lo_v16hi"
12087 [(set (match_operand:V16HI 0 "register_operand" "=x")
12089 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12091 (match_operand:V16HI 1 "register_operand" "x")
12092 (parallel [(const_int 8) (const_int 9)
12093 (const_int 10) (const_int 11)
12094 (const_int 12) (const_int 13)
12095 (const_int 14) (const_int 15)]))))]
12097 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12098 [(set_attr "type" "sselog")
12099 (set_attr "prefix_extra" "1")
12100 (set_attr "length_immediate" "1")
12101 (set_attr "prefix" "vex")
12102 (set_attr "mode" "V8SF")])
12104 (define_insn "vec_set_hi_v16hi"
12105 [(set (match_operand:V16HI 0 "register_operand" "=x")
12108 (match_operand:V16HI 1 "register_operand" "x")
12109 (parallel [(const_int 0) (const_int 1)
12110 (const_int 2) (const_int 3)
12111 (const_int 4) (const_int 5)
12112 (const_int 6) (const_int 7)]))
12113 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12115 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12116 [(set_attr "type" "sselog")
12117 (set_attr "prefix_extra" "1")
12118 (set_attr "length_immediate" "1")
12119 (set_attr "prefix" "vex")
12120 (set_attr "mode" "V8SF")])
12122 (define_insn "vec_set_lo_v32qi"
12123 [(set (match_operand:V32QI 0 "register_operand" "=x")
12125 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12127 (match_operand:V32QI 1 "register_operand" "x")
12128 (parallel [(const_int 16) (const_int 17)
12129 (const_int 18) (const_int 19)
12130 (const_int 20) (const_int 21)
12131 (const_int 22) (const_int 23)
12132 (const_int 24) (const_int 25)
12133 (const_int 26) (const_int 27)
12134 (const_int 28) (const_int 29)
12135 (const_int 30) (const_int 31)]))))]
12137 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12138 [(set_attr "type" "sselog")
12139 (set_attr "prefix_extra" "1")
12140 (set_attr "length_immediate" "1")
12141 (set_attr "prefix" "vex")
12142 (set_attr "mode" "V8SF")])
12144 (define_insn "vec_set_hi_v32qi"
12145 [(set (match_operand:V32QI 0 "register_operand" "=x")
12148 (match_operand:V32QI 1 "register_operand" "x")
12149 (parallel [(const_int 0) (const_int 1)
12150 (const_int 2) (const_int 3)
12151 (const_int 4) (const_int 5)
12152 (const_int 6) (const_int 7)
12153 (const_int 8) (const_int 9)
12154 (const_int 10) (const_int 11)
12155 (const_int 12) (const_int 13)
12156 (const_int 14) (const_int 15)]))
12157 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12159 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12160 [(set_attr "type" "sselog")
12161 (set_attr "prefix_extra" "1")
12162 (set_attr "length_immediate" "1")
12163 (set_attr "prefix" "vex")
12164 (set_attr "mode" "V8SF")])
12166 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12167 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12169 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12170 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12174 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12175 [(set_attr "type" "sselog1")
12176 (set_attr "prefix_extra" "1")
12177 (set_attr "prefix" "vex")
12178 (set_attr "mode" "<MODE>")])
12180 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12181 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12183 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12184 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12186 UNSPEC_MASKSTORE))]
12188 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12189 [(set_attr "type" "sselog1")
12190 (set_attr "prefix_extra" "1")
12191 (set_attr "prefix" "vex")
12192 (set_attr "mode" "<MODE>")])
12194 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12195 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12196 (unspec:AVX256MODE2P
12197 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12201 switch (which_alternative)
12206 switch (get_attr_mode (insn))
12209 return "vmovaps\t{%1, %x0|%x0, %1}";
12211 return "vmovapd\t{%1, %x0|%x0, %1}";
12213 return "vmovdqa\t{%1, %x0|%x0, %1}";
12220 gcc_unreachable ();
12222 [(set_attr "type" "ssemov")
12223 (set_attr "prefix" "vex")
12224 (set_attr "mode" "<avxvecmode>")
12225 (set (attr "length")
12226 (if_then_else (eq_attr "alternative" "0")
12228 (const_string "*")))])
12230 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12231 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12232 (unspec:<avxhalfvecmode>
12233 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12237 switch (which_alternative)
12242 switch (get_attr_mode (insn))
12245 return "vmovaps\t{%x1, %0|%0, %x1}";
12247 return "vmovapd\t{%x1, %0|%0, %x1}";
12249 return "vmovdqa\t{%x1, %0|%0, %x1}";
12256 gcc_unreachable ();
12258 [(set_attr "type" "ssemov")
12259 (set_attr "prefix" "vex")
12260 (set_attr "mode" "<avxvecmode>")
12261 (set (attr "length")
12262 (if_then_else (eq_attr "alternative" "0")
12264 (const_string "*")))])
12266 (define_expand "vec_init<mode>"
12267 [(match_operand:AVX256MODE 0 "register_operand" "")
12268 (match_operand 1 "" "")]
12271 ix86_expand_vector_init (false, operands[0], operands[1]);
12275 (define_insn "*vec_concat<mode>_avx"
12276 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12277 (vec_concat:AVX256MODE
12278 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12279 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12282 switch (which_alternative)
12285 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12287 switch (get_attr_mode (insn))
12290 return "vmovaps\t{%1, %x0|%x0, %1}";
12292 return "vmovapd\t{%1, %x0|%x0, %1}";
12294 return "vmovdqa\t{%1, %x0|%x0, %1}";
12297 gcc_unreachable ();
12300 [(set_attr "type" "sselog,ssemov")
12301 (set_attr "prefix_extra" "1,*")
12302 (set_attr "length_immediate" "1,*")
12303 (set_attr "prefix" "vex")
12304 (set_attr "mode" "<avxvecmode>")])