1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from integer vector mode to mnemonic suffix
40 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
42 ;; Mapping of the sse5 suffix
43 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")])
44 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd")])
45 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
47 ;; Mapping of the max integer size for sse5 rotate immediate constraint
48 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
50 ;; Mapping of vector modes back to the scalar modes
51 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
53 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
55 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
59 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
61 ;; All of these patterns are enabled for SSE1 as well as SSE2.
62 ;; This is essential for maintaining stable calling conventions.
64 (define_expand "mov<mode>"
65 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
66 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
69 ix86_expand_vector_move (<MODE>mode, operands);
73 (define_insn "*mov<mode>_internal"
74 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
75 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 switch (which_alternative)
83 return standard_sse_constant_opcode (insn, operands[1]);
86 if (get_attr_mode (insn) == MODE_V4SF)
87 return "movaps\t{%1, %0|%0, %1}";
89 return "movdqa\t{%1, %0|%0, %1}";
94 [(set_attr "type" "sselog1,ssemov,ssemov")
97 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
98 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
99 (and (eq_attr "alternative" "2")
100 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
102 (const_string "V4SF")
103 (const_string "TI")))])
105 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
106 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
107 ;; from memory, we'd prefer to load the memory directly into the %xmm
108 ;; register. To facilitate this happy circumstance, this pattern won't
109 ;; split until after register allocation. If the 64-bit value didn't
110 ;; come from memory, this is the best we can do. This is much better
111 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
114 (define_insn_and_split "movdi_to_sse"
116 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
117 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
118 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
119 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
121 "&& reload_completed"
124 if (register_operand (operands[1], DImode))
126 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
127 Assemble the 64-bit DImode value in an xmm register. */
128 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
129 gen_rtx_SUBREG (SImode, operands[1], 0)));
130 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
131 gen_rtx_SUBREG (SImode, operands[1], 4)));
132 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
134 else if (memory_operand (operands[1], DImode))
135 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
140 (define_expand "movv4sf"
141 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
142 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
145 ix86_expand_vector_move (V4SFmode, operands);
149 (define_insn "*movv4sf_internal"
150 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
151 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
153 && (register_operand (operands[0], V4SFmode)
154 || register_operand (operands[1], V4SFmode))"
156 switch (which_alternative)
159 return standard_sse_constant_opcode (insn, operands[1]);
162 return "movaps\t{%1, %0|%0, %1}";
167 [(set_attr "type" "sselog1,ssemov,ssemov")
168 (set_attr "mode" "V4SF")])
171 [(set (match_operand:V4SF 0 "register_operand" "")
172 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
173 "TARGET_SSE && reload_completed"
176 (vec_duplicate:V4SF (match_dup 1))
180 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
181 operands[2] = CONST0_RTX (V4SFmode);
184 (define_expand "movv2df"
185 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
186 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
189 ix86_expand_vector_move (V2DFmode, operands);
193 (define_insn "*movv2df_internal"
194 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
195 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
197 && (register_operand (operands[0], V2DFmode)
198 || register_operand (operands[1], V2DFmode))"
200 switch (which_alternative)
203 return standard_sse_constant_opcode (insn, operands[1]);
206 if (get_attr_mode (insn) == MODE_V4SF)
207 return "movaps\t{%1, %0|%0, %1}";
209 return "movapd\t{%1, %0|%0, %1}";
214 [(set_attr "type" "sselog1,ssemov,ssemov")
217 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
218 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
219 (and (eq_attr "alternative" "2")
220 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
222 (const_string "V4SF")
223 (const_string "V2DF")))])
226 [(set (match_operand:V2DF 0 "register_operand" "")
227 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
228 "TARGET_SSE2 && reload_completed"
229 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
231 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
232 operands[2] = CONST0_RTX (DFmode);
235 (define_expand "push<mode>1"
236 [(match_operand:SSEMODE 0 "register_operand" "")]
239 ix86_expand_push (<MODE>mode, operands[0]);
243 (define_expand "movmisalign<mode>"
244 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
245 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
248 ix86_expand_vector_move_misalign (<MODE>mode, operands);
252 (define_insn "sse_movups"
253 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
254 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
256 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
257 "movups\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssemov")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse2_movupd"
262 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
263 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
265 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
266 "movupd\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssemov")
268 (set_attr "mode" "V2DF")])
270 (define_insn "sse2_movdqu"
271 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
272 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
274 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
275 "movdqu\t{%1, %0|%0, %1}"
276 [(set_attr "type" "ssemov")
277 (set_attr "prefix_data16" "1")
278 (set_attr "mode" "TI")])
280 (define_insn "sse_movntv4sf"
281 [(set (match_operand:V4SF 0 "memory_operand" "=m")
282 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
285 "movntps\t{%1, %0|%0, %1}"
286 [(set_attr "type" "ssemov")
287 (set_attr "mode" "V4SF")])
289 (define_insn "sse2_movntv2df"
290 [(set (match_operand:V2DF 0 "memory_operand" "=m")
291 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
294 "movntpd\t{%1, %0|%0, %1}"
295 [(set_attr "type" "ssecvt")
296 (set_attr "mode" "V2DF")])
298 (define_insn "sse2_movntv2di"
299 [(set (match_operand:V2DI 0 "memory_operand" "=m")
300 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
303 "movntdq\t{%1, %0|%0, %1}"
304 [(set_attr "type" "ssecvt")
305 (set_attr "prefix_data16" "1")
306 (set_attr "mode" "TI")])
308 (define_insn "sse2_movntsi"
309 [(set (match_operand:SI 0 "memory_operand" "=m")
310 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
313 "movnti\t{%1, %0|%0, %1}"
314 [(set_attr "type" "ssecvt")
315 (set_attr "mode" "V2DF")])
317 (define_insn "sse3_lddqu"
318 [(set (match_operand:V16QI 0 "register_operand" "=x")
319 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
322 "lddqu\t{%1, %0|%0, %1}"
323 [(set_attr "type" "ssecvt")
324 (set_attr "prefix_rep" "1")
325 (set_attr "mode" "TI")])
327 ; Expand patterns for non-temporal stores. At the moment, only those
328 ; that directly map to insns are defined; it would be possible to
329 ; define patterns for other modes that would expand to several insns.
331 (define_expand "storentv4sf"
332 [(set (match_operand:V4SF 0 "memory_operand" "")
333 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")]
338 (define_expand "storentv2df"
339 [(set (match_operand:V2DF 0 "memory_operand" "")
340 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "")]
345 (define_expand "storentv2di"
346 [(set (match_operand:V2DI 0 "memory_operand" "")
347 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
352 (define_expand "storentsi"
353 [(set (match_operand:SI 0 "memory_operand" "")
354 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
359 (define_expand "storentdf"
360 [(set (match_operand:DF 0 "memory_operand" "")
361 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
366 (define_expand "storentsf"
367 [(set (match_operand:SF 0 "memory_operand" "")
368 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
373 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
375 ;; Parallel single-precision floating point arithmetic
377 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
379 (define_expand "negv4sf2"
380 [(set (match_operand:V4SF 0 "register_operand" "")
381 (neg:V4SF (match_operand:V4SF 1 "register_operand" "")))]
383 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
385 (define_expand "absv4sf2"
386 [(set (match_operand:V4SF 0 "register_operand" "")
387 (abs:V4SF (match_operand:V4SF 1 "register_operand" "")))]
389 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
391 (define_expand "addv4sf3"
392 [(set (match_operand:V4SF 0 "register_operand" "")
393 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
394 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
396 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
398 (define_insn "*addv4sf3"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
400 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
401 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
402 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
403 "addps\t{%2, %0|%0, %2}"
404 [(set_attr "type" "sseadd")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmaddv4sf3"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
411 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
414 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
415 "addss\t{%2, %0|%0, %2}"
416 [(set_attr "type" "sseadd")
417 (set_attr "mode" "SF")])
419 (define_expand "subv4sf3"
420 [(set (match_operand:V4SF 0 "register_operand" "")
421 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
422 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
424 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
426 (define_insn "*subv4sf3"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
429 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "subps\t{%2, %0|%0, %2}"
432 [(set_attr "type" "sseadd")
433 (set_attr "mode" "V4SF")])
435 (define_insn "sse_vmsubv4sf3"
436 [(set (match_operand:V4SF 0 "register_operand" "=x")
438 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
439 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
443 "subss\t{%2, %0|%0, %2}"
444 [(set_attr "type" "sseadd")
445 (set_attr "mode" "SF")])
447 (define_expand "mulv4sf3"
448 [(set (match_operand:V4SF 0 "register_operand" "")
449 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
450 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
452 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
454 (define_insn "*mulv4sf3"
455 [(set (match_operand:V4SF 0 "register_operand" "=x")
456 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
457 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
458 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
459 "mulps\t{%2, %0|%0, %2}"
460 [(set_attr "type" "ssemul")
461 (set_attr "mode" "V4SF")])
463 (define_insn "sse_vmmulv4sf3"
464 [(set (match_operand:V4SF 0 "register_operand" "=x")
466 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
467 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
470 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
471 "mulss\t{%2, %0|%0, %2}"
472 [(set_attr "type" "ssemul")
473 (set_attr "mode" "SF")])
475 (define_expand "divv4sf3"
476 [(set (match_operand:V4SF 0 "register_operand" "")
477 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
478 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
481 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
483 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
484 && flag_finite_math_only && !flag_trapping_math
485 && flag_unsafe_math_optimizations)
487 ix86_emit_swdivsf (operands[0], operands[1],
488 operands[2], V4SFmode);
493 (define_insn "sse_divv4sf3"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
496 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
498 "divps\t{%2, %0|%0, %2}"
499 [(set_attr "type" "ssediv")
500 (set_attr "mode" "V4SF")])
502 (define_insn "sse_vmdivv4sf3"
503 [(set (match_operand:V4SF 0 "register_operand" "=x")
505 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
506 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
510 "divss\t{%2, %0|%0, %2}"
511 [(set_attr "type" "ssediv")
512 (set_attr "mode" "SF")])
514 (define_insn "sse_rcpv4sf2"
515 [(set (match_operand:V4SF 0 "register_operand" "=x")
517 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
519 "rcpps\t{%1, %0|%0, %1}"
520 [(set_attr "type" "sse")
521 (set_attr "mode" "V4SF")])
523 (define_insn "sse_vmrcpv4sf2"
524 [(set (match_operand:V4SF 0 "register_operand" "=x")
526 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
528 (match_operand:V4SF 2 "register_operand" "0")
531 "rcpss\t{%1, %0|%0, %1}"
532 [(set_attr "type" "sse")
533 (set_attr "mode" "SF")])
535 (define_expand "rsqrtv4sf2"
536 [(set (match_operand:V4SF 0 "register_operand" "")
538 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
541 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
545 (define_insn "sse_rsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
548 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
550 "rsqrtps\t{%1, %0|%0, %1}"
551 [(set_attr "type" "sse")
552 (set_attr "mode" "V4SF")])
554 (define_insn "sse_vmrsqrtv4sf2"
555 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
559 (match_operand:V4SF 2 "register_operand" "0")
562 "rsqrtss\t{%1, %0|%0, %1}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "SF")])
566 (define_expand "sqrtv4sf2"
567 [(set (match_operand:V4SF 0 "register_operand" "")
568 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
571 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
572 && flag_finite_math_only && !flag_trapping_math
573 && flag_unsafe_math_optimizations)
575 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
580 (define_insn "sse_sqrtv4sf2"
581 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
584 "sqrtps\t{%1, %0|%0, %1}"
585 [(set_attr "type" "sse")
586 (set_attr "mode" "V4SF")])
588 (define_insn "sse_vmsqrtv4sf2"
589 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
592 (match_operand:V4SF 2 "register_operand" "0")
595 "sqrtss\t{%1, %0|%0, %1}"
596 [(set_attr "type" "sse")
597 (set_attr "mode" "SF")])
599 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
600 ;; isn't really correct, as those rtl operators aren't defined when
601 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
603 (define_expand "smaxv4sf3"
604 [(set (match_operand:V4SF 0 "register_operand" "")
605 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
606 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
609 if (!flag_finite_math_only)
610 operands[1] = force_reg (V4SFmode, operands[1]);
611 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
614 (define_insn "*smaxv4sf3_finite"
615 [(set (match_operand:V4SF 0 "register_operand" "=x")
616 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
617 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
618 "TARGET_SSE && flag_finite_math_only
619 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
620 "maxps\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sse")
622 (set_attr "mode" "V4SF")])
624 (define_insn "*smaxv4sf3"
625 [(set (match_operand:V4SF 0 "register_operand" "=x")
626 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
627 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
629 "maxps\t{%2, %0|%0, %2}"
630 [(set_attr "type" "sse")
631 (set_attr "mode" "V4SF")])
633 (define_insn "sse_vmsmaxv4sf3"
634 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
637 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
641 "maxss\t{%2, %0|%0, %2}"
642 [(set_attr "type" "sse")
643 (set_attr "mode" "SF")])
645 (define_expand "sminv4sf3"
646 [(set (match_operand:V4SF 0 "register_operand" "")
647 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
648 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
651 if (!flag_finite_math_only)
652 operands[1] = force_reg (V4SFmode, operands[1]);
653 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
656 (define_insn "*sminv4sf3_finite"
657 [(set (match_operand:V4SF 0 "register_operand" "=x")
658 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
659 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
660 "TARGET_SSE && flag_finite_math_only
661 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
662 "minps\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sse")
664 (set_attr "mode" "V4SF")])
666 (define_insn "*sminv4sf3"
667 [(set (match_operand:V4SF 0 "register_operand" "=x")
668 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
669 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
671 "minps\t{%2, %0|%0, %2}"
672 [(set_attr "type" "sse")
673 (set_attr "mode" "V4SF")])
675 (define_insn "sse_vmsminv4sf3"
676 [(set (match_operand:V4SF 0 "register_operand" "=x")
678 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
679 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
683 "minss\t{%2, %0|%0, %2}"
684 [(set_attr "type" "sse")
685 (set_attr "mode" "SF")])
687 ;; These versions of the min/max patterns implement exactly the operations
688 ;; min = (op1 < op2 ? op1 : op2)
689 ;; max = (!(op1 < op2) ? op1 : op2)
690 ;; Their operands are not commutative, and thus they may be used in the
691 ;; presence of -0.0 and NaN.
693 (define_insn "*ieee_sminv4sf3"
694 [(set (match_operand:V4SF 0 "register_operand" "=x")
695 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
696 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
699 "minps\t{%2, %0|%0, %2}"
700 [(set_attr "type" "sseadd")
701 (set_attr "mode" "V4SF")])
703 (define_insn "*ieee_smaxv4sf3"
704 [(set (match_operand:V4SF 0 "register_operand" "=x")
705 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
706 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
709 "maxps\t{%2, %0|%0, %2}"
710 [(set_attr "type" "sseadd")
711 (set_attr "mode" "V4SF")])
713 (define_insn "*ieee_sminv2df3"
714 [(set (match_operand:V2DF 0 "register_operand" "=x")
715 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
716 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
719 "minpd\t{%2, %0|%0, %2}"
720 [(set_attr "type" "sseadd")
721 (set_attr "mode" "V2DF")])
723 (define_insn "*ieee_smaxv2df3"
724 [(set (match_operand:V2DF 0 "register_operand" "=x")
725 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
726 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
729 "maxpd\t{%2, %0|%0, %2}"
730 [(set_attr "type" "sseadd")
731 (set_attr "mode" "V2DF")])
733 (define_insn "sse3_addsubv4sf3"
734 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 (match_operand:V4SF 1 "register_operand" "0")
738 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
739 (minus:V4SF (match_dup 1) (match_dup 2))
742 "addsubps\t{%2, %0|%0, %2}"
743 [(set_attr "type" "sseadd")
744 (set_attr "prefix_rep" "1")
745 (set_attr "mode" "V4SF")])
747 (define_insn "sse3_haddv4sf3"
748 [(set (match_operand:V4SF 0 "register_operand" "=x")
753 (match_operand:V4SF 1 "register_operand" "0")
754 (parallel [(const_int 0)]))
755 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
757 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
758 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
762 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
763 (parallel [(const_int 0)]))
764 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
766 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
767 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
769 "haddps\t{%2, %0|%0, %2}"
770 [(set_attr "type" "sseadd")
771 (set_attr "prefix_rep" "1")
772 (set_attr "mode" "V4SF")])
774 (define_insn "sse3_hsubv4sf3"
775 [(set (match_operand:V4SF 0 "register_operand" "=x")
780 (match_operand:V4SF 1 "register_operand" "0")
781 (parallel [(const_int 0)]))
782 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
784 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
785 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
789 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
790 (parallel [(const_int 0)]))
791 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
793 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
794 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
796 "hsubps\t{%2, %0|%0, %2}"
797 [(set_attr "type" "sseadd")
798 (set_attr "prefix_rep" "1")
799 (set_attr "mode" "V4SF")])
801 (define_expand "reduc_splus_v4sf"
802 [(match_operand:V4SF 0 "register_operand" "")
803 (match_operand:V4SF 1 "register_operand" "")]
808 rtx tmp = gen_reg_rtx (V4SFmode);
809 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
810 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
813 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
817 (define_expand "reduc_smax_v4sf"
818 [(match_operand:V4SF 0 "register_operand" "")
819 (match_operand:V4SF 1 "register_operand" "")]
822 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
826 (define_expand "reduc_smin_v4sf"
827 [(match_operand:V4SF 0 "register_operand" "")
828 (match_operand:V4SF 1 "register_operand" "")]
831 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
835 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
837 ;; Parallel single-precision floating point comparisons
839 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
841 (define_insn "sse_maskcmpv4sf3"
842 [(set (match_operand:V4SF 0 "register_operand" "=x")
843 (match_operator:V4SF 3 "sse_comparison_operator"
844 [(match_operand:V4SF 1 "register_operand" "0")
845 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
846 "TARGET_SSE && !TARGET_SSE5"
847 "cmp%D3ps\t{%2, %0|%0, %2}"
848 [(set_attr "type" "ssecmp")
849 (set_attr "mode" "V4SF")])
851 (define_insn "sse_maskcmpsf3"
852 [(set (match_operand:SF 0 "register_operand" "=x")
853 (match_operator:SF 3 "sse_comparison_operator"
854 [(match_operand:SF 1 "register_operand" "0")
855 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
856 "TARGET_SSE && !TARGET_SSE5"
857 "cmp%D3ss\t{%2, %0|%0, %2}"
858 [(set_attr "type" "ssecmp")
859 (set_attr "mode" "SF")])
861 (define_insn "sse_vmmaskcmpv4sf3"
862 [(set (match_operand:V4SF 0 "register_operand" "=x")
864 (match_operator:V4SF 3 "sse_comparison_operator"
865 [(match_operand:V4SF 1 "register_operand" "0")
866 (match_operand:V4SF 2 "register_operand" "x")])
869 "TARGET_SSE && !TARGET_SSE5"
870 "cmp%D3ss\t{%2, %0|%0, %2}"
871 [(set_attr "type" "ssecmp")
872 (set_attr "mode" "SF")])
874 (define_insn "sse_comi"
875 [(set (reg:CCFP FLAGS_REG)
878 (match_operand:V4SF 0 "register_operand" "x")
879 (parallel [(const_int 0)]))
881 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
882 (parallel [(const_int 0)]))))]
884 "comiss\t{%1, %0|%0, %1}"
885 [(set_attr "type" "ssecomi")
886 (set_attr "mode" "SF")])
888 (define_insn "sse_ucomi"
889 [(set (reg:CCFPU FLAGS_REG)
892 (match_operand:V4SF 0 "register_operand" "x")
893 (parallel [(const_int 0)]))
895 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
896 (parallel [(const_int 0)]))))]
898 "ucomiss\t{%1, %0|%0, %1}"
899 [(set_attr "type" "ssecomi")
900 (set_attr "mode" "SF")])
902 (define_expand "vcondv4sf"
903 [(set (match_operand:V4SF 0 "register_operand" "")
906 [(match_operand:V4SF 4 "nonimmediate_operand" "")
907 (match_operand:V4SF 5 "nonimmediate_operand" "")])
908 (match_operand:V4SF 1 "general_operand" "")
909 (match_operand:V4SF 2 "general_operand" "")))]
912 if (ix86_expand_fp_vcond (operands))
918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
920 ;; Parallel single-precision floating point logical operations
922 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
924 (define_expand "andv4sf3"
925 [(set (match_operand:V4SF 0 "register_operand" "")
926 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
927 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
929 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
931 (define_insn "*andv4sf3"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
934 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
935 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
936 "andps\t{%2, %0|%0, %2}"
937 [(set_attr "type" "sselog")
938 (set_attr "mode" "V4SF")])
940 (define_insn "sse_nandv4sf3"
941 [(set (match_operand:V4SF 0 "register_operand" "=x")
942 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
943 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
945 "andnps\t{%2, %0|%0, %2}"
946 [(set_attr "type" "sselog")
947 (set_attr "mode" "V4SF")])
949 (define_expand "iorv4sf3"
950 [(set (match_operand:V4SF 0 "register_operand" "")
951 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
952 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
954 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
956 (define_insn "*iorv4sf3"
957 [(set (match_operand:V4SF 0 "register_operand" "=x")
958 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
959 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
960 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
961 "orps\t{%2, %0|%0, %2}"
962 [(set_attr "type" "sselog")
963 (set_attr "mode" "V4SF")])
965 (define_expand "xorv4sf3"
966 [(set (match_operand:V4SF 0 "register_operand" "")
967 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
968 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
970 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
972 (define_insn "*xorv4sf3"
973 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
975 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
976 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
977 "xorps\t{%2, %0|%0, %2}"
978 [(set_attr "type" "sselog")
979 (set_attr "mode" "V4SF")])
981 ;; Also define scalar versions. These are used for abs, neg, and
982 ;; conditional move. Using subregs into vector modes causes register
983 ;; allocation lossage. These patterns do not allow memory operands
984 ;; because the native instructions read the full 128-bits.
986 (define_insn "*andsf3"
987 [(set (match_operand:SF 0 "register_operand" "=x")
988 (and:SF (match_operand:SF 1 "register_operand" "0")
989 (match_operand:SF 2 "register_operand" "x")))]
991 "andps\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sselog")
993 (set_attr "mode" "V4SF")])
995 (define_insn "*nandsf3"
996 [(set (match_operand:SF 0 "register_operand" "=x")
997 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
998 (match_operand:SF 2 "register_operand" "x")))]
1000 "andnps\t{%2, %0|%0, %2}"
1001 [(set_attr "type" "sselog")
1002 (set_attr "mode" "V4SF")])
1004 (define_insn "*iorsf3"
1005 [(set (match_operand:SF 0 "register_operand" "=x")
1006 (ior:SF (match_operand:SF 1 "register_operand" "0")
1007 (match_operand:SF 2 "register_operand" "x")))]
1009 "orps\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "V4SF")])
1013 (define_insn "*xorsf3"
1014 [(set (match_operand:SF 0 "register_operand" "=x")
1015 (xor:SF (match_operand:SF 1 "register_operand" "0")
1016 (match_operand:SF 2 "register_operand" "x")))]
1018 "xorps\t{%2, %0|%0, %2}"
1019 [(set_attr "type" "sselog")
1020 (set_attr "mode" "V4SF")])
1022 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1024 ;; Parallel single-precision floating point conversion operations
1026 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1028 (define_insn "sse_cvtpi2ps"
1029 [(set (match_operand:V4SF 0 "register_operand" "=x")
1032 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1033 (match_operand:V4SF 1 "register_operand" "0")
1036 "cvtpi2ps\t{%2, %0|%0, %2}"
1037 [(set_attr "type" "ssecvt")
1038 (set_attr "mode" "V4SF")])
1040 (define_insn "sse_cvtps2pi"
1041 [(set (match_operand:V2SI 0 "register_operand" "=y")
1043 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1045 (parallel [(const_int 0) (const_int 1)])))]
1047 "cvtps2pi\t{%1, %0|%0, %1}"
1048 [(set_attr "type" "ssecvt")
1049 (set_attr "unit" "mmx")
1050 (set_attr "mode" "DI")])
1052 (define_insn "sse_cvttps2pi"
1053 [(set (match_operand:V2SI 0 "register_operand" "=y")
1055 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1056 (parallel [(const_int 0) (const_int 1)])))]
1058 "cvttps2pi\t{%1, %0|%0, %1}"
1059 [(set_attr "type" "ssecvt")
1060 (set_attr "unit" "mmx")
1061 (set_attr "mode" "SF")])
1063 (define_insn "sse_cvtsi2ss"
1064 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1067 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1068 (match_operand:V4SF 1 "register_operand" "0,0")
1071 "cvtsi2ss\t{%2, %0|%0, %2}"
1072 [(set_attr "type" "sseicvt")
1073 (set_attr "athlon_decode" "vector,double")
1074 (set_attr "amdfam10_decode" "vector,double")
1075 (set_attr "mode" "SF")])
1077 (define_insn "sse_cvtsi2ssq"
1078 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1081 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1082 (match_operand:V4SF 1 "register_operand" "0,0")
1084 "TARGET_SSE && TARGET_64BIT"
1085 "cvtsi2ssq\t{%2, %0|%0, %2}"
1086 [(set_attr "type" "sseicvt")
1087 (set_attr "athlon_decode" "vector,double")
1088 (set_attr "amdfam10_decode" "vector,double")
1089 (set_attr "mode" "SF")])
1091 (define_insn "sse_cvtss2si"
1092 [(set (match_operand:SI 0 "register_operand" "=r,r")
1095 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1096 (parallel [(const_int 0)]))]
1097 UNSPEC_FIX_NOTRUNC))]
1099 "cvtss2si\t{%1, %0|%0, %1}"
1100 [(set_attr "type" "sseicvt")
1101 (set_attr "athlon_decode" "double,vector")
1102 (set_attr "prefix_rep" "1")
1103 (set_attr "mode" "SI")])
1105 (define_insn "sse_cvtss2si_2"
1106 [(set (match_operand:SI 0 "register_operand" "=r,r")
1107 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1108 UNSPEC_FIX_NOTRUNC))]
1110 "cvtss2si\t{%1, %0|%0, %1}"
1111 [(set_attr "type" "sseicvt")
1112 (set_attr "athlon_decode" "double,vector")
1113 (set_attr "amdfam10_decode" "double,double")
1114 (set_attr "prefix_rep" "1")
1115 (set_attr "mode" "SI")])
1117 (define_insn "sse_cvtss2siq"
1118 [(set (match_operand:DI 0 "register_operand" "=r,r")
1121 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1122 (parallel [(const_int 0)]))]
1123 UNSPEC_FIX_NOTRUNC))]
1124 "TARGET_SSE && TARGET_64BIT"
1125 "cvtss2siq\t{%1, %0|%0, %1}"
1126 [(set_attr "type" "sseicvt")
1127 (set_attr "athlon_decode" "double,vector")
1128 (set_attr "prefix_rep" "1")
1129 (set_attr "mode" "DI")])
1131 (define_insn "sse_cvtss2siq_2"
1132 [(set (match_operand:DI 0 "register_operand" "=r,r")
1133 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1134 UNSPEC_FIX_NOTRUNC))]
1135 "TARGET_SSE && TARGET_64BIT"
1136 "cvtss2siq\t{%1, %0|%0, %1}"
1137 [(set_attr "type" "sseicvt")
1138 (set_attr "athlon_decode" "double,vector")
1139 (set_attr "amdfam10_decode" "double,double")
1140 (set_attr "prefix_rep" "1")
1141 (set_attr "mode" "DI")])
1143 (define_insn "sse_cvttss2si"
1144 [(set (match_operand:SI 0 "register_operand" "=r,r")
1147 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1148 (parallel [(const_int 0)]))))]
1150 "cvttss2si\t{%1, %0|%0, %1}"
1151 [(set_attr "type" "sseicvt")
1152 (set_attr "athlon_decode" "double,vector")
1153 (set_attr "amdfam10_decode" "double,double")
1154 (set_attr "prefix_rep" "1")
1155 (set_attr "mode" "SI")])
1157 (define_insn "sse_cvttss2siq"
1158 [(set (match_operand:DI 0 "register_operand" "=r,r")
1161 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1162 (parallel [(const_int 0)]))))]
1163 "TARGET_SSE && TARGET_64BIT"
1164 "cvttss2siq\t{%1, %0|%0, %1}"
1165 [(set_attr "type" "sseicvt")
1166 (set_attr "athlon_decode" "double,vector")
1167 (set_attr "amdfam10_decode" "double,double")
1168 (set_attr "prefix_rep" "1")
1169 (set_attr "mode" "DI")])
1171 (define_insn "sse2_cvtdq2ps"
1172 [(set (match_operand:V4SF 0 "register_operand" "=x")
1173 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1175 "cvtdq2ps\t{%1, %0|%0, %1}"
1176 [(set_attr "type" "ssecvt")
1177 (set_attr "mode" "V4SF")])
1179 (define_insn "sse2_cvtps2dq"
1180 [(set (match_operand:V4SI 0 "register_operand" "=x")
1181 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1182 UNSPEC_FIX_NOTRUNC))]
1184 "cvtps2dq\t{%1, %0|%0, %1}"
1185 [(set_attr "type" "ssecvt")
1186 (set_attr "prefix_data16" "1")
1187 (set_attr "mode" "TI")])
1189 (define_insn "sse2_cvttps2dq"
1190 [(set (match_operand:V4SI 0 "register_operand" "=x")
1191 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1193 "cvttps2dq\t{%1, %0|%0, %1}"
1194 [(set_attr "type" "ssecvt")
1195 (set_attr "prefix_rep" "1")
1196 (set_attr "mode" "TI")])
1198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1200 ;; Parallel single-precision floating point element swizzling
1202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1204 (define_insn "sse_movhlps"
1205 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1208 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1209 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1210 (parallel [(const_int 6)
1214 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1216 movhlps\t{%2, %0|%0, %2}
1217 movlps\t{%H2, %0|%0, %H2}
1218 movhps\t{%2, %0|%0, %2}"
1219 [(set_attr "type" "ssemov")
1220 (set_attr "mode" "V4SF,V2SF,V2SF")])
1222 (define_insn "sse_movlhps"
1223 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1226 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1227 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1228 (parallel [(const_int 0)
1232 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1234 movlhps\t{%2, %0|%0, %2}
1235 movhps\t{%2, %0|%0, %2}
1236 movlps\t{%2, %H0|%H0, %2}"
1237 [(set_attr "type" "ssemov")
1238 (set_attr "mode" "V4SF,V2SF,V2SF")])
1240 (define_insn "sse_unpckhps"
1241 [(set (match_operand:V4SF 0 "register_operand" "=x")
1244 (match_operand:V4SF 1 "register_operand" "0")
1245 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1246 (parallel [(const_int 2) (const_int 6)
1247 (const_int 3) (const_int 7)])))]
1249 "unpckhps\t{%2, %0|%0, %2}"
1250 [(set_attr "type" "sselog")
1251 (set_attr "mode" "V4SF")])
1253 (define_insn "sse_unpcklps"
1254 [(set (match_operand:V4SF 0 "register_operand" "=x")
1257 (match_operand:V4SF 1 "register_operand" "0")
1258 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1259 (parallel [(const_int 0) (const_int 4)
1260 (const_int 1) (const_int 5)])))]
1262 "unpcklps\t{%2, %0|%0, %2}"
1263 [(set_attr "type" "sselog")
1264 (set_attr "mode" "V4SF")])
1266 ;; These are modeled with the same vec_concat as the others so that we
1267 ;; capture users of shufps that can use the new instructions
1268 (define_insn "sse3_movshdup"
1269 [(set (match_operand:V4SF 0 "register_operand" "=x")
1272 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1274 (parallel [(const_int 1)
1279 "movshdup\t{%1, %0|%0, %1}"
1280 [(set_attr "type" "sse")
1281 (set_attr "prefix_rep" "1")
1282 (set_attr "mode" "V4SF")])
1284 (define_insn "sse3_movsldup"
1285 [(set (match_operand:V4SF 0 "register_operand" "=x")
1288 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1290 (parallel [(const_int 0)
1295 "movsldup\t{%1, %0|%0, %1}"
1296 [(set_attr "type" "sse")
1297 (set_attr "prefix_rep" "1")
1298 (set_attr "mode" "V4SF")])
1300 (define_expand "sse_shufps"
1301 [(match_operand:V4SF 0 "register_operand" "")
1302 (match_operand:V4SF 1 "register_operand" "")
1303 (match_operand:V4SF 2 "nonimmediate_operand" "")
1304 (match_operand:SI 3 "const_int_operand" "")]
1307 int mask = INTVAL (operands[3]);
1308 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1309 GEN_INT ((mask >> 0) & 3),
1310 GEN_INT ((mask >> 2) & 3),
1311 GEN_INT (((mask >> 4) & 3) + 4),
1312 GEN_INT (((mask >> 6) & 3) + 4)));
1316 (define_insn "sse_shufps_1"
1317 [(set (match_operand:V4SF 0 "register_operand" "=x")
1320 (match_operand:V4SF 1 "register_operand" "0")
1321 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1322 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1323 (match_operand 4 "const_0_to_3_operand" "")
1324 (match_operand 5 "const_4_to_7_operand" "")
1325 (match_operand 6 "const_4_to_7_operand" "")])))]
1329 mask |= INTVAL (operands[3]) << 0;
1330 mask |= INTVAL (operands[4]) << 2;
1331 mask |= (INTVAL (operands[5]) - 4) << 4;
1332 mask |= (INTVAL (operands[6]) - 4) << 6;
1333 operands[3] = GEN_INT (mask);
1335 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1337 [(set_attr "type" "sselog")
1338 (set_attr "mode" "V4SF")])
1340 (define_insn "sse_storehps"
1341 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1343 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1344 (parallel [(const_int 2) (const_int 3)])))]
1347 movhps\t{%1, %0|%0, %1}
1348 movhlps\t{%1, %0|%0, %1}
1349 movlps\t{%H1, %0|%0, %H1}"
1350 [(set_attr "type" "ssemov")
1351 (set_attr "mode" "V2SF,V4SF,V2SF")])
1353 (define_insn "sse_loadhps"
1354 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1357 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1358 (parallel [(const_int 0) (const_int 1)]))
1359 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1362 movhps\t{%2, %0|%0, %2}
1363 movlhps\t{%2, %0|%0, %2}
1364 movlps\t{%2, %H0|%H0, %2}"
1365 [(set_attr "type" "ssemov")
1366 (set_attr "mode" "V2SF,V4SF,V2SF")])
1368 (define_insn "sse_storelps"
1369 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1371 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1372 (parallel [(const_int 0) (const_int 1)])))]
1375 movlps\t{%1, %0|%0, %1}
1376 movaps\t{%1, %0|%0, %1}
1377 movlps\t{%1, %0|%0, %1}"
1378 [(set_attr "type" "ssemov")
1379 (set_attr "mode" "V2SF,V4SF,V2SF")])
1381 (define_insn "sse_loadlps"
1382 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1384 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1386 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1387 (parallel [(const_int 2) (const_int 3)]))))]
1390 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1391 movlps\t{%2, %0|%0, %2}
1392 movlps\t{%2, %0|%0, %2}"
1393 [(set_attr "type" "sselog,ssemov,ssemov")
1394 (set_attr "mode" "V4SF,V2SF,V2SF")])
1396 (define_insn "sse_movss"
1397 [(set (match_operand:V4SF 0 "register_operand" "=x")
1399 (match_operand:V4SF 2 "register_operand" "x")
1400 (match_operand:V4SF 1 "register_operand" "0")
1403 "movss\t{%2, %0|%0, %2}"
1404 [(set_attr "type" "ssemov")
1405 (set_attr "mode" "SF")])
1407 (define_insn "*vec_dupv4sf"
1408 [(set (match_operand:V4SF 0 "register_operand" "=x")
1410 (match_operand:SF 1 "register_operand" "0")))]
1412 "shufps\t{$0, %0, %0|%0, %0, 0}"
1413 [(set_attr "type" "sselog1")
1414 (set_attr "mode" "V4SF")])
1416 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1417 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1418 ;; alternatives pretty much forces the MMX alternative to be chosen.
1419 (define_insn "*sse_concatv2sf"
1420 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1422 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1423 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1426 unpcklps\t{%2, %0|%0, %2}
1427 movss\t{%1, %0|%0, %1}
1428 punpckldq\t{%2, %0|%0, %2}
1429 movd\t{%1, %0|%0, %1}"
1430 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1431 (set_attr "mode" "V4SF,SF,DI,DI")])
1433 (define_insn "*sse_concatv4sf"
1434 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1436 (match_operand:V2SF 1 "register_operand" " 0,0")
1437 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1440 movlhps\t{%2, %0|%0, %2}
1441 movhps\t{%2, %0|%0, %2}"
1442 [(set_attr "type" "ssemov")
1443 (set_attr "mode" "V4SF,V2SF")])
1445 (define_expand "vec_initv4sf"
1446 [(match_operand:V4SF 0 "register_operand" "")
1447 (match_operand 1 "" "")]
1450 ix86_expand_vector_init (false, operands[0], operands[1]);
1454 (define_insn "vec_setv4sf_0"
1455 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
1458 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1459 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1463 movss\t{%2, %0|%0, %2}
1464 movss\t{%2, %0|%0, %2}
1465 movd\t{%2, %0|%0, %2}
1467 [(set_attr "type" "ssemov")
1468 (set_attr "mode" "SF")])
1470 ;; A subset is vec_setv4sf.
1471 (define_insn "*vec_setv4sf_sse4_1"
1472 [(set (match_operand:V4SF 0 "register_operand" "=x")
1475 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1476 (match_operand:V4SF 1 "register_operand" "0")
1477 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1480 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1481 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1483 [(set_attr "type" "sselog")
1484 (set_attr "prefix_extra" "1")
1485 (set_attr "mode" "V4SF")])
1487 (define_insn "sse4_1_insertps"
1488 [(set (match_operand:V4SF 0 "register_operand" "=x")
1489 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1490 (match_operand:V4SF 1 "register_operand" "0")
1491 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1494 "insertps\t{%3, %2, %0|%0, %2, %3}";
1495 [(set_attr "type" "sselog")
1496 (set_attr "prefix_extra" "1")
1497 (set_attr "mode" "V4SF")])
1500 [(set (match_operand:V4SF 0 "memory_operand" "")
1503 (match_operand:SF 1 "nonmemory_operand" ""))
1506 "TARGET_SSE && reload_completed"
1509 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1513 (define_expand "vec_setv4sf"
1514 [(match_operand:V4SF 0 "register_operand" "")
1515 (match_operand:SF 1 "register_operand" "")
1516 (match_operand 2 "const_int_operand" "")]
1519 ix86_expand_vector_set (false, operands[0], operands[1],
1520 INTVAL (operands[2]));
1524 (define_insn_and_split "*vec_extractv4sf_0"
1525 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1527 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1528 (parallel [(const_int 0)])))]
1529 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1531 "&& reload_completed"
1534 rtx op1 = operands[1];
1536 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1538 op1 = gen_lowpart (SFmode, op1);
1539 emit_move_insn (operands[0], op1);
1543 (define_insn "*sse4_1_extractps"
1544 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1546 (match_operand:V4SF 1 "register_operand" "x")
1547 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1549 "extractps\t{%2, %1, %0|%0, %1, %2}"
1550 [(set_attr "type" "sselog")
1551 (set_attr "prefix_extra" "1")
1552 (set_attr "mode" "V4SF")])
1554 (define_insn_and_split "*vec_extract_v4sf_mem"
1555 [(set (match_operand:SF 0 "register_operand" "=x*rf")
1557 (match_operand:V4SF 1 "memory_operand" "o")
1558 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
1564 int i = INTVAL (operands[2]);
1566 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
1570 (define_expand "vec_extractv4sf"
1571 [(match_operand:SF 0 "register_operand" "")
1572 (match_operand:V4SF 1 "register_operand" "")
1573 (match_operand 2 "const_int_operand" "")]
1576 ix86_expand_vector_extract (false, operands[0], operands[1],
1577 INTVAL (operands[2]));
1581 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1583 ;; SSE5 floating point multiply/accumulate instructions This includes the
1584 ;; scalar version of the instructions as well as the vector
1586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1588 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1589 ;; combine to generate a multiply/add with two memory references. We then
1590 ;; split this insn, into loading up the destination register with one of the
1591 ;; memory operations. If we don't manage to split the insn, reload will
1592 ;; generate the appropriate moves. The reason this is needed, is that combine
1593 ;; has already folded one of the memory references into both the multiply and
1594 ;; add insns, and it can't generate a new pseudo. I.e.:
1595 ;; (set (reg1) (mem (addr1)))
1596 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1597 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1599 (define_insn "sse5_fmadd<mode>4"
1600 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1603 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1604 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1605 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1606 "TARGET_SSE5 && TARGET_FUSED_MADD
1607 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1608 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1609 [(set_attr "type" "ssemuladd")
1610 (set_attr "mode" "<MODE>")])
1612 ;; Split fmadd with two memory operands into a load and the fmadd.
1614 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1617 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1618 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1619 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1621 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1622 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1623 && !reg_mentioned_p (operands[0], operands[1])
1624 && !reg_mentioned_p (operands[0], operands[2])
1625 && !reg_mentioned_p (operands[0], operands[3])"
1628 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1629 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1630 operands[2], operands[3]));
1634 ;; For the scalar operations, use operand1 for the upper words that aren't
1635 ;; modified, so restrict the forms that are generated.
1636 ;; Scalar version of fmadd
1637 (define_insn "sse5_vmfmadd<mode>4"
1638 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1639 (vec_merge:SSEMODEF2P
1642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1643 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1644 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1647 "TARGET_SSE5 && TARGET_FUSED_MADD
1648 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1649 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1650 [(set_attr "type" "ssemuladd")
1651 (set_attr "mode" "<MODE>")])
1653 ;; Floating multiply and subtract
1654 ;; Allow two memory operands the same as fmadd
1655 (define_insn "sse5_fmsub<mode>4"
1656 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1659 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1660 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1661 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1662 "TARGET_SSE5 && TARGET_FUSED_MADD
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1664 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1665 [(set_attr "type" "ssemuladd")
1666 (set_attr "mode" "<MODE>")])
1668 ;; Split fmsub with two memory operands into a load and the fmsub.
1670 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1673 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1674 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1675 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1677 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1678 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1679 && !reg_mentioned_p (operands[0], operands[1])
1680 && !reg_mentioned_p (operands[0], operands[2])
1681 && !reg_mentioned_p (operands[0], operands[3])"
1684 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1685 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1686 operands[2], operands[3]));
1690 ;; For the scalar operations, use operand1 for the upper words that aren't
1691 ;; modified, so restrict the forms that are generated.
1692 ;; Scalar version of fmsub
1693 (define_insn "sse5_vmfmsub<mode>4"
1694 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1695 (vec_merge:SSEMODEF2P
1698 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1699 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1700 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1703 "TARGET_SSE5 && TARGET_FUSED_MADD
1704 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1705 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1706 [(set_attr "type" "ssemuladd")
1707 (set_attr "mode" "<MODE>")])
1709 ;; Floating point negative multiply and add
1710 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1711 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1712 ;; Allow two memory operands to help in optimizing.
1713 (define_insn "sse5_fnmadd<mode>4"
1714 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1716 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1718 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1719 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1720 "TARGET_SSE5 && TARGET_FUSED_MADD
1721 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1722 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1723 [(set_attr "type" "ssemuladd")
1724 (set_attr "mode" "<MODE>")])
1726 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1728 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1730 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1732 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1733 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1735 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1736 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1737 && !reg_mentioned_p (operands[0], operands[1])
1738 && !reg_mentioned_p (operands[0], operands[2])
1739 && !reg_mentioned_p (operands[0], operands[3])"
1742 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1743 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1744 operands[2], operands[3]));
1748 ;; For the scalar operations, use operand1 for the upper words that aren't
1749 ;; modified, so restrict the forms that are generated.
1750 ;; Scalar version of fnmadd
1751 (define_insn "sse5_vmfnmadd<mode>4"
1752 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1753 (vec_merge:SSEMODEF2P
1755 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1757 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1758 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1761 "TARGET_SSE5 && TARGET_FUSED_MADD
1762 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1763 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1764 [(set_attr "type" "ssemuladd")
1765 (set_attr "mode" "<MODE>")])
1767 ;; Floating point negative multiply and subtract
1768 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1769 ;; Allow 2 memory operands to help with optimization
1770 (define_insn "sse5_fnmsub<mode>4"
1771 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1775 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1776 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1777 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1778 "TARGET_SSE5 && TARGET_FUSED_MADD
1779 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1780 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1781 [(set_attr "type" "ssemuladd")
1782 (set_attr "mode" "<MODE>")])
1784 ;; Split fnmsub with two memory operands into a load and the fmsub.
1786 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1790 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1791 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1792 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1794 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1795 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1796 && !reg_mentioned_p (operands[0], operands[1])
1797 && !reg_mentioned_p (operands[0], operands[2])
1798 && !reg_mentioned_p (operands[0], operands[3])"
1801 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1802 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1803 operands[2], operands[3]));
1807 ;; For the scalar operations, use operand1 for the upper words that aren't
1808 ;; modified, so restrict the forms that are generated.
1809 ;; Scalar version of fnmsub
1810 (define_insn "sse5_vmfnmsub<mode>4"
1811 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1812 (vec_merge:SSEMODEF2P
1816 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1817 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1818 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1821 "TARGET_SSE5 && TARGET_FUSED_MADD
1822 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1823 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1824 [(set_attr "type" "ssemuladd")
1825 (set_attr "mode" "<MODE>")])
1827 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1828 ;; even if the user used -mno-fused-madd
1829 ;; Parallel instructions. During instruction generation, just default
1830 ;; to registers, and let combine later build the appropriate instruction.
1831 (define_expand "sse5i_fmadd<mode>4"
1832 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1836 (match_operand:SSEMODEF2P 1 "register_operand" "")
1837 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1838 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1839 UNSPEC_SSE5_INTRINSIC))]
1842 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1843 if (TARGET_FUSED_MADD)
1845 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1846 operands[2], operands[3]));
1851 (define_insn "*sse5i_fmadd<mode>4"
1852 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1856 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1857 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1858 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1859 UNSPEC_SSE5_INTRINSIC))]
1860 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1861 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1862 [(set_attr "type" "ssemuladd")
1863 (set_attr "mode" "<MODE>")])
1865 (define_expand "sse5i_fmsub<mode>4"
1866 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1870 (match_operand:SSEMODEF2P 1 "register_operand" "")
1871 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1872 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1873 UNSPEC_SSE5_INTRINSIC))]
1876 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1877 if (TARGET_FUSED_MADD)
1879 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1880 operands[2], operands[3]));
1885 (define_insn "*sse5i_fmsub<mode>4"
1886 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1890 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1891 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1892 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1893 UNSPEC_SSE5_INTRINSIC))]
1894 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1895 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1896 [(set_attr "type" "ssemuladd")
1897 (set_attr "mode" "<MODE>")])
1899 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1900 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1901 (define_expand "sse5i_fnmadd<mode>4"
1902 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1905 (match_operand:SSEMODEF2P 3 "register_operand" "")
1907 (match_operand:SSEMODEF2P 1 "register_operand" "")
1908 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1909 UNSPEC_SSE5_INTRINSIC))]
1912 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1913 if (TARGET_FUSED_MADD)
1915 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1916 operands[2], operands[3]));
1921 (define_insn "*sse5i_fnmadd<mode>4"
1922 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1925 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1927 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1928 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1929 UNSPEC_SSE5_INTRINSIC))]
1930 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1931 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1932 [(set_attr "type" "ssemuladd")
1933 (set_attr "mode" "<MODE>")])
1935 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1936 (define_expand "sse5i_fnmsub<mode>4"
1937 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1942 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1943 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1944 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1945 UNSPEC_SSE5_INTRINSIC))]
1948 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1949 if (TARGET_FUSED_MADD)
1951 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1952 operands[2], operands[3]));
1957 (define_insn "*sse5i_fnmsub<mode>4"
1958 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1963 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1964 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1965 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1966 UNSPEC_SSE5_INTRINSIC))]
1967 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1968 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1969 [(set_attr "type" "ssemuladd")
1970 (set_attr "mode" "<MODE>")])
1972 ;; Scalar instructions
1973 (define_expand "sse5i_vmfmadd<mode>4"
1974 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1976 [(vec_merge:SSEMODEF2P
1979 (match_operand:SSEMODEF2P 1 "register_operand" "")
1980 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1981 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1984 UNSPEC_SSE5_INTRINSIC))]
1987 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1988 if (TARGET_FUSED_MADD)
1990 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1991 operands[2], operands[3]));
1996 ;; For the scalar operations, use operand1 for the upper words that aren't
1997 ;; modified, so restrict the forms that are accepted.
1998 (define_insn "*sse5i_vmfmadd<mode>4"
1999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2001 [(vec_merge:SSEMODEF2P
2004 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2009 UNSPEC_SSE5_INTRINSIC))]
2010 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2011 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2012 [(set_attr "type" "ssemuladd")
2013 (set_attr "mode" "<ssescalarmode>")])
2015 (define_expand "sse5i_vmfmsub<mode>4"
2016 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2018 [(vec_merge:SSEMODEF2P
2021 (match_operand:SSEMODEF2P 1 "register_operand" "")
2022 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2023 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2026 UNSPEC_SSE5_INTRINSIC))]
2029 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2030 if (TARGET_FUSED_MADD)
2032 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2033 operands[2], operands[3]));
2038 (define_insn "*sse5i_vmfmsub<mode>4"
2039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2041 [(vec_merge:SSEMODEF2P
2044 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
2045 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2046 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2049 UNSPEC_SSE5_INTRINSIC))]
2050 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2051 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2052 [(set_attr "type" "ssemuladd")
2053 (set_attr "mode" "<ssescalarmode>")])
2055 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2056 (define_expand "sse5i_vmfnmadd<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2059 [(vec_merge:SSEMODEF2P
2061 (match_operand:SSEMODEF2P 3 "register_operand" "")
2063 (match_operand:SSEMODEF2P 1 "register_operand" "")
2064 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2067 UNSPEC_SSE5_INTRINSIC))]
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2073 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2079 (define_insn "*sse5i_vmfnmadd<mode>4"
2080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2082 [(vec_merge:SSEMODEF2P
2084 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2086 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
2087 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2090 UNSPEC_SSE5_INTRINSIC))]
2091 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2092 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2093 [(set_attr "type" "ssemuladd")
2094 (set_attr "mode" "<ssescalarmode>")])
2096 (define_expand "sse5i_vmfnmsub<mode>4"
2097 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2099 [(vec_merge:SSEMODEF2P
2103 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2104 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2105 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2108 UNSPEC_SSE5_INTRINSIC))]
2111 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2112 if (TARGET_FUSED_MADD)
2114 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2115 operands[2], operands[3]));
2120 (define_insn "*sse5i_vmfnmsub<mode>4"
2121 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2123 [(vec_merge:SSEMODEF2P
2127 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2129 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2132 UNSPEC_SSE5_INTRINSIC))]
2133 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2134 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "<ssescalarmode>")])
2138 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2140 ;; Parallel double-precision floating point arithmetic
2142 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2144 (define_expand "negv2df2"
2145 [(set (match_operand:V2DF 0 "register_operand" "")
2146 (neg:V2DF (match_operand:V2DF 1 "register_operand" "")))]
2148 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
2150 (define_expand "absv2df2"
2151 [(set (match_operand:V2DF 0 "register_operand" "")
2152 (abs:V2DF (match_operand:V2DF 1 "register_operand" "")))]
2154 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
2156 (define_expand "addv2df3"
2157 [(set (match_operand:V2DF 0 "register_operand" "")
2158 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2159 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2161 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
2163 (define_insn "*addv2df3"
2164 [(set (match_operand:V2DF 0 "register_operand" "=x")
2165 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2166 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2167 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
2168 "addpd\t{%2, %0|%0, %2}"
2169 [(set_attr "type" "sseadd")
2170 (set_attr "mode" "V2DF")])
2172 (define_insn "sse2_vmaddv2df3"
2173 [(set (match_operand:V2DF 0 "register_operand" "=x")
2175 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
2176 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2179 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
2180 "addsd\t{%2, %0|%0, %2}"
2181 [(set_attr "type" "sseadd")
2182 (set_attr "mode" "DF")])
2184 (define_expand "subv2df3"
2185 [(set (match_operand:V2DF 0 "register_operand" "")
2186 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2187 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2189 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
2191 (define_insn "*subv2df3"
2192 [(set (match_operand:V2DF 0 "register_operand" "=x")
2193 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
2194 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2196 "subpd\t{%2, %0|%0, %2}"
2197 [(set_attr "type" "sseadd")
2198 (set_attr "mode" "V2DF")])
2200 (define_insn "sse2_vmsubv2df3"
2201 [(set (match_operand:V2DF 0 "register_operand" "=x")
2203 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
2204 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2208 "subsd\t{%2, %0|%0, %2}"
2209 [(set_attr "type" "sseadd")
2210 (set_attr "mode" "DF")])
2212 (define_expand "mulv2df3"
2213 [(set (match_operand:V2DF 0 "register_operand" "")
2214 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2215 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2217 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
2219 (define_insn "*mulv2df3"
2220 [(set (match_operand:V2DF 0 "register_operand" "=x")
2221 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2222 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2223 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
2224 "mulpd\t{%2, %0|%0, %2}"
2225 [(set_attr "type" "ssemul")
2226 (set_attr "mode" "V2DF")])
2228 (define_insn "sse2_vmmulv2df3"
2229 [(set (match_operand:V2DF 0 "register_operand" "=x")
2231 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
2232 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2235 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
2236 "mulsd\t{%2, %0|%0, %2}"
2237 [(set_attr "type" "ssemul")
2238 (set_attr "mode" "DF")])
2240 (define_expand "divv2df3"
2241 [(set (match_operand:V2DF 0 "register_operand" "")
2242 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
2243 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2245 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
2247 (define_insn "*divv2df3"
2248 [(set (match_operand:V2DF 0 "register_operand" "=x")
2249 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
2250 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2252 "divpd\t{%2, %0|%0, %2}"
2253 [(set_attr "type" "ssediv")
2254 (set_attr "mode" "V2DF")])
2256 (define_insn "sse2_vmdivv2df3"
2257 [(set (match_operand:V2DF 0 "register_operand" "=x")
2259 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
2260 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2264 "divsd\t{%2, %0|%0, %2}"
2265 [(set_attr "type" "ssediv")
2266 (set_attr "mode" "DF")])
2268 (define_insn "sqrtv2df2"
2269 [(set (match_operand:V2DF 0 "register_operand" "=x")
2270 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2272 "sqrtpd\t{%1, %0|%0, %1}"
2273 [(set_attr "type" "sse")
2274 (set_attr "mode" "V2DF")])
2276 (define_insn "sse2_vmsqrtv2df2"
2277 [(set (match_operand:V2DF 0 "register_operand" "=x")
2279 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2280 (match_operand:V2DF 2 "register_operand" "0")
2283 "sqrtsd\t{%1, %0|%0, %1}"
2284 [(set_attr "type" "sse")
2285 (set_attr "mode" "DF")])
2287 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
2288 ;; isn't really correct, as those rtl operators aren't defined when
2289 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
2291 (define_expand "smaxv2df3"
2292 [(set (match_operand:V2DF 0 "register_operand" "")
2293 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2294 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2297 if (!flag_finite_math_only)
2298 operands[1] = force_reg (V2DFmode, operands[1]);
2299 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
2302 (define_insn "*smaxv2df3_finite"
2303 [(set (match_operand:V2DF 0 "register_operand" "=x")
2304 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2305 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2306 "TARGET_SSE2 && flag_finite_math_only
2307 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
2308 "maxpd\t{%2, %0|%0, %2}"
2309 [(set_attr "type" "sseadd")
2310 (set_attr "mode" "V2DF")])
2312 (define_insn "*smaxv2df3"
2313 [(set (match_operand:V2DF 0 "register_operand" "=x")
2314 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
2315 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2317 "maxpd\t{%2, %0|%0, %2}"
2318 [(set_attr "type" "sseadd")
2319 (set_attr "mode" "V2DF")])
2321 (define_insn "sse2_vmsmaxv2df3"
2322 [(set (match_operand:V2DF 0 "register_operand" "=x")
2324 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
2325 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2329 "maxsd\t{%2, %0|%0, %2}"
2330 [(set_attr "type" "sseadd")
2331 (set_attr "mode" "DF")])
2333 (define_expand "sminv2df3"
2334 [(set (match_operand:V2DF 0 "register_operand" "")
2335 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2336 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2339 if (!flag_finite_math_only)
2340 operands[1] = force_reg (V2DFmode, operands[1]);
2341 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
2344 (define_insn "*sminv2df3_finite"
2345 [(set (match_operand:V2DF 0 "register_operand" "=x")
2346 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2347 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2348 "TARGET_SSE2 && flag_finite_math_only
2349 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
2350 "minpd\t{%2, %0|%0, %2}"
2351 [(set_attr "type" "sseadd")
2352 (set_attr "mode" "V2DF")])
2354 (define_insn "*sminv2df3"
2355 [(set (match_operand:V2DF 0 "register_operand" "=x")
2356 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
2357 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2359 "minpd\t{%2, %0|%0, %2}"
2360 [(set_attr "type" "sseadd")
2361 (set_attr "mode" "V2DF")])
2363 (define_insn "sse2_vmsminv2df3"
2364 [(set (match_operand:V2DF 0 "register_operand" "=x")
2366 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
2367 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2371 "minsd\t{%2, %0|%0, %2}"
2372 [(set_attr "type" "sseadd")
2373 (set_attr "mode" "DF")])
2375 (define_insn "sse3_addsubv2df3"
2376 [(set (match_operand:V2DF 0 "register_operand" "=x")
2379 (match_operand:V2DF 1 "register_operand" "0")
2380 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2381 (minus:V2DF (match_dup 1) (match_dup 2))
2384 "addsubpd\t{%2, %0|%0, %2}"
2385 [(set_attr "type" "sseadd")
2386 (set_attr "mode" "V2DF")])
2388 (define_insn "sse3_haddv2df3"
2389 [(set (match_operand:V2DF 0 "register_operand" "=x")
2393 (match_operand:V2DF 1 "register_operand" "0")
2394 (parallel [(const_int 0)]))
2395 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2398 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
2399 (parallel [(const_int 0)]))
2400 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2402 "haddpd\t{%2, %0|%0, %2}"
2403 [(set_attr "type" "sseadd")
2404 (set_attr "mode" "V2DF")])
2406 (define_insn "sse3_hsubv2df3"
2407 [(set (match_operand:V2DF 0 "register_operand" "=x")
2411 (match_operand:V2DF 1 "register_operand" "0")
2412 (parallel [(const_int 0)]))
2413 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2416 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
2417 (parallel [(const_int 0)]))
2418 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2420 "hsubpd\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sseadd")
2422 (set_attr "mode" "V2DF")])
2424 (define_expand "reduc_splus_v2df"
2425 [(match_operand:V2DF 0 "register_operand" "")
2426 (match_operand:V2DF 1 "register_operand" "")]
2429 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2435 ;; Parallel double-precision floating point comparisons
2437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2439 (define_insn "sse2_maskcmpv2df3"
2440 [(set (match_operand:V2DF 0 "register_operand" "=x")
2441 (match_operator:V2DF 3 "sse_comparison_operator"
2442 [(match_operand:V2DF 1 "register_operand" "0")
2443 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
2444 "TARGET_SSE2 && !TARGET_SSE5"
2445 "cmp%D3pd\t{%2, %0|%0, %2}"
2446 [(set_attr "type" "ssecmp")
2447 (set_attr "mode" "V2DF")])
2449 (define_insn "sse2_maskcmpdf3"
2450 [(set (match_operand:DF 0 "register_operand" "=x")
2451 (match_operator:DF 3 "sse_comparison_operator"
2452 [(match_operand:DF 1 "register_operand" "0")
2453 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
2454 "TARGET_SSE2 && !TARGET_SSE5"
2455 "cmp%D3sd\t{%2, %0|%0, %2}"
2456 [(set_attr "type" "ssecmp")
2457 (set_attr "mode" "DF")])
2459 (define_insn "sse2_vmmaskcmpv2df3"
2460 [(set (match_operand:V2DF 0 "register_operand" "=x")
2462 (match_operator:V2DF 3 "sse_comparison_operator"
2463 [(match_operand:V2DF 1 "register_operand" "0")
2464 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
2467 "TARGET_SSE2 && !TARGET_SSE5"
2468 "cmp%D3sd\t{%2, %0|%0, %2}"
2469 [(set_attr "type" "ssecmp")
2470 (set_attr "mode" "DF")])
2472 (define_insn "sse2_comi"
2473 [(set (reg:CCFP FLAGS_REG)
2476 (match_operand:V2DF 0 "register_operand" "x")
2477 (parallel [(const_int 0)]))
2479 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
2480 (parallel [(const_int 0)]))))]
2482 "comisd\t{%1, %0|%0, %1}"
2483 [(set_attr "type" "ssecomi")
2484 (set_attr "mode" "DF")])
2486 (define_insn "sse2_ucomi"
2487 [(set (reg:CCFPU FLAGS_REG)
2490 (match_operand:V2DF 0 "register_operand" "x")
2491 (parallel [(const_int 0)]))
2493 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
2494 (parallel [(const_int 0)]))))]
2496 "ucomisd\t{%1, %0|%0, %1}"
2497 [(set_attr "type" "ssecomi")
2498 (set_attr "mode" "DF")])
2500 (define_expand "vcondv2df"
2501 [(set (match_operand:V2DF 0 "register_operand" "")
2503 (match_operator 3 ""
2504 [(match_operand:V2DF 4 "nonimmediate_operand" "")
2505 (match_operand:V2DF 5 "nonimmediate_operand" "")])
2506 (match_operand:V2DF 1 "general_operand" "")
2507 (match_operand:V2DF 2 "general_operand" "")))]
2510 if (ix86_expand_fp_vcond (operands))
2516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2518 ;; Parallel double-precision floating point logical operations
2520 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2522 (define_expand "andv2df3"
2523 [(set (match_operand:V2DF 0 "register_operand" "")
2524 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2525 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2527 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
2529 (define_insn "*andv2df3"
2530 [(set (match_operand:V2DF 0 "register_operand" "=x")
2531 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2532 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2533 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
2534 "andpd\t{%2, %0|%0, %2}"
2535 [(set_attr "type" "sselog")
2536 (set_attr "mode" "V2DF")])
2538 (define_insn "sse2_nandv2df3"
2539 [(set (match_operand:V2DF 0 "register_operand" "=x")
2540 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
2541 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2543 "andnpd\t{%2, %0|%0, %2}"
2544 [(set_attr "type" "sselog")
2545 (set_attr "mode" "V2DF")])
2547 (define_expand "iorv2df3"
2548 [(set (match_operand:V2DF 0 "register_operand" "")
2549 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2550 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2552 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
2554 (define_insn "*iorv2df3"
2555 [(set (match_operand:V2DF 0 "register_operand" "=x")
2556 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2558 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
2559 "orpd\t{%2, %0|%0, %2}"
2560 [(set_attr "type" "sselog")
2561 (set_attr "mode" "V2DF")])
2563 (define_expand "xorv2df3"
2564 [(set (match_operand:V2DF 0 "register_operand" "")
2565 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2566 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2568 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
2570 (define_insn "*xorv2df3"
2571 [(set (match_operand:V2DF 0 "register_operand" "=x")
2572 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2573 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2574 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
2575 "xorpd\t{%2, %0|%0, %2}"
2576 [(set_attr "type" "sselog")
2577 (set_attr "mode" "V2DF")])
2579 ;; Also define scalar versions. These are used for abs, neg, and
2580 ;; conditional move. Using subregs into vector modes causes register
2581 ;; allocation lossage. These patterns do not allow memory operands
2582 ;; because the native instructions read the full 128-bits.
2584 (define_insn "*anddf3"
2585 [(set (match_operand:DF 0 "register_operand" "=x")
2586 (and:DF (match_operand:DF 1 "register_operand" "0")
2587 (match_operand:DF 2 "register_operand" "x")))]
2589 "andpd\t{%2, %0|%0, %2}"
2590 [(set_attr "type" "sselog")
2591 (set_attr "mode" "V2DF")])
2593 (define_insn "*nanddf3"
2594 [(set (match_operand:DF 0 "register_operand" "=x")
2595 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2596 (match_operand:DF 2 "register_operand" "x")))]
2598 "andnpd\t{%2, %0|%0, %2}"
2599 [(set_attr "type" "sselog")
2600 (set_attr "mode" "V2DF")])
2602 (define_insn "*iordf3"
2603 [(set (match_operand:DF 0 "register_operand" "=x")
2604 (ior:DF (match_operand:DF 1 "register_operand" "0")
2605 (match_operand:DF 2 "register_operand" "x")))]
2607 "orpd\t{%2, %0|%0, %2}"
2608 [(set_attr "type" "sselog")
2609 (set_attr "mode" "V2DF")])
2611 (define_insn "*xordf3"
2612 [(set (match_operand:DF 0 "register_operand" "=x")
2613 (xor:DF (match_operand:DF 1 "register_operand" "0")
2614 (match_operand:DF 2 "register_operand" "x")))]
2616 "xorpd\t{%2, %0|%0, %2}"
2617 [(set_attr "type" "sselog")
2618 (set_attr "mode" "V2DF")])
2620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2622 ;; Parallel double-precision floating point conversion operations
2624 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2626 (define_insn "sse2_cvtpi2pd"
2627 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2628 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2630 "cvtpi2pd\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "ssecvt")
2632 (set_attr "unit" "mmx,*")
2633 (set_attr "mode" "V2DF")])
2635 (define_insn "sse2_cvtpd2pi"
2636 [(set (match_operand:V2SI 0 "register_operand" "=y")
2637 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2638 UNSPEC_FIX_NOTRUNC))]
2640 "cvtpd2pi\t{%1, %0|%0, %1}"
2641 [(set_attr "type" "ssecvt")
2642 (set_attr "unit" "mmx")
2643 (set_attr "prefix_data16" "1")
2644 (set_attr "mode" "DI")])
2646 (define_insn "sse2_cvttpd2pi"
2647 [(set (match_operand:V2SI 0 "register_operand" "=y")
2648 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2650 "cvttpd2pi\t{%1, %0|%0, %1}"
2651 [(set_attr "type" "ssecvt")
2652 (set_attr "unit" "mmx")
2653 (set_attr "prefix_data16" "1")
2654 (set_attr "mode" "TI")])
2656 (define_insn "sse2_cvtsi2sd"
2657 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2660 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2661 (match_operand:V2DF 1 "register_operand" "0,0")
2664 "cvtsi2sd\t{%2, %0|%0, %2}"
2665 [(set_attr "type" "sseicvt")
2666 (set_attr "mode" "DF")
2667 (set_attr "athlon_decode" "double,direct")
2668 (set_attr "amdfam10_decode" "vector,double")])
2670 (define_insn "sse2_cvtsi2sdq"
2671 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2674 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2675 (match_operand:V2DF 1 "register_operand" "0,0")
2677 "TARGET_SSE2 && TARGET_64BIT"
2678 "cvtsi2sdq\t{%2, %0|%0, %2}"
2679 [(set_attr "type" "sseicvt")
2680 (set_attr "mode" "DF")
2681 (set_attr "athlon_decode" "double,direct")
2682 (set_attr "amdfam10_decode" "vector,double")])
2684 (define_insn "sse2_cvtsd2si"
2685 [(set (match_operand:SI 0 "register_operand" "=r,r")
2688 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2689 (parallel [(const_int 0)]))]
2690 UNSPEC_FIX_NOTRUNC))]
2692 "cvtsd2si\t{%1, %0|%0, %1}"
2693 [(set_attr "type" "sseicvt")
2694 (set_attr "athlon_decode" "double,vector")
2695 (set_attr "prefix_rep" "1")
2696 (set_attr "mode" "SI")])
2698 (define_insn "sse2_cvtsd2si_2"
2699 [(set (match_operand:SI 0 "register_operand" "=r,r")
2700 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2701 UNSPEC_FIX_NOTRUNC))]
2703 "cvtsd2si\t{%1, %0|%0, %1}"
2704 [(set_attr "type" "sseicvt")
2705 (set_attr "athlon_decode" "double,vector")
2706 (set_attr "amdfam10_decode" "double,double")
2707 (set_attr "prefix_rep" "1")
2708 (set_attr "mode" "SI")])
2710 (define_insn "sse2_cvtsd2siq"
2711 [(set (match_operand:DI 0 "register_operand" "=r,r")
2714 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2715 (parallel [(const_int 0)]))]
2716 UNSPEC_FIX_NOTRUNC))]
2717 "TARGET_SSE2 && TARGET_64BIT"
2718 "cvtsd2siq\t{%1, %0|%0, %1}"
2719 [(set_attr "type" "sseicvt")
2720 (set_attr "athlon_decode" "double,vector")
2721 (set_attr "prefix_rep" "1")
2722 (set_attr "mode" "DI")])
2724 (define_insn "sse2_cvtsd2siq_2"
2725 [(set (match_operand:DI 0 "register_operand" "=r,r")
2726 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2727 UNSPEC_FIX_NOTRUNC))]
2728 "TARGET_SSE2 && TARGET_64BIT"
2729 "cvtsd2siq\t{%1, %0|%0, %1}"
2730 [(set_attr "type" "sseicvt")
2731 (set_attr "athlon_decode" "double,vector")
2732 (set_attr "amdfam10_decode" "double,double")
2733 (set_attr "prefix_rep" "1")
2734 (set_attr "mode" "DI")])
2736 (define_insn "sse2_cvttsd2si"
2737 [(set (match_operand:SI 0 "register_operand" "=r,r")
2740 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2741 (parallel [(const_int 0)]))))]
2743 "cvttsd2si\t{%1, %0|%0, %1}"
2744 [(set_attr "type" "sseicvt")
2745 (set_attr "prefix_rep" "1")
2746 (set_attr "mode" "SI")
2747 (set_attr "athlon_decode" "double,vector")
2748 (set_attr "amdfam10_decode" "double,double")])
2750 (define_insn "sse2_cvttsd2siq"
2751 [(set (match_operand:DI 0 "register_operand" "=r,r")
2754 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2755 (parallel [(const_int 0)]))))]
2756 "TARGET_SSE2 && TARGET_64BIT"
2757 "cvttsd2siq\t{%1, %0|%0, %1}"
2758 [(set_attr "type" "sseicvt")
2759 (set_attr "prefix_rep" "1")
2760 (set_attr "mode" "DI")
2761 (set_attr "athlon_decode" "double,vector")
2762 (set_attr "amdfam10_decode" "double,double")])
2764 (define_insn "sse2_cvtdq2pd"
2765 [(set (match_operand:V2DF 0 "register_operand" "=x")
2768 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2769 (parallel [(const_int 0) (const_int 1)]))))]
2771 "cvtdq2pd\t{%1, %0|%0, %1}"
2772 [(set_attr "type" "ssecvt")
2773 (set_attr "mode" "V2DF")])
2775 (define_expand "sse2_cvtpd2dq"
2776 [(set (match_operand:V4SI 0 "register_operand" "")
2778 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2782 "operands[2] = CONST0_RTX (V2SImode);")
2784 (define_insn "*sse2_cvtpd2dq"
2785 [(set (match_operand:V4SI 0 "register_operand" "=x")
2787 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2789 (match_operand:V2SI 2 "const0_operand" "")))]
2791 "cvtpd2dq\t{%1, %0|%0, %1}"
2792 [(set_attr "type" "ssecvt")
2793 (set_attr "prefix_rep" "1")
2794 (set_attr "mode" "TI")
2795 (set_attr "amdfam10_decode" "double")])
2797 (define_expand "sse2_cvttpd2dq"
2798 [(set (match_operand:V4SI 0 "register_operand" "")
2800 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2803 "operands[2] = CONST0_RTX (V2SImode);")
2805 (define_insn "*sse2_cvttpd2dq"
2806 [(set (match_operand:V4SI 0 "register_operand" "=x")
2808 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2809 (match_operand:V2SI 2 "const0_operand" "")))]
2811 "cvttpd2dq\t{%1, %0|%0, %1}"
2812 [(set_attr "type" "ssecvt")
2813 (set_attr "prefix_rep" "1")
2814 (set_attr "mode" "TI")
2815 (set_attr "amdfam10_decode" "double")])
2817 (define_insn "sse2_cvtsd2ss"
2818 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2821 (float_truncate:V2SF
2822 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2823 (match_operand:V4SF 1 "register_operand" "0,0")
2826 "cvtsd2ss\t{%2, %0|%0, %2}"
2827 [(set_attr "type" "ssecvt")
2828 (set_attr "athlon_decode" "vector,double")
2829 (set_attr "amdfam10_decode" "vector,double")
2830 (set_attr "mode" "SF")])
2832 (define_insn "sse2_cvtss2sd"
2833 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2837 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2838 (parallel [(const_int 0) (const_int 1)])))
2839 (match_operand:V2DF 1 "register_operand" "0,0")
2842 "cvtss2sd\t{%2, %0|%0, %2}"
2843 [(set_attr "type" "ssecvt")
2844 (set_attr "amdfam10_decode" "vector,double")
2845 (set_attr "mode" "DF")])
2847 (define_expand "sse2_cvtpd2ps"
2848 [(set (match_operand:V4SF 0 "register_operand" "")
2850 (float_truncate:V2SF
2851 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2854 "operands[2] = CONST0_RTX (V2SFmode);")
2856 (define_insn "*sse2_cvtpd2ps"
2857 [(set (match_operand:V4SF 0 "register_operand" "=x")
2859 (float_truncate:V2SF
2860 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2861 (match_operand:V2SF 2 "const0_operand" "")))]
2863 "cvtpd2ps\t{%1, %0|%0, %1}"
2864 [(set_attr "type" "ssecvt")
2865 (set_attr "prefix_data16" "1")
2866 (set_attr "mode" "V4SF")
2867 (set_attr "amdfam10_decode" "double")])
2869 (define_insn "sse2_cvtps2pd"
2870 [(set (match_operand:V2DF 0 "register_operand" "=x")
2873 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2874 (parallel [(const_int 0) (const_int 1)]))))]
2876 "cvtps2pd\t{%1, %0|%0, %1}"
2877 [(set_attr "type" "ssecvt")
2878 (set_attr "mode" "V2DF")
2879 (set_attr "amdfam10_decode" "direct")])
2881 (define_expand "vec_unpacks_hi_v4sf"
2886 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2887 (parallel [(const_int 6)
2891 (set (match_operand:V2DF 0 "register_operand" "")
2895 (parallel [(const_int 0) (const_int 1)]))))]
2898 operands[2] = gen_reg_rtx (V4SFmode);
2901 (define_expand "vec_unpacks_lo_v4sf"
2902 [(set (match_operand:V2DF 0 "register_operand" "")
2905 (match_operand:V4SF 1 "nonimmediate_operand" "")
2906 (parallel [(const_int 0) (const_int 1)]))))]
2909 (define_expand "vec_unpacks_float_hi_v8hi"
2910 [(match_operand:V4SF 0 "register_operand" "")
2911 (match_operand:V8HI 1 "register_operand" "")]
2914 rtx tmp = gen_reg_rtx (V4SImode);
2916 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2917 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2921 (define_expand "vec_unpacks_float_lo_v8hi"
2922 [(match_operand:V4SF 0 "register_operand" "")
2923 (match_operand:V8HI 1 "register_operand" "")]
2926 rtx tmp = gen_reg_rtx (V4SImode);
2928 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2929 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2933 (define_expand "vec_unpacku_float_hi_v8hi"
2934 [(match_operand:V4SF 0 "register_operand" "")
2935 (match_operand:V8HI 1 "register_operand" "")]
2938 rtx tmp = gen_reg_rtx (V4SImode);
2940 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2941 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2945 (define_expand "vec_unpacku_float_lo_v8hi"
2946 [(match_operand:V4SF 0 "register_operand" "")
2947 (match_operand:V8HI 1 "register_operand" "")]
2950 rtx tmp = gen_reg_rtx (V4SImode);
2952 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2953 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2957 (define_expand "vec_unpacks_float_hi_v4si"
2960 (match_operand:V4SI 1 "nonimmediate_operand" "")
2961 (parallel [(const_int 2)
2965 (set (match_operand:V2DF 0 "register_operand" "")
2969 (parallel [(const_int 0) (const_int 1)]))))]
2972 operands[2] = gen_reg_rtx (V4SImode);
2975 (define_expand "vec_unpacks_float_lo_v4si"
2976 [(set (match_operand:V2DF 0 "register_operand" "")
2979 (match_operand:V4SI 1 "nonimmediate_operand" "")
2980 (parallel [(const_int 0) (const_int 1)]))))]
2983 (define_expand "vec_pack_trunc_v2df"
2984 [(match_operand:V4SF 0 "register_operand" "")
2985 (match_operand:V2DF 1 "nonimmediate_operand" "")
2986 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2991 r1 = gen_reg_rtx (V4SFmode);
2992 r2 = gen_reg_rtx (V4SFmode);
2994 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2995 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2996 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3000 (define_expand "vec_pack_sfix_trunc_v2df"
3001 [(match_operand:V4SI 0 "register_operand" "")
3002 (match_operand:V2DF 1 "nonimmediate_operand" "")
3003 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3008 r1 = gen_reg_rtx (V4SImode);
3009 r2 = gen_reg_rtx (V4SImode);
3011 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3012 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3013 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3014 gen_lowpart (V2DImode, r1),
3015 gen_lowpart (V2DImode, r2)));
3019 (define_expand "vec_pack_sfix_v2df"
3020 [(match_operand:V4SI 0 "register_operand" "")
3021 (match_operand:V2DF 1 "nonimmediate_operand" "")
3022 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3027 r1 = gen_reg_rtx (V4SImode);
3028 r2 = gen_reg_rtx (V4SImode);
3030 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3031 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3032 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3033 gen_lowpart (V2DImode, r1),
3034 gen_lowpart (V2DImode, r2)));
3039 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3041 ;; Parallel double-precision floating point element swizzling
3043 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3045 (define_insn "sse2_unpckhpd"
3046 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3049 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3050 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3051 (parallel [(const_int 1)
3053 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3055 unpckhpd\t{%2, %0|%0, %2}
3056 movlpd\t{%H1, %0|%0, %H1}
3057 movhpd\t{%1, %0|%0, %1}"
3058 [(set_attr "type" "sselog,ssemov,ssemov")
3059 (set_attr "mode" "V2DF,V1DF,V1DF")])
3061 (define_insn "*sse3_movddup"
3062 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3065 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3067 (parallel [(const_int 0)
3069 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3071 movddup\t{%1, %0|%0, %1}
3073 [(set_attr "type" "sselog1,ssemov")
3074 (set_attr "mode" "V2DF")])
3077 [(set (match_operand:V2DF 0 "memory_operand" "")
3080 (match_operand:V2DF 1 "register_operand" "")
3082 (parallel [(const_int 0)
3084 "TARGET_SSE3 && reload_completed"
3087 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
3088 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
3089 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
3093 (define_insn "sse2_unpcklpd"
3094 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3097 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3098 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3099 (parallel [(const_int 0)
3101 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3103 unpcklpd\t{%2, %0|%0, %2}
3104 movhpd\t{%2, %0|%0, %2}
3105 movlpd\t{%2, %H0|%H0, %2}"
3106 [(set_attr "type" "sselog,ssemov,ssemov")
3107 (set_attr "mode" "V2DF,V1DF,V1DF")])
3109 (define_expand "sse2_shufpd"
3110 [(match_operand:V2DF 0 "register_operand" "")
3111 (match_operand:V2DF 1 "register_operand" "")
3112 (match_operand:V2DF 2 "nonimmediate_operand" "")
3113 (match_operand:SI 3 "const_int_operand" "")]
3116 int mask = INTVAL (operands[3]);
3117 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
3119 GEN_INT (mask & 2 ? 3 : 2)));
3123 (define_insn "sse2_shufpd_1"
3124 [(set (match_operand:V2DF 0 "register_operand" "=x")
3127 (match_operand:V2DF 1 "register_operand" "0")
3128 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
3129 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3130 (match_operand 4 "const_2_to_3_operand" "")])))]
3134 mask = INTVAL (operands[3]);
3135 mask |= (INTVAL (operands[4]) - 2) << 1;
3136 operands[3] = GEN_INT (mask);
3138 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
3140 [(set_attr "type" "sselog")
3141 (set_attr "mode" "V2DF")])
3143 (define_insn "sse2_storehpd"
3144 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
3146 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
3147 (parallel [(const_int 1)])))]
3148 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3150 movhpd\t{%1, %0|%0, %1}
3153 [(set_attr "type" "ssemov,sselog1,ssemov")
3154 (set_attr "mode" "V1DF,V2DF,DF")])
3157 [(set (match_operand:DF 0 "register_operand" "")
3159 (match_operand:V2DF 1 "memory_operand" "")
3160 (parallel [(const_int 1)])))]
3161 "TARGET_SSE2 && reload_completed"
3162 [(set (match_dup 0) (match_dup 1))]
3164 operands[1] = adjust_address (operands[1], DFmode, 8);
3167 (define_insn "sse2_storelpd"
3168 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
3170 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
3171 (parallel [(const_int 0)])))]
3172 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3174 movlpd\t{%1, %0|%0, %1}
3177 [(set_attr "type" "ssemov")
3178 (set_attr "mode" "V1DF,DF,DF")])
3181 [(set (match_operand:DF 0 "register_operand" "")
3183 (match_operand:V2DF 1 "nonimmediate_operand" "")
3184 (parallel [(const_int 0)])))]
3185 "TARGET_SSE2 && reload_completed"
3188 rtx op1 = operands[1];
3190 op1 = gen_rtx_REG (DFmode, REGNO (op1));
3192 op1 = gen_lowpart (DFmode, op1);
3193 emit_move_insn (operands[0], op1);
3197 (define_insn "sse2_loadhpd"
3198 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
3201 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
3202 (parallel [(const_int 0)]))
3203 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
3204 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3206 movhpd\t{%2, %0|%0, %2}
3207 unpcklpd\t{%2, %0|%0, %2}
3208 shufpd\t{$1, %1, %0|%0, %1, 1}
3210 [(set_attr "type" "ssemov,sselog,sselog,other")
3211 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
3214 [(set (match_operand:V2DF 0 "memory_operand" "")
3216 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
3217 (match_operand:DF 1 "register_operand" "")))]
3218 "TARGET_SSE2 && reload_completed"
3219 [(set (match_dup 0) (match_dup 1))]
3221 operands[0] = adjust_address (operands[0], DFmode, 8);
3224 (define_insn "sse2_loadlpd"
3225 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3227 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
3229 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
3230 (parallel [(const_int 1)]))))]
3231 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3233 movsd\t{%2, %0|%0, %2}
3234 movlpd\t{%2, %0|%0, %2}
3235 movsd\t{%2, %0|%0, %2}
3236 shufpd\t{$2, %2, %0|%0, %2, 2}
3237 movhpd\t{%H1, %0|%0, %H1}
3239 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
3240 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
3243 [(set (match_operand:V2DF 0 "memory_operand" "")
3245 (match_operand:DF 1 "register_operand" "")
3246 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
3247 "TARGET_SSE2 && reload_completed"
3248 [(set (match_dup 0) (match_dup 1))]
3250 operands[0] = adjust_address (operands[0], DFmode, 8);
3253 ;; Not sure these two are ever used, but it doesn't hurt to have
3255 (define_insn "*vec_extractv2df_1_sse"
3256 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
3258 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
3259 (parallel [(const_int 1)])))]
3260 "!TARGET_SSE2 && TARGET_SSE
3261 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3263 movhps\t{%1, %0|%0, %1}
3264 movhlps\t{%1, %0|%0, %1}
3265 movlps\t{%H1, %0|%0, %H1}"
3266 [(set_attr "type" "ssemov")
3267 (set_attr "mode" "V2SF,V4SF,V2SF")])
3269 (define_insn "*vec_extractv2df_0_sse"
3270 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
3272 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
3273 (parallel [(const_int 0)])))]
3274 "!TARGET_SSE2 && TARGET_SSE
3275 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3277 movlps\t{%1, %0|%0, %1}
3278 movaps\t{%1, %0|%0, %1}
3279 movlps\t{%1, %0|%0, %1}"
3280 [(set_attr "type" "ssemov")
3281 (set_attr "mode" "V2SF,V4SF,V2SF")])
3283 (define_insn "sse2_movsd"
3284 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
3286 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
3287 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
3291 movsd\t{%2, %0|%0, %2}
3292 movlpd\t{%2, %0|%0, %2}
3293 movlpd\t{%2, %0|%0, %2}
3294 shufpd\t{$2, %2, %0|%0, %2, 2}
3295 movhps\t{%H1, %0|%0, %H1}
3296 movhps\t{%1, %H0|%H0, %1}"
3297 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
3298 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
3300 (define_insn "*vec_dupv2df_sse3"
3301 [(set (match_operand:V2DF 0 "register_operand" "=x")
3303 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
3305 "movddup\t{%1, %0|%0, %1}"
3306 [(set_attr "type" "sselog1")
3307 (set_attr "mode" "DF")])
3309 (define_insn "vec_dupv2df"
3310 [(set (match_operand:V2DF 0 "register_operand" "=x")
3312 (match_operand:DF 1 "register_operand" "0")))]
3315 [(set_attr "type" "sselog1")
3316 (set_attr "mode" "V2DF")])
3318 (define_insn "*vec_concatv2df_sse3"
3319 [(set (match_operand:V2DF 0 "register_operand" "=x")
3321 (match_operand:DF 1 "nonimmediate_operand" "xm")
3324 "movddup\t{%1, %0|%0, %1}"
3325 [(set_attr "type" "sselog1")
3326 (set_attr "mode" "DF")])
3328 (define_insn "*vec_concatv2df"
3329 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
3331 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
3332 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
3335 unpcklpd\t{%2, %0|%0, %2}
3336 movhpd\t{%2, %0|%0, %2}
3337 movsd\t{%1, %0|%0, %1}
3338 movlhps\t{%2, %0|%0, %2}
3339 movhps\t{%2, %0|%0, %2}"
3340 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
3341 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
3343 (define_expand "vec_setv2df"
3344 [(match_operand:V2DF 0 "register_operand" "")
3345 (match_operand:DF 1 "register_operand" "")
3346 (match_operand 2 "const_int_operand" "")]
3349 ix86_expand_vector_set (false, operands[0], operands[1],
3350 INTVAL (operands[2]));
3354 (define_expand "vec_extractv2df"
3355 [(match_operand:DF 0 "register_operand" "")
3356 (match_operand:V2DF 1 "register_operand" "")
3357 (match_operand 2 "const_int_operand" "")]
3360 ix86_expand_vector_extract (false, operands[0], operands[1],
3361 INTVAL (operands[2]));
3365 (define_expand "vec_initv2df"
3366 [(match_operand:V2DF 0 "register_operand" "")
3367 (match_operand 1 "" "")]
3370 ix86_expand_vector_init (false, operands[0], operands[1]);
3374 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3376 ;; Parallel integral arithmetic
3378 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3380 (define_expand "neg<mode>2"
3381 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3384 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
3386 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
3388 (define_expand "add<mode>3"
3389 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3390 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3391 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3393 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
3395 (define_insn "*add<mode>3"
3396 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3398 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3399 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3400 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
3401 "padd<ssevecsize>\t{%2, %0|%0, %2}"
3402 [(set_attr "type" "sseiadd")
3403 (set_attr "prefix_data16" "1")
3404 (set_attr "mode" "TI")])
3406 (define_insn "sse2_ssadd<mode>3"
3407 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3409 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
3410 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3411 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
3412 "padds<ssevecsize>\t{%2, %0|%0, %2}"
3413 [(set_attr "type" "sseiadd")
3414 (set_attr "prefix_data16" "1")
3415 (set_attr "mode" "TI")])
3417 (define_insn "sse2_usadd<mode>3"
3418 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3420 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
3421 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3422 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
3423 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
3424 [(set_attr "type" "sseiadd")
3425 (set_attr "prefix_data16" "1")
3426 (set_attr "mode" "TI")])
3428 (define_expand "sub<mode>3"
3429 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3430 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
3431 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3433 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
3435 (define_insn "*sub<mode>3"
3436 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3438 (match_operand:SSEMODEI 1 "register_operand" "0")
3439 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3441 "psub<ssevecsize>\t{%2, %0|%0, %2}"
3442 [(set_attr "type" "sseiadd")
3443 (set_attr "prefix_data16" "1")
3444 (set_attr "mode" "TI")])
3446 (define_insn "sse2_sssub<mode>3"
3447 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3449 (match_operand:SSEMODE12 1 "register_operand" "0")
3450 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3452 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
3453 [(set_attr "type" "sseiadd")
3454 (set_attr "prefix_data16" "1")
3455 (set_attr "mode" "TI")])
3457 (define_insn "sse2_ussub<mode>3"
3458 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3460 (match_operand:SSEMODE12 1 "register_operand" "0")
3461 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3463 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
3464 [(set_attr "type" "sseiadd")
3465 (set_attr "prefix_data16" "1")
3466 (set_attr "mode" "TI")])
3468 (define_insn_and_split "mulv16qi3"
3469 [(set (match_operand:V16QI 0 "register_operand" "")
3470 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
3471 (match_operand:V16QI 2 "register_operand" "")))]
3473 && !(reload_completed || reload_in_progress)"
3478 rtx t[12], op0, op[3];
3483 /* On SSE5, we can take advantage of the pperm instruction to pack and
3484 unpack the bytes. Unpack data such that we've got a source byte in
3485 each low byte of each word. We don't care what goes into the high
3486 byte, so put 0 there. */
3487 for (i = 0; i < 6; ++i)
3488 t[i] = gen_reg_rtx (V8HImode);
3490 for (i = 0; i < 2; i++)
3493 op[1] = operands[i+1];
3494 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
3497 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
3500 /* Multiply words. */
3501 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
3502 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
3504 /* Pack the low byte of each word back into a single xmm */
3505 op[0] = operands[0];
3508 ix86_expand_sse5_pack (op);
3512 for (i = 0; i < 12; ++i)
3513 t[i] = gen_reg_rtx (V16QImode);
3515 /* Unpack data such that we've got a source byte in each low byte of
3516 each word. We don't care what goes into the high byte of each word.
3517 Rather than trying to get zero in there, most convenient is to let
3518 it be a copy of the low byte. */
3519 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
3520 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
3521 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
3522 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
3524 /* Multiply words. The end-of-line annotations here give a picture of what
3525 the output of that instruction looks like. Dot means don't care; the
3526 letters are the bytes of the result with A being the most significant. */
3527 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
3528 gen_lowpart (V8HImode, t[0]),
3529 gen_lowpart (V8HImode, t[1])));
3530 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
3531 gen_lowpart (V8HImode, t[2]),
3532 gen_lowpart (V8HImode, t[3])));
3534 /* Extract the relevant bytes and merge them back together. */
3535 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
3536 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
3537 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
3538 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
3539 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
3540 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
3543 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
3547 (define_expand "mulv8hi3"
3548 [(set (match_operand:V8HI 0 "register_operand" "")
3549 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3550 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3552 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3554 (define_insn "*mulv8hi3"
3555 [(set (match_operand:V8HI 0 "register_operand" "=x")
3556 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3557 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3558 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3559 "pmullw\t{%2, %0|%0, %2}"
3560 [(set_attr "type" "sseimul")
3561 (set_attr "prefix_data16" "1")
3562 (set_attr "mode" "TI")])
3564 (define_expand "smulv8hi3_highpart"
3565 [(set (match_operand:V8HI 0 "register_operand" "")
3570 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3572 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3575 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3577 (define_insn "*smulv8hi3_highpart"
3578 [(set (match_operand:V8HI 0 "register_operand" "=x")
3583 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3585 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3587 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3588 "pmulhw\t{%2, %0|%0, %2}"
3589 [(set_attr "type" "sseimul")
3590 (set_attr "prefix_data16" "1")
3591 (set_attr "mode" "TI")])
3593 (define_expand "umulv8hi3_highpart"
3594 [(set (match_operand:V8HI 0 "register_operand" "")
3599 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3601 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3604 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3606 (define_insn "*umulv8hi3_highpart"
3607 [(set (match_operand:V8HI 0 "register_operand" "=x")
3612 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3614 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3616 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3617 "pmulhuw\t{%2, %0|%0, %2}"
3618 [(set_attr "type" "sseimul")
3619 (set_attr "prefix_data16" "1")
3620 (set_attr "mode" "TI")])
3622 (define_insn "sse2_umulv2siv2di3"
3623 [(set (match_operand:V2DI 0 "register_operand" "=x")
3627 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3628 (parallel [(const_int 0) (const_int 2)])))
3631 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3632 (parallel [(const_int 0) (const_int 2)])))))]
3633 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3634 "pmuludq\t{%2, %0|%0, %2}"
3635 [(set_attr "type" "sseimul")
3636 (set_attr "prefix_data16" "1")
3637 (set_attr "mode" "TI")])
3639 (define_insn "sse4_1_mulv2siv2di3"
3640 [(set (match_operand:V2DI 0 "register_operand" "=x")
3644 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3645 (parallel [(const_int 0) (const_int 2)])))
3648 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3649 (parallel [(const_int 0) (const_int 2)])))))]
3650 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3651 "pmuldq\t{%2, %0|%0, %2}"
3652 [(set_attr "type" "sseimul")
3653 (set_attr "prefix_extra" "1")
3654 (set_attr "mode" "TI")])
3656 (define_insn "sse2_pmaddwd"
3657 [(set (match_operand:V4SI 0 "register_operand" "=x")
3662 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3663 (parallel [(const_int 0)
3669 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3670 (parallel [(const_int 0)
3676 (vec_select:V4HI (match_dup 1)
3677 (parallel [(const_int 1)
3682 (vec_select:V4HI (match_dup 2)
3683 (parallel [(const_int 1)
3686 (const_int 7)]))))))]
3687 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3688 "pmaddwd\t{%2, %0|%0, %2}"
3689 [(set_attr "type" "sseiadd")
3690 (set_attr "prefix_data16" "1")
3691 (set_attr "mode" "TI")])
3693 (define_expand "mulv4si3"
3694 [(set (match_operand:V4SI 0 "register_operand" "")
3695 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3696 (match_operand:V4SI 2 "register_operand" "")))]
3699 if (TARGET_SSE4_1 || TARGET_SSE5)
3700 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3703 (define_insn "*sse4_1_mulv4si3"
3704 [(set (match_operand:V4SI 0 "register_operand" "=x")
3705 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3706 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3707 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3708 "pmulld\t{%2, %0|%0, %2}"
3709 [(set_attr "type" "sseimul")
3710 (set_attr "prefix_extra" "1")
3711 (set_attr "mode" "TI")])
3713 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3714 ;; multiply/add. In general, we expect the define_split to occur before
3715 ;; register allocation, so we have to handle the corner case where the target
3716 ;; is used as the base or index register in operands 1/2.
3717 (define_insn_and_split "*sse5_mulv4si3"
3718 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3719 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3720 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3723 "&& (reload_completed
3724 || (!reg_mentioned_p (operands[0], operands[1])
3725 && !reg_mentioned_p (operands[0], operands[2])))"
3729 (plus:V4SI (mult:V4SI (match_dup 1)
3733 operands[3] = CONST0_RTX (V4SImode);
3735 [(set_attr "type" "ssemuladd")
3736 (set_attr "mode" "TI")])
3738 (define_insn_and_split "*sse2_mulv4si3"
3739 [(set (match_operand:V4SI 0 "register_operand" "")
3740 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3741 (match_operand:V4SI 2 "register_operand" "")))]
3742 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3743 && !(reload_completed || reload_in_progress)"
3748 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3754 t1 = gen_reg_rtx (V4SImode);
3755 t2 = gen_reg_rtx (V4SImode);
3756 t3 = gen_reg_rtx (V4SImode);
3757 t4 = gen_reg_rtx (V4SImode);
3758 t5 = gen_reg_rtx (V4SImode);
3759 t6 = gen_reg_rtx (V4SImode);
3760 thirtytwo = GEN_INT (32);
3762 /* Multiply elements 2 and 0. */
3763 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3766 /* Shift both input vectors down one element, so that elements 3
3767 and 1 are now in the slots for elements 2 and 0. For K8, at
3768 least, this is faster than using a shuffle. */
3769 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3770 gen_lowpart (TImode, op1),
3772 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3773 gen_lowpart (TImode, op2),
3775 /* Multiply elements 3 and 1. */
3776 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3779 /* Move the results in element 2 down to element 1; we don't care
3780 what goes in elements 2 and 3. */
3781 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3782 const0_rtx, const0_rtx));
3783 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3784 const0_rtx, const0_rtx));
3786 /* Merge the parts back together. */
3787 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3791 (define_insn_and_split "mulv2di3"
3792 [(set (match_operand:V2DI 0 "register_operand" "")
3793 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3794 (match_operand:V2DI 2 "register_operand" "")))]
3796 && !(reload_completed || reload_in_progress)"
3801 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3807 t1 = gen_reg_rtx (V2DImode);
3808 t2 = gen_reg_rtx (V2DImode);
3809 t3 = gen_reg_rtx (V2DImode);
3810 t4 = gen_reg_rtx (V2DImode);
3811 t5 = gen_reg_rtx (V2DImode);
3812 t6 = gen_reg_rtx (V2DImode);
3813 thirtytwo = GEN_INT (32);
3815 /* Multiply low parts. */
3816 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3817 gen_lowpart (V4SImode, op2)));
3819 /* Shift input vectors left 32 bits so we can multiply high parts. */
3820 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3821 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3823 /* Multiply high parts by low parts. */
3824 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3825 gen_lowpart (V4SImode, t3)));
3826 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3827 gen_lowpart (V4SImode, t2)));
3829 /* Shift them back. */
3830 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3831 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3833 /* Add the three parts together. */
3834 emit_insn (gen_addv2di3 (t6, t1, t4));
3835 emit_insn (gen_addv2di3 (op0, t6, t5));
3839 (define_expand "vec_widen_smult_hi_v8hi"
3840 [(match_operand:V4SI 0 "register_operand" "")
3841 (match_operand:V8HI 1 "register_operand" "")
3842 (match_operand:V8HI 2 "register_operand" "")]
3845 rtx op1, op2, t1, t2, dest;
3849 t1 = gen_reg_rtx (V8HImode);
3850 t2 = gen_reg_rtx (V8HImode);
3851 dest = gen_lowpart (V8HImode, operands[0]);
3853 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3854 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3855 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3859 (define_expand "vec_widen_smult_lo_v8hi"
3860 [(match_operand:V4SI 0 "register_operand" "")
3861 (match_operand:V8HI 1 "register_operand" "")
3862 (match_operand:V8HI 2 "register_operand" "")]
3865 rtx op1, op2, t1, t2, dest;
3869 t1 = gen_reg_rtx (V8HImode);
3870 t2 = gen_reg_rtx (V8HImode);
3871 dest = gen_lowpart (V8HImode, operands[0]);
3873 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3874 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3875 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3879 (define_expand "vec_widen_umult_hi_v8hi"
3880 [(match_operand:V4SI 0 "register_operand" "")
3881 (match_operand:V8HI 1 "register_operand" "")
3882 (match_operand:V8HI 2 "register_operand" "")]
3885 rtx op1, op2, t1, t2, dest;
3889 t1 = gen_reg_rtx (V8HImode);
3890 t2 = gen_reg_rtx (V8HImode);
3891 dest = gen_lowpart (V8HImode, operands[0]);
3893 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3894 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3895 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3899 (define_expand "vec_widen_umult_lo_v8hi"
3900 [(match_operand:V4SI 0 "register_operand" "")
3901 (match_operand:V8HI 1 "register_operand" "")
3902 (match_operand:V8HI 2 "register_operand" "")]
3905 rtx op1, op2, t1, t2, dest;
3909 t1 = gen_reg_rtx (V8HImode);
3910 t2 = gen_reg_rtx (V8HImode);
3911 dest = gen_lowpart (V8HImode, operands[0]);
3913 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3914 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3915 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3919 (define_expand "vec_widen_smult_hi_v4si"
3920 [(match_operand:V2DI 0 "register_operand" "")
3921 (match_operand:V4SI 1 "register_operand" "")
3922 (match_operand:V4SI 2 "register_operand" "")]
3925 rtx op1, op2, t1, t2;
3929 t1 = gen_reg_rtx (V4SImode);
3930 t2 = gen_reg_rtx (V4SImode);
3932 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3933 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3934 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3938 (define_expand "vec_widen_smult_lo_v4si"
3939 [(match_operand:V2DI 0 "register_operand" "")
3940 (match_operand:V4SI 1 "register_operand" "")
3941 (match_operand:V4SI 2 "register_operand" "")]
3944 rtx op1, op2, t1, t2;
3948 t1 = gen_reg_rtx (V4SImode);
3949 t2 = gen_reg_rtx (V4SImode);
3951 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3952 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3953 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3957 (define_expand "vec_widen_umult_hi_v4si"
3958 [(match_operand:V2DI 0 "register_operand" "")
3959 (match_operand:V4SI 1 "register_operand" "")
3960 (match_operand:V4SI 2 "register_operand" "")]
3963 rtx op1, op2, t1, t2;
3967 t1 = gen_reg_rtx (V4SImode);
3968 t2 = gen_reg_rtx (V4SImode);
3970 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3971 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3972 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3976 (define_expand "vec_widen_umult_lo_v4si"
3977 [(match_operand:V2DI 0 "register_operand" "")
3978 (match_operand:V4SI 1 "register_operand" "")
3979 (match_operand:V4SI 2 "register_operand" "")]
3982 rtx op1, op2, t1, t2;
3986 t1 = gen_reg_rtx (V4SImode);
3987 t2 = gen_reg_rtx (V4SImode);
3989 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3990 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3991 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3995 (define_expand "sdot_prodv8hi"
3996 [(match_operand:V4SI 0 "register_operand" "")
3997 (match_operand:V8HI 1 "register_operand" "")
3998 (match_operand:V8HI 2 "register_operand" "")
3999 (match_operand:V4SI 3 "register_operand" "")]
4002 rtx t = gen_reg_rtx (V4SImode);
4003 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
4004 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
4008 (define_expand "udot_prodv4si"
4009 [(match_operand:V2DI 0 "register_operand" "")
4010 (match_operand:V4SI 1 "register_operand" "")
4011 (match_operand:V4SI 2 "register_operand" "")
4012 (match_operand:V2DI 3 "register_operand" "")]
4017 t1 = gen_reg_rtx (V2DImode);
4018 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
4019 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
4021 t2 = gen_reg_rtx (V4SImode);
4022 t3 = gen_reg_rtx (V4SImode);
4023 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
4024 gen_lowpart (TImode, operands[1]),
4026 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
4027 gen_lowpart (TImode, operands[2]),
4030 t4 = gen_reg_rtx (V2DImode);
4031 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
4033 emit_insn (gen_addv2di3 (operands[0], t1, t4));
4037 (define_insn "ashr<mode>3"
4038 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
4040 (match_operand:SSEMODE24 1 "register_operand" "0")
4041 (match_operand:SI 2 "nonmemory_operand" "xN")))]
4043 "psra<ssevecsize>\t{%2, %0|%0, %2}"
4044 [(set_attr "type" "sseishft")
4045 (set_attr "prefix_data16" "1")
4046 (set_attr "mode" "TI")])
4048 (define_insn "lshr<mode>3"
4049 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
4050 (lshiftrt:SSEMODE248
4051 (match_operand:SSEMODE248 1 "register_operand" "0")
4052 (match_operand:SI 2 "nonmemory_operand" "xN")))]
4054 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
4055 [(set_attr "type" "sseishft")
4056 (set_attr "prefix_data16" "1")
4057 (set_attr "mode" "TI")])
4059 (define_insn "ashl<mode>3"
4060 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
4062 (match_operand:SSEMODE248 1 "register_operand" "0")
4063 (match_operand:SI 2 "nonmemory_operand" "xN")))]
4065 "psll<ssevecsize>\t{%2, %0|%0, %2}"
4066 [(set_attr "type" "sseishft")
4067 (set_attr "prefix_data16" "1")
4068 (set_attr "mode" "TI")])
4070 (define_expand "vec_shl_<mode>"
4071 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4072 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
4073 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
4076 operands[0] = gen_lowpart (TImode, operands[0]);
4077 operands[1] = gen_lowpart (TImode, operands[1]);
4080 (define_expand "vec_shr_<mode>"
4081 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4082 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
4083 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
4086 operands[0] = gen_lowpart (TImode, operands[0]);
4087 operands[1] = gen_lowpart (TImode, operands[1]);
4090 (define_expand "umaxv16qi3"
4091 [(set (match_operand:V16QI 0 "register_operand" "")
4092 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
4093 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
4095 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
4097 (define_insn "*umaxv16qi3"
4098 [(set (match_operand:V16QI 0 "register_operand" "=x")
4099 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4100 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
4101 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
4102 "pmaxub\t{%2, %0|%0, %2}"
4103 [(set_attr "type" "sseiadd")
4104 (set_attr "prefix_data16" "1")
4105 (set_attr "mode" "TI")])
4107 (define_expand "smaxv8hi3"
4108 [(set (match_operand:V8HI 0 "register_operand" "")
4109 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4110 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4112 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
4114 (define_insn "*smaxv8hi3"
4115 [(set (match_operand:V8HI 0 "register_operand" "=x")
4116 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4117 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4118 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
4119 "pmaxsw\t{%2, %0|%0, %2}"
4120 [(set_attr "type" "sseiadd")
4121 (set_attr "prefix_data16" "1")
4122 (set_attr "mode" "TI")])
4124 (define_expand "umaxv8hi3"
4125 [(set (match_operand:V8HI 0 "register_operand" "")
4126 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
4127 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4131 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
4134 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
4135 if (rtx_equal_p (op3, op2))
4136 op3 = gen_reg_rtx (V8HImode);
4137 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
4138 emit_insn (gen_addv8hi3 (op0, op3, op2));
4143 (define_expand "smax<mode>3"
4144 [(set (match_operand:SSEMODE14 0 "register_operand" "")
4145 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
4146 (match_operand:SSEMODE14 2 "register_operand" "")))]
4150 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
4156 xops[0] = operands[0];
4157 xops[1] = operands[1];
4158 xops[2] = operands[2];
4159 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
4160 xops[4] = operands[1];
4161 xops[5] = operands[2];
4162 ok = ix86_expand_int_vcond (xops);
4168 (define_insn "*sse4_1_smax<mode>3"
4169 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
4171 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
4172 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
4173 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
4174 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
4175 [(set_attr "type" "sseiadd")
4176 (set_attr "prefix_extra" "1")
4177 (set_attr "mode" "TI")])
4179 (define_expand "umaxv4si3"
4180 [(set (match_operand:V4SI 0 "register_operand" "")
4181 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
4182 (match_operand:V4SI 2 "register_operand" "")))]
4186 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
4192 xops[0] = operands[0];
4193 xops[1] = operands[1];
4194 xops[2] = operands[2];
4195 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
4196 xops[4] = operands[1];
4197 xops[5] = operands[2];
4198 ok = ix86_expand_int_vcond (xops);
4204 (define_insn "*sse4_1_umax<mode>3"
4205 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
4207 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
4208 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
4209 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
4210 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
4211 [(set_attr "type" "sseiadd")
4212 (set_attr "prefix_extra" "1")
4213 (set_attr "mode" "TI")])
4215 (define_expand "uminv16qi3"
4216 [(set (match_operand:V16QI 0 "register_operand" "")
4217 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
4218 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
4220 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
4222 (define_insn "*uminv16qi3"
4223 [(set (match_operand:V16QI 0 "register_operand" "=x")
4224 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4225 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
4226 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
4227 "pminub\t{%2, %0|%0, %2}"
4228 [(set_attr "type" "sseiadd")
4229 (set_attr "prefix_data16" "1")
4230 (set_attr "mode" "TI")])
4232 (define_expand "sminv8hi3"
4233 [(set (match_operand:V8HI 0 "register_operand" "")
4234 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4235 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4237 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
4239 (define_insn "*sminv8hi3"
4240 [(set (match_operand:V8HI 0 "register_operand" "=x")
4241 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4242 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4243 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
4244 "pminsw\t{%2, %0|%0, %2}"
4245 [(set_attr "type" "sseiadd")
4246 (set_attr "prefix_data16" "1")
4247 (set_attr "mode" "TI")])
4249 (define_expand "smin<mode>3"
4250 [(set (match_operand:SSEMODE14 0 "register_operand" "")
4251 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
4252 (match_operand:SSEMODE14 2 "register_operand" "")))]
4256 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
4262 xops[0] = operands[0];
4263 xops[1] = operands[2];
4264 xops[2] = operands[1];
4265 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
4266 xops[4] = operands[1];
4267 xops[5] = operands[2];
4268 ok = ix86_expand_int_vcond (xops);
4274 (define_insn "*sse4_1_smin<mode>3"
4275 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
4277 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
4278 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
4279 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
4280 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
4281 [(set_attr "type" "sseiadd")
4282 (set_attr "prefix_extra" "1")
4283 (set_attr "mode" "TI")])
4285 (define_expand "umin<mode>3"
4286 [(set (match_operand:SSEMODE24 0 "register_operand" "")
4287 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
4288 (match_operand:SSEMODE24 2 "register_operand" "")))]
4292 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
4298 xops[0] = operands[0];
4299 xops[1] = operands[2];
4300 xops[2] = operands[1];
4301 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
4302 xops[4] = operands[1];
4303 xops[5] = operands[2];
4304 ok = ix86_expand_int_vcond (xops);
4310 (define_insn "*sse4_1_umin<mode>3"
4311 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
4313 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
4314 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
4315 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
4316 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
4317 [(set_attr "type" "sseiadd")
4318 (set_attr "prefix_extra" "1")
4319 (set_attr "mode" "TI")])
4321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4323 ;; Parallel integral comparisons
4325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4327 (define_insn "sse2_eq<mode>3"
4328 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4330 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
4331 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
4332 "TARGET_SSE2 && !TARGET_SSE5
4333 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
4334 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
4335 [(set_attr "type" "ssecmp")
4336 (set_attr "prefix_data16" "1")
4337 (set_attr "mode" "TI")])
4339 (define_insn "sse4_1_eqv2di3"
4340 [(set (match_operand:V2DI 0 "register_operand" "=x")
4342 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
4343 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
4344 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
4345 "pcmpeqq\t{%2, %0|%0, %2}"
4346 [(set_attr "type" "ssecmp")
4347 (set_attr "prefix_extra" "1")
4348 (set_attr "mode" "TI")])
4350 (define_insn "sse2_gt<mode>3"
4351 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4353 (match_operand:SSEMODE124 1 "register_operand" "0")
4354 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
4355 "TARGET_SSE2 && !TARGET_SSE5"
4356 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
4357 [(set_attr "type" "ssecmp")
4358 (set_attr "prefix_data16" "1")
4359 (set_attr "mode" "TI")])
4361 (define_insn "sse4_2_gtv2di3"
4362 [(set (match_operand:V2DI 0 "register_operand" "=x")
4364 (match_operand:V2DI 1 "nonimmediate_operand" "0")
4365 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
4367 "pcmpgtq\t{%2, %0|%0, %2}"
4368 [(set_attr "type" "ssecmp")
4369 (set_attr "mode" "TI")])
4371 (define_expand "vcond<mode>"
4372 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4373 (if_then_else:SSEMODEI
4374 (match_operator 3 ""
4375 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
4376 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
4377 (match_operand:SSEMODEI 1 "general_operand" "")
4378 (match_operand:SSEMODEI 2 "general_operand" "")))]
4381 if (ix86_expand_int_vcond (operands))
4387 (define_expand "vcondu<mode>"
4388 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4389 (if_then_else:SSEMODEI
4390 (match_operator 3 ""
4391 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
4392 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
4393 (match_operand:SSEMODEI 1 "general_operand" "")
4394 (match_operand:SSEMODEI 2 "general_operand" "")))]
4397 if (ix86_expand_int_vcond (operands))
4403 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4405 ;; Parallel bitwise logical operations
4407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4409 (define_expand "one_cmpl<mode>2"
4410 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4411 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4415 int i, n = GET_MODE_NUNITS (<MODE>mode);
4416 rtvec v = rtvec_alloc (n);
4418 for (i = 0; i < n; ++i)
4419 RTVEC_ELT (v, i) = constm1_rtx;
4421 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
4424 (define_expand "and<mode>3"
4425 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4426 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4427 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4429 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
4431 (define_insn "*sse_and<mode>3"
4432 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4434 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4435 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4436 "(TARGET_SSE && !TARGET_SSE2)
4437 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4438 "andps\t{%2, %0|%0, %2}"
4439 [(set_attr "type" "sselog")
4440 (set_attr "mode" "V4SF")])
4442 (define_insn "*sse2_and<mode>3"
4443 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4445 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4446 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4447 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4448 "pand\t{%2, %0|%0, %2}"
4449 [(set_attr "type" "sselog")
4450 (set_attr "prefix_data16" "1")
4451 (set_attr "mode" "TI")])
4453 (define_insn "*sse_nand<mode>3"
4454 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4456 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4457 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4458 "(TARGET_SSE && !TARGET_SSE2)"
4459 "andnps\t{%2, %0|%0, %2}"
4460 [(set_attr "type" "sselog")
4461 (set_attr "mode" "V4SF")])
4463 (define_insn "sse2_nand<mode>3"
4464 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4466 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4467 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4469 "pandn\t{%2, %0|%0, %2}"
4470 [(set_attr "type" "sselog")
4471 (set_attr "prefix_data16" "1")
4472 (set_attr "mode" "TI")])
4474 (define_expand "andtf3"
4475 [(set (match_operand:TF 0 "register_operand" "")
4476 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
4477 (match_operand:TF 2 "nonimmediate_operand" "")))]
4479 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
4481 (define_insn "*andtf3"
4482 [(set (match_operand:TF 0 "register_operand" "=x")
4484 (match_operand:TF 1 "nonimmediate_operand" "%0")
4485 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4486 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
4487 "pand\t{%2, %0|%0, %2}"
4488 [(set_attr "type" "sselog")
4489 (set_attr "prefix_data16" "1")
4490 (set_attr "mode" "TI")])
4492 (define_insn "*nandtf3"
4493 [(set (match_operand:TF 0 "register_operand" "=x")
4495 (not:TF (match_operand:TF 1 "register_operand" "0"))
4496 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4498 "pandn\t{%2, %0|%0, %2}"
4499 [(set_attr "type" "sselog")
4500 (set_attr "prefix_data16" "1")
4501 (set_attr "mode" "TI")])
4503 (define_expand "ior<mode>3"
4504 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4505 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4506 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4508 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
4510 (define_insn "*sse_ior<mode>3"
4511 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4513 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4514 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4515 "(TARGET_SSE && !TARGET_SSE2)
4516 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4517 "orps\t{%2, %0|%0, %2}"
4518 [(set_attr "type" "sselog")
4519 (set_attr "mode" "V4SF")])
4521 (define_insn "*sse2_ior<mode>3"
4522 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4524 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4525 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4526 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4527 "por\t{%2, %0|%0, %2}"
4528 [(set_attr "type" "sselog")
4529 (set_attr "prefix_data16" "1")
4530 (set_attr "mode" "TI")])
4532 (define_expand "iortf3"
4533 [(set (match_operand:TF 0 "register_operand" "")
4534 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
4535 (match_operand:TF 2 "nonimmediate_operand" "")))]
4537 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
4539 (define_insn "*iortf3"
4540 [(set (match_operand:TF 0 "register_operand" "=x")
4542 (match_operand:TF 1 "nonimmediate_operand" "%0")
4543 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4544 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
4545 "por\t{%2, %0|%0, %2}"
4546 [(set_attr "type" "sselog")
4547 (set_attr "prefix_data16" "1")
4548 (set_attr "mode" "TI")])
4550 (define_expand "xor<mode>3"
4551 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4552 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4553 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4555 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
4557 (define_insn "*sse_xor<mode>3"
4558 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4560 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4561 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4562 "(TARGET_SSE && !TARGET_SSE2)
4563 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4564 "xorps\t{%2, %0|%0, %2}"
4565 [(set_attr "type" "sselog")
4566 (set_attr "mode" "V4SF")])
4568 (define_insn "*sse2_xor<mode>3"
4569 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4571 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4572 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4573 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4574 "pxor\t{%2, %0|%0, %2}"
4575 [(set_attr "type" "sselog")
4576 (set_attr "prefix_data16" "1")
4577 (set_attr "mode" "TI")])
4579 (define_expand "xortf3"
4580 [(set (match_operand:TF 0 "register_operand" "")
4581 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
4582 (match_operand:TF 2 "nonimmediate_operand" "")))]
4584 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
4586 (define_insn "*xortf3"
4587 [(set (match_operand:TF 0 "register_operand" "=x")
4589 (match_operand:TF 1 "nonimmediate_operand" "%0")
4590 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4591 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
4592 "pxor\t{%2, %0|%0, %2}"
4593 [(set_attr "type" "sselog")
4594 (set_attr "prefix_data16" "1")
4595 (set_attr "mode" "TI")])
4597 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4599 ;; Parallel integral element swizzling
4601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4604 ;; op1 = abcdefghijklmnop
4605 ;; op2 = qrstuvwxyz012345
4606 ;; h1 = aqbrcsdteufvgwhx
4607 ;; l1 = iyjzk0l1m2n3o4p5
4608 ;; h2 = aiqybjrzcks0dlt1
4609 ;; l2 = emu2fnv3gow4hpx5
4610 ;; h3 = aeimquy2bfjnrvz3
4611 ;; l3 = cgkosw04dhlptx15
4612 ;; result = bdfhjlnprtvxz135
4613 (define_expand "vec_pack_trunc_v8hi"
4614 [(match_operand:V16QI 0 "register_operand" "")
4615 (match_operand:V8HI 1 "register_operand" "")
4616 (match_operand:V8HI 2 "register_operand" "")]
4619 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4621 op1 = gen_lowpart (V16QImode, operands[1]);
4622 op2 = gen_lowpart (V16QImode, operands[2]);
4623 h1 = gen_reg_rtx (V16QImode);
4624 l1 = gen_reg_rtx (V16QImode);
4625 h2 = gen_reg_rtx (V16QImode);
4626 l2 = gen_reg_rtx (V16QImode);
4627 h3 = gen_reg_rtx (V16QImode);
4628 l3 = gen_reg_rtx (V16QImode);
4630 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4631 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4632 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4633 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4634 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4635 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4636 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4647 ;; result = bdfhjlnp
4648 (define_expand "vec_pack_trunc_v4si"
4649 [(match_operand:V8HI 0 "register_operand" "")
4650 (match_operand:V4SI 1 "register_operand" "")
4651 (match_operand:V4SI 2 "register_operand" "")]
4654 rtx op1, op2, h1, l1, h2, l2;
4656 op1 = gen_lowpart (V8HImode, operands[1]);
4657 op2 = gen_lowpart (V8HImode, operands[2]);
4658 h1 = gen_reg_rtx (V8HImode);
4659 l1 = gen_reg_rtx (V8HImode);
4660 h2 = gen_reg_rtx (V8HImode);
4661 l2 = gen_reg_rtx (V8HImode);
4663 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4664 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4665 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4666 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4667 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4677 (define_expand "vec_pack_trunc_v2di"
4678 [(match_operand:V4SI 0 "register_operand" "")
4679 (match_operand:V2DI 1 "register_operand" "")
4680 (match_operand:V2DI 2 "register_operand" "")]
4683 rtx op1, op2, h1, l1;
4685 op1 = gen_lowpart (V4SImode, operands[1]);
4686 op2 = gen_lowpart (V4SImode, operands[2]);
4687 h1 = gen_reg_rtx (V4SImode);
4688 l1 = gen_reg_rtx (V4SImode);
4690 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4691 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4692 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4696 (define_expand "vec_interleave_highv16qi"
4697 [(set (match_operand:V16QI 0 "register_operand" "")
4700 (match_operand:V16QI 1 "register_operand" "")
4701 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4702 (parallel [(const_int 8) (const_int 24)
4703 (const_int 9) (const_int 25)
4704 (const_int 10) (const_int 26)
4705 (const_int 11) (const_int 27)
4706 (const_int 12) (const_int 28)
4707 (const_int 13) (const_int 29)
4708 (const_int 14) (const_int 30)
4709 (const_int 15) (const_int 31)])))]
4712 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4716 (define_expand "vec_interleave_lowv16qi"
4717 [(set (match_operand:V16QI 0 "register_operand" "")
4720 (match_operand:V16QI 1 "register_operand" "")
4721 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4722 (parallel [(const_int 0) (const_int 16)
4723 (const_int 1) (const_int 17)
4724 (const_int 2) (const_int 18)
4725 (const_int 3) (const_int 19)
4726 (const_int 4) (const_int 20)
4727 (const_int 5) (const_int 21)
4728 (const_int 6) (const_int 22)
4729 (const_int 7) (const_int 23)])))]
4732 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4736 (define_expand "vec_interleave_highv8hi"
4737 [(set (match_operand:V8HI 0 "register_operand" "=")
4740 (match_operand:V8HI 1 "register_operand" "")
4741 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4742 (parallel [(const_int 4) (const_int 12)
4743 (const_int 5) (const_int 13)
4744 (const_int 6) (const_int 14)
4745 (const_int 7) (const_int 15)])))]
4748 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4752 (define_expand "vec_interleave_lowv8hi"
4753 [(set (match_operand:V8HI 0 "register_operand" "")
4756 (match_operand:V8HI 1 "register_operand" "")
4757 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4758 (parallel [(const_int 0) (const_int 8)
4759 (const_int 1) (const_int 9)
4760 (const_int 2) (const_int 10)
4761 (const_int 3) (const_int 11)])))]
4764 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4768 (define_expand "vec_interleave_highv4si"
4769 [(set (match_operand:V4SI 0 "register_operand" "")
4772 (match_operand:V4SI 1 "register_operand" "")
4773 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4774 (parallel [(const_int 2) (const_int 6)
4775 (const_int 3) (const_int 7)])))]
4778 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4782 (define_expand "vec_interleave_lowv4si"
4783 [(set (match_operand:V4SI 0 "register_operand" "")
4786 (match_operand:V4SI 1 "register_operand" "")
4787 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4788 (parallel [(const_int 0) (const_int 4)
4789 (const_int 1) (const_int 5)])))]
4792 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4796 (define_expand "vec_interleave_highv2di"
4797 [(set (match_operand:V2DI 0 "register_operand" "")
4800 (match_operand:V2DI 1 "register_operand" "")
4801 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4802 (parallel [(const_int 1)
4806 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4810 (define_expand "vec_interleave_lowv2di"
4811 [(set (match_operand:V2DI 0 "register_operand" "")
4814 (match_operand:V2DI 1 "register_operand" "")
4815 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4816 (parallel [(const_int 0)
4820 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4824 (define_insn "sse2_packsswb"
4825 [(set (match_operand:V16QI 0 "register_operand" "=x")
4828 (match_operand:V8HI 1 "register_operand" "0"))
4830 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4832 "packsswb\t{%2, %0|%0, %2}"
4833 [(set_attr "type" "sselog")
4834 (set_attr "prefix_data16" "1")
4835 (set_attr "mode" "TI")])
4837 (define_insn "sse2_packssdw"
4838 [(set (match_operand:V8HI 0 "register_operand" "=x")
4841 (match_operand:V4SI 1 "register_operand" "0"))
4843 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4845 "packssdw\t{%2, %0|%0, %2}"
4846 [(set_attr "type" "sselog")
4847 (set_attr "prefix_data16" "1")
4848 (set_attr "mode" "TI")])
4850 (define_insn "sse2_packuswb"
4851 [(set (match_operand:V16QI 0 "register_operand" "=x")
4854 (match_operand:V8HI 1 "register_operand" "0"))
4856 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4858 "packuswb\t{%2, %0|%0, %2}"
4859 [(set_attr "type" "sselog")
4860 (set_attr "prefix_data16" "1")
4861 (set_attr "mode" "TI")])
4863 (define_insn "sse2_punpckhbw"
4864 [(set (match_operand:V16QI 0 "register_operand" "=x")
4867 (match_operand:V16QI 1 "register_operand" "0")
4868 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4869 (parallel [(const_int 8) (const_int 24)
4870 (const_int 9) (const_int 25)
4871 (const_int 10) (const_int 26)
4872 (const_int 11) (const_int 27)
4873 (const_int 12) (const_int 28)
4874 (const_int 13) (const_int 29)
4875 (const_int 14) (const_int 30)
4876 (const_int 15) (const_int 31)])))]
4878 "punpckhbw\t{%2, %0|%0, %2}"
4879 [(set_attr "type" "sselog")
4880 (set_attr "prefix_data16" "1")
4881 (set_attr "mode" "TI")])
4883 (define_insn "sse2_punpcklbw"
4884 [(set (match_operand:V16QI 0 "register_operand" "=x")
4887 (match_operand:V16QI 1 "register_operand" "0")
4888 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4889 (parallel [(const_int 0) (const_int 16)
4890 (const_int 1) (const_int 17)
4891 (const_int 2) (const_int 18)
4892 (const_int 3) (const_int 19)
4893 (const_int 4) (const_int 20)
4894 (const_int 5) (const_int 21)
4895 (const_int 6) (const_int 22)
4896 (const_int 7) (const_int 23)])))]
4898 "punpcklbw\t{%2, %0|%0, %2}"
4899 [(set_attr "type" "sselog")
4900 (set_attr "prefix_data16" "1")
4901 (set_attr "mode" "TI")])
4903 (define_insn "sse2_punpckhwd"
4904 [(set (match_operand:V8HI 0 "register_operand" "=x")
4907 (match_operand:V8HI 1 "register_operand" "0")
4908 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4909 (parallel [(const_int 4) (const_int 12)
4910 (const_int 5) (const_int 13)
4911 (const_int 6) (const_int 14)
4912 (const_int 7) (const_int 15)])))]
4914 "punpckhwd\t{%2, %0|%0, %2}"
4915 [(set_attr "type" "sselog")
4916 (set_attr "prefix_data16" "1")
4917 (set_attr "mode" "TI")])
4919 (define_insn "sse2_punpcklwd"
4920 [(set (match_operand:V8HI 0 "register_operand" "=x")
4923 (match_operand:V8HI 1 "register_operand" "0")
4924 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4925 (parallel [(const_int 0) (const_int 8)
4926 (const_int 1) (const_int 9)
4927 (const_int 2) (const_int 10)
4928 (const_int 3) (const_int 11)])))]
4930 "punpcklwd\t{%2, %0|%0, %2}"
4931 [(set_attr "type" "sselog")
4932 (set_attr "prefix_data16" "1")
4933 (set_attr "mode" "TI")])
4935 (define_insn "sse2_punpckhdq"
4936 [(set (match_operand:V4SI 0 "register_operand" "=x")
4939 (match_operand:V4SI 1 "register_operand" "0")
4940 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4941 (parallel [(const_int 2) (const_int 6)
4942 (const_int 3) (const_int 7)])))]
4944 "punpckhdq\t{%2, %0|%0, %2}"
4945 [(set_attr "type" "sselog")
4946 (set_attr "prefix_data16" "1")
4947 (set_attr "mode" "TI")])
4949 (define_insn "sse2_punpckldq"
4950 [(set (match_operand:V4SI 0 "register_operand" "=x")
4953 (match_operand:V4SI 1 "register_operand" "0")
4954 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4955 (parallel [(const_int 0) (const_int 4)
4956 (const_int 1) (const_int 5)])))]
4958 "punpckldq\t{%2, %0|%0, %2}"
4959 [(set_attr "type" "sselog")
4960 (set_attr "prefix_data16" "1")
4961 (set_attr "mode" "TI")])
4963 (define_insn "sse2_punpckhqdq"
4964 [(set (match_operand:V2DI 0 "register_operand" "=x")
4967 (match_operand:V2DI 1 "register_operand" "0")
4968 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4969 (parallel [(const_int 1)
4972 "punpckhqdq\t{%2, %0|%0, %2}"
4973 [(set_attr "type" "sselog")
4974 (set_attr "prefix_data16" "1")
4975 (set_attr "mode" "TI")])
4977 (define_insn "sse2_punpcklqdq"
4978 [(set (match_operand:V2DI 0 "register_operand" "=x")
4981 (match_operand:V2DI 1 "register_operand" "0")
4982 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4983 (parallel [(const_int 0)
4986 "punpcklqdq\t{%2, %0|%0, %2}"
4987 [(set_attr "type" "sselog")
4988 (set_attr "prefix_data16" "1")
4989 (set_attr "mode" "TI")])
4991 (define_insn "*sse4_1_pinsrb"
4992 [(set (match_operand:V16QI 0 "register_operand" "=x")
4994 (vec_duplicate:V16QI
4995 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4996 (match_operand:V16QI 1 "register_operand" "0")
4997 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
5000 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5001 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
5003 [(set_attr "type" "sselog")
5004 (set_attr "prefix_extra" "1")
5005 (set_attr "mode" "TI")])
5007 (define_insn "*sse2_pinsrw"
5008 [(set (match_operand:V8HI 0 "register_operand" "=x")
5011 (match_operand:HI 2 "nonimmediate_operand" "rm"))
5012 (match_operand:V8HI 1 "register_operand" "0")
5013 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
5016 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5017 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
5019 [(set_attr "type" "sselog")
5020 (set_attr "prefix_data16" "1")
5021 (set_attr "mode" "TI")])
5023 ;; It must come before sse2_loadld since it is preferred.
5024 (define_insn "*sse4_1_pinsrd"
5025 [(set (match_operand:V4SI 0 "register_operand" "=x")
5028 (match_operand:SI 2 "nonimmediate_operand" "rm"))
5029 (match_operand:V4SI 1 "register_operand" "0")
5030 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
5033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5034 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
5036 [(set_attr "type" "sselog")
5037 (set_attr "prefix_extra" "1")
5038 (set_attr "mode" "TI")])
5040 (define_insn "*sse4_1_pinsrq"
5041 [(set (match_operand:V2DI 0 "register_operand" "=x")
5044 (match_operand:DI 2 "nonimmediate_operand" "rm"))
5045 (match_operand:V2DI 1 "register_operand" "0")
5046 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
5049 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5050 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
5052 [(set_attr "type" "sselog")
5053 (set_attr "prefix_extra" "1")
5054 (set_attr "mode" "TI")])
5056 (define_insn "*sse4_1_pextrb"
5057 [(set (match_operand:SI 0 "register_operand" "=r")
5060 (match_operand:V16QI 1 "register_operand" "x")
5061 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
5063 "pextrb\t{%2, %1, %0|%0, %1, %2}"
5064 [(set_attr "type" "sselog")
5065 (set_attr "prefix_extra" "1")
5066 (set_attr "mode" "TI")])
5068 (define_insn "*sse4_1_pextrb_memory"
5069 [(set (match_operand:QI 0 "memory_operand" "=m")
5071 (match_operand:V16QI 1 "register_operand" "x")
5072 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
5074 "pextrb\t{%2, %1, %0|%0, %1, %2}"
5075 [(set_attr "type" "sselog")
5076 (set_attr "prefix_extra" "1")
5077 (set_attr "mode" "TI")])
5079 (define_insn "*sse2_pextrw"
5080 [(set (match_operand:SI 0 "register_operand" "=r")
5083 (match_operand:V8HI 1 "register_operand" "x")
5084 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
5086 "pextrw\t{%2, %1, %0|%0, %1, %2}"
5087 [(set_attr "type" "sselog")
5088 (set_attr "prefix_data16" "1")
5089 (set_attr "mode" "TI")])
5091 (define_insn "*sse4_1_pextrw_memory"
5092 [(set (match_operand:HI 0 "memory_operand" "=m")
5094 (match_operand:V8HI 1 "register_operand" "x")
5095 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
5097 "pextrw\t{%2, %1, %0|%0, %1, %2}"
5098 [(set_attr "type" "sselog")
5099 (set_attr "prefix_extra" "1")
5100 (set_attr "mode" "TI")])
5102 (define_insn "*sse4_1_pextrd"
5103 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
5105 (match_operand:V4SI 1 "register_operand" "x")
5106 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
5108 "pextrd\t{%2, %1, %0|%0, %1, %2}"
5109 [(set_attr "type" "sselog")
5110 (set_attr "prefix_extra" "1")
5111 (set_attr "mode" "TI")])
5113 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
5114 (define_insn "*sse4_1_pextrq"
5115 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
5117 (match_operand:V2DI 1 "register_operand" "x")
5118 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
5119 "TARGET_SSE4_1 && TARGET_64BIT"
5120 "pextrq\t{%2, %1, %0|%0, %1, %2}"
5121 [(set_attr "type" "sselog")
5122 (set_attr "prefix_extra" "1")
5123 (set_attr "mode" "TI")])
5125 (define_expand "sse2_pshufd"
5126 [(match_operand:V4SI 0 "register_operand" "")
5127 (match_operand:V4SI 1 "nonimmediate_operand" "")
5128 (match_operand:SI 2 "const_int_operand" "")]
5131 int mask = INTVAL (operands[2]);
5132 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
5133 GEN_INT ((mask >> 0) & 3),
5134 GEN_INT ((mask >> 2) & 3),
5135 GEN_INT ((mask >> 4) & 3),
5136 GEN_INT ((mask >> 6) & 3)));
5140 (define_insn "sse2_pshufd_1"
5141 [(set (match_operand:V4SI 0 "register_operand" "=x")
5143 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
5144 (parallel [(match_operand 2 "const_0_to_3_operand" "")
5145 (match_operand 3 "const_0_to_3_operand" "")
5146 (match_operand 4 "const_0_to_3_operand" "")
5147 (match_operand 5 "const_0_to_3_operand" "")])))]
5151 mask |= INTVAL (operands[2]) << 0;
5152 mask |= INTVAL (operands[3]) << 2;
5153 mask |= INTVAL (operands[4]) << 4;
5154 mask |= INTVAL (operands[5]) << 6;
5155 operands[2] = GEN_INT (mask);
5157 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
5159 [(set_attr "type" "sselog1")
5160 (set_attr "prefix_data16" "1")
5161 (set_attr "mode" "TI")])
5163 (define_expand "sse2_pshuflw"
5164 [(match_operand:V8HI 0 "register_operand" "")
5165 (match_operand:V8HI 1 "nonimmediate_operand" "")
5166 (match_operand:SI 2 "const_int_operand" "")]
5169 int mask = INTVAL (operands[2]);
5170 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
5171 GEN_INT ((mask >> 0) & 3),
5172 GEN_INT ((mask >> 2) & 3),
5173 GEN_INT ((mask >> 4) & 3),
5174 GEN_INT ((mask >> 6) & 3)));
5178 (define_insn "sse2_pshuflw_1"
5179 [(set (match_operand:V8HI 0 "register_operand" "=x")
5181 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
5182 (parallel [(match_operand 2 "const_0_to_3_operand" "")
5183 (match_operand 3 "const_0_to_3_operand" "")
5184 (match_operand 4 "const_0_to_3_operand" "")
5185 (match_operand 5 "const_0_to_3_operand" "")
5193 mask |= INTVAL (operands[2]) << 0;
5194 mask |= INTVAL (operands[3]) << 2;
5195 mask |= INTVAL (operands[4]) << 4;
5196 mask |= INTVAL (operands[5]) << 6;
5197 operands[2] = GEN_INT (mask);
5199 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
5201 [(set_attr "type" "sselog")
5202 (set_attr "prefix_rep" "1")
5203 (set_attr "mode" "TI")])
5205 (define_expand "sse2_pshufhw"
5206 [(match_operand:V8HI 0 "register_operand" "")
5207 (match_operand:V8HI 1 "nonimmediate_operand" "")
5208 (match_operand:SI 2 "const_int_operand" "")]
5211 int mask = INTVAL (operands[2]);
5212 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
5213 GEN_INT (((mask >> 0) & 3) + 4),
5214 GEN_INT (((mask >> 2) & 3) + 4),
5215 GEN_INT (((mask >> 4) & 3) + 4),
5216 GEN_INT (((mask >> 6) & 3) + 4)));
5220 (define_insn "sse2_pshufhw_1"
5221 [(set (match_operand:V8HI 0 "register_operand" "=x")
5223 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
5224 (parallel [(const_int 0)
5228 (match_operand 2 "const_4_to_7_operand" "")
5229 (match_operand 3 "const_4_to_7_operand" "")
5230 (match_operand 4 "const_4_to_7_operand" "")
5231 (match_operand 5 "const_4_to_7_operand" "")])))]
5235 mask |= (INTVAL (operands[2]) - 4) << 0;
5236 mask |= (INTVAL (operands[3]) - 4) << 2;
5237 mask |= (INTVAL (operands[4]) - 4) << 4;
5238 mask |= (INTVAL (operands[5]) - 4) << 6;
5239 operands[2] = GEN_INT (mask);
5241 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
5243 [(set_attr "type" "sselog")
5244 (set_attr "prefix_rep" "1")
5245 (set_attr "mode" "TI")])
5247 (define_expand "sse2_loadd"
5248 [(set (match_operand:V4SI 0 "register_operand" "")
5251 (match_operand:SI 1 "nonimmediate_operand" ""))
5255 "operands[2] = CONST0_RTX (V4SImode);")
5257 (define_insn "sse2_loadld"
5258 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
5261 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
5262 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
5266 movd\t{%2, %0|%0, %2}
5267 movd\t{%2, %0|%0, %2}
5268 movss\t{%2, %0|%0, %2}
5269 movss\t{%2, %0|%0, %2}"
5270 [(set_attr "type" "ssemov")
5271 (set_attr "mode" "TI,TI,V4SF,SF")])
5273 (define_insn_and_split "sse2_stored"
5274 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
5276 (match_operand:V4SI 1 "register_operand" "x,Yi")
5277 (parallel [(const_int 0)])))]
5280 "&& reload_completed
5281 && (TARGET_INTER_UNIT_MOVES
5282 || MEM_P (operands [0])
5283 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
5284 [(set (match_dup 0) (match_dup 1))]
5286 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
5289 (define_insn_and_split "*vec_ext_v4si_mem"
5290 [(set (match_operand:SI 0 "register_operand" "=r")
5292 (match_operand:V4SI 1 "memory_operand" "o")
5293 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
5299 int i = INTVAL (operands[2]);
5301 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
5305 (define_expand "sse_storeq"
5306 [(set (match_operand:DI 0 "nonimmediate_operand" "")
5308 (match_operand:V2DI 1 "register_operand" "")
5309 (parallel [(const_int 0)])))]
5313 (define_insn "*sse2_storeq_rex64"
5314 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
5316 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
5317 (parallel [(const_int 0)])))]
5318 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5322 mov{q}\t{%1, %0|%0, %1}"
5323 [(set_attr "type" "*,*,imov")
5324 (set_attr "mode" "*,*,DI")])
5326 (define_insn "*sse2_storeq"
5327 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
5329 (match_operand:V2DI 1 "register_operand" "x")
5330 (parallel [(const_int 0)])))]
5335 [(set (match_operand:DI 0 "nonimmediate_operand" "")
5337 (match_operand:V2DI 1 "register_operand" "")
5338 (parallel [(const_int 0)])))]
5341 && (TARGET_INTER_UNIT_MOVES
5342 || MEM_P (operands [0])
5343 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
5344 [(set (match_dup 0) (match_dup 1))]
5346 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
5349 (define_insn "*vec_extractv2di_1_rex64"
5350 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
5352 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
5353 (parallel [(const_int 1)])))]
5354 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5356 movhps\t{%1, %0|%0, %1}
5357 psrldq\t{$8, %0|%0, 8}
5358 movq\t{%H1, %0|%0, %H1}
5359 mov{q}\t{%H1, %0|%0, %H1}"
5360 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
5361 (set_attr "memory" "*,none,*,*")
5362 (set_attr "mode" "V2SF,TI,TI,DI")])
5364 (define_insn "*vec_extractv2di_1_sse2"
5365 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
5367 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
5368 (parallel [(const_int 1)])))]
5370 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5372 movhps\t{%1, %0|%0, %1}
5373 psrldq\t{$8, %0|%0, 8}
5374 movq\t{%H1, %0|%0, %H1}"
5375 [(set_attr "type" "ssemov,sseishft,ssemov")
5376 (set_attr "memory" "*,none,*")
5377 (set_attr "mode" "V2SF,TI,TI")])
5379 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
5380 (define_insn "*vec_extractv2di_1_sse"
5381 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
5383 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
5384 (parallel [(const_int 1)])))]
5385 "!TARGET_SSE2 && TARGET_SSE
5386 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5388 movhps\t{%1, %0|%0, %1}
5389 movhlps\t{%1, %0|%0, %1}
5390 movlps\t{%H1, %0|%0, %H1}"
5391 [(set_attr "type" "ssemov")
5392 (set_attr "mode" "V2SF,V4SF,V2SF")])
5394 (define_insn "*vec_dupv4si"
5395 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
5397 (match_operand:SI 1 "register_operand" " Y2,0")))]
5400 pshufd\t{$0, %1, %0|%0, %1, 0}
5401 shufps\t{$0, %0, %0|%0, %0, 0}"
5402 [(set_attr "type" "sselog1")
5403 (set_attr "mode" "TI,V4SF")])
5405 (define_insn "*vec_dupv2di"
5406 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
5408 (match_operand:DI 1 "register_operand" " 0 ,0")))]
5413 [(set_attr "type" "sselog1,ssemov")
5414 (set_attr "mode" "TI,V4SF")])
5416 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5417 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5418 ;; alternatives pretty much forces the MMX alternative to be chosen.
5419 (define_insn "*sse2_concatv2si"
5420 [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
5422 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
5423 (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
5426 punpckldq\t{%2, %0|%0, %2}
5427 movd\t{%1, %0|%0, %1}
5428 punpckldq\t{%2, %0|%0, %2}
5429 movd\t{%1, %0|%0, %1}"
5430 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5431 (set_attr "mode" "TI,TI,DI,DI")])
5433 (define_insn "*sse1_concatv2si"
5434 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
5436 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
5437 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
5440 unpcklps\t{%2, %0|%0, %2}
5441 movss\t{%1, %0|%0, %1}
5442 punpckldq\t{%2, %0|%0, %2}
5443 movd\t{%1, %0|%0, %1}"
5444 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5445 (set_attr "mode" "V4SF,V4SF,DI,DI")])
5447 (define_insn "*vec_concatv4si_1"
5448 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
5450 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
5451 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
5454 punpcklqdq\t{%2, %0|%0, %2}
5455 movlhps\t{%2, %0|%0, %2}
5456 movhps\t{%2, %0|%0, %2}"
5457 [(set_attr "type" "sselog,ssemov,ssemov")
5458 (set_attr "mode" "TI,V4SF,V2SF")])
5460 (define_insn "vec_concatv2di"
5461 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
5463 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
5464 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
5465 "!TARGET_64BIT && TARGET_SSE"
5467 movq\t{%1, %0|%0, %1}
5468 movq2dq\t{%1, %0|%0, %1}
5469 punpcklqdq\t{%2, %0|%0, %2}
5470 movlhps\t{%2, %0|%0, %2}
5471 movhps\t{%2, %0|%0, %2}
5472 movlps\t{%1, %0|%0, %1}"
5473 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5474 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
5476 (define_insn "*vec_concatv2di_rex"
5477 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
5479 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
5480 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
5483 movq\t{%1, %0|%0, %1}
5484 movq\t{%1, %0|%0, %1}
5485 movq2dq\t{%1, %0|%0, %1}
5486 punpcklqdq\t{%2, %0|%0, %2}
5487 movlhps\t{%2, %0|%0, %2}
5488 movhps\t{%2, %0|%0, %2}
5489 movlps\t{%1, %0|%0, %1}"
5490 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5491 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
5493 (define_expand "vec_setv2di"
5494 [(match_operand:V2DI 0 "register_operand" "")
5495 (match_operand:DI 1 "register_operand" "")
5496 (match_operand 2 "const_int_operand" "")]
5499 ix86_expand_vector_set (false, operands[0], operands[1],
5500 INTVAL (operands[2]));
5504 (define_expand "vec_extractv2di"
5505 [(match_operand:DI 0 "register_operand" "")
5506 (match_operand:V2DI 1 "register_operand" "")
5507 (match_operand 2 "const_int_operand" "")]
5510 ix86_expand_vector_extract (false, operands[0], operands[1],
5511 INTVAL (operands[2]));
5515 (define_expand "vec_initv2di"
5516 [(match_operand:V2DI 0 "register_operand" "")
5517 (match_operand 1 "" "")]
5520 ix86_expand_vector_init (false, operands[0], operands[1]);
5524 (define_expand "vec_setv4si"
5525 [(match_operand:V4SI 0 "register_operand" "")
5526 (match_operand:SI 1 "register_operand" "")
5527 (match_operand 2 "const_int_operand" "")]
5530 ix86_expand_vector_set (false, operands[0], operands[1],
5531 INTVAL (operands[2]));
5535 (define_expand "vec_extractv4si"
5536 [(match_operand:SI 0 "register_operand" "")
5537 (match_operand:V4SI 1 "register_operand" "")
5538 (match_operand 2 "const_int_operand" "")]
5541 ix86_expand_vector_extract (false, operands[0], operands[1],
5542 INTVAL (operands[2]));
5546 (define_expand "vec_initv4si"
5547 [(match_operand:V4SI 0 "register_operand" "")
5548 (match_operand 1 "" "")]
5551 ix86_expand_vector_init (false, operands[0], operands[1]);
5555 (define_expand "vec_setv8hi"
5556 [(match_operand:V8HI 0 "register_operand" "")
5557 (match_operand:HI 1 "register_operand" "")
5558 (match_operand 2 "const_int_operand" "")]
5561 ix86_expand_vector_set (false, operands[0], operands[1],
5562 INTVAL (operands[2]));
5566 (define_expand "vec_extractv8hi"
5567 [(match_operand:HI 0 "register_operand" "")
5568 (match_operand:V8HI 1 "register_operand" "")
5569 (match_operand 2 "const_int_operand" "")]
5572 ix86_expand_vector_extract (false, operands[0], operands[1],
5573 INTVAL (operands[2]));
5577 (define_expand "vec_initv8hi"
5578 [(match_operand:V8HI 0 "register_operand" "")
5579 (match_operand 1 "" "")]
5582 ix86_expand_vector_init (false, operands[0], operands[1]);
5586 (define_expand "vec_setv16qi"
5587 [(match_operand:V16QI 0 "register_operand" "")
5588 (match_operand:QI 1 "register_operand" "")
5589 (match_operand 2 "const_int_operand" "")]
5592 ix86_expand_vector_set (false, operands[0], operands[1],
5593 INTVAL (operands[2]));
5597 (define_expand "vec_extractv16qi"
5598 [(match_operand:QI 0 "register_operand" "")
5599 (match_operand:V16QI 1 "register_operand" "")
5600 (match_operand 2 "const_int_operand" "")]
5603 ix86_expand_vector_extract (false, operands[0], operands[1],
5604 INTVAL (operands[2]));
5608 (define_expand "vec_initv16qi"
5609 [(match_operand:V16QI 0 "register_operand" "")
5610 (match_operand 1 "" "")]
5613 ix86_expand_vector_init (false, operands[0], operands[1]);
5617 (define_expand "vec_unpacku_hi_v16qi"
5618 [(match_operand:V8HI 0 "register_operand" "")
5619 (match_operand:V16QI 1 "register_operand" "")]
5623 ix86_expand_sse4_unpack (operands, true, true);
5624 else if (TARGET_SSE5)
5625 ix86_expand_sse5_unpack (operands, true, true);
5627 ix86_expand_sse_unpack (operands, true, true);
5631 (define_expand "vec_unpacks_hi_v16qi"
5632 [(match_operand:V8HI 0 "register_operand" "")
5633 (match_operand:V16QI 1 "register_operand" "")]
5637 ix86_expand_sse4_unpack (operands, false, true);
5638 else if (TARGET_SSE5)
5639 ix86_expand_sse5_unpack (operands, false, true);
5641 ix86_expand_sse_unpack (operands, false, true);
5645 (define_expand "vec_unpacku_lo_v16qi"
5646 [(match_operand:V8HI 0 "register_operand" "")
5647 (match_operand:V16QI 1 "register_operand" "")]
5651 ix86_expand_sse4_unpack (operands, true, false);
5652 else if (TARGET_SSE5)
5653 ix86_expand_sse5_unpack (operands, true, false);
5655 ix86_expand_sse_unpack (operands, true, false);
5659 (define_expand "vec_unpacks_lo_v16qi"
5660 [(match_operand:V8HI 0 "register_operand" "")
5661 (match_operand:V16QI 1 "register_operand" "")]
5665 ix86_expand_sse4_unpack (operands, false, false);
5666 else if (TARGET_SSE5)
5667 ix86_expand_sse5_unpack (operands, false, false);
5669 ix86_expand_sse_unpack (operands, false, false);
5673 (define_expand "vec_unpacku_hi_v8hi"
5674 [(match_operand:V4SI 0 "register_operand" "")
5675 (match_operand:V8HI 1 "register_operand" "")]
5679 ix86_expand_sse4_unpack (operands, true, true);
5680 else if (TARGET_SSE5)
5681 ix86_expand_sse5_unpack (operands, true, true);
5683 ix86_expand_sse_unpack (operands, true, true);
5687 (define_expand "vec_unpacks_hi_v8hi"
5688 [(match_operand:V4SI 0 "register_operand" "")
5689 (match_operand:V8HI 1 "register_operand" "")]
5693 ix86_expand_sse4_unpack (operands, false, true);
5694 else if (TARGET_SSE5)
5695 ix86_expand_sse5_unpack (operands, false, true);
5697 ix86_expand_sse_unpack (operands, false, true);
5701 (define_expand "vec_unpacku_lo_v8hi"
5702 [(match_operand:V4SI 0 "register_operand" "")
5703 (match_operand:V8HI 1 "register_operand" "")]
5707 ix86_expand_sse4_unpack (operands, true, false);
5708 else if (TARGET_SSE5)
5709 ix86_expand_sse5_unpack (operands, true, false);
5711 ix86_expand_sse_unpack (operands, true, false);
5715 (define_expand "vec_unpacks_lo_v8hi"
5716 [(match_operand:V4SI 0 "register_operand" "")
5717 (match_operand:V8HI 1 "register_operand" "")]
5721 ix86_expand_sse4_unpack (operands, false, false);
5722 else if (TARGET_SSE5)
5723 ix86_expand_sse5_unpack (operands, false, false);
5725 ix86_expand_sse_unpack (operands, false, false);
5729 (define_expand "vec_unpacku_hi_v4si"
5730 [(match_operand:V2DI 0 "register_operand" "")
5731 (match_operand:V4SI 1 "register_operand" "")]
5735 ix86_expand_sse4_unpack (operands, true, true);
5736 else if (TARGET_SSE5)
5737 ix86_expand_sse5_unpack (operands, true, true);
5739 ix86_expand_sse_unpack (operands, true, true);
5743 (define_expand "vec_unpacks_hi_v4si"
5744 [(match_operand:V2DI 0 "register_operand" "")
5745 (match_operand:V4SI 1 "register_operand" "")]
5749 ix86_expand_sse4_unpack (operands, false, true);
5750 else if (TARGET_SSE5)
5751 ix86_expand_sse5_unpack (operands, false, true);
5753 ix86_expand_sse_unpack (operands, false, true);
5757 (define_expand "vec_unpacku_lo_v4si"
5758 [(match_operand:V2DI 0 "register_operand" "")
5759 (match_operand:V4SI 1 "register_operand" "")]
5763 ix86_expand_sse4_unpack (operands, true, false);
5764 else if (TARGET_SSE5)
5765 ix86_expand_sse5_unpack (operands, true, false);
5767 ix86_expand_sse_unpack (operands, true, false);
5771 (define_expand "vec_unpacks_lo_v4si"
5772 [(match_operand:V2DI 0 "register_operand" "")
5773 (match_operand:V4SI 1 "register_operand" "")]
5777 ix86_expand_sse4_unpack (operands, false, false);
5778 else if (TARGET_SSE5)
5779 ix86_expand_sse5_unpack (operands, false, false);
5781 ix86_expand_sse_unpack (operands, false, false);
5785 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5789 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5791 (define_insn "sse2_uavgv16qi3"
5792 [(set (match_operand:V16QI 0 "register_operand" "=x")
5798 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5800 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5801 (const_vector:V16QI [(const_int 1) (const_int 1)
5802 (const_int 1) (const_int 1)
5803 (const_int 1) (const_int 1)
5804 (const_int 1) (const_int 1)
5805 (const_int 1) (const_int 1)
5806 (const_int 1) (const_int 1)
5807 (const_int 1) (const_int 1)
5808 (const_int 1) (const_int 1)]))
5810 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5811 "pavgb\t{%2, %0|%0, %2}"
5812 [(set_attr "type" "sseiadd")
5813 (set_attr "prefix_data16" "1")
5814 (set_attr "mode" "TI")])
5816 (define_insn "sse2_uavgv8hi3"
5817 [(set (match_operand:V8HI 0 "register_operand" "=x")
5823 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5825 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5826 (const_vector:V8HI [(const_int 1) (const_int 1)
5827 (const_int 1) (const_int 1)
5828 (const_int 1) (const_int 1)
5829 (const_int 1) (const_int 1)]))
5831 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5832 "pavgw\t{%2, %0|%0, %2}"
5833 [(set_attr "type" "sseiadd")
5834 (set_attr "prefix_data16" "1")
5835 (set_attr "mode" "TI")])
5837 ;; The correct representation for this is absolutely enormous, and
5838 ;; surely not generally useful.
5839 (define_insn "sse2_psadbw"
5840 [(set (match_operand:V2DI 0 "register_operand" "=x")
5841 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5842 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5845 "psadbw\t{%2, %0|%0, %2}"
5846 [(set_attr "type" "sseiadd")
5847 (set_attr "prefix_data16" "1")
5848 (set_attr "mode" "TI")])
5850 (define_insn "sse_movmskps"
5851 [(set (match_operand:SI 0 "register_operand" "=r")
5852 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5855 "movmskps\t{%1, %0|%0, %1}"
5856 [(set_attr "type" "ssecvt")
5857 (set_attr "mode" "V4SF")])
5859 (define_insn "sse2_movmskpd"
5860 [(set (match_operand:SI 0 "register_operand" "=r")
5861 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5864 "movmskpd\t{%1, %0|%0, %1}"
5865 [(set_attr "type" "ssecvt")
5866 (set_attr "mode" "V2DF")])
5868 (define_insn "sse2_pmovmskb"
5869 [(set (match_operand:SI 0 "register_operand" "=r")
5870 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5873 "pmovmskb\t{%1, %0|%0, %1}"
5874 [(set_attr "type" "ssecvt")
5875 (set_attr "prefix_data16" "1")
5876 (set_attr "mode" "SI")])
5878 (define_expand "sse2_maskmovdqu"
5879 [(set (match_operand:V16QI 0 "memory_operand" "")
5880 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5881 (match_operand:V16QI 2 "register_operand" "")
5887 (define_insn "*sse2_maskmovdqu"
5888 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5889 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5890 (match_operand:V16QI 2 "register_operand" "x")
5891 (mem:V16QI (match_dup 0))]
5893 "TARGET_SSE2 && !TARGET_64BIT"
5894 ;; @@@ check ordering of operands in intel/nonintel syntax
5895 "maskmovdqu\t{%2, %1|%1, %2}"
5896 [(set_attr "type" "ssecvt")
5897 (set_attr "prefix_data16" "1")
5898 (set_attr "mode" "TI")])
5900 (define_insn "*sse2_maskmovdqu_rex64"
5901 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5902 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5903 (match_operand:V16QI 2 "register_operand" "x")
5904 (mem:V16QI (match_dup 0))]
5906 "TARGET_SSE2 && TARGET_64BIT"
5907 ;; @@@ check ordering of operands in intel/nonintel syntax
5908 "maskmovdqu\t{%2, %1|%1, %2}"
5909 [(set_attr "type" "ssecvt")
5910 (set_attr "prefix_data16" "1")
5911 (set_attr "mode" "TI")])
5913 (define_insn "sse_ldmxcsr"
5914 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5918 [(set_attr "type" "sse")
5919 (set_attr "memory" "load")])
5921 (define_insn "sse_stmxcsr"
5922 [(set (match_operand:SI 0 "memory_operand" "=m")
5923 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5926 [(set_attr "type" "sse")
5927 (set_attr "memory" "store")])
5929 (define_expand "sse_sfence"
5931 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5932 "TARGET_SSE || TARGET_3DNOW_A"
5934 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5935 MEM_VOLATILE_P (operands[0]) = 1;
5938 (define_insn "*sse_sfence"
5939 [(set (match_operand:BLK 0 "" "")
5940 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5941 "TARGET_SSE || TARGET_3DNOW_A"
5943 [(set_attr "type" "sse")
5944 (set_attr "memory" "unknown")])
5946 (define_insn "sse2_clflush"
5947 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5951 [(set_attr "type" "sse")
5952 (set_attr "memory" "unknown")])
5954 (define_expand "sse2_mfence"
5956 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5959 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5960 MEM_VOLATILE_P (operands[0]) = 1;
5963 (define_insn "*sse2_mfence"
5964 [(set (match_operand:BLK 0 "" "")
5965 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5968 [(set_attr "type" "sse")
5969 (set_attr "memory" "unknown")])
5971 (define_expand "sse2_lfence"
5973 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5976 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5977 MEM_VOLATILE_P (operands[0]) = 1;
5980 (define_insn "*sse2_lfence"
5981 [(set (match_operand:BLK 0 "" "")
5982 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5985 [(set_attr "type" "sse")
5986 (set_attr "memory" "unknown")])
5988 (define_insn "sse3_mwait"
5989 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5990 (match_operand:SI 1 "register_operand" "c")]
5993 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5994 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5995 ;; we only need to set up 32bit registers.
5997 [(set_attr "length" "3")])
5999 (define_insn "sse3_monitor"
6000 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6001 (match_operand:SI 1 "register_operand" "c")
6002 (match_operand:SI 2 "register_operand" "d")]
6004 "TARGET_SSE3 && !TARGET_64BIT"
6005 "monitor\t%0, %1, %2"
6006 [(set_attr "length" "3")])
6008 (define_insn "sse3_monitor64"
6009 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
6010 (match_operand:SI 1 "register_operand" "c")
6011 (match_operand:SI 2 "register_operand" "d")]
6013 "TARGET_SSE3 && TARGET_64BIT"
6014 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
6015 ;; RCX and RDX are used. Since 32bit register operands are implicitly
6016 ;; zero extended to 64bit, we only need to set up 32bit registers.
6018 [(set_attr "length" "3")])
6021 (define_insn "ssse3_phaddwv8hi3"
6022 [(set (match_operand:V8HI 0 "register_operand" "=x")
6028 (match_operand:V8HI 1 "register_operand" "0")
6029 (parallel [(const_int 0)]))
6030 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6032 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6033 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6036 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6037 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6039 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6040 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6045 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6046 (parallel [(const_int 0)]))
6047 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6049 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6050 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6053 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6054 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6056 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6057 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6059 "phaddw\t{%2, %0|%0, %2}"
6060 [(set_attr "type" "sseiadd")
6061 (set_attr "prefix_data16" "1")
6062 (set_attr "prefix_extra" "1")
6063 (set_attr "mode" "TI")])
6065 (define_insn "ssse3_phaddwv4hi3"
6066 [(set (match_operand:V4HI 0 "register_operand" "=y")
6071 (match_operand:V4HI 1 "register_operand" "0")
6072 (parallel [(const_int 0)]))
6073 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6075 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6076 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6080 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6081 (parallel [(const_int 0)]))
6082 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6084 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6085 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6087 "phaddw\t{%2, %0|%0, %2}"
6088 [(set_attr "type" "sseiadd")
6089 (set_attr "prefix_extra" "1")
6090 (set_attr "mode" "DI")])
6092 (define_insn "ssse3_phadddv4si3"
6093 [(set (match_operand:V4SI 0 "register_operand" "=x")
6098 (match_operand:V4SI 1 "register_operand" "0")
6099 (parallel [(const_int 0)]))
6100 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6102 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
6103 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
6107 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
6108 (parallel [(const_int 0)]))
6109 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
6111 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
6112 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
6114 "phaddd\t{%2, %0|%0, %2}"
6115 [(set_attr "type" "sseiadd")
6116 (set_attr "prefix_data16" "1")
6117 (set_attr "prefix_extra" "1")
6118 (set_attr "mode" "TI")])
6120 (define_insn "ssse3_phadddv2si3"
6121 [(set (match_operand:V2SI 0 "register_operand" "=y")
6125 (match_operand:V2SI 1 "register_operand" "0")
6126 (parallel [(const_int 0)]))
6127 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6130 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
6131 (parallel [(const_int 0)]))
6132 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
6134 "phaddd\t{%2, %0|%0, %2}"
6135 [(set_attr "type" "sseiadd")
6136 (set_attr "prefix_extra" "1")
6137 (set_attr "mode" "DI")])
6139 (define_insn "ssse3_phaddswv8hi3"
6140 [(set (match_operand:V8HI 0 "register_operand" "=x")
6146 (match_operand:V8HI 1 "register_operand" "0")
6147 (parallel [(const_int 0)]))
6148 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6150 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6151 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6154 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6155 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6157 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6158 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6163 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6164 (parallel [(const_int 0)]))
6165 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6167 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6168 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6171 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6172 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6174 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6175 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6177 "phaddsw\t{%2, %0|%0, %2}"
6178 [(set_attr "type" "sseiadd")
6179 (set_attr "prefix_data16" "1")
6180 (set_attr "prefix_extra" "1")
6181 (set_attr "mode" "TI")])
6183 (define_insn "ssse3_phaddswv4hi3"
6184 [(set (match_operand:V4HI 0 "register_operand" "=y")
6189 (match_operand:V4HI 1 "register_operand" "0")
6190 (parallel [(const_int 0)]))
6191 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6193 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6194 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6198 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6199 (parallel [(const_int 0)]))
6200 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6202 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6203 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6205 "phaddsw\t{%2, %0|%0, %2}"
6206 [(set_attr "type" "sseiadd")
6207 (set_attr "prefix_extra" "1")
6208 (set_attr "mode" "DI")])
6210 (define_insn "ssse3_phsubwv8hi3"
6211 [(set (match_operand:V8HI 0 "register_operand" "=x")
6217 (match_operand:V8HI 1 "register_operand" "0")
6218 (parallel [(const_int 0)]))
6219 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6221 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6222 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6225 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6226 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6228 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6229 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6234 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6235 (parallel [(const_int 0)]))
6236 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6238 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6239 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6242 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6243 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6245 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6246 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6248 "phsubw\t{%2, %0|%0, %2}"
6249 [(set_attr "type" "sseiadd")
6250 (set_attr "prefix_data16" "1")
6251 (set_attr "prefix_extra" "1")
6252 (set_attr "mode" "TI")])
6254 (define_insn "ssse3_phsubwv4hi3"
6255 [(set (match_operand:V4HI 0 "register_operand" "=y")
6260 (match_operand:V4HI 1 "register_operand" "0")
6261 (parallel [(const_int 0)]))
6262 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6264 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6265 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6269 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6270 (parallel [(const_int 0)]))
6271 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6273 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6274 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6276 "phsubw\t{%2, %0|%0, %2}"
6277 [(set_attr "type" "sseiadd")
6278 (set_attr "prefix_extra" "1")
6279 (set_attr "mode" "DI")])
6281 (define_insn "ssse3_phsubdv4si3"
6282 [(set (match_operand:V4SI 0 "register_operand" "=x")
6287 (match_operand:V4SI 1 "register_operand" "0")
6288 (parallel [(const_int 0)]))
6289 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6291 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
6292 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
6296 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
6297 (parallel [(const_int 0)]))
6298 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
6300 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
6301 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
6303 "phsubd\t{%2, %0|%0, %2}"
6304 [(set_attr "type" "sseiadd")
6305 (set_attr "prefix_data16" "1")
6306 (set_attr "prefix_extra" "1")
6307 (set_attr "mode" "TI")])
6309 (define_insn "ssse3_phsubdv2si3"
6310 [(set (match_operand:V2SI 0 "register_operand" "=y")
6314 (match_operand:V2SI 1 "register_operand" "0")
6315 (parallel [(const_int 0)]))
6316 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6319 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
6320 (parallel [(const_int 0)]))
6321 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
6323 "phsubd\t{%2, %0|%0, %2}"
6324 [(set_attr "type" "sseiadd")
6325 (set_attr "prefix_extra" "1")
6326 (set_attr "mode" "DI")])
6328 (define_insn "ssse3_phsubswv8hi3"
6329 [(set (match_operand:V8HI 0 "register_operand" "=x")
6335 (match_operand:V8HI 1 "register_operand" "0")
6336 (parallel [(const_int 0)]))
6337 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6339 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6340 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6343 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6344 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6346 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6347 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6352 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6353 (parallel [(const_int 0)]))
6354 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6356 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6357 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6360 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6361 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6363 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6364 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6366 "phsubsw\t{%2, %0|%0, %2}"
6367 [(set_attr "type" "sseiadd")
6368 (set_attr "prefix_data16" "1")
6369 (set_attr "prefix_extra" "1")
6370 (set_attr "mode" "TI")])
6372 (define_insn "ssse3_phsubswv4hi3"
6373 [(set (match_operand:V4HI 0 "register_operand" "=y")
6378 (match_operand:V4HI 1 "register_operand" "0")
6379 (parallel [(const_int 0)]))
6380 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6382 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6383 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6387 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6388 (parallel [(const_int 0)]))
6389 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6391 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6392 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6394 "phsubsw\t{%2, %0|%0, %2}"
6395 [(set_attr "type" "sseiadd")
6396 (set_attr "prefix_extra" "1")
6397 (set_attr "mode" "DI")])
6399 (define_insn "ssse3_pmaddubswv8hi3"
6400 [(set (match_operand:V8HI 0 "register_operand" "=x")
6405 (match_operand:V16QI 1 "nonimmediate_operand" "0")
6406 (parallel [(const_int 0)
6416 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6417 (parallel [(const_int 0)
6427 (vec_select:V16QI (match_dup 1)
6428 (parallel [(const_int 1)
6437 (vec_select:V16QI (match_dup 2)
6438 (parallel [(const_int 1)
6445 (const_int 15)]))))))]
6447 "pmaddubsw\t{%2, %0|%0, %2}"
6448 [(set_attr "type" "sseiadd")
6449 (set_attr "prefix_data16" "1")
6450 (set_attr "prefix_extra" "1")
6451 (set_attr "mode" "TI")])
6453 (define_insn "ssse3_pmaddubswv4hi3"
6454 [(set (match_operand:V4HI 0 "register_operand" "=y")
6459 (match_operand:V8QI 1 "nonimmediate_operand" "0")
6460 (parallel [(const_int 0)
6466 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
6467 (parallel [(const_int 0)
6473 (vec_select:V8QI (match_dup 1)
6474 (parallel [(const_int 1)
6479 (vec_select:V8QI (match_dup 2)
6480 (parallel [(const_int 1)
6483 (const_int 7)]))))))]
6485 "pmaddubsw\t{%2, %0|%0, %2}"
6486 [(set_attr "type" "sseiadd")
6487 (set_attr "prefix_extra" "1")
6488 (set_attr "mode" "DI")])
6490 (define_insn "ssse3_pmulhrswv8hi3"
6491 [(set (match_operand:V8HI 0 "register_operand" "=x")
6498 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
6500 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
6502 (const_vector:V8HI [(const_int 1) (const_int 1)
6503 (const_int 1) (const_int 1)
6504 (const_int 1) (const_int 1)
6505 (const_int 1) (const_int 1)]))
6507 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
6508 "pmulhrsw\t{%2, %0|%0, %2}"
6509 [(set_attr "type" "sseimul")
6510 (set_attr "prefix_data16" "1")
6511 (set_attr "prefix_extra" "1")
6512 (set_attr "mode" "TI")])
6514 (define_insn "ssse3_pmulhrswv4hi3"
6515 [(set (match_operand:V4HI 0 "register_operand" "=y")
6522 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
6524 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
6526 (const_vector:V4HI [(const_int 1) (const_int 1)
6527 (const_int 1) (const_int 1)]))
6529 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
6530 "pmulhrsw\t{%2, %0|%0, %2}"
6531 [(set_attr "type" "sseimul")
6532 (set_attr "prefix_extra" "1")
6533 (set_attr "mode" "DI")])
6535 (define_insn "ssse3_pshufbv16qi3"
6536 [(set (match_operand:V16QI 0 "register_operand" "=x")
6537 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6538 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6541 "pshufb\t{%2, %0|%0, %2}";
6542 [(set_attr "type" "sselog1")
6543 (set_attr "prefix_data16" "1")
6544 (set_attr "prefix_extra" "1")
6545 (set_attr "mode" "TI")])
6547 (define_insn "ssse3_pshufbv8qi3"
6548 [(set (match_operand:V8QI 0 "register_operand" "=y")
6549 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6550 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6553 "pshufb\t{%2, %0|%0, %2}";
6554 [(set_attr "type" "sselog1")
6555 (set_attr "prefix_extra" "1")
6556 (set_attr "mode" "DI")])
6558 (define_insn "ssse3_psign<mode>3"
6559 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6560 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
6561 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6564 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6565 [(set_attr "type" "sselog1")
6566 (set_attr "prefix_data16" "1")
6567 (set_attr "prefix_extra" "1")
6568 (set_attr "mode" "TI")])
6570 (define_insn "ssse3_psign<mode>3"
6571 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6572 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
6573 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6576 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6577 [(set_attr "type" "sselog1")
6578 (set_attr "prefix_extra" "1")
6579 (set_attr "mode" "DI")])
6581 (define_insn "ssse3_palignrti"
6582 [(set (match_operand:TI 0 "register_operand" "=x")
6583 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6584 (match_operand:TI 2 "nonimmediate_operand" "xm")
6585 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6589 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6590 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6592 [(set_attr "type" "sseishft")
6593 (set_attr "prefix_data16" "1")
6594 (set_attr "prefix_extra" "1")
6595 (set_attr "mode" "TI")])
6597 (define_insn "ssse3_palignrdi"
6598 [(set (match_operand:DI 0 "register_operand" "=y")
6599 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6600 (match_operand:DI 2 "nonimmediate_operand" "ym")
6601 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6605 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6606 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6608 [(set_attr "type" "sseishft")
6609 (set_attr "prefix_extra" "1")
6610 (set_attr "mode" "DI")])
6612 (define_insn "abs<mode>2"
6613 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6614 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6616 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6617 [(set_attr "type" "sselog1")
6618 (set_attr "prefix_data16" "1")
6619 (set_attr "prefix_extra" "1")
6620 (set_attr "mode" "TI")])
6622 (define_insn "abs<mode>2"
6623 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6624 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6626 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6627 [(set_attr "type" "sselog1")
6628 (set_attr "prefix_extra" "1")
6629 (set_attr "mode" "DI")])
6631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6633 ;; AMD SSE4A instructions
6635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6637 (define_insn "sse4a_vmmovntv2df"
6638 [(set (match_operand:DF 0 "memory_operand" "=m")
6639 (unspec:DF [(vec_select:DF
6640 (match_operand:V2DF 1 "register_operand" "x")
6641 (parallel [(const_int 0)]))]
6644 "movntsd\t{%1, %0|%0, %1}"
6645 [(set_attr "type" "ssemov")
6646 (set_attr "mode" "DF")])
6648 (define_insn "sse4a_movntdf"
6649 [(set (match_operand:DF 0 "memory_operand" "=m")
6650 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
6653 "movntsd\t{%1, %0|%0, %1}"
6654 [(set_attr "type" "ssemov")
6655 (set_attr "mode" "DF")])
6657 (define_insn "sse4a_vmmovntv4sf"
6658 [(set (match_operand:SF 0 "memory_operand" "=m")
6659 (unspec:SF [(vec_select:SF
6660 (match_operand:V4SF 1 "register_operand" "x")
6661 (parallel [(const_int 0)]))]
6664 "movntss\t{%1, %0|%0, %1}"
6665 [(set_attr "type" "ssemov")
6666 (set_attr "mode" "SF")])
6668 (define_insn "sse4a_movntsf"
6669 [(set (match_operand:SF 0 "memory_operand" "=m")
6670 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
6673 "movntss\t{%1, %0|%0, %1}"
6674 [(set_attr "type" "ssemov")
6675 (set_attr "mode" "SF")])
6677 (define_insn "sse4a_extrqi"
6678 [(set (match_operand:V2DI 0 "register_operand" "=x")
6679 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6680 (match_operand 2 "const_int_operand" "")
6681 (match_operand 3 "const_int_operand" "")]
6684 "extrq\t{%3, %2, %0|%0, %2, %3}"
6685 [(set_attr "type" "sse")
6686 (set_attr "prefix_data16" "1")
6687 (set_attr "mode" "TI")])
6689 (define_insn "sse4a_extrq"
6690 [(set (match_operand:V2DI 0 "register_operand" "=x")
6691 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6692 (match_operand:V16QI 2 "register_operand" "x")]
6695 "extrq\t{%2, %0|%0, %2}"
6696 [(set_attr "type" "sse")
6697 (set_attr "prefix_data16" "1")
6698 (set_attr "mode" "TI")])
6700 (define_insn "sse4a_insertqi"
6701 [(set (match_operand:V2DI 0 "register_operand" "=x")
6702 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6703 (match_operand:V2DI 2 "register_operand" "x")
6704 (match_operand 3 "const_int_operand" "")
6705 (match_operand 4 "const_int_operand" "")]
6708 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6709 [(set_attr "type" "sseins")
6710 (set_attr "prefix_rep" "1")
6711 (set_attr "mode" "TI")])
6713 (define_insn "sse4a_insertq"
6714 [(set (match_operand:V2DI 0 "register_operand" "=x")
6715 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6716 (match_operand:V2DI 2 "register_operand" "x")]
6719 "insertq\t{%2, %0|%0, %2}"
6720 [(set_attr "type" "sseins")
6721 (set_attr "prefix_rep" "1")
6722 (set_attr "mode" "TI")])
6724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6726 ;; Intel SSE4.1 instructions
6728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6730 (define_insn "sse4_1_blendpd"
6731 [(set (match_operand:V2DF 0 "register_operand" "=x")
6733 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6734 (match_operand:V2DF 1 "register_operand" "0")
6735 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6737 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6738 [(set_attr "type" "ssemov")
6739 (set_attr "prefix_extra" "1")
6740 (set_attr "mode" "V2DF")])
6742 (define_insn "sse4_1_blendps"
6743 [(set (match_operand:V4SF 0 "register_operand" "=x")
6745 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6746 (match_operand:V4SF 1 "register_operand" "0")
6747 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6749 "blendps\t{%3, %2, %0|%0, %2, %3}"
6750 [(set_attr "type" "ssemov")
6751 (set_attr "prefix_extra" "1")
6752 (set_attr "mode" "V4SF")])
6754 (define_insn "sse4_1_blendvpd"
6755 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6756 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6757 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6758 (match_operand:V2DF 3 "register_operand" "Yz")]
6761 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6762 [(set_attr "type" "ssemov")
6763 (set_attr "prefix_extra" "1")
6764 (set_attr "mode" "V2DF")])
6766 (define_insn "sse4_1_blendvps"
6767 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6768 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6769 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6770 (match_operand:V4SF 3 "register_operand" "Yz")]
6773 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6774 [(set_attr "type" "ssemov")
6775 (set_attr "prefix_extra" "1")
6776 (set_attr "mode" "V4SF")])
6778 (define_insn "sse4_1_dppd"
6779 [(set (match_operand:V2DF 0 "register_operand" "=x")
6780 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6782 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6785 "dppd\t{%3, %2, %0|%0, %2, %3}"
6786 [(set_attr "type" "ssemul")
6787 (set_attr "prefix_extra" "1")
6788 (set_attr "mode" "V2DF")])
6790 (define_insn "sse4_1_dpps"
6791 [(set (match_operand:V4SF 0 "register_operand" "=x")
6792 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6793 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6794 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6797 "dpps\t{%3, %2, %0|%0, %2, %3}"
6798 [(set_attr "type" "ssemul")
6799 (set_attr "prefix_extra" "1")
6800 (set_attr "mode" "V4SF")])
6802 (define_insn "sse4_1_movntdqa"
6803 [(set (match_operand:V2DI 0 "register_operand" "=x")
6804 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6807 "movntdqa\t{%1, %0|%0, %1}"
6808 [(set_attr "type" "ssecvt")
6809 (set_attr "prefix_extra" "1")
6810 (set_attr "mode" "TI")])
6812 (define_insn "sse4_1_mpsadbw"
6813 [(set (match_operand:V16QI 0 "register_operand" "=x")
6814 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6815 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6816 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6819 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6820 [(set_attr "type" "sselog1")
6821 (set_attr "prefix_extra" "1")
6822 (set_attr "mode" "TI")])
6824 (define_insn "sse4_1_packusdw"
6825 [(set (match_operand:V8HI 0 "register_operand" "=x")
6828 (match_operand:V4SI 1 "register_operand" "0"))
6830 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6832 "packusdw\t{%2, %0|%0, %2}"
6833 [(set_attr "type" "sselog")
6834 (set_attr "prefix_extra" "1")
6835 (set_attr "mode" "TI")])
6837 (define_insn "sse4_1_pblendvb"
6838 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6839 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6840 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6841 (match_operand:V16QI 3 "register_operand" "Yz")]
6844 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6845 [(set_attr "type" "ssemov")
6846 (set_attr "prefix_extra" "1")
6847 (set_attr "mode" "TI")])
6849 (define_insn "sse4_1_pblendw"
6850 [(set (match_operand:V8HI 0 "register_operand" "=x")
6852 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6853 (match_operand:V8HI 1 "register_operand" "0")
6854 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6856 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6857 [(set_attr "type" "ssemov")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "mode" "TI")])
6861 (define_insn "sse4_1_phminposuw"
6862 [(set (match_operand:V8HI 0 "register_operand" "=x")
6863 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6864 UNSPEC_PHMINPOSUW))]
6866 "phminposuw\t{%1, %0|%0, %1}"
6867 [(set_attr "type" "sselog1")
6868 (set_attr "prefix_extra" "1")
6869 (set_attr "mode" "TI")])
6871 (define_insn "sse4_1_extendv8qiv8hi2"
6872 [(set (match_operand:V8HI 0 "register_operand" "=x")
6875 (match_operand:V16QI 1 "register_operand" "x")
6876 (parallel [(const_int 0)
6885 "pmovsxbw\t{%1, %0|%0, %1}"
6886 [(set_attr "type" "ssemov")
6887 (set_attr "prefix_extra" "1")
6888 (set_attr "mode" "TI")])
6890 (define_insn "*sse4_1_extendv8qiv8hi2"
6891 [(set (match_operand:V8HI 0 "register_operand" "=x")
6894 (vec_duplicate:V16QI
6895 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6896 (parallel [(const_int 0)
6905 "pmovsxbw\t{%1, %0|%0, %1}"
6906 [(set_attr "type" "ssemov")
6907 (set_attr "prefix_extra" "1")
6908 (set_attr "mode" "TI")])
6910 (define_insn "sse4_1_extendv4qiv4si2"
6911 [(set (match_operand:V4SI 0 "register_operand" "=x")
6914 (match_operand:V16QI 1 "register_operand" "x")
6915 (parallel [(const_int 0)
6920 "pmovsxbd\t{%1, %0|%0, %1}"
6921 [(set_attr "type" "ssemov")
6922 (set_attr "prefix_extra" "1")
6923 (set_attr "mode" "TI")])
6925 (define_insn "*sse4_1_extendv4qiv4si2"
6926 [(set (match_operand:V4SI 0 "register_operand" "=x")
6929 (vec_duplicate:V16QI
6930 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6931 (parallel [(const_int 0)
6936 "pmovsxbd\t{%1, %0|%0, %1}"
6937 [(set_attr "type" "ssemov")
6938 (set_attr "prefix_extra" "1")
6939 (set_attr "mode" "TI")])
6941 (define_insn "sse4_1_extendv2qiv2di2"
6942 [(set (match_operand:V2DI 0 "register_operand" "=x")
6945 (match_operand:V16QI 1 "register_operand" "x")
6946 (parallel [(const_int 0)
6949 "pmovsxbq\t{%1, %0|%0, %1}"
6950 [(set_attr "type" "ssemov")
6951 (set_attr "prefix_extra" "1")
6952 (set_attr "mode" "TI")])
6954 (define_insn "*sse4_1_extendv2qiv2di2"
6955 [(set (match_operand:V2DI 0 "register_operand" "=x")
6958 (vec_duplicate:V16QI
6959 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6960 (parallel [(const_int 0)
6963 "pmovsxbq\t{%1, %0|%0, %1}"
6964 [(set_attr "type" "ssemov")
6965 (set_attr "prefix_extra" "1")
6966 (set_attr "mode" "TI")])
6968 (define_insn "sse4_1_extendv4hiv4si2"
6969 [(set (match_operand:V4SI 0 "register_operand" "=x")
6972 (match_operand:V8HI 1 "register_operand" "x")
6973 (parallel [(const_int 0)
6978 "pmovsxwd\t{%1, %0|%0, %1}"
6979 [(set_attr "type" "ssemov")
6980 (set_attr "prefix_extra" "1")
6981 (set_attr "mode" "TI")])
6983 (define_insn "*sse4_1_extendv4hiv4si2"
6984 [(set (match_operand:V4SI 0 "register_operand" "=x")
6988 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6989 (parallel [(const_int 0)
6994 "pmovsxwd\t{%1, %0|%0, %1}"
6995 [(set_attr "type" "ssemov")
6996 (set_attr "prefix_extra" "1")
6997 (set_attr "mode" "TI")])
6999 (define_insn "sse4_1_extendv2hiv2di2"
7000 [(set (match_operand:V2DI 0 "register_operand" "=x")
7003 (match_operand:V8HI 1 "register_operand" "x")
7004 (parallel [(const_int 0)
7007 "pmovsxwq\t{%1, %0|%0, %1}"
7008 [(set_attr "type" "ssemov")
7009 (set_attr "prefix_extra" "1")
7010 (set_attr "mode" "TI")])
7012 (define_insn "*sse4_1_extendv2hiv2di2"
7013 [(set (match_operand:V2DI 0 "register_operand" "=x")
7017 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
7018 (parallel [(const_int 0)
7021 "pmovsxwq\t{%1, %0|%0, %1}"
7022 [(set_attr "type" "ssemov")
7023 (set_attr "prefix_extra" "1")
7024 (set_attr "mode" "TI")])
7026 (define_insn "sse4_1_extendv2siv2di2"
7027 [(set (match_operand:V2DI 0 "register_operand" "=x")
7030 (match_operand:V4SI 1 "register_operand" "x")
7031 (parallel [(const_int 0)
7034 "pmovsxdq\t{%1, %0|%0, %1}"
7035 [(set_attr "type" "ssemov")
7036 (set_attr "prefix_extra" "1")
7037 (set_attr "mode" "TI")])
7039 (define_insn "*sse4_1_extendv2siv2di2"
7040 [(set (match_operand:V2DI 0 "register_operand" "=x")
7044 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
7045 (parallel [(const_int 0)
7048 "pmovsxdq\t{%1, %0|%0, %1}"
7049 [(set_attr "type" "ssemov")
7050 (set_attr "prefix_extra" "1")
7051 (set_attr "mode" "TI")])
7053 (define_insn "sse4_1_zero_extendv8qiv8hi2"
7054 [(set (match_operand:V8HI 0 "register_operand" "=x")
7057 (match_operand:V16QI 1 "register_operand" "x")
7058 (parallel [(const_int 0)
7067 "pmovzxbw\t{%1, %0|%0, %1}"
7068 [(set_attr "type" "ssemov")
7069 (set_attr "prefix_extra" "1")
7070 (set_attr "mode" "TI")])
7072 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
7073 [(set (match_operand:V8HI 0 "register_operand" "=x")
7076 (vec_duplicate:V16QI
7077 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
7078 (parallel [(const_int 0)
7087 "pmovzxbw\t{%1, %0|%0, %1}"
7088 [(set_attr "type" "ssemov")
7089 (set_attr "prefix_extra" "1")
7090 (set_attr "mode" "TI")])
7092 (define_insn "sse4_1_zero_extendv4qiv4si2"
7093 [(set (match_operand:V4SI 0 "register_operand" "=x")
7096 (match_operand:V16QI 1 "register_operand" "x")
7097 (parallel [(const_int 0)
7102 "pmovzxbd\t{%1, %0|%0, %1}"
7103 [(set_attr "type" "ssemov")
7104 (set_attr "prefix_extra" "1")
7105 (set_attr "mode" "TI")])
7107 (define_insn "*sse4_1_zero_extendv4qiv4si2"
7108 [(set (match_operand:V4SI 0 "register_operand" "=x")
7111 (vec_duplicate:V16QI
7112 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
7113 (parallel [(const_int 0)
7118 "pmovzxbd\t{%1, %0|%0, %1}"
7119 [(set_attr "type" "ssemov")
7120 (set_attr "prefix_extra" "1")
7121 (set_attr "mode" "TI")])
7123 (define_insn "sse4_1_zero_extendv2qiv2di2"
7124 [(set (match_operand:V2DI 0 "register_operand" "=x")
7127 (match_operand:V16QI 1 "register_operand" "x")
7128 (parallel [(const_int 0)
7131 "pmovzxbq\t{%1, %0|%0, %1}"
7132 [(set_attr "type" "ssemov")
7133 (set_attr "prefix_extra" "1")
7134 (set_attr "mode" "TI")])
7136 (define_insn "*sse4_1_zero_extendv2qiv2di2"
7137 [(set (match_operand:V2DI 0 "register_operand" "=x")
7140 (vec_duplicate:V16QI
7141 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
7142 (parallel [(const_int 0)
7145 "pmovzxbq\t{%1, %0|%0, %1}"
7146 [(set_attr "type" "ssemov")
7147 (set_attr "prefix_extra" "1")
7148 (set_attr "mode" "TI")])
7150 (define_insn "sse4_1_zero_extendv4hiv4si2"
7151 [(set (match_operand:V4SI 0 "register_operand" "=x")
7154 (match_operand:V8HI 1 "register_operand" "x")
7155 (parallel [(const_int 0)
7160 "pmovzxwd\t{%1, %0|%0, %1}"
7161 [(set_attr "type" "ssemov")
7162 (set_attr "prefix_extra" "1")
7163 (set_attr "mode" "TI")])
7165 (define_insn "*sse4_1_zero_extendv4hiv4si2"
7166 [(set (match_operand:V4SI 0 "register_operand" "=x")
7170 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
7171 (parallel [(const_int 0)
7176 "pmovzxwd\t{%1, %0|%0, %1}"
7177 [(set_attr "type" "ssemov")
7178 (set_attr "prefix_extra" "1")
7179 (set_attr "mode" "TI")])
7181 (define_insn "sse4_1_zero_extendv2hiv2di2"
7182 [(set (match_operand:V2DI 0 "register_operand" "=x")
7185 (match_operand:V8HI 1 "register_operand" "x")
7186 (parallel [(const_int 0)
7189 "pmovzxwq\t{%1, %0|%0, %1}"
7190 [(set_attr "type" "ssemov")
7191 (set_attr "prefix_extra" "1")
7192 (set_attr "mode" "TI")])
7194 (define_insn "*sse4_1_zero_extendv2hiv2di2"
7195 [(set (match_operand:V2DI 0 "register_operand" "=x")
7199 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
7200 (parallel [(const_int 0)
7203 "pmovzxwq\t{%1, %0|%0, %1}"
7204 [(set_attr "type" "ssemov")
7205 (set_attr "prefix_extra" "1")
7206 (set_attr "mode" "TI")])
7208 (define_insn "sse4_1_zero_extendv2siv2di2"
7209 [(set (match_operand:V2DI 0 "register_operand" "=x")
7212 (match_operand:V4SI 1 "register_operand" "x")
7213 (parallel [(const_int 0)
7216 "pmovzxdq\t{%1, %0|%0, %1}"
7217 [(set_attr "type" "ssemov")
7218 (set_attr "prefix_extra" "1")
7219 (set_attr "mode" "TI")])
7221 (define_insn "*sse4_1_zero_extendv2siv2di2"
7222 [(set (match_operand:V2DI 0 "register_operand" "=x")
7226 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
7227 (parallel [(const_int 0)
7230 "pmovzxdq\t{%1, %0|%0, %1}"
7231 [(set_attr "type" "ssemov")
7232 (set_attr "prefix_extra" "1")
7233 (set_attr "mode" "TI")])
7235 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
7236 ;; But it is not a really compare instruction.
7237 (define_insn "sse4_1_ptest"
7238 [(set (reg:CC FLAGS_REG)
7239 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
7240 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
7243 "ptest\t{%1, %0|%0, %1}"
7244 [(set_attr "type" "ssecomi")
7245 (set_attr "prefix_extra" "1")
7246 (set_attr "mode" "TI")])
7248 (define_insn "sse4_1_roundpd"
7249 [(set (match_operand:V2DF 0 "register_operand" "=x")
7250 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
7251 (match_operand:SI 2 "const_0_to_15_operand" "n")]
7254 "roundpd\t{%2, %1, %0|%0, %1, %2}"
7255 [(set_attr "type" "ssecvt")
7256 (set_attr "prefix_extra" "1")
7257 (set_attr "mode" "V2DF")])
7259 (define_insn "sse4_1_roundps"
7260 [(set (match_operand:V4SF 0 "register_operand" "=x")
7261 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
7262 (match_operand:SI 2 "const_0_to_15_operand" "n")]
7265 "roundps\t{%2, %1, %0|%0, %1, %2}"
7266 [(set_attr "type" "ssecvt")
7267 (set_attr "prefix_extra" "1")
7268 (set_attr "mode" "V4SF")])
7270 (define_insn "sse4_1_roundsd"
7271 [(set (match_operand:V2DF 0 "register_operand" "=x")
7273 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
7274 (match_operand:SI 3 "const_0_to_15_operand" "n")]
7276 (match_operand:V2DF 1 "register_operand" "0")
7279 "roundsd\t{%3, %2, %0|%0, %2, %3}"
7280 [(set_attr "type" "ssecvt")
7281 (set_attr "prefix_extra" "1")
7282 (set_attr "mode" "V2DF")])
7284 (define_insn "sse4_1_roundss"
7285 [(set (match_operand:V4SF 0 "register_operand" "=x")
7287 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
7288 (match_operand:SI 3 "const_0_to_15_operand" "n")]
7290 (match_operand:V4SF 1 "register_operand" "0")
7293 "roundss\t{%3, %2, %0|%0, %2, %3}"
7294 [(set_attr "type" "ssecvt")
7295 (set_attr "prefix_extra" "1")
7296 (set_attr "mode" "V4SF")])
7298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7300 ;; Intel SSE4.2 string/text processing instructions
7302 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7304 (define_insn_and_split "sse4_2_pcmpestr"
7305 [(set (match_operand:SI 0 "register_operand" "=c,c")
7307 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
7308 (match_operand:SI 3 "register_operand" "a,a")
7309 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
7310 (match_operand:SI 5 "register_operand" "d,d")
7311 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
7313 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
7321 (set (reg:CC FLAGS_REG)
7330 && !(reload_completed || reload_in_progress)"
7335 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
7336 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
7337 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
7340 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
7341 operands[3], operands[4],
7342 operands[5], operands[6]));
7344 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
7345 operands[3], operands[4],
7346 operands[5], operands[6]));
7347 if (flags && !(ecx || xmm0))
7348 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
7349 operands[2], operands[3],
7350 operands[4], operands[5],
7354 [(set_attr "type" "sselog")
7355 (set_attr "prefix_data16" "1")
7356 (set_attr "prefix_extra" "1")
7357 (set_attr "memory" "none,load")
7358 (set_attr "mode" "TI")])
7360 (define_insn "sse4_2_pcmpestri"
7361 [(set (match_operand:SI 0 "register_operand" "=c,c")
7363 [(match_operand:V16QI 1 "register_operand" "x,x")
7364 (match_operand:SI 2 "register_operand" "a,a")
7365 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
7366 (match_operand:SI 4 "register_operand" "d,d")
7367 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
7369 (set (reg:CC FLAGS_REG)
7378 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
7379 [(set_attr "type" "sselog")
7380 (set_attr "prefix_data16" "1")
7381 (set_attr "prefix_extra" "1")
7382 (set_attr "memory" "none,load")
7383 (set_attr "mode" "TI")])
7385 (define_insn "sse4_2_pcmpestrm"
7386 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
7388 [(match_operand:V16QI 1 "register_operand" "x,x")
7389 (match_operand:SI 2 "register_operand" "a,a")
7390 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
7391 (match_operand:SI 4 "register_operand" "d,d")
7392 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
7394 (set (reg:CC FLAGS_REG)
7403 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
7404 [(set_attr "type" "sselog")
7405 (set_attr "prefix_data16" "1")
7406 (set_attr "prefix_extra" "1")
7407 (set_attr "memory" "none,load")
7408 (set_attr "mode" "TI")])
7410 (define_insn "sse4_2_pcmpestr_cconly"
7411 [(set (reg:CC FLAGS_REG)
7413 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
7414 (match_operand:SI 3 "register_operand" "a,a,a,a")
7415 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
7416 (match_operand:SI 5 "register_operand" "d,d,d,d")
7417 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
7419 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
7420 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
7423 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
7424 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
7425 pcmpestri\t{%6, %4, %2|%2, %4, %6}
7426 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
7427 [(set_attr "type" "sselog")
7428 (set_attr "prefix_data16" "1")
7429 (set_attr "prefix_extra" "1")
7430 (set_attr "memory" "none,load,none,load")
7431 (set_attr "mode" "TI")])
7433 (define_insn_and_split "sse4_2_pcmpistr"
7434 [(set (match_operand:SI 0 "register_operand" "=c,c")
7436 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
7437 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
7438 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
7440 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
7446 (set (reg:CC FLAGS_REG)
7453 && !(reload_completed || reload_in_progress)"
7458 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
7459 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
7460 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
7463 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
7464 operands[3], operands[4]));
7466 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
7467 operands[3], operands[4]));
7468 if (flags && !(ecx || xmm0))
7469 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
7470 operands[2], operands[3],
7474 [(set_attr "type" "sselog")
7475 (set_attr "prefix_data16" "1")
7476 (set_attr "prefix_extra" "1")
7477 (set_attr "memory" "none,load")
7478 (set_attr "mode" "TI")])
7480 (define_insn "sse4_2_pcmpistri"
7481 [(set (match_operand:SI 0 "register_operand" "=c,c")
7483 [(match_operand:V16QI 1 "register_operand" "x,x")
7484 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7485 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7487 (set (reg:CC FLAGS_REG)
7494 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
7495 [(set_attr "type" "sselog")
7496 (set_attr "prefix_data16" "1")
7497 (set_attr "prefix_extra" "1")
7498 (set_attr "memory" "none,load")
7499 (set_attr "mode" "TI")])
7501 (define_insn "sse4_2_pcmpistrm"
7502 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
7504 [(match_operand:V16QI 1 "register_operand" "x,x")
7505 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7506 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7508 (set (reg:CC FLAGS_REG)
7515 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
7516 [(set_attr "type" "sselog")
7517 (set_attr "prefix_data16" "1")
7518 (set_attr "prefix_extra" "1")
7519 (set_attr "memory" "none,load")
7520 (set_attr "mode" "TI")])
7522 (define_insn "sse4_2_pcmpistr_cconly"
7523 [(set (reg:CC FLAGS_REG)
7525 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
7526 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
7527 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
7529 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
7530 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
7533 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7534 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
7535 pcmpistri\t{%4, %3, %2|%2, %3, %4}
7536 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
7537 [(set_attr "type" "sselog")
7538 (set_attr "prefix_data16" "1")
7539 (set_attr "prefix_extra" "1")
7540 (set_attr "memory" "none,load,none,load")
7541 (set_attr "mode" "TI")])
7543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7545 ;; SSE5 instructions
7547 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7549 ;; SSE5 parallel integer multiply/add instructions.
7550 ;; Note the instruction does not allow the value being added to be a memory
7551 ;; operation. However by pretending via the nonimmediate_operand predicate
7552 ;; that it does and splitting it later allows the following to be recognized:
7553 ;; a[i] = b[i] * c[i] + d[i];
7554 (define_insn "sse5_pmacsww"
7555 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7558 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7559 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7560 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7561 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7563 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7564 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7565 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7566 [(set_attr "type" "ssemuladd")
7567 (set_attr "mode" "TI")])
7569 ;; Split pmacsww with two memory operands into a load and the pmacsww.
7571 [(set (match_operand:V8HI 0 "register_operand" "")
7573 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
7574 (match_operand:V8HI 2 "nonimmediate_operand" ""))
7575 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
7577 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7578 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7579 && !reg_mentioned_p (operands[0], operands[1])
7580 && !reg_mentioned_p (operands[0], operands[2])
7581 && !reg_mentioned_p (operands[0], operands[3])"
7584 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
7585 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
7590 (define_insn "sse5_pmacssww"
7591 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7593 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7594 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7595 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7596 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7598 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7599 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7600 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7601 [(set_attr "type" "ssemuladd")
7602 (set_attr "mode" "TI")])
7604 ;; Note the instruction does not allow the value being added to be a memory
7605 ;; operation. However by pretending via the nonimmediate_operand predicate
7606 ;; that it does and splitting it later allows the following to be recognized:
7607 ;; a[i] = b[i] * c[i] + d[i];
7608 (define_insn "sse5_pmacsdd"
7609 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7612 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7613 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7614 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7615 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7617 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7618 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7619 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7620 [(set_attr "type" "ssemuladd")
7621 (set_attr "mode" "TI")])
7623 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7625 [(set (match_operand:V4SI 0 "register_operand" "")
7627 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7628 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7629 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7631 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7632 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7633 && !reg_mentioned_p (operands[0], operands[1])
7634 && !reg_mentioned_p (operands[0], operands[2])
7635 && !reg_mentioned_p (operands[0], operands[3])"
7638 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7639 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7644 (define_insn "sse5_pmacssdd"
7645 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7647 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7648 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7649 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7650 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7652 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7653 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7654 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7655 [(set_attr "type" "ssemuladd")
7656 (set_attr "mode" "TI")])
7658 (define_insn "sse5_pmacssdql"
7659 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7664 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7665 (parallel [(const_int 1)
7668 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7669 (parallel [(const_int 1)
7671 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7672 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7674 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7675 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7676 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7677 [(set_attr "type" "ssemuladd")
7678 (set_attr "mode" "TI")])
7680 (define_insn "sse5_pmacssdqh"
7681 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7686 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7687 (parallel [(const_int 0)
7691 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7692 (parallel [(const_int 0)
7694 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7695 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7697 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7698 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7699 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7700 [(set_attr "type" "ssemuladd")
7701 (set_attr "mode" "TI")])
7703 (define_insn "sse5_pmacsdql"
7704 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7709 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7710 (parallel [(const_int 1)
7714 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7715 (parallel [(const_int 1)
7717 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7718 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7720 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7721 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7722 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7723 [(set_attr "type" "ssemuladd")
7724 (set_attr "mode" "TI")])
7726 (define_insn "sse5_pmacsdqh"
7727 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7732 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7733 (parallel [(const_int 0)
7737 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7738 (parallel [(const_int 0)
7740 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7741 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7743 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7744 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7745 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7746 [(set_attr "type" "ssemuladd")
7747 (set_attr "mode" "TI")])
7749 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7750 (define_insn "sse5_pmacsswd"
7751 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7756 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7757 (parallel [(const_int 1)
7763 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7764 (parallel [(const_int 1)
7768 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7769 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7771 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7772 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7773 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7774 [(set_attr "type" "ssemuladd")
7775 (set_attr "mode" "TI")])
7777 (define_insn "sse5_pmacswd"
7778 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7783 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7784 (parallel [(const_int 1)
7790 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7791 (parallel [(const_int 1)
7795 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7796 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7798 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7799 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7800 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7801 [(set_attr "type" "ssemuladd")
7802 (set_attr "mode" "TI")])
7804 (define_insn "sse5_pmadcsswd"
7805 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7811 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7812 (parallel [(const_int 0)
7818 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7819 (parallel [(const_int 0)
7827 (parallel [(const_int 1)
7834 (parallel [(const_int 1)
7838 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7839 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7841 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7842 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7843 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7844 [(set_attr "type" "ssemuladd")
7845 (set_attr "mode" "TI")])
7847 (define_insn "sse5_pmadcswd"
7848 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7854 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7855 (parallel [(const_int 0)
7861 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7862 (parallel [(const_int 0)
7870 (parallel [(const_int 1)
7877 (parallel [(const_int 1)
7881 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7882 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7884 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7885 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7886 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7887 [(set_attr "type" "ssemuladd")
7888 (set_attr "mode" "TI")])
7890 ;; SSE5 parallel XMM conditional moves
7891 (define_insn "sse5_pcmov_<mode>"
7892 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7893 (if_then_else:SSEMODE
7894 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x,0,0")
7895 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0,C,x")
7896 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm,x,C")))]
7897 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7899 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7900 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7901 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7902 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7903 andps\t{%2, %0|%0, %2}
7904 andnps\t{%1, %0|%0, %1}"
7905 [(set_attr "type" "sse4arg")])
7907 ;; SSE5 horizontal add/subtract instructions
7908 (define_insn "sse5_phaddbw"
7909 [(set (match_operand:V8HI 0 "register_operand" "=x")
7913 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7914 (parallel [(const_int 0)
7925 (parallel [(const_int 1)
7932 (const_int 15)])))))]
7934 "phaddbw\t{%1, %0|%0, %1}"
7935 [(set_attr "type" "sseiadd1")])
7937 (define_insn "sse5_phaddbd"
7938 [(set (match_operand:V4SI 0 "register_operand" "=x")
7943 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7944 (parallel [(const_int 0)
7951 (parallel [(const_int 1)
7959 (parallel [(const_int 2)
7966 (parallel [(const_int 3)
7969 (const_int 15)]))))))]
7971 "phaddbd\t{%1, %0|%0, %1}"
7972 [(set_attr "type" "sseiadd1")])
7974 (define_insn "sse5_phaddbq"
7975 [(set (match_operand:V2DI 0 "register_operand" "=x")
7981 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7982 (parallel [(const_int 0)
7987 (parallel [(const_int 1)
7993 (parallel [(const_int 2)
7998 (parallel [(const_int 3)
8005 (parallel [(const_int 8)
8010 (parallel [(const_int 9)
8016 (parallel [(const_int 10)
8021 (parallel [(const_int 11)
8022 (const_int 15)])))))))]
8024 "phaddbq\t{%1, %0|%0, %1}"
8025 [(set_attr "type" "sseiadd1")])
8027 (define_insn "sse5_phaddwd"
8028 [(set (match_operand:V4SI 0 "register_operand" "=x")
8032 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8033 (parallel [(const_int 0)
8040 (parallel [(const_int 1)
8043 (const_int 7)])))))]
8045 "phaddwd\t{%1, %0|%0, %1}"
8046 [(set_attr "type" "sseiadd1")])
8048 (define_insn "sse5_phaddwq"
8049 [(set (match_operand:V2DI 0 "register_operand" "=x")
8054 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8055 (parallel [(const_int 0)
8060 (parallel [(const_int 1)
8066 (parallel [(const_int 2)
8071 (parallel [(const_int 3)
8072 (const_int 7)]))))))]
8074 "phaddwq\t{%1, %0|%0, %1}"
8075 [(set_attr "type" "sseiadd1")])
8077 (define_insn "sse5_phadddq"
8078 [(set (match_operand:V2DI 0 "register_operand" "=x")
8082 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8083 (parallel [(const_int 0)
8088 (parallel [(const_int 1)
8089 (const_int 3)])))))]
8091 "phadddq\t{%1, %0|%0, %1}"
8092 [(set_attr "type" "sseiadd1")])
8094 (define_insn "sse5_phaddubw"
8095 [(set (match_operand:V8HI 0 "register_operand" "=x")
8099 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8100 (parallel [(const_int 0)
8111 (parallel [(const_int 1)
8118 (const_int 15)])))))]
8120 "phaddubw\t{%1, %0|%0, %1}"
8121 [(set_attr "type" "sseiadd1")])
8123 (define_insn "sse5_phaddubd"
8124 [(set (match_operand:V4SI 0 "register_operand" "=x")
8129 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8130 (parallel [(const_int 0)
8137 (parallel [(const_int 1)
8145 (parallel [(const_int 2)
8152 (parallel [(const_int 3)
8155 (const_int 15)]))))))]
8157 "phaddubd\t{%1, %0|%0, %1}"
8158 [(set_attr "type" "sseiadd1")])
8160 (define_insn "sse5_phaddubq"
8161 [(set (match_operand:V2DI 0 "register_operand" "=x")
8167 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8168 (parallel [(const_int 0)
8173 (parallel [(const_int 1)
8179 (parallel [(const_int 2)
8184 (parallel [(const_int 3)
8191 (parallel [(const_int 8)
8196 (parallel [(const_int 9)
8202 (parallel [(const_int 10)
8207 (parallel [(const_int 11)
8208 (const_int 15)])))))))]
8210 "phaddubq\t{%1, %0|%0, %1}"
8211 [(set_attr "type" "sseiadd1")])
8213 (define_insn "sse5_phadduwd"
8214 [(set (match_operand:V4SI 0 "register_operand" "=x")
8218 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8219 (parallel [(const_int 0)
8226 (parallel [(const_int 1)
8229 (const_int 7)])))))]
8231 "phadduwd\t{%1, %0|%0, %1}"
8232 [(set_attr "type" "sseiadd1")])
8234 (define_insn "sse5_phadduwq"
8235 [(set (match_operand:V2DI 0 "register_operand" "=x")
8240 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8241 (parallel [(const_int 0)
8246 (parallel [(const_int 1)
8252 (parallel [(const_int 2)
8257 (parallel [(const_int 3)
8258 (const_int 7)]))))))]
8260 "phadduwq\t{%1, %0|%0, %1}"
8261 [(set_attr "type" "sseiadd1")])
8263 (define_insn "sse5_phaddudq"
8264 [(set (match_operand:V2DI 0 "register_operand" "=x")
8268 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8269 (parallel [(const_int 0)
8274 (parallel [(const_int 1)
8275 (const_int 3)])))))]
8277 "phaddudq\t{%1, %0|%0, %1}"
8278 [(set_attr "type" "sseiadd1")])
8280 (define_insn "sse5_phsubbw"
8281 [(set (match_operand:V8HI 0 "register_operand" "=x")
8285 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8286 (parallel [(const_int 0)
8297 (parallel [(const_int 1)
8304 (const_int 15)])))))]
8306 "phsubbw\t{%1, %0|%0, %1}"
8307 [(set_attr "type" "sseiadd1")])
8309 (define_insn "sse5_phsubwd"
8310 [(set (match_operand:V4SI 0 "register_operand" "=x")
8314 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8315 (parallel [(const_int 0)
8322 (parallel [(const_int 1)
8325 (const_int 7)])))))]
8327 "phsubwd\t{%1, %0|%0, %1}"
8328 [(set_attr "type" "sseiadd1")])
8330 (define_insn "sse5_phsubdq"
8331 [(set (match_operand:V2DI 0 "register_operand" "=x")
8335 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8336 (parallel [(const_int 0)
8341 (parallel [(const_int 1)
8342 (const_int 3)])))))]
8344 "phsubdq\t{%1, %0|%0, %1}"
8345 [(set_attr "type" "sseiadd1")])
8347 ;; SSE5 permute instructions
8348 (define_insn "sse5_pperm"
8349 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8350 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
8351 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
8352 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
8353 UNSPEC_SSE5_PERMUTE))]
8354 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8355 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8356 [(set_attr "type" "sse4arg")
8357 (set_attr "mode" "TI")])
8359 ;; The following are for the various unpack insns which doesn't need the first
8360 ;; source operand, so we can just use the output operand for the first operand.
8361 ;; This allows either of the other two operands to be a memory operand. We
8362 ;; can't just use the first operand as an argument to the normal pperm because
8363 ;; then an output only argument, suddenly becomes an input operand.
8364 (define_insn "sse5_pperm_zero_v16qi_v8hi"
8365 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8368 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
8369 (match_operand 2 "" "")))) ;; parallel with const_int's
8370 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8372 && (register_operand (operands[1], V16QImode)
8373 || register_operand (operands[2], V16QImode))"
8374 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8375 [(set_attr "type" "sseadd")
8376 (set_attr "mode" "TI")])
8378 (define_insn "sse5_pperm_sign_v16qi_v8hi"
8379 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8382 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
8383 (match_operand 2 "" "")))) ;; parallel with const_int's
8384 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8386 && (register_operand (operands[1], V16QImode)
8387 || register_operand (operands[2], V16QImode))"
8388 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8389 [(set_attr "type" "sseadd")
8390 (set_attr "mode" "TI")])
8392 (define_insn "sse5_pperm_zero_v8hi_v4si"
8393 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8396 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
8397 (match_operand 2 "" "")))) ;; parallel with const_int's
8398 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8400 && (register_operand (operands[1], V8HImode)
8401 || register_operand (operands[2], V16QImode))"
8402 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8403 [(set_attr "type" "sseadd")
8404 (set_attr "mode" "TI")])
8406 (define_insn "sse5_pperm_sign_v8hi_v4si"
8407 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8410 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
8411 (match_operand 2 "" "")))) ;; parallel with const_int's
8412 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8414 && (register_operand (operands[1], V8HImode)
8415 || register_operand (operands[2], V16QImode))"
8416 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8417 [(set_attr "type" "sseadd")
8418 (set_attr "mode" "TI")])
8420 (define_insn "sse5_pperm_zero_v4si_v2di"
8421 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8424 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8425 (match_operand 2 "" "")))) ;; parallel with const_int's
8426 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8428 && (register_operand (operands[1], V4SImode)
8429 || register_operand (operands[2], V16QImode))"
8430 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8431 [(set_attr "type" "sseadd")
8432 (set_attr "mode" "TI")])
8434 (define_insn "sse5_pperm_sign_v4si_v2di"
8435 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8438 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8439 (match_operand 2 "" "")))) ;; parallel with const_int's
8440 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8442 && (register_operand (operands[1], V4SImode)
8443 || register_operand (operands[2], V16QImode))"
8444 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8445 [(set_attr "type" "sseadd")
8446 (set_attr "mode" "TI")])
8448 ;; SSE5 pack instructions that combine two vectors into a smaller vector
8449 (define_insn "sse5_pperm_pack_v2di_v4si"
8450 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
8453 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
8455 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8456 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8457 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8458 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8459 [(set_attr "type" "sse4arg")
8460 (set_attr "mode" "TI")])
8462 (define_insn "sse5_pperm_pack_v4si_v8hi"
8463 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
8466 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
8468 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8469 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8470 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8471 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8472 [(set_attr "type" "sse4arg")
8473 (set_attr "mode" "TI")])
8475 (define_insn "sse5_pperm_pack_v8hi_v16qi"
8476 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8479 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
8481 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
8482 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
8483 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8484 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8485 [(set_attr "type" "sse4arg")
8486 (set_attr "mode" "TI")])
8488 ;; Floating point permutation (permps, permpd)
8489 (define_insn "sse5_perm<mode>"
8490 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
8492 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
8493 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
8494 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
8495 UNSPEC_SSE5_PERMUTE))]
8496 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8497 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8498 [(set_attr "type" "sse4arg")
8499 (set_attr "mode" "<MODE>")])
8501 ;; SSE5 packed rotate instructions
8502 (define_insn "rotl<mode>3"
8503 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8505 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8506 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8508 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8509 [(set_attr "type" "sseishft")
8510 (set_attr "mode" "TI")])
8512 (define_insn "sse5_rotl<mode>3"
8513 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8515 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8516 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
8517 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8518 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8519 [(set_attr "type" "sseishft")
8520 (set_attr "mode" "TI")])
8522 ;; SSE5 packed shift instructions. Note negative values for the shift amount
8523 ;; convert this into a right shift instead of left shift. For now, model this
8524 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
8525 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
8526 (define_insn "sse5_ashl<mode>3"
8527 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8529 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8530 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8531 UNSPEC_SSE5_ASHIFT))]
8532 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8533 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8534 [(set_attr "type" "sseishft")
8535 (set_attr "mode" "TI")])
8537 (define_insn "sse5_lshl<mode>3"
8538 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8540 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8541 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8542 UNSPEC_SSE5_LSHIFT))]
8543 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8544 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8545 [(set_attr "type" "sseishft")
8546 (set_attr "mode" "TI")])
8548 ;; SSE5 FRCZ support
8550 (define_insn "sse5_frcz<mode>2"
8551 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8553 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8556 "frcz<ssesuffixf4>\t{%1, %0|%0, %1}"
8557 [(set_attr "type" "ssecvt1")
8558 (set_attr "prefix_extra" "1")
8559 (set_attr "mode" "<MODE>")])
8562 (define_insn "sse5_vmfrcz<mode>2"
8563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8564 (vec_merge:SSEMODEF2P
8566 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8568 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8571 "frcz<ssesuffixf2s>\t{%2, %0|%0, %2}"
8572 [(set_attr "type" "ssecvt1")
8573 (set_attr "prefix_extra" "1")
8574 (set_attr "mode" "<MODE>")])
8576 (define_insn "sse5_cvtph2ps"
8577 [(set (match_operand:V4SF 0 "register_operand" "=x")
8578 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8581 "cvtph2ps\t{%1, %0|%0, %1}"
8582 [(set_attr "type" "ssecvt")
8583 (set_attr "mode" "V4SF")])
8585 (define_insn "sse5_cvtps2ph"
8586 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8587 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8590 "cvtps2ph\t{%1, %0|%0, %1}"
8591 [(set_attr "type" "ssecvt")
8592 (set_attr "mode" "V4SF")])
8594 ;; Scalar versions of the com instructions that use vector types that are
8595 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8596 ;; com instructions fill in 0's in the upper bits instead of leaving them
8597 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8598 (define_expand "sse5_vmmaskcmp<mode>3"
8599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8600 (vec_merge:SSEMODEF2P
8601 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8602 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8603 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8608 operands[4] = CONST0_RTX (<MODE>mode);
8611 (define_insn "*sse5_vmmaskcmp<mode>3"
8612 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8613 (vec_merge:SSEMODEF2P
8614 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8615 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8616 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8617 (match_operand:SSEMODEF2P 4 "")
8620 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8621 [(set_attr "type" "sse4arg")
8622 (set_attr "mode" "<ssescalarmode>")])
8624 ;; We don't have a comparison operator that always returns true/false, so
8625 ;; handle comfalse and comtrue specially.
8626 (define_insn "sse5_com_tf<mode>3"
8627 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8629 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8630 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8631 (match_operand:SI 3 "const_int_operand" "n")]
8632 UNSPEC_SSE5_TRUEFALSE))]
8635 const char *ret = NULL;
8637 switch (INTVAL (operands[3]))
8640 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8644 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8648 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8652 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8661 [(set_attr "type" "ssecmp")
8662 (set_attr "mode" "<MODE>")])
8664 (define_insn "sse5_maskcmp<mode>3"
8665 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8666 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8667 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8668 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8670 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8671 [(set_attr "type" "ssecmp")
8672 (set_attr "mode" "<MODE>")])
8674 (define_insn "sse5_maskcmp<mode>3"
8675 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8676 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8677 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8678 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8680 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8681 [(set_attr "type" "sse4arg")
8682 (set_attr "mode" "TI")])
8684 (define_insn "sse5_maskcmp_uns<mode>3"
8685 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8686 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8687 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8688 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8690 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8691 [(set_attr "type" "ssecmp")
8692 (set_attr "mode" "TI")])
8694 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8695 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8696 ;; the exact instruction generated for the intrinsic.
8697 (define_insn "sse5_maskcmp_uns2<mode>3"
8698 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8700 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8701 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8702 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8703 UNSPEC_SSE5_UNSIGNED_CMP))]
8705 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8706 [(set_attr "type" "ssecmp")
8707 (set_attr "mode" "TI")])
8709 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8710 ;; being added here to be complete.
8711 (define_insn "sse5_pcom_tf<mode>3"
8712 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8713 (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8714 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8715 (match_operand:SI 3 "const_int_operand" "n")]
8716 UNSPEC_SSE5_TRUEFALSE))]
8719 return ((INTVAL (operands[3]) != 0)
8720 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8721 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8723 [(set_attr "type" "ssecmp")
8724 (set_attr "mode" "TI")])