1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from integer vector mode to mnemonic suffix
40 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
42 ;; Mapping of the sse5 suffix
43 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")])
44 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd")])
45 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
47 ;; Mapping of the max integer size for sse5 rotate immediate constraint
48 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
50 ;; Mapping of vector modes back to the scalar modes
51 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
53 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
55 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
59 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
61 ;; All of these patterns are enabled for SSE1 as well as SSE2.
62 ;; This is essential for maintaining stable calling conventions.
64 (define_expand "mov<mode>"
65 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
66 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
69 ix86_expand_vector_move (<MODE>mode, operands);
73 (define_insn "*mov<mode>_internal"
74 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
75 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 switch (which_alternative)
83 return standard_sse_constant_opcode (insn, operands[1]);
86 if (get_attr_mode (insn) == MODE_V4SF)
87 return "movaps\t{%1, %0|%0, %1}";
89 return "movdqa\t{%1, %0|%0, %1}";
94 [(set_attr "type" "sselog1,ssemov,ssemov")
97 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
98 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
99 (and (eq_attr "alternative" "2")
100 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
102 (const_string "V4SF")
103 (const_string "TI")))])
105 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
106 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
107 ;; from memory, we'd prefer to load the memory directly into the %xmm
108 ;; register. To facilitate this happy circumstance, this pattern won't
109 ;; split until after register allocation. If the 64-bit value didn't
110 ;; come from memory, this is the best we can do. This is much better
111 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
114 (define_insn_and_split "movdi_to_sse"
116 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
117 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
118 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
119 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
121 "&& reload_completed"
124 if (register_operand (operands[1], DImode))
126 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
127 Assemble the 64-bit DImode value in an xmm register. */
128 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
129 gen_rtx_SUBREG (SImode, operands[1], 0)));
130 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
131 gen_rtx_SUBREG (SImode, operands[1], 4)));
132 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
134 else if (memory_operand (operands[1], DImode))
135 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
140 (define_expand "movv4sf"
141 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
142 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
145 ix86_expand_vector_move (V4SFmode, operands);
149 (define_insn "*movv4sf_internal"
150 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
151 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
153 && (register_operand (operands[0], V4SFmode)
154 || register_operand (operands[1], V4SFmode))"
156 switch (which_alternative)
159 return standard_sse_constant_opcode (insn, operands[1]);
162 return "movaps\t{%1, %0|%0, %1}";
167 [(set_attr "type" "sselog1,ssemov,ssemov")
168 (set_attr "mode" "V4SF")])
171 [(set (match_operand:V4SF 0 "register_operand" "")
172 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
173 "TARGET_SSE && reload_completed"
176 (vec_duplicate:V4SF (match_dup 1))
180 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
181 operands[2] = CONST0_RTX (V4SFmode);
184 (define_expand "movv2df"
185 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
186 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
189 ix86_expand_vector_move (V2DFmode, operands);
193 (define_insn "*movv2df_internal"
194 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
195 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
197 && (register_operand (operands[0], V2DFmode)
198 || register_operand (operands[1], V2DFmode))"
200 switch (which_alternative)
203 return standard_sse_constant_opcode (insn, operands[1]);
206 if (get_attr_mode (insn) == MODE_V4SF)
207 return "movaps\t{%1, %0|%0, %1}";
209 return "movapd\t{%1, %0|%0, %1}";
214 [(set_attr "type" "sselog1,ssemov,ssemov")
217 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
218 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
219 (and (eq_attr "alternative" "2")
220 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
222 (const_string "V4SF")
223 (const_string "V2DF")))])
226 [(set (match_operand:V2DF 0 "register_operand" "")
227 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
228 "TARGET_SSE2 && reload_completed"
229 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
231 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
232 operands[2] = CONST0_RTX (DFmode);
235 (define_expand "push<mode>1"
236 [(match_operand:SSEMODE 0 "register_operand" "")]
239 ix86_expand_push (<MODE>mode, operands[0]);
243 (define_expand "movmisalign<mode>"
244 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
245 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
248 ix86_expand_vector_move_misalign (<MODE>mode, operands);
252 (define_insn "sse_movups"
253 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
254 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
256 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
257 "movups\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssemov")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse2_movupd"
262 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
263 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
265 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
266 "movupd\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssemov")
268 (set_attr "mode" "V2DF")])
270 (define_insn "sse2_movdqu"
271 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
272 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
274 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
275 "movdqu\t{%1, %0|%0, %1}"
276 [(set_attr "type" "ssemov")
277 (set_attr "prefix_data16" "1")
278 (set_attr "mode" "TI")])
280 (define_insn "sse_movntv4sf"
281 [(set (match_operand:V4SF 0 "memory_operand" "=m")
282 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
285 "movntps\t{%1, %0|%0, %1}"
286 [(set_attr "type" "ssemov")
287 (set_attr "mode" "V4SF")])
289 (define_insn "sse2_movntv2df"
290 [(set (match_operand:V2DF 0 "memory_operand" "=m")
291 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
294 "movntpd\t{%1, %0|%0, %1}"
295 [(set_attr "type" "ssecvt")
296 (set_attr "mode" "V2DF")])
298 (define_insn "sse2_movntv2di"
299 [(set (match_operand:V2DI 0 "memory_operand" "=m")
300 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
303 "movntdq\t{%1, %0|%0, %1}"
304 [(set_attr "type" "ssecvt")
305 (set_attr "prefix_data16" "1")
306 (set_attr "mode" "TI")])
308 (define_insn "sse2_movntsi"
309 [(set (match_operand:SI 0 "memory_operand" "=m")
310 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
313 "movnti\t{%1, %0|%0, %1}"
314 [(set_attr "type" "ssecvt")
315 (set_attr "mode" "V2DF")])
317 (define_insn "sse3_lddqu"
318 [(set (match_operand:V16QI 0 "register_operand" "=x")
319 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
322 "lddqu\t{%1, %0|%0, %1}"
323 [(set_attr "type" "ssecvt")
324 (set_attr "prefix_rep" "1")
325 (set_attr "mode" "TI")])
327 ; Expand patterns for non-temporal stores. At the moment, only those
328 ; that directly map to insns are defined; it would be possible to
329 ; define patterns for other modes that would expand to several insns.
331 (define_expand "storentv4sf"
332 [(set (match_operand:V4SF 0 "memory_operand" "")
333 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")]
338 (define_expand "storentv2df"
339 [(set (match_operand:V2DF 0 "memory_operand" "")
340 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "")]
345 (define_expand "storentv2di"
346 [(set (match_operand:V2DI 0 "memory_operand" "")
347 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
352 (define_expand "storentsi"
353 [(set (match_operand:SI 0 "memory_operand" "")
354 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
359 (define_expand "storentdf"
360 [(set (match_operand:DF 0 "memory_operand" "")
361 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
366 (define_expand "storentsf"
367 [(set (match_operand:SF 0 "memory_operand" "")
368 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
373 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
375 ;; Parallel single-precision floating point arithmetic
377 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
379 (define_expand "negv4sf2"
380 [(set (match_operand:V4SF 0 "register_operand" "")
381 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
383 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
385 (define_expand "absv4sf2"
386 [(set (match_operand:V4SF 0 "register_operand" "")
387 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
389 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
391 (define_expand "addv4sf3"
392 [(set (match_operand:V4SF 0 "register_operand" "")
393 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
394 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
396 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
398 (define_insn "*addv4sf3"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
400 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
401 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
402 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
403 "addps\t{%2, %0|%0, %2}"
404 [(set_attr "type" "sseadd")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmaddv4sf3"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
411 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
414 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
415 "addss\t{%2, %0|%0, %2}"
416 [(set_attr "type" "sseadd")
417 (set_attr "mode" "SF")])
419 (define_expand "subv4sf3"
420 [(set (match_operand:V4SF 0 "register_operand" "")
421 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
422 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
424 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
426 (define_insn "*subv4sf3"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
429 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "subps\t{%2, %0|%0, %2}"
432 [(set_attr "type" "sseadd")
433 (set_attr "mode" "V4SF")])
435 (define_insn "sse_vmsubv4sf3"
436 [(set (match_operand:V4SF 0 "register_operand" "=x")
438 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
439 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
443 "subss\t{%2, %0|%0, %2}"
444 [(set_attr "type" "sseadd")
445 (set_attr "mode" "SF")])
447 (define_expand "mulv4sf3"
448 [(set (match_operand:V4SF 0 "register_operand" "")
449 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
450 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
452 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
454 (define_insn "*mulv4sf3"
455 [(set (match_operand:V4SF 0 "register_operand" "=x")
456 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
457 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
458 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
459 "mulps\t{%2, %0|%0, %2}"
460 [(set_attr "type" "ssemul")
461 (set_attr "mode" "V4SF")])
463 (define_insn "sse_vmmulv4sf3"
464 [(set (match_operand:V4SF 0 "register_operand" "=x")
466 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
467 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
470 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
471 "mulss\t{%2, %0|%0, %2}"
472 [(set_attr "type" "ssemul")
473 (set_attr "mode" "SF")])
475 (define_expand "divv4sf3"
476 [(set (match_operand:V4SF 0 "register_operand" "")
477 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
478 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
481 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
483 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
484 && flag_finite_math_only && !flag_trapping_math
485 && flag_unsafe_math_optimizations)
487 ix86_emit_swdivsf (operands[0], operands[1],
488 operands[2], V4SFmode);
493 (define_insn "*divv4sf3"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
496 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
498 "divps\t{%2, %0|%0, %2}"
499 [(set_attr "type" "ssediv")
500 (set_attr "mode" "V4SF")])
502 (define_insn "sse_vmdivv4sf3"
503 [(set (match_operand:V4SF 0 "register_operand" "=x")
505 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
506 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
510 "divss\t{%2, %0|%0, %2}"
511 [(set_attr "type" "ssediv")
512 (set_attr "mode" "SF")])
514 (define_insn "sse_rcpv4sf2"
515 [(set (match_operand:V4SF 0 "register_operand" "=x")
517 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
519 "rcpps\t{%1, %0|%0, %1}"
520 [(set_attr "type" "sse")
521 (set_attr "mode" "V4SF")])
523 (define_insn "sse_vmrcpv4sf2"
524 [(set (match_operand:V4SF 0 "register_operand" "=x")
526 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
528 (match_operand:V4SF 2 "register_operand" "0")
531 "rcpss\t{%1, %0|%0, %1}"
532 [(set_attr "type" "sse")
533 (set_attr "mode" "SF")])
535 (define_insn "*sse_rsqrtv4sf2"
536 [(set (match_operand:V4SF 0 "register_operand" "=x")
538 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
540 "rsqrtps\t{%1, %0|%0, %1}"
541 [(set_attr "type" "sse")
542 (set_attr "mode" "V4SF")])
544 (define_expand "sse_rsqrtv4sf2"
545 [(set (match_operand:V4SF 0 "register_operand" "")
547 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
550 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
551 && flag_finite_math_only && !flag_trapping_math
552 && flag_unsafe_math_optimizations)
554 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
559 (define_insn "sse_vmrsqrtv4sf2"
560 [(set (match_operand:V4SF 0 "register_operand" "=x")
562 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
564 (match_operand:V4SF 2 "register_operand" "0")
567 "rsqrtss\t{%1, %0|%0, %1}"
568 [(set_attr "type" "sse")
569 (set_attr "mode" "SF")])
571 (define_insn "*sqrtv4sf2"
572 [(set (match_operand:V4SF 0 "register_operand" "=x")
573 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
575 "sqrtps\t{%1, %0|%0, %1}"
576 [(set_attr "type" "sse")
577 (set_attr "mode" "V4SF")])
579 (define_expand "sqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=")
581 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
584 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
585 && flag_finite_math_only && !flag_trapping_math
586 && flag_unsafe_math_optimizations)
588 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
593 (define_insn "sse_vmsqrtv4sf2"
594 [(set (match_operand:V4SF 0 "register_operand" "=x")
596 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
597 (match_operand:V4SF 2 "register_operand" "0")
600 "sqrtss\t{%1, %0|%0, %1}"
601 [(set_attr "type" "sse")
602 (set_attr "mode" "SF")])
604 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
605 ;; isn't really correct, as those rtl operators aren't defined when
606 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
608 (define_expand "smaxv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "")
610 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
611 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
614 if (!flag_finite_math_only)
615 operands[1] = force_reg (V4SFmode, operands[1]);
616 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
619 (define_insn "*smaxv4sf3_finite"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
621 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
622 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
623 "TARGET_SSE && flag_finite_math_only
624 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
625 "maxps\t{%2, %0|%0, %2}"
626 [(set_attr "type" "sse")
627 (set_attr "mode" "V4SF")])
629 (define_insn "*smaxv4sf3"
630 [(set (match_operand:V4SF 0 "register_operand" "=x")
631 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
632 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
634 "maxps\t{%2, %0|%0, %2}"
635 [(set_attr "type" "sse")
636 (set_attr "mode" "V4SF")])
638 (define_insn "sse_vmsmaxv4sf3"
639 [(set (match_operand:V4SF 0 "register_operand" "=x")
641 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
642 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
646 "maxss\t{%2, %0|%0, %2}"
647 [(set_attr "type" "sse")
648 (set_attr "mode" "SF")])
650 (define_expand "sminv4sf3"
651 [(set (match_operand:V4SF 0 "register_operand" "")
652 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
653 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
656 if (!flag_finite_math_only)
657 operands[1] = force_reg (V4SFmode, operands[1]);
658 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
661 (define_insn "*sminv4sf3_finite"
662 [(set (match_operand:V4SF 0 "register_operand" "=x")
663 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
664 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
665 "TARGET_SSE && flag_finite_math_only
666 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
667 "minps\t{%2, %0|%0, %2}"
668 [(set_attr "type" "sse")
669 (set_attr "mode" "V4SF")])
671 (define_insn "*sminv4sf3"
672 [(set (match_operand:V4SF 0 "register_operand" "=x")
673 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
674 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
676 "minps\t{%2, %0|%0, %2}"
677 [(set_attr "type" "sse")
678 (set_attr "mode" "V4SF")])
680 (define_insn "sse_vmsminv4sf3"
681 [(set (match_operand:V4SF 0 "register_operand" "=x")
683 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
684 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
688 "minss\t{%2, %0|%0, %2}"
689 [(set_attr "type" "sse")
690 (set_attr "mode" "SF")])
692 ;; These versions of the min/max patterns implement exactly the operations
693 ;; min = (op1 < op2 ? op1 : op2)
694 ;; max = (!(op1 < op2) ? op1 : op2)
695 ;; Their operands are not commutative, and thus they may be used in the
696 ;; presence of -0.0 and NaN.
698 (define_insn "*ieee_sminv4sf3"
699 [(set (match_operand:V4SF 0 "register_operand" "=x")
700 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
701 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
704 "minps\t{%2, %0|%0, %2}"
705 [(set_attr "type" "sseadd")
706 (set_attr "mode" "V4SF")])
708 (define_insn "*ieee_smaxv4sf3"
709 [(set (match_operand:V4SF 0 "register_operand" "=x")
710 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
711 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
714 "maxps\t{%2, %0|%0, %2}"
715 [(set_attr "type" "sseadd")
716 (set_attr "mode" "V4SF")])
718 (define_insn "*ieee_sminv2df3"
719 [(set (match_operand:V2DF 0 "register_operand" "=x")
720 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
721 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
724 "minpd\t{%2, %0|%0, %2}"
725 [(set_attr "type" "sseadd")
726 (set_attr "mode" "V2DF")])
728 (define_insn "*ieee_smaxv2df3"
729 [(set (match_operand:V2DF 0 "register_operand" "=x")
730 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
734 "maxpd\t{%2, %0|%0, %2}"
735 [(set_attr "type" "sseadd")
736 (set_attr "mode" "V2DF")])
738 (define_insn "sse3_addsubv4sf3"
739 [(set (match_operand:V4SF 0 "register_operand" "=x")
742 (match_operand:V4SF 1 "register_operand" "0")
743 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
744 (minus:V4SF (match_dup 1) (match_dup 2))
747 "addsubps\t{%2, %0|%0, %2}"
748 [(set_attr "type" "sseadd")
749 (set_attr "prefix_rep" "1")
750 (set_attr "mode" "V4SF")])
752 (define_insn "sse3_haddv4sf3"
753 [(set (match_operand:V4SF 0 "register_operand" "=x")
758 (match_operand:V4SF 1 "register_operand" "0")
759 (parallel [(const_int 0)]))
760 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
762 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
763 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
767 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
768 (parallel [(const_int 0)]))
769 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
771 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
772 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
774 "haddps\t{%2, %0|%0, %2}"
775 [(set_attr "type" "sseadd")
776 (set_attr "prefix_rep" "1")
777 (set_attr "mode" "V4SF")])
779 (define_insn "sse3_hsubv4sf3"
780 [(set (match_operand:V4SF 0 "register_operand" "=x")
785 (match_operand:V4SF 1 "register_operand" "0")
786 (parallel [(const_int 0)]))
787 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
789 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
790 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
794 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
795 (parallel [(const_int 0)]))
796 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
798 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
799 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
801 "hsubps\t{%2, %0|%0, %2}"
802 [(set_attr "type" "sseadd")
803 (set_attr "prefix_rep" "1")
804 (set_attr "mode" "V4SF")])
806 (define_expand "reduc_splus_v4sf"
807 [(match_operand:V4SF 0 "register_operand" "")
808 (match_operand:V4SF 1 "register_operand" "")]
813 rtx tmp = gen_reg_rtx (V4SFmode);
814 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
815 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
818 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
822 (define_expand "reduc_smax_v4sf"
823 [(match_operand:V4SF 0 "register_operand" "")
824 (match_operand:V4SF 1 "register_operand" "")]
827 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
831 (define_expand "reduc_smin_v4sf"
832 [(match_operand:V4SF 0 "register_operand" "")
833 (match_operand:V4SF 1 "register_operand" "")]
836 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
840 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
842 ;; Parallel single-precision floating point comparisons
844 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
846 (define_insn "sse_maskcmpv4sf3"
847 [(set (match_operand:V4SF 0 "register_operand" "=x")
848 (match_operator:V4SF 3 "sse_comparison_operator"
849 [(match_operand:V4SF 1 "register_operand" "0")
850 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
851 "TARGET_SSE && !TARGET_SSE5"
852 "cmp%D3ps\t{%2, %0|%0, %2}"
853 [(set_attr "type" "ssecmp")
854 (set_attr "mode" "V4SF")])
856 (define_insn "sse_maskcmpsf3"
857 [(set (match_operand:SF 0 "register_operand" "=x")
858 (match_operator:SF 3 "sse_comparison_operator"
859 [(match_operand:SF 1 "register_operand" "0")
860 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
861 "TARGET_SSE && !TARGET_SSE5"
862 "cmp%D3ss\t{%2, %0|%0, %2}"
863 [(set_attr "type" "ssecmp")
864 (set_attr "mode" "SF")])
866 (define_insn "sse_vmmaskcmpv4sf3"
867 [(set (match_operand:V4SF 0 "register_operand" "=x")
869 (match_operator:V4SF 3 "sse_comparison_operator"
870 [(match_operand:V4SF 1 "register_operand" "0")
871 (match_operand:V4SF 2 "register_operand" "x")])
874 "TARGET_SSE && !TARGET_SSE5"
875 "cmp%D3ss\t{%2, %0|%0, %2}"
876 [(set_attr "type" "ssecmp")
877 (set_attr "mode" "SF")])
879 (define_insn "sse_comi"
880 [(set (reg:CCFP FLAGS_REG)
883 (match_operand:V4SF 0 "register_operand" "x")
884 (parallel [(const_int 0)]))
886 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
887 (parallel [(const_int 0)]))))]
889 "comiss\t{%1, %0|%0, %1}"
890 [(set_attr "type" "ssecomi")
891 (set_attr "mode" "SF")])
893 (define_insn "sse_ucomi"
894 [(set (reg:CCFPU FLAGS_REG)
897 (match_operand:V4SF 0 "register_operand" "x")
898 (parallel [(const_int 0)]))
900 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
901 (parallel [(const_int 0)]))))]
903 "ucomiss\t{%1, %0|%0, %1}"
904 [(set_attr "type" "ssecomi")
905 (set_attr "mode" "SF")])
907 (define_expand "vcondv4sf"
908 [(set (match_operand:V4SF 0 "register_operand" "")
911 [(match_operand:V4SF 4 "nonimmediate_operand" "")
912 (match_operand:V4SF 5 "nonimmediate_operand" "")])
913 (match_operand:V4SF 1 "general_operand" "")
914 (match_operand:V4SF 2 "general_operand" "")))]
917 if (ix86_expand_fp_vcond (operands))
923 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
925 ;; Parallel single-precision floating point logical operations
927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
929 (define_expand "andv4sf3"
930 [(set (match_operand:V4SF 0 "register_operand" "")
931 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
932 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
934 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
936 (define_insn "*andv4sf3"
937 [(set (match_operand:V4SF 0 "register_operand" "=x")
938 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
939 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
940 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
941 "andps\t{%2, %0|%0, %2}"
942 [(set_attr "type" "sselog")
943 (set_attr "mode" "V4SF")])
945 (define_insn "sse_nandv4sf3"
946 [(set (match_operand:V4SF 0 "register_operand" "=x")
947 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
948 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
950 "andnps\t{%2, %0|%0, %2}"
951 [(set_attr "type" "sselog")
952 (set_attr "mode" "V4SF")])
954 (define_expand "iorv4sf3"
955 [(set (match_operand:V4SF 0 "register_operand" "")
956 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
957 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
959 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
961 (define_insn "*iorv4sf3"
962 [(set (match_operand:V4SF 0 "register_operand" "=x")
963 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
964 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
965 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
966 "orps\t{%2, %0|%0, %2}"
967 [(set_attr "type" "sselog")
968 (set_attr "mode" "V4SF")])
970 (define_expand "xorv4sf3"
971 [(set (match_operand:V4SF 0 "register_operand" "")
972 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
973 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
975 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
977 (define_insn "*xorv4sf3"
978 [(set (match_operand:V4SF 0 "register_operand" "=x")
979 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
980 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
981 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
982 "xorps\t{%2, %0|%0, %2}"
983 [(set_attr "type" "sselog")
984 (set_attr "mode" "V4SF")])
986 ;; Also define scalar versions. These are used for abs, neg, and
987 ;; conditional move. Using subregs into vector modes causes register
988 ;; allocation lossage. These patterns do not allow memory operands
989 ;; because the native instructions read the full 128-bits.
991 (define_insn "*andsf3"
992 [(set (match_operand:SF 0 "register_operand" "=x")
993 (and:SF (match_operand:SF 1 "register_operand" "0")
994 (match_operand:SF 2 "register_operand" "x")))]
996 "andps\t{%2, %0|%0, %2}"
997 [(set_attr "type" "sselog")
998 (set_attr "mode" "V4SF")])
1000 (define_insn "*nandsf3"
1001 [(set (match_operand:SF 0 "register_operand" "=x")
1002 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
1003 (match_operand:SF 2 "register_operand" "x")))]
1005 "andnps\t{%2, %0|%0, %2}"
1006 [(set_attr "type" "sselog")
1007 (set_attr "mode" "V4SF")])
1009 (define_insn "*iorsf3"
1010 [(set (match_operand:SF 0 "register_operand" "=x")
1011 (ior:SF (match_operand:SF 1 "register_operand" "0")
1012 (match_operand:SF 2 "register_operand" "x")))]
1014 "orps\t{%2, %0|%0, %2}"
1015 [(set_attr "type" "sselog")
1016 (set_attr "mode" "V4SF")])
1018 (define_insn "*xorsf3"
1019 [(set (match_operand:SF 0 "register_operand" "=x")
1020 (xor:SF (match_operand:SF 1 "register_operand" "0")
1021 (match_operand:SF 2 "register_operand" "x")))]
1023 "xorps\t{%2, %0|%0, %2}"
1024 [(set_attr "type" "sselog")
1025 (set_attr "mode" "V4SF")])
1027 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1029 ;; Parallel single-precision floating point conversion operations
1031 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1033 (define_insn "sse_cvtpi2ps"
1034 [(set (match_operand:V4SF 0 "register_operand" "=x")
1037 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1038 (match_operand:V4SF 1 "register_operand" "0")
1041 "cvtpi2ps\t{%2, %0|%0, %2}"
1042 [(set_attr "type" "ssecvt")
1043 (set_attr "mode" "V4SF")])
1045 (define_insn "sse_cvtps2pi"
1046 [(set (match_operand:V2SI 0 "register_operand" "=y")
1048 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1050 (parallel [(const_int 0) (const_int 1)])))]
1052 "cvtps2pi\t{%1, %0|%0, %1}"
1053 [(set_attr "type" "ssecvt")
1054 (set_attr "unit" "mmx")
1055 (set_attr "mode" "DI")])
1057 (define_insn "sse_cvttps2pi"
1058 [(set (match_operand:V2SI 0 "register_operand" "=y")
1060 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1061 (parallel [(const_int 0) (const_int 1)])))]
1063 "cvttps2pi\t{%1, %0|%0, %1}"
1064 [(set_attr "type" "ssecvt")
1065 (set_attr "unit" "mmx")
1066 (set_attr "mode" "SF")])
1068 (define_insn "sse_cvtsi2ss"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1072 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1073 (match_operand:V4SF 1 "register_operand" "0,0")
1076 "cvtsi2ss\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "vector,double")
1079 (set_attr "amdfam10_decode" "vector,double")
1080 (set_attr "mode" "SF")])
1082 (define_insn "sse_cvtsi2ssq"
1083 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1086 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1087 (match_operand:V4SF 1 "register_operand" "0,0")
1089 "TARGET_SSE && TARGET_64BIT"
1090 "cvtsi2ssq\t{%2, %0|%0, %2}"
1091 [(set_attr "type" "sseicvt")
1092 (set_attr "athlon_decode" "vector,double")
1093 (set_attr "amdfam10_decode" "vector,double")
1094 (set_attr "mode" "SF")])
1096 (define_insn "sse_cvtss2si"
1097 [(set (match_operand:SI 0 "register_operand" "=r,r")
1100 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1101 (parallel [(const_int 0)]))]
1102 UNSPEC_FIX_NOTRUNC))]
1104 "cvtss2si\t{%1, %0|%0, %1}"
1105 [(set_attr "type" "sseicvt")
1106 (set_attr "athlon_decode" "double,vector")
1107 (set_attr "prefix_rep" "1")
1108 (set_attr "mode" "SI")])
1110 (define_insn "sse_cvtss2si_2"
1111 [(set (match_operand:SI 0 "register_operand" "=r,r")
1112 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1113 UNSPEC_FIX_NOTRUNC))]
1115 "cvtss2si\t{%1, %0|%0, %1}"
1116 [(set_attr "type" "sseicvt")
1117 (set_attr "athlon_decode" "double,vector")
1118 (set_attr "amdfam10_decode" "double,double")
1119 (set_attr "prefix_rep" "1")
1120 (set_attr "mode" "SI")])
1122 (define_insn "sse_cvtss2siq"
1123 [(set (match_operand:DI 0 "register_operand" "=r,r")
1126 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1127 (parallel [(const_int 0)]))]
1128 UNSPEC_FIX_NOTRUNC))]
1129 "TARGET_SSE && TARGET_64BIT"
1130 "cvtss2siq\t{%1, %0|%0, %1}"
1131 [(set_attr "type" "sseicvt")
1132 (set_attr "athlon_decode" "double,vector")
1133 (set_attr "prefix_rep" "1")
1134 (set_attr "mode" "DI")])
1136 (define_insn "sse_cvtss2siq_2"
1137 [(set (match_operand:DI 0 "register_operand" "=r,r")
1138 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1139 UNSPEC_FIX_NOTRUNC))]
1140 "TARGET_SSE && TARGET_64BIT"
1141 "cvtss2siq\t{%1, %0|%0, %1}"
1142 [(set_attr "type" "sseicvt")
1143 (set_attr "athlon_decode" "double,vector")
1144 (set_attr "amdfam10_decode" "double,double")
1145 (set_attr "prefix_rep" "1")
1146 (set_attr "mode" "DI")])
1148 (define_insn "sse_cvttss2si"
1149 [(set (match_operand:SI 0 "register_operand" "=r,r")
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1153 (parallel [(const_int 0)]))))]
1155 "cvttss2si\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sseicvt")
1157 (set_attr "athlon_decode" "double,vector")
1158 (set_attr "amdfam10_decode" "double,double")
1159 (set_attr "prefix_rep" "1")
1160 (set_attr "mode" "SI")])
1162 (define_insn "sse_cvttss2siq"
1163 [(set (match_operand:DI 0 "register_operand" "=r,r")
1166 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1167 (parallel [(const_int 0)]))))]
1168 "TARGET_SSE && TARGET_64BIT"
1169 "cvttss2siq\t{%1, %0|%0, %1}"
1170 [(set_attr "type" "sseicvt")
1171 (set_attr "athlon_decode" "double,vector")
1172 (set_attr "amdfam10_decode" "double,double")
1173 (set_attr "prefix_rep" "1")
1174 (set_attr "mode" "DI")])
1176 (define_insn "sse2_cvtdq2ps"
1177 [(set (match_operand:V4SF 0 "register_operand" "=x")
1178 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1180 "cvtdq2ps\t{%1, %0|%0, %1}"
1181 [(set_attr "type" "ssecvt")
1182 (set_attr "mode" "V4SF")])
1184 (define_insn "sse2_cvtps2dq"
1185 [(set (match_operand:V4SI 0 "register_operand" "=x")
1186 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1187 UNSPEC_FIX_NOTRUNC))]
1189 "cvtps2dq\t{%1, %0|%0, %1}"
1190 [(set_attr "type" "ssecvt")
1191 (set_attr "prefix_data16" "1")
1192 (set_attr "mode" "TI")])
1194 (define_insn "sse2_cvttps2dq"
1195 [(set (match_operand:V4SI 0 "register_operand" "=x")
1196 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1198 "cvttps2dq\t{%1, %0|%0, %1}"
1199 [(set_attr "type" "ssecvt")
1200 (set_attr "prefix_rep" "1")
1201 (set_attr "mode" "TI")])
1203 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1205 ;; Parallel single-precision floating point element swizzling
1207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1209 (define_insn "sse_movhlps"
1210 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1213 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1214 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1215 (parallel [(const_int 6)
1219 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1221 movhlps\t{%2, %0|%0, %2}
1222 movlps\t{%H2, %0|%0, %H2}
1223 movhps\t{%2, %0|%0, %2}"
1224 [(set_attr "type" "ssemov")
1225 (set_attr "mode" "V4SF,V2SF,V2SF")])
1227 (define_insn "sse_movlhps"
1228 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1231 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1232 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1233 (parallel [(const_int 0)
1237 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1239 movlhps\t{%2, %0|%0, %2}
1240 movhps\t{%2, %0|%0, %2}
1241 movlps\t{%2, %H0|%H0, %2}"
1242 [(set_attr "type" "ssemov")
1243 (set_attr "mode" "V4SF,V2SF,V2SF")])
1245 (define_insn "sse_unpckhps"
1246 [(set (match_operand:V4SF 0 "register_operand" "=x")
1249 (match_operand:V4SF 1 "register_operand" "0")
1250 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1251 (parallel [(const_int 2) (const_int 6)
1252 (const_int 3) (const_int 7)])))]
1254 "unpckhps\t{%2, %0|%0, %2}"
1255 [(set_attr "type" "sselog")
1256 (set_attr "mode" "V4SF")])
1258 (define_insn "sse_unpcklps"
1259 [(set (match_operand:V4SF 0 "register_operand" "=x")
1262 (match_operand:V4SF 1 "register_operand" "0")
1263 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1264 (parallel [(const_int 0) (const_int 4)
1265 (const_int 1) (const_int 5)])))]
1267 "unpcklps\t{%2, %0|%0, %2}"
1268 [(set_attr "type" "sselog")
1269 (set_attr "mode" "V4SF")])
1271 ;; These are modeled with the same vec_concat as the others so that we
1272 ;; capture users of shufps that can use the new instructions
1273 (define_insn "sse3_movshdup"
1274 [(set (match_operand:V4SF 0 "register_operand" "=x")
1277 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1279 (parallel [(const_int 1)
1284 "movshdup\t{%1, %0|%0, %1}"
1285 [(set_attr "type" "sse")
1286 (set_attr "prefix_rep" "1")
1287 (set_attr "mode" "V4SF")])
1289 (define_insn "sse3_movsldup"
1290 [(set (match_operand:V4SF 0 "register_operand" "=x")
1293 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1295 (parallel [(const_int 0)
1300 "movsldup\t{%1, %0|%0, %1}"
1301 [(set_attr "type" "sse")
1302 (set_attr "prefix_rep" "1")
1303 (set_attr "mode" "V4SF")])
1305 (define_expand "sse_shufps"
1306 [(match_operand:V4SF 0 "register_operand" "")
1307 (match_operand:V4SF 1 "register_operand" "")
1308 (match_operand:V4SF 2 "nonimmediate_operand" "")
1309 (match_operand:SI 3 "const_int_operand" "")]
1312 int mask = INTVAL (operands[3]);
1313 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1314 GEN_INT ((mask >> 0) & 3),
1315 GEN_INT ((mask >> 2) & 3),
1316 GEN_INT (((mask >> 4) & 3) + 4),
1317 GEN_INT (((mask >> 6) & 3) + 4)));
1321 (define_insn "sse_shufps_1"
1322 [(set (match_operand:V4SF 0 "register_operand" "=x")
1325 (match_operand:V4SF 1 "register_operand" "0")
1326 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1327 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1328 (match_operand 4 "const_0_to_3_operand" "")
1329 (match_operand 5 "const_4_to_7_operand" "")
1330 (match_operand 6 "const_4_to_7_operand" "")])))]
1334 mask |= INTVAL (operands[3]) << 0;
1335 mask |= INTVAL (operands[4]) << 2;
1336 mask |= (INTVAL (operands[5]) - 4) << 4;
1337 mask |= (INTVAL (operands[6]) - 4) << 6;
1338 operands[3] = GEN_INT (mask);
1340 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1342 [(set_attr "type" "sselog")
1343 (set_attr "mode" "V4SF")])
1345 (define_insn "sse_storehps"
1346 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1348 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1349 (parallel [(const_int 2) (const_int 3)])))]
1352 movhps\t{%1, %0|%0, %1}
1353 movhlps\t{%1, %0|%0, %1}
1354 movlps\t{%H1, %0|%0, %H1}"
1355 [(set_attr "type" "ssemov")
1356 (set_attr "mode" "V2SF,V4SF,V2SF")])
1358 (define_insn "sse_loadhps"
1359 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1362 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1363 (parallel [(const_int 0) (const_int 1)]))
1364 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1367 movhps\t{%2, %0|%0, %2}
1368 movlhps\t{%2, %0|%0, %2}
1369 movlps\t{%2, %H0|%H0, %2}"
1370 [(set_attr "type" "ssemov")
1371 (set_attr "mode" "V2SF,V4SF,V2SF")])
1373 (define_insn "sse_storelps"
1374 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1376 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1377 (parallel [(const_int 0) (const_int 1)])))]
1380 movlps\t{%1, %0|%0, %1}
1381 movaps\t{%1, %0|%0, %1}
1382 movlps\t{%1, %0|%0, %1}"
1383 [(set_attr "type" "ssemov")
1384 (set_attr "mode" "V2SF,V4SF,V2SF")])
1386 (define_insn "sse_loadlps"
1387 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1389 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1391 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1392 (parallel [(const_int 2) (const_int 3)]))))]
1395 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1396 movlps\t{%2, %0|%0, %2}
1397 movlps\t{%2, %0|%0, %2}"
1398 [(set_attr "type" "sselog,ssemov,ssemov")
1399 (set_attr "mode" "V4SF,V2SF,V2SF")])
1401 (define_insn "sse_movss"
1402 [(set (match_operand:V4SF 0 "register_operand" "=x")
1404 (match_operand:V4SF 2 "register_operand" "x")
1405 (match_operand:V4SF 1 "register_operand" "0")
1408 "movss\t{%2, %0|%0, %2}"
1409 [(set_attr "type" "ssemov")
1410 (set_attr "mode" "SF")])
1412 (define_insn "*vec_dupv4sf"
1413 [(set (match_operand:V4SF 0 "register_operand" "=x")
1415 (match_operand:SF 1 "register_operand" "0")))]
1417 "shufps\t{$0, %0, %0|%0, %0, 0}"
1418 [(set_attr "type" "sselog1")
1419 (set_attr "mode" "V4SF")])
1421 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1422 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1423 ;; alternatives pretty much forces the MMX alternative to be chosen.
1424 (define_insn "*sse_concatv2sf"
1425 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1427 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1428 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1431 unpcklps\t{%2, %0|%0, %2}
1432 movss\t{%1, %0|%0, %1}
1433 punpckldq\t{%2, %0|%0, %2}
1434 movd\t{%1, %0|%0, %1}"
1435 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1436 (set_attr "mode" "V4SF,SF,DI,DI")])
1438 (define_insn "*sse_concatv4sf"
1439 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1441 (match_operand:V2SF 1 "register_operand" " 0,0")
1442 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1445 movlhps\t{%2, %0|%0, %2}
1446 movhps\t{%2, %0|%0, %2}"
1447 [(set_attr "type" "ssemov")
1448 (set_attr "mode" "V4SF,V2SF")])
1450 (define_expand "vec_initv4sf"
1451 [(match_operand:V4SF 0 "register_operand" "")
1452 (match_operand 1 "" "")]
1455 ix86_expand_vector_init (false, operands[0], operands[1]);
1459 (define_insn "vec_setv4sf_0"
1460 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1463 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1464 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1468 movss\t{%2, %0|%0, %2}
1469 movss\t{%2, %0|%0, %2}
1470 movd\t{%2, %0|%0, %2}
1472 [(set_attr "type" "ssemov")
1473 (set_attr "mode" "SF")])
1475 ;; A subset is vec_setv4sf.
1476 (define_insn "*vec_setv4sf_sse4_1"
1477 [(set (match_operand:V4SF 0 "register_operand" "=x")
1480 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1481 (match_operand:V4SF 1 "register_operand" "0")
1482 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1485 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1486 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1488 [(set_attr "type" "sselog")
1489 (set_attr "prefix_extra" "1")
1490 (set_attr "mode" "V4SF")])
1492 (define_insn "sse4_1_insertps"
1493 [(set (match_operand:V4SF 0 "register_operand" "=x")
1494 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1495 (match_operand:V4SF 1 "register_operand" "0")
1496 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1499 "insertps\t{%3, %2, %0|%0, %2, %3}";
1500 [(set_attr "type" "sselog")
1501 (set_attr "prefix_extra" "1")
1502 (set_attr "mode" "V4SF")])
1505 [(set (match_operand:V4SF 0 "memory_operand" "")
1508 (match_operand:SF 1 "nonmemory_operand" ""))
1511 "TARGET_SSE && reload_completed"
1514 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1518 (define_expand "vec_setv4sf"
1519 [(match_operand:V4SF 0 "register_operand" "")
1520 (match_operand:SF 1 "register_operand" "")
1521 (match_operand 2 "const_int_operand" "")]
1524 ix86_expand_vector_set (false, operands[0], operands[1],
1525 INTVAL (operands[2]));
1529 (define_insn_and_split "*vec_extractv4sf_0"
1530 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1532 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1533 (parallel [(const_int 0)])))]
1534 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1536 "&& reload_completed"
1539 rtx op1 = operands[1];
1541 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1543 op1 = gen_lowpart (SFmode, op1);
1544 emit_move_insn (operands[0], op1);
1548 (define_insn "*sse4_1_extractps"
1549 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1551 (match_operand:V4SF 1 "register_operand" "x")
1552 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1554 "extractps\t{%2, %1, %0|%0, %1, %2}"
1555 [(set_attr "type" "sselog")
1556 (set_attr "prefix_extra" "1")
1557 (set_attr "mode" "V4SF")])
1559 (define_insn_and_split "*vec_extract_v4sf_mem"
1560 [(set (match_operand:SF 0 "register_operand" "=x*rf")
1562 (match_operand:V4SF 1 "memory_operand" "o")
1563 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
1569 int i = INTVAL (operands[2]);
1571 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
1575 (define_expand "vec_extractv4sf"
1576 [(match_operand:SF 0 "register_operand" "")
1577 (match_operand:V4SF 1 "register_operand" "")
1578 (match_operand 2 "const_int_operand" "")]
1581 ix86_expand_vector_extract (false, operands[0], operands[1],
1582 INTVAL (operands[2]));
1586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1588 ;; SSE5 floating point multiply/accumulate instructions This includes the
1589 ;; scalar version of the instructions as well as the vector
1591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1593 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1594 ;; combine to generate a multiply/add with two memory references. We then
1595 ;; split this insn, into loading up the destination register with one of the
1596 ;; memory operations. If we don't manage to split the insn, reload will
1597 ;; generate the appropriate moves. The reason this is needed, is that combine
1598 ;; has already folded one of the memory references into both the multiply and
1599 ;; add insns, and it can't generate a new pseudo. I.e.:
1600 ;; (set (reg1) (mem (addr1)))
1601 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1602 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1604 (define_insn "sse5_fmadd<mode>4"
1605 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1608 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1609 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1610 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1611 "TARGET_SSE5 && TARGET_FUSED_MADD
1612 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1613 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1614 [(set_attr "type" "ssemuladd")
1615 (set_attr "mode" "<MODE>")])
1617 ;; Split fmadd with two memory operands into a load and the fmadd.
1619 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1622 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1623 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1624 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1626 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1627 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1628 && !reg_mentioned_p (operands[0], operands[1])
1629 && !reg_mentioned_p (operands[0], operands[2])
1630 && !reg_mentioned_p (operands[0], operands[3])"
1633 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1634 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1635 operands[2], operands[3]));
1639 ;; For the scalar operations, use operand1 for the upper words that aren't
1640 ;; modified, so restrict the forms that are generated.
1641 ;; Scalar version of fmadd
1642 (define_insn "sse5_vmfmadd<mode>4"
1643 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1644 (vec_merge:SSEMODEF2P
1647 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1648 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1649 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1652 "TARGET_SSE5 && TARGET_FUSED_MADD
1653 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1654 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1655 [(set_attr "type" "ssemuladd")
1656 (set_attr "mode" "<MODE>")])
1658 ;; Floating multiply and subtract
1659 ;; Allow two memory operands the same as fmadd
1660 (define_insn "sse5_fmsub<mode>4"
1661 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1664 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1665 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
1666 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
1667 "TARGET_SSE5 && TARGET_FUSED_MADD
1668 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1669 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1670 [(set_attr "type" "ssemuladd")
1671 (set_attr "mode" "<MODE>")])
1673 ;; Split fmsub with two memory operands into a load and the fmsub.
1675 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1678 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1679 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1680 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1682 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1683 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1684 && !reg_mentioned_p (operands[0], operands[1])
1685 && !reg_mentioned_p (operands[0], operands[2])
1686 && !reg_mentioned_p (operands[0], operands[3])"
1689 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1690 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1691 operands[2], operands[3]));
1695 ;; For the scalar operations, use operand1 for the upper words that aren't
1696 ;; modified, so restrict the forms that are generated.
1697 ;; Scalar version of fmsub
1698 (define_insn "sse5_vmfmsub<mode>4"
1699 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1700 (vec_merge:SSEMODEF2P
1703 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1704 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1705 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1708 "TARGET_SSE5 && TARGET_FUSED_MADD
1709 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1710 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1711 [(set_attr "type" "ssemuladd")
1712 (set_attr "mode" "<MODE>")])
1714 ;; Floating point negative multiply and add
1715 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1716 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1717 ;; Allow two memory operands to help in optimizing.
1718 (define_insn "sse5_fnmadd<mode>4"
1719 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1721 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
1723 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
1724 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
1725 "TARGET_SSE5 && TARGET_FUSED_MADD
1726 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1727 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1728 [(set_attr "type" "ssemuladd")
1729 (set_attr "mode" "<MODE>")])
1731 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1733 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1735 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1737 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1738 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1740 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1741 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1742 && !reg_mentioned_p (operands[0], operands[1])
1743 && !reg_mentioned_p (operands[0], operands[2])
1744 && !reg_mentioned_p (operands[0], operands[3])"
1747 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1748 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1749 operands[2], operands[3]));
1753 ;; For the scalar operations, use operand1 for the upper words that aren't
1754 ;; modified, so restrict the forms that are generated.
1755 ;; Scalar version of fnmadd
1756 (define_insn "sse5_vmfnmadd<mode>4"
1757 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1758 (vec_merge:SSEMODEF2P
1760 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1762 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1763 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1766 "TARGET_SSE5 && TARGET_FUSED_MADD
1767 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1768 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1769 [(set_attr "type" "ssemuladd")
1770 (set_attr "mode" "<MODE>")])
1772 ;; Floating point negative multiply and subtract
1773 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1774 ;; Allow 2 memory operands to help with optimization
1775 (define_insn "sse5_fnmsub<mode>4"
1776 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1780 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1781 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1782 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1783 "TARGET_SSE5 && TARGET_FUSED_MADD
1784 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1785 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1786 [(set_attr "type" "ssemuladd")
1787 (set_attr "mode" "<MODE>")])
1789 ;; Split fnmsub with two memory operands into a load and the fmsub.
1791 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1795 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1796 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1797 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1799 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1800 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1801 && !reg_mentioned_p (operands[0], operands[1])
1802 && !reg_mentioned_p (operands[0], operands[2])
1803 && !reg_mentioned_p (operands[0], operands[3])"
1806 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1807 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1808 operands[2], operands[3]));
1812 ;; For the scalar operations, use operand1 for the upper words that aren't
1813 ;; modified, so restrict the forms that are generated.
1814 ;; Scalar version of fnmsub
1815 (define_insn "sse5_vmfnmsub<mode>4"
1816 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1817 (vec_merge:SSEMODEF2P
1821 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1822 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1823 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1826 "TARGET_SSE5 && TARGET_FUSED_MADD
1827 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1828 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1829 [(set_attr "type" "ssemuladd")
1830 (set_attr "mode" "<MODE>")])
1832 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1833 ;; even if the user used -mno-fused-madd
1834 ;; Parallel instructions. During instruction generation, just default
1835 ;; to registers, and let combine later build the appropriate instruction.
1836 (define_expand "sse5i_fmadd<mode>4"
1837 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1841 (match_operand:SSEMODEF2P 1 "register_operand" "")
1842 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1843 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1844 UNSPEC_SSE5_INTRINSIC))]
1847 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1848 if (TARGET_FUSED_MADD)
1850 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1851 operands[2], operands[3]));
1856 (define_insn "*sse5i_fmadd<mode>4"
1857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1861 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1862 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1863 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1864 UNSPEC_SSE5_INTRINSIC))]
1865 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1866 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1867 [(set_attr "type" "ssemuladd")
1868 (set_attr "mode" "<MODE>")])
1870 (define_expand "sse5i_fmsub<mode>4"
1871 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1875 (match_operand:SSEMODEF2P 1 "register_operand" "")
1876 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1877 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1878 UNSPEC_SSE5_INTRINSIC))]
1881 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1882 if (TARGET_FUSED_MADD)
1884 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1885 operands[2], operands[3]));
1890 (define_insn "*sse5i_fmsub<mode>4"
1891 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1895 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1896 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1897 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1898 UNSPEC_SSE5_INTRINSIC))]
1899 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1900 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901 [(set_attr "type" "ssemuladd")
1902 (set_attr "mode" "<MODE>")])
1904 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1905 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1906 (define_expand "sse5i_fnmadd<mode>4"
1907 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1910 (match_operand:SSEMODEF2P 3 "register_operand" "")
1912 (match_operand:SSEMODEF2P 1 "register_operand" "")
1913 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1914 UNSPEC_SSE5_INTRINSIC))]
1917 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1918 if (TARGET_FUSED_MADD)
1920 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1921 operands[2], operands[3]));
1926 (define_insn "*sse5i_fnmadd<mode>4"
1927 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1930 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1932 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1933 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1934 UNSPEC_SSE5_INTRINSIC))]
1935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1936 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")])
1940 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1941 (define_expand "sse5i_fnmsub<mode>4"
1942 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1947 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1948 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1949 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1950 UNSPEC_SSE5_INTRINSIC))]
1953 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1954 if (TARGET_FUSED_MADD)
1956 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1957 operands[2], operands[3]));
1962 (define_insn "*sse5i_fnmsub<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1968 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1969 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1970 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1971 UNSPEC_SSE5_INTRINSIC))]
1972 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1973 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1974 [(set_attr "type" "ssemuladd")
1975 (set_attr "mode" "<MODE>")])
1977 ;; Scalar instructions
1978 (define_expand "sse5i_vmfmadd<mode>4"
1979 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1981 [(vec_merge:SSEMODEF2P
1984 (match_operand:SSEMODEF2P 1 "register_operand" "")
1985 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1986 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1989 UNSPEC_SSE5_INTRINSIC))]
1992 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1993 if (TARGET_FUSED_MADD)
1995 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1996 operands[2], operands[3]));
2001 ;; For the scalar operations, use operand1 for the upper words that aren't
2002 ;; modified, so restrict the forms that are accepted.
2003 (define_insn "*sse5i_vmfmadd<mode>4"
2004 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2006 [(vec_merge:SSEMODEF2P
2009 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
2010 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2011 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2014 UNSPEC_SSE5_INTRINSIC))]
2015 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2016 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2017 [(set_attr "type" "ssemuladd")
2018 (set_attr "mode" "<ssescalarmode>")])
2020 (define_expand "sse5i_vmfmsub<mode>4"
2021 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2023 [(vec_merge:SSEMODEF2P
2026 (match_operand:SSEMODEF2P 1 "register_operand" "")
2027 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2028 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2031 UNSPEC_SSE5_INTRINSIC))]
2034 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2035 if (TARGET_FUSED_MADD)
2037 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2038 operands[2], operands[3]));
2043 (define_insn "*sse5i_vmfmsub<mode>4"
2044 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2046 [(vec_merge:SSEMODEF2P
2049 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
2050 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2051 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2054 UNSPEC_SSE5_INTRINSIC))]
2055 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2056 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2057 [(set_attr "type" "ssemuladd")
2058 (set_attr "mode" "<ssescalarmode>")])
2060 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
2061 (define_expand "sse5i_vmfnmadd<mode>4"
2062 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2064 [(vec_merge:SSEMODEF2P
2066 (match_operand:SSEMODEF2P 3 "register_operand" "")
2068 (match_operand:SSEMODEF2P 1 "register_operand" "")
2069 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2072 UNSPEC_SSE5_INTRINSIC))]
2075 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2076 if (TARGET_FUSED_MADD)
2078 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2079 operands[2], operands[3]));
2084 (define_insn "*sse5i_vmfnmadd<mode>4"
2085 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2087 [(vec_merge:SSEMODEF2P
2089 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2091 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
2092 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2095 UNSPEC_SSE5_INTRINSIC))]
2096 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2097 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2098 [(set_attr "type" "ssemuladd")
2099 (set_attr "mode" "<ssescalarmode>")])
2101 (define_expand "sse5i_vmfnmsub<mode>4"
2102 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2104 [(vec_merge:SSEMODEF2P
2108 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2109 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2110 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2113 UNSPEC_SSE5_INTRINSIC))]
2116 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2117 if (TARGET_FUSED_MADD)
2119 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2120 operands[2], operands[3]));
2125 (define_insn "*sse5i_vmfnmsub<mode>4"
2126 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2128 [(vec_merge:SSEMODEF2P
2132 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
2133 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2134 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2137 UNSPEC_SSE5_INTRINSIC))]
2138 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
2139 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2140 [(set_attr "type" "ssemuladd")
2141 (set_attr "mode" "<ssescalarmode>")])
2143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2145 ;; Parallel double-precision floating point arithmetic
2147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2149 (define_expand "negv2df2"
2150 [(set (match_operand:V2DF 0 "register_operand" "")
2151 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
2153 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
2155 (define_expand "absv2df2"
2156 [(set (match_operand:V2DF 0 "register_operand" "")
2157 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
2159 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
2161 (define_expand "addv2df3"
2162 [(set (match_operand:V2DF 0 "register_operand" "")
2163 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2164 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2166 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
2168 (define_insn "*addv2df3"
2169 [(set (match_operand:V2DF 0 "register_operand" "=x")
2170 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2171 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2172 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
2173 "addpd\t{%2, %0|%0, %2}"
2174 [(set_attr "type" "sseadd")
2175 (set_attr "mode" "V2DF")])
2177 (define_insn "sse2_vmaddv2df3"
2178 [(set (match_operand:V2DF 0 "register_operand" "=x")
2180 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
2181 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2184 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
2185 "addsd\t{%2, %0|%0, %2}"
2186 [(set_attr "type" "sseadd")
2187 (set_attr "mode" "DF")])
2189 (define_expand "subv2df3"
2190 [(set (match_operand:V2DF 0 "register_operand" "")
2191 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2192 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2194 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
2196 (define_insn "*subv2df3"
2197 [(set (match_operand:V2DF 0 "register_operand" "=x")
2198 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
2199 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2201 "subpd\t{%2, %0|%0, %2}"
2202 [(set_attr "type" "sseadd")
2203 (set_attr "mode" "V2DF")])
2205 (define_insn "sse2_vmsubv2df3"
2206 [(set (match_operand:V2DF 0 "register_operand" "=x")
2208 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
2209 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2213 "subsd\t{%2, %0|%0, %2}"
2214 [(set_attr "type" "sseadd")
2215 (set_attr "mode" "DF")])
2217 (define_expand "mulv2df3"
2218 [(set (match_operand:V2DF 0 "register_operand" "")
2219 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2220 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2222 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
2224 (define_insn "*mulv2df3"
2225 [(set (match_operand:V2DF 0 "register_operand" "=x")
2226 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2227 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2228 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
2229 "mulpd\t{%2, %0|%0, %2}"
2230 [(set_attr "type" "ssemul")
2231 (set_attr "mode" "V2DF")])
2233 (define_insn "sse2_vmmulv2df3"
2234 [(set (match_operand:V2DF 0 "register_operand" "=x")
2236 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
2237 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2240 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
2241 "mulsd\t{%2, %0|%0, %2}"
2242 [(set_attr "type" "ssemul")
2243 (set_attr "mode" "DF")])
2245 (define_expand "divv2df3"
2246 [(set (match_operand:V2DF 0 "register_operand" "")
2247 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
2248 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2250 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
2252 (define_insn "*divv2df3"
2253 [(set (match_operand:V2DF 0 "register_operand" "=x")
2254 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
2255 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2257 "divpd\t{%2, %0|%0, %2}"
2258 [(set_attr "type" "ssediv")
2259 (set_attr "mode" "V2DF")])
2261 (define_insn "sse2_vmdivv2df3"
2262 [(set (match_operand:V2DF 0 "register_operand" "=x")
2264 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
2265 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2269 "divsd\t{%2, %0|%0, %2}"
2270 [(set_attr "type" "ssediv")
2271 (set_attr "mode" "DF")])
2273 (define_insn "sqrtv2df2"
2274 [(set (match_operand:V2DF 0 "register_operand" "=x")
2275 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2277 "sqrtpd\t{%1, %0|%0, %1}"
2278 [(set_attr "type" "sse")
2279 (set_attr "mode" "V2DF")])
2281 (define_insn "sse2_vmsqrtv2df2"
2282 [(set (match_operand:V2DF 0 "register_operand" "=x")
2284 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2285 (match_operand:V2DF 2 "register_operand" "0")
2288 "sqrtsd\t{%1, %0|%0, %1}"
2289 [(set_attr "type" "sse")
2290 (set_attr "mode" "DF")])
2292 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
2293 ;; isn't really correct, as those rtl operators aren't defined when
2294 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
2296 (define_expand "smaxv2df3"
2297 [(set (match_operand:V2DF 0 "register_operand" "")
2298 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2299 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2302 if (!flag_finite_math_only)
2303 operands[1] = force_reg (V2DFmode, operands[1]);
2304 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
2307 (define_insn "*smaxv2df3_finite"
2308 [(set (match_operand:V2DF 0 "register_operand" "=x")
2309 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2310 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2311 "TARGET_SSE2 && flag_finite_math_only
2312 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
2313 "maxpd\t{%2, %0|%0, %2}"
2314 [(set_attr "type" "sseadd")
2315 (set_attr "mode" "V2DF")])
2317 (define_insn "*smaxv2df3"
2318 [(set (match_operand:V2DF 0 "register_operand" "=x")
2319 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
2320 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2322 "maxpd\t{%2, %0|%0, %2}"
2323 [(set_attr "type" "sseadd")
2324 (set_attr "mode" "V2DF")])
2326 (define_insn "sse2_vmsmaxv2df3"
2327 [(set (match_operand:V2DF 0 "register_operand" "=x")
2329 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
2330 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2334 "maxsd\t{%2, %0|%0, %2}"
2335 [(set_attr "type" "sseadd")
2336 (set_attr "mode" "DF")])
2338 (define_expand "sminv2df3"
2339 [(set (match_operand:V2DF 0 "register_operand" "")
2340 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2341 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2344 if (!flag_finite_math_only)
2345 operands[1] = force_reg (V2DFmode, operands[1]);
2346 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
2349 (define_insn "*sminv2df3_finite"
2350 [(set (match_operand:V2DF 0 "register_operand" "=x")
2351 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2352 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2353 "TARGET_SSE2 && flag_finite_math_only
2354 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
2355 "minpd\t{%2, %0|%0, %2}"
2356 [(set_attr "type" "sseadd")
2357 (set_attr "mode" "V2DF")])
2359 (define_insn "*sminv2df3"
2360 [(set (match_operand:V2DF 0 "register_operand" "=x")
2361 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
2362 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2364 "minpd\t{%2, %0|%0, %2}"
2365 [(set_attr "type" "sseadd")
2366 (set_attr "mode" "V2DF")])
2368 (define_insn "sse2_vmsminv2df3"
2369 [(set (match_operand:V2DF 0 "register_operand" "=x")
2371 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
2372 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2376 "minsd\t{%2, %0|%0, %2}"
2377 [(set_attr "type" "sseadd")
2378 (set_attr "mode" "DF")])
2380 (define_insn "sse3_addsubv2df3"
2381 [(set (match_operand:V2DF 0 "register_operand" "=x")
2384 (match_operand:V2DF 1 "register_operand" "0")
2385 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2386 (minus:V2DF (match_dup 1) (match_dup 2))
2389 "addsubpd\t{%2, %0|%0, %2}"
2390 [(set_attr "type" "sseadd")
2391 (set_attr "mode" "V2DF")])
2393 (define_insn "sse3_haddv2df3"
2394 [(set (match_operand:V2DF 0 "register_operand" "=x")
2398 (match_operand:V2DF 1 "register_operand" "0")
2399 (parallel [(const_int 0)]))
2400 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2403 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
2404 (parallel [(const_int 0)]))
2405 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2407 "haddpd\t{%2, %0|%0, %2}"
2408 [(set_attr "type" "sseadd")
2409 (set_attr "mode" "V2DF")])
2411 (define_insn "sse3_hsubv2df3"
2412 [(set (match_operand:V2DF 0 "register_operand" "=x")
2416 (match_operand:V2DF 1 "register_operand" "0")
2417 (parallel [(const_int 0)]))
2418 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2421 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
2422 (parallel [(const_int 0)]))
2423 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2425 "hsubpd\t{%2, %0|%0, %2}"
2426 [(set_attr "type" "sseadd")
2427 (set_attr "mode" "V2DF")])
2429 (define_expand "reduc_splus_v2df"
2430 [(match_operand:V2DF 0 "register_operand" "")
2431 (match_operand:V2DF 1 "register_operand" "")]
2434 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2438 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2440 ;; Parallel double-precision floating point comparisons
2442 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2444 (define_insn "sse2_maskcmpv2df3"
2445 [(set (match_operand:V2DF 0 "register_operand" "=x")
2446 (match_operator:V2DF 3 "sse_comparison_operator"
2447 [(match_operand:V2DF 1 "register_operand" "0")
2448 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
2449 "TARGET_SSE2 && !TARGET_SSE5"
2450 "cmp%D3pd\t{%2, %0|%0, %2}"
2451 [(set_attr "type" "ssecmp")
2452 (set_attr "mode" "V2DF")])
2454 (define_insn "sse2_maskcmpdf3"
2455 [(set (match_operand:DF 0 "register_operand" "=x")
2456 (match_operator:DF 3 "sse_comparison_operator"
2457 [(match_operand:DF 1 "register_operand" "0")
2458 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
2459 "TARGET_SSE2 && !TARGET_SSE5"
2460 "cmp%D3sd\t{%2, %0|%0, %2}"
2461 [(set_attr "type" "ssecmp")
2462 (set_attr "mode" "DF")])
2464 (define_insn "sse2_vmmaskcmpv2df3"
2465 [(set (match_operand:V2DF 0 "register_operand" "=x")
2467 (match_operator:V2DF 3 "sse_comparison_operator"
2468 [(match_operand:V2DF 1 "register_operand" "0")
2469 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
2472 "TARGET_SSE2 && !TARGET_SSE5"
2473 "cmp%D3sd\t{%2, %0|%0, %2}"
2474 [(set_attr "type" "ssecmp")
2475 (set_attr "mode" "DF")])
2477 (define_insn "sse2_comi"
2478 [(set (reg:CCFP FLAGS_REG)
2481 (match_operand:V2DF 0 "register_operand" "x")
2482 (parallel [(const_int 0)]))
2484 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
2485 (parallel [(const_int 0)]))))]
2487 "comisd\t{%1, %0|%0, %1}"
2488 [(set_attr "type" "ssecomi")
2489 (set_attr "mode" "DF")])
2491 (define_insn "sse2_ucomi"
2492 [(set (reg:CCFPU FLAGS_REG)
2495 (match_operand:V2DF 0 "register_operand" "x")
2496 (parallel [(const_int 0)]))
2498 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
2499 (parallel [(const_int 0)]))))]
2501 "ucomisd\t{%1, %0|%0, %1}"
2502 [(set_attr "type" "ssecomi")
2503 (set_attr "mode" "DF")])
2505 (define_expand "vcondv2df"
2506 [(set (match_operand:V2DF 0 "register_operand" "")
2508 (match_operator 3 ""
2509 [(match_operand:V2DF 4 "nonimmediate_operand" "")
2510 (match_operand:V2DF 5 "nonimmediate_operand" "")])
2511 (match_operand:V2DF 1 "general_operand" "")
2512 (match_operand:V2DF 2 "general_operand" "")))]
2515 if (ix86_expand_fp_vcond (operands))
2521 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2523 ;; Parallel double-precision floating point logical operations
2525 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2527 (define_expand "andv2df3"
2528 [(set (match_operand:V2DF 0 "register_operand" "")
2529 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2530 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2532 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
2534 (define_insn "*andv2df3"
2535 [(set (match_operand:V2DF 0 "register_operand" "=x")
2536 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2537 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2538 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
2539 "andpd\t{%2, %0|%0, %2}"
2540 [(set_attr "type" "sselog")
2541 (set_attr "mode" "V2DF")])
2543 (define_insn "sse2_nandv2df3"
2544 [(set (match_operand:V2DF 0 "register_operand" "=x")
2545 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
2546 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2548 "andnpd\t{%2, %0|%0, %2}"
2549 [(set_attr "type" "sselog")
2550 (set_attr "mode" "V2DF")])
2552 (define_expand "iorv2df3"
2553 [(set (match_operand:V2DF 0 "register_operand" "")
2554 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2555 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2557 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
2559 (define_insn "*iorv2df3"
2560 [(set (match_operand:V2DF 0 "register_operand" "=x")
2561 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2562 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2563 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
2564 "orpd\t{%2, %0|%0, %2}"
2565 [(set_attr "type" "sselog")
2566 (set_attr "mode" "V2DF")])
2568 (define_expand "xorv2df3"
2569 [(set (match_operand:V2DF 0 "register_operand" "")
2570 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
2571 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
2573 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
2575 (define_insn "*xorv2df3"
2576 [(set (match_operand:V2DF 0 "register_operand" "=x")
2577 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
2578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
2579 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
2580 "xorpd\t{%2, %0|%0, %2}"
2581 [(set_attr "type" "sselog")
2582 (set_attr "mode" "V2DF")])
2584 ;; Also define scalar versions. These are used for abs, neg, and
2585 ;; conditional move. Using subregs into vector modes causes register
2586 ;; allocation lossage. These patterns do not allow memory operands
2587 ;; because the native instructions read the full 128-bits.
2589 (define_insn "*anddf3"
2590 [(set (match_operand:DF 0 "register_operand" "=x")
2591 (and:DF (match_operand:DF 1 "register_operand" "0")
2592 (match_operand:DF 2 "register_operand" "x")))]
2594 "andpd\t{%2, %0|%0, %2}"
2595 [(set_attr "type" "sselog")
2596 (set_attr "mode" "V2DF")])
2598 (define_insn "*nanddf3"
2599 [(set (match_operand:DF 0 "register_operand" "=x")
2600 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2601 (match_operand:DF 2 "register_operand" "x")))]
2603 "andnpd\t{%2, %0|%0, %2}"
2604 [(set_attr "type" "sselog")
2605 (set_attr "mode" "V2DF")])
2607 (define_insn "*iordf3"
2608 [(set (match_operand:DF 0 "register_operand" "=x")
2609 (ior:DF (match_operand:DF 1 "register_operand" "0")
2610 (match_operand:DF 2 "register_operand" "x")))]
2612 "orpd\t{%2, %0|%0, %2}"
2613 [(set_attr "type" "sselog")
2614 (set_attr "mode" "V2DF")])
2616 (define_insn "*xordf3"
2617 [(set (match_operand:DF 0 "register_operand" "=x")
2618 (xor:DF (match_operand:DF 1 "register_operand" "0")
2619 (match_operand:DF 2 "register_operand" "x")))]
2621 "xorpd\t{%2, %0|%0, %2}"
2622 [(set_attr "type" "sselog")
2623 (set_attr "mode" "V2DF")])
2625 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2627 ;; Parallel double-precision floating point conversion operations
2629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2631 (define_insn "sse2_cvtpi2pd"
2632 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2633 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2635 "cvtpi2pd\t{%1, %0|%0, %1}"
2636 [(set_attr "type" "ssecvt")
2637 (set_attr "unit" "mmx,*")
2638 (set_attr "mode" "V2DF")])
2640 (define_insn "sse2_cvtpd2pi"
2641 [(set (match_operand:V2SI 0 "register_operand" "=y")
2642 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2643 UNSPEC_FIX_NOTRUNC))]
2645 "cvtpd2pi\t{%1, %0|%0, %1}"
2646 [(set_attr "type" "ssecvt")
2647 (set_attr "unit" "mmx")
2648 (set_attr "prefix_data16" "1")
2649 (set_attr "mode" "DI")])
2651 (define_insn "sse2_cvttpd2pi"
2652 [(set (match_operand:V2SI 0 "register_operand" "=y")
2653 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2655 "cvttpd2pi\t{%1, %0|%0, %1}"
2656 [(set_attr "type" "ssecvt")
2657 (set_attr "unit" "mmx")
2658 (set_attr "prefix_data16" "1")
2659 (set_attr "mode" "TI")])
2661 (define_insn "sse2_cvtsi2sd"
2662 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2665 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2666 (match_operand:V2DF 1 "register_operand" "0,0")
2669 "cvtsi2sd\t{%2, %0|%0, %2}"
2670 [(set_attr "type" "sseicvt")
2671 (set_attr "mode" "DF")
2672 (set_attr "athlon_decode" "double,direct")
2673 (set_attr "amdfam10_decode" "vector,double")])
2675 (define_insn "sse2_cvtsi2sdq"
2676 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2679 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2680 (match_operand:V2DF 1 "register_operand" "0,0")
2682 "TARGET_SSE2 && TARGET_64BIT"
2683 "cvtsi2sdq\t{%2, %0|%0, %2}"
2684 [(set_attr "type" "sseicvt")
2685 (set_attr "mode" "DF")
2686 (set_attr "athlon_decode" "double,direct")
2687 (set_attr "amdfam10_decode" "vector,double")])
2689 (define_insn "sse2_cvtsd2si"
2690 [(set (match_operand:SI 0 "register_operand" "=r,r")
2693 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2694 (parallel [(const_int 0)]))]
2695 UNSPEC_FIX_NOTRUNC))]
2697 "cvtsd2si\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "sseicvt")
2699 (set_attr "athlon_decode" "double,vector")
2700 (set_attr "prefix_rep" "1")
2701 (set_attr "mode" "SI")])
2703 (define_insn "sse2_cvtsd2si_2"
2704 [(set (match_operand:SI 0 "register_operand" "=r,r")
2705 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2706 UNSPEC_FIX_NOTRUNC))]
2708 "cvtsd2si\t{%1, %0|%0, %1}"
2709 [(set_attr "type" "sseicvt")
2710 (set_attr "athlon_decode" "double,vector")
2711 (set_attr "amdfam10_decode" "double,double")
2712 (set_attr "prefix_rep" "1")
2713 (set_attr "mode" "SI")])
2715 (define_insn "sse2_cvtsd2siq"
2716 [(set (match_operand:DI 0 "register_operand" "=r,r")
2719 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2720 (parallel [(const_int 0)]))]
2721 UNSPEC_FIX_NOTRUNC))]
2722 "TARGET_SSE2 && TARGET_64BIT"
2723 "cvtsd2siq\t{%1, %0|%0, %1}"
2724 [(set_attr "type" "sseicvt")
2725 (set_attr "athlon_decode" "double,vector")
2726 (set_attr "prefix_rep" "1")
2727 (set_attr "mode" "DI")])
2729 (define_insn "sse2_cvtsd2siq_2"
2730 [(set (match_operand:DI 0 "register_operand" "=r,r")
2731 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2732 UNSPEC_FIX_NOTRUNC))]
2733 "TARGET_SSE2 && TARGET_64BIT"
2734 "cvtsd2siq\t{%1, %0|%0, %1}"
2735 [(set_attr "type" "sseicvt")
2736 (set_attr "athlon_decode" "double,vector")
2737 (set_attr "amdfam10_decode" "double,double")
2738 (set_attr "prefix_rep" "1")
2739 (set_attr "mode" "DI")])
2741 (define_insn "sse2_cvttsd2si"
2742 [(set (match_operand:SI 0 "register_operand" "=r,r")
2745 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2746 (parallel [(const_int 0)]))))]
2748 "cvttsd2si\t{%1, %0|%0, %1}"
2749 [(set_attr "type" "sseicvt")
2750 (set_attr "prefix_rep" "1")
2751 (set_attr "mode" "SI")
2752 (set_attr "athlon_decode" "double,vector")
2753 (set_attr "amdfam10_decode" "double,double")])
2755 (define_insn "sse2_cvttsd2siq"
2756 [(set (match_operand:DI 0 "register_operand" "=r,r")
2759 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2760 (parallel [(const_int 0)]))))]
2761 "TARGET_SSE2 && TARGET_64BIT"
2762 "cvttsd2siq\t{%1, %0|%0, %1}"
2763 [(set_attr "type" "sseicvt")
2764 (set_attr "prefix_rep" "1")
2765 (set_attr "mode" "DI")
2766 (set_attr "athlon_decode" "double,vector")
2767 (set_attr "amdfam10_decode" "double,double")])
2769 (define_insn "sse2_cvtdq2pd"
2770 [(set (match_operand:V2DF 0 "register_operand" "=x")
2773 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2774 (parallel [(const_int 0) (const_int 1)]))))]
2776 "cvtdq2pd\t{%1, %0|%0, %1}"
2777 [(set_attr "type" "ssecvt")
2778 (set_attr "mode" "V2DF")])
2780 (define_expand "sse2_cvtpd2dq"
2781 [(set (match_operand:V4SI 0 "register_operand" "")
2783 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2787 "operands[2] = CONST0_RTX (V2SImode);")
2789 (define_insn "*sse2_cvtpd2dq"
2790 [(set (match_operand:V4SI 0 "register_operand" "=x")
2792 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2794 (match_operand:V2SI 2 "const0_operand" "")))]
2796 "cvtpd2dq\t{%1, %0|%0, %1}"
2797 [(set_attr "type" "ssecvt")
2798 (set_attr "prefix_rep" "1")
2799 (set_attr "mode" "TI")
2800 (set_attr "amdfam10_decode" "double")])
2802 (define_expand "sse2_cvttpd2dq"
2803 [(set (match_operand:V4SI 0 "register_operand" "")
2805 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2808 "operands[2] = CONST0_RTX (V2SImode);")
2810 (define_insn "*sse2_cvttpd2dq"
2811 [(set (match_operand:V4SI 0 "register_operand" "=x")
2813 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2814 (match_operand:V2SI 2 "const0_operand" "")))]
2816 "cvttpd2dq\t{%1, %0|%0, %1}"
2817 [(set_attr "type" "ssecvt")
2818 (set_attr "prefix_rep" "1")
2819 (set_attr "mode" "TI")
2820 (set_attr "amdfam10_decode" "double")])
2822 (define_insn "sse2_cvtsd2ss"
2823 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2826 (float_truncate:V2SF
2827 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2828 (match_operand:V4SF 1 "register_operand" "0,0")
2831 "cvtsd2ss\t{%2, %0|%0, %2}"
2832 [(set_attr "type" "ssecvt")
2833 (set_attr "athlon_decode" "vector,double")
2834 (set_attr "amdfam10_decode" "vector,double")
2835 (set_attr "mode" "SF")])
2837 (define_insn "sse2_cvtss2sd"
2838 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2842 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2843 (parallel [(const_int 0) (const_int 1)])))
2844 (match_operand:V2DF 1 "register_operand" "0,0")
2847 "cvtss2sd\t{%2, %0|%0, %2}"
2848 [(set_attr "type" "ssecvt")
2849 (set_attr "amdfam10_decode" "vector,double")
2850 (set_attr "mode" "DF")])
2852 (define_expand "sse2_cvtpd2ps"
2853 [(set (match_operand:V4SF 0 "register_operand" "")
2855 (float_truncate:V2SF
2856 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2859 "operands[2] = CONST0_RTX (V2SFmode);")
2861 (define_insn "*sse2_cvtpd2ps"
2862 [(set (match_operand:V4SF 0 "register_operand" "=x")
2864 (float_truncate:V2SF
2865 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2866 (match_operand:V2SF 2 "const0_operand" "")))]
2868 "cvtpd2ps\t{%1, %0|%0, %1}"
2869 [(set_attr "type" "ssecvt")
2870 (set_attr "prefix_data16" "1")
2871 (set_attr "mode" "V4SF")
2872 (set_attr "amdfam10_decode" "double")])
2874 (define_insn "sse2_cvtps2pd"
2875 [(set (match_operand:V2DF 0 "register_operand" "=x")
2878 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2879 (parallel [(const_int 0) (const_int 1)]))))]
2881 "cvtps2pd\t{%1, %0|%0, %1}"
2882 [(set_attr "type" "ssecvt")
2883 (set_attr "mode" "V2DF")
2884 (set_attr "amdfam10_decode" "direct")])
2886 (define_expand "vec_unpacks_hi_v4sf"
2891 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2892 (parallel [(const_int 6)
2896 (set (match_operand:V2DF 0 "register_operand" "")
2900 (parallel [(const_int 0) (const_int 1)]))))]
2903 operands[2] = gen_reg_rtx (V4SFmode);
2906 (define_expand "vec_unpacks_lo_v4sf"
2907 [(set (match_operand:V2DF 0 "register_operand" "")
2910 (match_operand:V4SF 1 "nonimmediate_operand" "")
2911 (parallel [(const_int 0) (const_int 1)]))))]
2914 (define_expand "vec_unpacks_float_hi_v8hi"
2915 [(match_operand:V4SF 0 "register_operand" "")
2916 (match_operand:V8HI 1 "register_operand" "")]
2919 rtx tmp = gen_reg_rtx (V4SImode);
2921 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2922 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2926 (define_expand "vec_unpacks_float_lo_v8hi"
2927 [(match_operand:V4SF 0 "register_operand" "")
2928 (match_operand:V8HI 1 "register_operand" "")]
2931 rtx tmp = gen_reg_rtx (V4SImode);
2933 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2934 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2938 (define_expand "vec_unpacku_float_hi_v8hi"
2939 [(match_operand:V4SF 0 "register_operand" "")
2940 (match_operand:V8HI 1 "register_operand" "")]
2943 rtx tmp = gen_reg_rtx (V4SImode);
2945 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2946 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2950 (define_expand "vec_unpacku_float_lo_v8hi"
2951 [(match_operand:V4SF 0 "register_operand" "")
2952 (match_operand:V8HI 1 "register_operand" "")]
2955 rtx tmp = gen_reg_rtx (V4SImode);
2957 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2958 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2962 (define_expand "vec_unpacks_float_hi_v4si"
2965 (match_operand:V4SI 1 "nonimmediate_operand" "")
2966 (parallel [(const_int 2)
2970 (set (match_operand:V2DF 0 "register_operand" "")
2974 (parallel [(const_int 0) (const_int 1)]))))]
2977 operands[2] = gen_reg_rtx (V4SImode);
2980 (define_expand "vec_unpacks_float_lo_v4si"
2981 [(set (match_operand:V2DF 0 "register_operand" "")
2984 (match_operand:V4SI 1 "nonimmediate_operand" "")
2985 (parallel [(const_int 0) (const_int 1)]))))]
2988 (define_expand "vec_pack_trunc_v2df"
2989 [(match_operand:V4SF 0 "register_operand" "")
2990 (match_operand:V2DF 1 "nonimmediate_operand" "")
2991 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2996 r1 = gen_reg_rtx (V4SFmode);
2997 r2 = gen_reg_rtx (V4SFmode);
2999 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3000 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3001 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3005 (define_expand "vec_pack_sfix_trunc_v2df"
3006 [(match_operand:V4SI 0 "register_operand" "")
3007 (match_operand:V2DF 1 "nonimmediate_operand" "")
3008 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3013 r1 = gen_reg_rtx (V4SImode);
3014 r2 = gen_reg_rtx (V4SImode);
3016 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3017 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3018 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3019 gen_lowpart (V2DImode, r1),
3020 gen_lowpart (V2DImode, r2)));
3024 (define_expand "vec_pack_sfix_v2df"
3025 [(match_operand:V4SI 0 "register_operand" "")
3026 (match_operand:V2DF 1 "nonimmediate_operand" "")
3027 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3032 r1 = gen_reg_rtx (V4SImode);
3033 r2 = gen_reg_rtx (V4SImode);
3035 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3036 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3037 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3038 gen_lowpart (V2DImode, r1),
3039 gen_lowpart (V2DImode, r2)));
3044 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3046 ;; Parallel double-precision floating point element swizzling
3048 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3050 (define_insn "sse2_unpckhpd"
3051 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3054 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3055 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3056 (parallel [(const_int 1)
3058 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3060 unpckhpd\t{%2, %0|%0, %2}
3061 movlpd\t{%H1, %0|%0, %H1}
3062 movhpd\t{%1, %0|%0, %1}"
3063 [(set_attr "type" "sselog,ssemov,ssemov")
3064 (set_attr "mode" "V2DF,V1DF,V1DF")])
3066 (define_insn "*sse3_movddup"
3067 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3070 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
3072 (parallel [(const_int 0)
3074 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3076 movddup\t{%1, %0|%0, %1}
3078 [(set_attr "type" "sselog1,ssemov")
3079 (set_attr "mode" "V2DF")])
3082 [(set (match_operand:V2DF 0 "memory_operand" "")
3085 (match_operand:V2DF 1 "register_operand" "")
3087 (parallel [(const_int 0)
3089 "TARGET_SSE3 && reload_completed"
3092 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
3093 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
3094 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
3098 (define_insn "sse2_unpcklpd"
3099 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3102 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3103 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3104 (parallel [(const_int 0)
3106 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3108 unpcklpd\t{%2, %0|%0, %2}
3109 movhpd\t{%2, %0|%0, %2}
3110 movlpd\t{%2, %H0|%H0, %2}"
3111 [(set_attr "type" "sselog,ssemov,ssemov")
3112 (set_attr "mode" "V2DF,V1DF,V1DF")])
3114 (define_expand "sse2_shufpd"
3115 [(match_operand:V2DF 0 "register_operand" "")
3116 (match_operand:V2DF 1 "register_operand" "")
3117 (match_operand:V2DF 2 "nonimmediate_operand" "")
3118 (match_operand:SI 3 "const_int_operand" "")]
3121 int mask = INTVAL (operands[3]);
3122 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
3124 GEN_INT (mask & 2 ? 3 : 2)));
3128 (define_insn "sse2_shufpd_1"
3129 [(set (match_operand:V2DF 0 "register_operand" "=x")
3132 (match_operand:V2DF 1 "register_operand" "0")
3133 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
3134 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3135 (match_operand 4 "const_2_to_3_operand" "")])))]
3139 mask = INTVAL (operands[3]);
3140 mask |= (INTVAL (operands[4]) - 2) << 1;
3141 operands[3] = GEN_INT (mask);
3143 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
3145 [(set_attr "type" "sselog")
3146 (set_attr "mode" "V2DF")])
3148 (define_insn "sse2_storehpd"
3149 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
3151 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
3152 (parallel [(const_int 1)])))]
3153 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3155 movhpd\t{%1, %0|%0, %1}
3158 [(set_attr "type" "ssemov,sselog1,ssemov")
3159 (set_attr "mode" "V1DF,V2DF,DF")])
3162 [(set (match_operand:DF 0 "register_operand" "")
3164 (match_operand:V2DF 1 "memory_operand" "")
3165 (parallel [(const_int 1)])))]
3166 "TARGET_SSE2 && reload_completed"
3167 [(set (match_dup 0) (match_dup 1))]
3169 operands[1] = adjust_address (operands[1], DFmode, 8);
3172 (define_insn "sse2_storelpd"
3173 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
3175 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
3176 (parallel [(const_int 0)])))]
3177 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3179 movlpd\t{%1, %0|%0, %1}
3182 [(set_attr "type" "ssemov")
3183 (set_attr "mode" "V1DF,DF,DF")])
3186 [(set (match_operand:DF 0 "register_operand" "")
3188 (match_operand:V2DF 1 "nonimmediate_operand" "")
3189 (parallel [(const_int 0)])))]
3190 "TARGET_SSE2 && reload_completed"
3193 rtx op1 = operands[1];
3195 op1 = gen_rtx_REG (DFmode, REGNO (op1));
3197 op1 = gen_lowpart (DFmode, op1);
3198 emit_move_insn (operands[0], op1);
3202 (define_insn "sse2_loadhpd"
3203 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
3206 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
3207 (parallel [(const_int 0)]))
3208 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
3209 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3211 movhpd\t{%2, %0|%0, %2}
3212 unpcklpd\t{%2, %0|%0, %2}
3213 shufpd\t{$1, %1, %0|%0, %1, 1}
3215 [(set_attr "type" "ssemov,sselog,sselog,other")
3216 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
3219 [(set (match_operand:V2DF 0 "memory_operand" "")
3221 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
3222 (match_operand:DF 1 "register_operand" "")))]
3223 "TARGET_SSE2 && reload_completed"
3224 [(set (match_dup 0) (match_dup 1))]
3226 operands[0] = adjust_address (operands[0], DFmode, 8);
3229 (define_insn "sse2_loadlpd"
3230 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3232 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
3234 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
3235 (parallel [(const_int 1)]))))]
3236 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3238 movsd\t{%2, %0|%0, %2}
3239 movlpd\t{%2, %0|%0, %2}
3240 movsd\t{%2, %0|%0, %2}
3241 shufpd\t{$2, %2, %0|%0, %2, 2}
3242 movhpd\t{%H1, %0|%0, %H1}
3244 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
3245 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
3248 [(set (match_operand:V2DF 0 "memory_operand" "")
3250 (match_operand:DF 1 "register_operand" "")
3251 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
3252 "TARGET_SSE2 && reload_completed"
3253 [(set (match_dup 0) (match_dup 1))]
3255 operands[0] = adjust_address (operands[0], DFmode, 8);
3258 ;; Not sure these two are ever used, but it doesn't hurt to have
3260 (define_insn "*vec_extractv2df_1_sse"
3261 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
3263 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
3264 (parallel [(const_int 1)])))]
3265 "!TARGET_SSE2 && TARGET_SSE
3266 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3268 movhps\t{%1, %0|%0, %1}
3269 movhlps\t{%1, %0|%0, %1}
3270 movlps\t{%H1, %0|%0, %H1}"
3271 [(set_attr "type" "ssemov")
3272 (set_attr "mode" "V2SF,V4SF,V2SF")])
3274 (define_insn "*vec_extractv2df_0_sse"
3275 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
3277 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
3278 (parallel [(const_int 0)])))]
3279 "!TARGET_SSE2 && TARGET_SSE
3280 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3282 movlps\t{%1, %0|%0, %1}
3283 movaps\t{%1, %0|%0, %1}
3284 movlps\t{%1, %0|%0, %1}"
3285 [(set_attr "type" "ssemov")
3286 (set_attr "mode" "V2SF,V4SF,V2SF")])
3288 (define_insn "sse2_movsd"
3289 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
3291 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
3292 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
3296 movsd\t{%2, %0|%0, %2}
3297 movlpd\t{%2, %0|%0, %2}
3298 movlpd\t{%2, %0|%0, %2}
3299 shufpd\t{$2, %2, %0|%0, %2, 2}
3300 movhps\t{%H1, %0|%0, %H1}
3301 movhps\t{%1, %H0|%H0, %1}"
3302 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
3303 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
3305 (define_insn "*vec_dupv2df_sse3"
3306 [(set (match_operand:V2DF 0 "register_operand" "=x")
3308 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
3310 "movddup\t{%1, %0|%0, %1}"
3311 [(set_attr "type" "sselog1")
3312 (set_attr "mode" "DF")])
3314 (define_insn "vec_dupv2df"
3315 [(set (match_operand:V2DF 0 "register_operand" "=x")
3317 (match_operand:DF 1 "register_operand" "0")))]
3320 [(set_attr "type" "sselog1")
3321 (set_attr "mode" "V2DF")])
3323 (define_insn "*vec_concatv2df_sse3"
3324 [(set (match_operand:V2DF 0 "register_operand" "=x")
3326 (match_operand:DF 1 "nonimmediate_operand" "xm")
3329 "movddup\t{%1, %0|%0, %1}"
3330 [(set_attr "type" "sselog1")
3331 (set_attr "mode" "DF")])
3333 (define_insn "*vec_concatv2df"
3334 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
3336 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
3337 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
3340 unpcklpd\t{%2, %0|%0, %2}
3341 movhpd\t{%2, %0|%0, %2}
3342 movsd\t{%1, %0|%0, %1}
3343 movlhps\t{%2, %0|%0, %2}
3344 movhps\t{%2, %0|%0, %2}"
3345 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
3346 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
3348 (define_expand "vec_setv2df"
3349 [(match_operand:V2DF 0 "register_operand" "")
3350 (match_operand:DF 1 "register_operand" "")
3351 (match_operand 2 "const_int_operand" "")]
3354 ix86_expand_vector_set (false, operands[0], operands[1],
3355 INTVAL (operands[2]));
3359 (define_expand "vec_extractv2df"
3360 [(match_operand:DF 0 "register_operand" "")
3361 (match_operand:V2DF 1 "register_operand" "")
3362 (match_operand 2 "const_int_operand" "")]
3365 ix86_expand_vector_extract (false, operands[0], operands[1],
3366 INTVAL (operands[2]));
3370 (define_expand "vec_initv2df"
3371 [(match_operand:V2DF 0 "register_operand" "")
3372 (match_operand 1 "" "")]
3375 ix86_expand_vector_init (false, operands[0], operands[1]);
3379 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3381 ;; Parallel integral arithmetic
3383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3385 (define_expand "neg<mode>2"
3386 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3389 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
3391 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
3393 (define_expand "add<mode>3"
3394 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3395 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3396 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3398 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
3400 (define_insn "*add<mode>3"
3401 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3403 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3404 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3405 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
3406 "padd<ssevecsize>\t{%2, %0|%0, %2}"
3407 [(set_attr "type" "sseiadd")
3408 (set_attr "prefix_data16" "1")
3409 (set_attr "mode" "TI")])
3411 (define_insn "sse2_ssadd<mode>3"
3412 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3414 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
3415 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3416 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
3417 "padds<ssevecsize>\t{%2, %0|%0, %2}"
3418 [(set_attr "type" "sseiadd")
3419 (set_attr "prefix_data16" "1")
3420 (set_attr "mode" "TI")])
3422 (define_insn "sse2_usadd<mode>3"
3423 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3425 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
3426 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3427 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
3428 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
3429 [(set_attr "type" "sseiadd")
3430 (set_attr "prefix_data16" "1")
3431 (set_attr "mode" "TI")])
3433 (define_expand "sub<mode>3"
3434 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3435 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
3436 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3438 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
3440 (define_insn "*sub<mode>3"
3441 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3443 (match_operand:SSEMODEI 1 "register_operand" "0")
3444 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3446 "psub<ssevecsize>\t{%2, %0|%0, %2}"
3447 [(set_attr "type" "sseiadd")
3448 (set_attr "prefix_data16" "1")
3449 (set_attr "mode" "TI")])
3451 (define_insn "sse2_sssub<mode>3"
3452 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3454 (match_operand:SSEMODE12 1 "register_operand" "0")
3455 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3457 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
3458 [(set_attr "type" "sseiadd")
3459 (set_attr "prefix_data16" "1")
3460 (set_attr "mode" "TI")])
3462 (define_insn "sse2_ussub<mode>3"
3463 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
3465 (match_operand:SSEMODE12 1 "register_operand" "0")
3466 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
3468 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
3469 [(set_attr "type" "sseiadd")
3470 (set_attr "prefix_data16" "1")
3471 (set_attr "mode" "TI")])
3473 (define_insn_and_split "mulv16qi3"
3474 [(set (match_operand:V16QI 0 "register_operand" "")
3475 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
3476 (match_operand:V16QI 2 "register_operand" "")))]
3478 && !(reload_completed || reload_in_progress)"
3483 rtx t[12], op0, op[3];
3488 /* On SSE5, we can take advantage of the pperm instruction to pack and
3489 unpack the bytes. Unpack data such that we've got a source byte in
3490 each low byte of each word. We don't care what goes into the high
3491 byte, so put 0 there. */
3492 for (i = 0; i < 6; ++i)
3493 t[i] = gen_reg_rtx (V8HImode);
3495 for (i = 0; i < 2; i++)
3498 op[1] = operands[i+1];
3499 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
3502 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
3505 /* Multiply words. */
3506 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
3507 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
3509 /* Pack the low byte of each word back into a single xmm */
3510 op[0] = operands[0];
3513 ix86_expand_sse5_pack (op);
3517 for (i = 0; i < 12; ++i)
3518 t[i] = gen_reg_rtx (V16QImode);
3520 /* Unpack data such that we've got a source byte in each low byte of
3521 each word. We don't care what goes into the high byte of each word.
3522 Rather than trying to get zero in there, most convenient is to let
3523 it be a copy of the low byte. */
3524 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
3525 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
3526 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
3527 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
3529 /* Multiply words. The end-of-line annotations here give a picture of what
3530 the output of that instruction looks like. Dot means don't care; the
3531 letters are the bytes of the result with A being the most significant. */
3532 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
3533 gen_lowpart (V8HImode, t[0]),
3534 gen_lowpart (V8HImode, t[1])));
3535 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
3536 gen_lowpart (V8HImode, t[2]),
3537 gen_lowpart (V8HImode, t[3])));
3539 /* Extract the relevant bytes and merge them back together. */
3540 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
3541 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
3542 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
3543 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
3544 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
3545 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
3548 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
3552 (define_expand "mulv8hi3"
3553 [(set (match_operand:V8HI 0 "register_operand" "")
3554 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3555 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3557 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3559 (define_insn "*mulv8hi3"
3560 [(set (match_operand:V8HI 0 "register_operand" "=x")
3561 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3562 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3563 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3564 "pmullw\t{%2, %0|%0, %2}"
3565 [(set_attr "type" "sseimul")
3566 (set_attr "prefix_data16" "1")
3567 (set_attr "mode" "TI")])
3569 (define_expand "smulv8hi3_highpart"
3570 [(set (match_operand:V8HI 0 "register_operand" "")
3575 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3577 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3580 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3582 (define_insn "*smulv8hi3_highpart"
3583 [(set (match_operand:V8HI 0 "register_operand" "=x")
3588 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3590 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3592 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3593 "pmulhw\t{%2, %0|%0, %2}"
3594 [(set_attr "type" "sseimul")
3595 (set_attr "prefix_data16" "1")
3596 (set_attr "mode" "TI")])
3598 (define_expand "umulv8hi3_highpart"
3599 [(set (match_operand:V8HI 0 "register_operand" "")
3604 (match_operand:V8HI 1 "nonimmediate_operand" ""))
3606 (match_operand:V8HI 2 "nonimmediate_operand" "")))
3609 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3611 (define_insn "*umulv8hi3_highpart"
3612 [(set (match_operand:V8HI 0 "register_operand" "=x")
3617 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3619 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3621 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3622 "pmulhuw\t{%2, %0|%0, %2}"
3623 [(set_attr "type" "sseimul")
3624 (set_attr "prefix_data16" "1")
3625 (set_attr "mode" "TI")])
3627 (define_insn "sse2_umulv2siv2di3"
3628 [(set (match_operand:V2DI 0 "register_operand" "=x")
3632 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3633 (parallel [(const_int 0) (const_int 2)])))
3636 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3637 (parallel [(const_int 0) (const_int 2)])))))]
3638 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3639 "pmuludq\t{%2, %0|%0, %2}"
3640 [(set_attr "type" "sseimul")
3641 (set_attr "prefix_data16" "1")
3642 (set_attr "mode" "TI")])
3644 (define_insn "sse4_1_mulv2siv2di3"
3645 [(set (match_operand:V2DI 0 "register_operand" "=x")
3649 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3650 (parallel [(const_int 0) (const_int 2)])))
3653 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3654 (parallel [(const_int 0) (const_int 2)])))))]
3655 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3656 "pmuldq\t{%2, %0|%0, %2}"
3657 [(set_attr "type" "sseimul")
3658 (set_attr "prefix_extra" "1")
3659 (set_attr "mode" "TI")])
3661 (define_insn "sse2_pmaddwd"
3662 [(set (match_operand:V4SI 0 "register_operand" "=x")
3667 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3668 (parallel [(const_int 0)
3674 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3675 (parallel [(const_int 0)
3681 (vec_select:V4HI (match_dup 1)
3682 (parallel [(const_int 1)
3687 (vec_select:V4HI (match_dup 2)
3688 (parallel [(const_int 1)
3691 (const_int 7)]))))))]
3692 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3693 "pmaddwd\t{%2, %0|%0, %2}"
3694 [(set_attr "type" "sseiadd")
3695 (set_attr "prefix_data16" "1")
3696 (set_attr "mode" "TI")])
3698 (define_expand "mulv4si3"
3699 [(set (match_operand:V4SI 0 "register_operand" "")
3700 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3701 (match_operand:V4SI 2 "register_operand" "")))]
3704 if (TARGET_SSE4_1 || TARGET_SSE5)
3705 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3708 (define_insn "*sse4_1_mulv4si3"
3709 [(set (match_operand:V4SI 0 "register_operand" "=x")
3710 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3711 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3712 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3713 "pmulld\t{%2, %0|%0, %2}"
3714 [(set_attr "type" "sseimul")
3715 (set_attr "prefix_extra" "1")
3716 (set_attr "mode" "TI")])
3718 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3719 ;; multiply/add. In general, we expect the define_split to occur before
3720 ;; register allocation, so we have to handle the corner case where the target
3721 ;; is used as the base or index register in operands 1/2.
3722 (define_insn_and_split "*sse5_mulv4si3"
3723 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3724 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3725 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3728 "&& (reload_completed
3729 || (!reg_mentioned_p (operands[0], operands[1])
3730 && !reg_mentioned_p (operands[0], operands[2])))"
3734 (plus:V4SI (mult:V4SI (match_dup 1)
3738 operands[3] = CONST0_RTX (V4SImode);
3740 [(set_attr "type" "ssemuladd")
3741 (set_attr "mode" "TI")])
3743 (define_insn_and_split "*sse2_mulv4si3"
3744 [(set (match_operand:V4SI 0 "register_operand" "")
3745 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3746 (match_operand:V4SI 2 "register_operand" "")))]
3747 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3748 && !(reload_completed || reload_in_progress)"
3753 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3759 t1 = gen_reg_rtx (V4SImode);
3760 t2 = gen_reg_rtx (V4SImode);
3761 t3 = gen_reg_rtx (V4SImode);
3762 t4 = gen_reg_rtx (V4SImode);
3763 t5 = gen_reg_rtx (V4SImode);
3764 t6 = gen_reg_rtx (V4SImode);
3765 thirtytwo = GEN_INT (32);
3767 /* Multiply elements 2 and 0. */
3768 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3771 /* Shift both input vectors down one element, so that elements 3
3772 and 1 are now in the slots for elements 2 and 0. For K8, at
3773 least, this is faster than using a shuffle. */
3774 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3775 gen_lowpart (TImode, op1),
3777 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3778 gen_lowpart (TImode, op2),
3780 /* Multiply elements 3 and 1. */
3781 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3784 /* Move the results in element 2 down to element 1; we don't care
3785 what goes in elements 2 and 3. */
3786 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3787 const0_rtx, const0_rtx));
3788 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3789 const0_rtx, const0_rtx));
3791 /* Merge the parts back together. */
3792 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3796 (define_insn_and_split "mulv2di3"
3797 [(set (match_operand:V2DI 0 "register_operand" "")
3798 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3799 (match_operand:V2DI 2 "register_operand" "")))]
3801 && !(reload_completed || reload_in_progress)"
3806 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3812 t1 = gen_reg_rtx (V2DImode);
3813 t2 = gen_reg_rtx (V2DImode);
3814 t3 = gen_reg_rtx (V2DImode);
3815 t4 = gen_reg_rtx (V2DImode);
3816 t5 = gen_reg_rtx (V2DImode);
3817 t6 = gen_reg_rtx (V2DImode);
3818 thirtytwo = GEN_INT (32);
3820 /* Multiply low parts. */
3821 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3822 gen_lowpart (V4SImode, op2)));
3824 /* Shift input vectors left 32 bits so we can multiply high parts. */
3825 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3826 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3828 /* Multiply high parts by low parts. */
3829 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3830 gen_lowpart (V4SImode, t3)));
3831 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3832 gen_lowpart (V4SImode, t2)));
3834 /* Shift them back. */
3835 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3836 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3838 /* Add the three parts together. */
3839 emit_insn (gen_addv2di3 (t6, t1, t4));
3840 emit_insn (gen_addv2di3 (op0, t6, t5));
3844 (define_expand "vec_widen_smult_hi_v8hi"
3845 [(match_operand:V4SI 0 "register_operand" "")
3846 (match_operand:V8HI 1 "register_operand" "")
3847 (match_operand:V8HI 2 "register_operand" "")]
3850 rtx op1, op2, t1, t2, dest;
3854 t1 = gen_reg_rtx (V8HImode);
3855 t2 = gen_reg_rtx (V8HImode);
3856 dest = gen_lowpart (V8HImode, operands[0]);
3858 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3859 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3860 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3864 (define_expand "vec_widen_smult_lo_v8hi"
3865 [(match_operand:V4SI 0 "register_operand" "")
3866 (match_operand:V8HI 1 "register_operand" "")
3867 (match_operand:V8HI 2 "register_operand" "")]
3870 rtx op1, op2, t1, t2, dest;
3874 t1 = gen_reg_rtx (V8HImode);
3875 t2 = gen_reg_rtx (V8HImode);
3876 dest = gen_lowpart (V8HImode, operands[0]);
3878 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3879 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3880 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3884 (define_expand "vec_widen_umult_hi_v8hi"
3885 [(match_operand:V4SI 0 "register_operand" "")
3886 (match_operand:V8HI 1 "register_operand" "")
3887 (match_operand:V8HI 2 "register_operand" "")]
3890 rtx op1, op2, t1, t2, dest;
3894 t1 = gen_reg_rtx (V8HImode);
3895 t2 = gen_reg_rtx (V8HImode);
3896 dest = gen_lowpart (V8HImode, operands[0]);
3898 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3899 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3900 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3904 (define_expand "vec_widen_umult_lo_v8hi"
3905 [(match_operand:V4SI 0 "register_operand" "")
3906 (match_operand:V8HI 1 "register_operand" "")
3907 (match_operand:V8HI 2 "register_operand" "")]
3910 rtx op1, op2, t1, t2, dest;
3914 t1 = gen_reg_rtx (V8HImode);
3915 t2 = gen_reg_rtx (V8HImode);
3916 dest = gen_lowpart (V8HImode, operands[0]);
3918 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3919 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3920 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3924 (define_expand "vec_widen_smult_hi_v4si"
3925 [(match_operand:V2DI 0 "register_operand" "")
3926 (match_operand:V4SI 1 "register_operand" "")
3927 (match_operand:V4SI 2 "register_operand" "")]
3930 rtx op1, op2, t1, t2;
3934 t1 = gen_reg_rtx (V4SImode);
3935 t2 = gen_reg_rtx (V4SImode);
3937 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3938 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3939 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3943 (define_expand "vec_widen_smult_lo_v4si"
3944 [(match_operand:V2DI 0 "register_operand" "")
3945 (match_operand:V4SI 1 "register_operand" "")
3946 (match_operand:V4SI 2 "register_operand" "")]
3949 rtx op1, op2, t1, t2;
3953 t1 = gen_reg_rtx (V4SImode);
3954 t2 = gen_reg_rtx (V4SImode);
3956 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3957 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3958 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3962 (define_expand "vec_widen_umult_hi_v4si"
3963 [(match_operand:V2DI 0 "register_operand" "")
3964 (match_operand:V4SI 1 "register_operand" "")
3965 (match_operand:V4SI 2 "register_operand" "")]
3968 rtx op1, op2, t1, t2;
3972 t1 = gen_reg_rtx (V4SImode);
3973 t2 = gen_reg_rtx (V4SImode);
3975 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3976 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3977 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3981 (define_expand "vec_widen_umult_lo_v4si"
3982 [(match_operand:V2DI 0 "register_operand" "")
3983 (match_operand:V4SI 1 "register_operand" "")
3984 (match_operand:V4SI 2 "register_operand" "")]
3987 rtx op1, op2, t1, t2;
3991 t1 = gen_reg_rtx (V4SImode);
3992 t2 = gen_reg_rtx (V4SImode);
3994 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3995 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3996 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
4000 (define_expand "sdot_prodv8hi"
4001 [(match_operand:V4SI 0 "register_operand" "")
4002 (match_operand:V8HI 1 "register_operand" "")
4003 (match_operand:V8HI 2 "register_operand" "")
4004 (match_operand:V4SI 3 "register_operand" "")]
4007 rtx t = gen_reg_rtx (V4SImode);
4008 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
4009 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
4013 (define_expand "udot_prodv4si"
4014 [(match_operand:V2DI 0 "register_operand" "")
4015 (match_operand:V4SI 1 "register_operand" "")
4016 (match_operand:V4SI 2 "register_operand" "")
4017 (match_operand:V2DI 3 "register_operand" "")]
4022 t1 = gen_reg_rtx (V2DImode);
4023 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
4024 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
4026 t2 = gen_reg_rtx (V4SImode);
4027 t3 = gen_reg_rtx (V4SImode);
4028 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
4029 gen_lowpart (TImode, operands[1]),
4031 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
4032 gen_lowpart (TImode, operands[2]),
4035 t4 = gen_reg_rtx (V2DImode);
4036 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
4038 emit_insn (gen_addv2di3 (operands[0], t1, t4));
4042 (define_insn "ashr<mode>3"
4043 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
4045 (match_operand:SSEMODE24 1 "register_operand" "0")
4046 (match_operand:SI 2 "nonmemory_operand" "xN")))]
4048 "psra<ssevecsize>\t{%2, %0|%0, %2}"
4049 [(set_attr "type" "sseishft")
4050 (set_attr "prefix_data16" "1")
4051 (set_attr "mode" "TI")])
4053 (define_insn "lshr<mode>3"
4054 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
4055 (lshiftrt:SSEMODE248
4056 (match_operand:SSEMODE248 1 "register_operand" "0")
4057 (match_operand:SI 2 "nonmemory_operand" "xN")))]
4059 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
4060 [(set_attr "type" "sseishft")
4061 (set_attr "prefix_data16" "1")
4062 (set_attr "mode" "TI")])
4064 (define_insn "ashl<mode>3"
4065 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
4067 (match_operand:SSEMODE248 1 "register_operand" "0")
4068 (match_operand:SI 2 "nonmemory_operand" "xN")))]
4070 "psll<ssevecsize>\t{%2, %0|%0, %2}"
4071 [(set_attr "type" "sseishft")
4072 (set_attr "prefix_data16" "1")
4073 (set_attr "mode" "TI")])
4075 (define_expand "vec_shl_<mode>"
4076 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4077 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
4078 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
4081 operands[0] = gen_lowpart (TImode, operands[0]);
4082 operands[1] = gen_lowpart (TImode, operands[1]);
4085 (define_expand "vec_shr_<mode>"
4086 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4087 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
4088 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
4091 operands[0] = gen_lowpart (TImode, operands[0]);
4092 operands[1] = gen_lowpart (TImode, operands[1]);
4095 (define_expand "umaxv16qi3"
4096 [(set (match_operand:V16QI 0 "register_operand" "")
4097 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
4098 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
4100 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
4102 (define_insn "*umaxv16qi3"
4103 [(set (match_operand:V16QI 0 "register_operand" "=x")
4104 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4105 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
4106 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
4107 "pmaxub\t{%2, %0|%0, %2}"
4108 [(set_attr "type" "sseiadd")
4109 (set_attr "prefix_data16" "1")
4110 (set_attr "mode" "TI")])
4112 (define_expand "smaxv8hi3"
4113 [(set (match_operand:V8HI 0 "register_operand" "")
4114 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4115 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4117 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
4119 (define_insn "*smaxv8hi3"
4120 [(set (match_operand:V8HI 0 "register_operand" "=x")
4121 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4122 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4123 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
4124 "pmaxsw\t{%2, %0|%0, %2}"
4125 [(set_attr "type" "sseiadd")
4126 (set_attr "prefix_data16" "1")
4127 (set_attr "mode" "TI")])
4129 (define_expand "umaxv8hi3"
4130 [(set (match_operand:V8HI 0 "register_operand" "")
4131 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
4132 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4136 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
4139 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
4140 if (rtx_equal_p (op3, op2))
4141 op3 = gen_reg_rtx (V8HImode);
4142 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
4143 emit_insn (gen_addv8hi3 (op0, op3, op2));
4148 (define_expand "smax<mode>3"
4149 [(set (match_operand:SSEMODE14 0 "register_operand" "")
4150 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
4151 (match_operand:SSEMODE14 2 "register_operand" "")))]
4155 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
4161 xops[0] = operands[0];
4162 xops[1] = operands[1];
4163 xops[2] = operands[2];
4164 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
4165 xops[4] = operands[1];
4166 xops[5] = operands[2];
4167 ok = ix86_expand_int_vcond (xops);
4173 (define_insn "*sse4_1_smax<mode>3"
4174 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
4176 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
4177 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
4178 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
4179 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
4180 [(set_attr "type" "sseiadd")
4181 (set_attr "prefix_extra" "1")
4182 (set_attr "mode" "TI")])
4184 (define_expand "umaxv4si3"
4185 [(set (match_operand:V4SI 0 "register_operand" "")
4186 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
4187 (match_operand:V4SI 2 "register_operand" "")))]
4191 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
4197 xops[0] = operands[0];
4198 xops[1] = operands[1];
4199 xops[2] = operands[2];
4200 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
4201 xops[4] = operands[1];
4202 xops[5] = operands[2];
4203 ok = ix86_expand_int_vcond (xops);
4209 (define_insn "*sse4_1_umax<mode>3"
4210 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
4212 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
4213 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
4214 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
4215 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
4216 [(set_attr "type" "sseiadd")
4217 (set_attr "prefix_extra" "1")
4218 (set_attr "mode" "TI")])
4220 (define_expand "uminv16qi3"
4221 [(set (match_operand:V16QI 0 "register_operand" "")
4222 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
4223 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
4225 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
4227 (define_insn "*uminv16qi3"
4228 [(set (match_operand:V16QI 0 "register_operand" "=x")
4229 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4230 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
4231 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
4232 "pminub\t{%2, %0|%0, %2}"
4233 [(set_attr "type" "sseiadd")
4234 (set_attr "prefix_data16" "1")
4235 (set_attr "mode" "TI")])
4237 (define_expand "sminv8hi3"
4238 [(set (match_operand:V8HI 0 "register_operand" "")
4239 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4240 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4242 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
4244 (define_insn "*sminv8hi3"
4245 [(set (match_operand:V8HI 0 "register_operand" "=x")
4246 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
4247 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
4248 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
4249 "pminsw\t{%2, %0|%0, %2}"
4250 [(set_attr "type" "sseiadd")
4251 (set_attr "prefix_data16" "1")
4252 (set_attr "mode" "TI")])
4254 (define_expand "smin<mode>3"
4255 [(set (match_operand:SSEMODE14 0 "register_operand" "")
4256 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
4257 (match_operand:SSEMODE14 2 "register_operand" "")))]
4261 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
4267 xops[0] = operands[0];
4268 xops[1] = operands[2];
4269 xops[2] = operands[1];
4270 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
4271 xops[4] = operands[1];
4272 xops[5] = operands[2];
4273 ok = ix86_expand_int_vcond (xops);
4279 (define_insn "*sse4_1_smin<mode>3"
4280 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
4282 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
4283 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
4284 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
4285 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
4286 [(set_attr "type" "sseiadd")
4287 (set_attr "prefix_extra" "1")
4288 (set_attr "mode" "TI")])
4290 (define_expand "umin<mode>3"
4291 [(set (match_operand:SSEMODE24 0 "register_operand" "")
4292 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
4293 (match_operand:SSEMODE24 2 "register_operand" "")))]
4297 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
4303 xops[0] = operands[0];
4304 xops[1] = operands[2];
4305 xops[2] = operands[1];
4306 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
4307 xops[4] = operands[1];
4308 xops[5] = operands[2];
4309 ok = ix86_expand_int_vcond (xops);
4315 (define_insn "*sse4_1_umin<mode>3"
4316 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
4318 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
4319 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
4320 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
4321 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
4322 [(set_attr "type" "sseiadd")
4323 (set_attr "prefix_extra" "1")
4324 (set_attr "mode" "TI")])
4326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4328 ;; Parallel integral comparisons
4330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4332 (define_insn "sse2_eq<mode>3"
4333 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4335 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
4336 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
4337 "TARGET_SSE2 && !TARGET_SSE5
4338 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
4339 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
4340 [(set_attr "type" "ssecmp")
4341 (set_attr "prefix_data16" "1")
4342 (set_attr "mode" "TI")])
4344 (define_insn "sse4_1_eqv2di3"
4345 [(set (match_operand:V2DI 0 "register_operand" "=x")
4347 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
4348 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
4349 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
4350 "pcmpeqq\t{%2, %0|%0, %2}"
4351 [(set_attr "type" "ssecmp")
4352 (set_attr "prefix_extra" "1")
4353 (set_attr "mode" "TI")])
4355 (define_insn "sse2_gt<mode>3"
4356 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4358 (match_operand:SSEMODE124 1 "register_operand" "0")
4359 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
4360 "TARGET_SSE2 && !TARGET_SSE5"
4361 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
4362 [(set_attr "type" "ssecmp")
4363 (set_attr "prefix_data16" "1")
4364 (set_attr "mode" "TI")])
4366 (define_insn "sse4_2_gtv2di3"
4367 [(set (match_operand:V2DI 0 "register_operand" "=x")
4369 (match_operand:V2DI 1 "nonimmediate_operand" "0")
4370 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
4372 "pcmpgtq\t{%2, %0|%0, %2}"
4373 [(set_attr "type" "ssecmp")
4374 (set_attr "mode" "TI")])
4376 (define_expand "vcond<mode>"
4377 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4378 (if_then_else:SSEMODEI
4379 (match_operator 3 ""
4380 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
4381 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
4382 (match_operand:SSEMODEI 1 "general_operand" "")
4383 (match_operand:SSEMODEI 2 "general_operand" "")))]
4386 if (ix86_expand_int_vcond (operands))
4392 (define_expand "vcondu<mode>"
4393 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4394 (if_then_else:SSEMODEI
4395 (match_operator 3 ""
4396 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
4397 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
4398 (match_operand:SSEMODEI 1 "general_operand" "")
4399 (match_operand:SSEMODEI 2 "general_operand" "")))]
4402 if (ix86_expand_int_vcond (operands))
4408 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4410 ;; Parallel bitwise logical operations
4412 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4414 (define_expand "one_cmpl<mode>2"
4415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4416 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4420 int i, n = GET_MODE_NUNITS (<MODE>mode);
4421 rtvec v = rtvec_alloc (n);
4423 for (i = 0; i < n; ++i)
4424 RTVEC_ELT (v, i) = constm1_rtx;
4426 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
4429 (define_expand "and<mode>3"
4430 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4431 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4432 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4434 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
4436 (define_insn "*sse_and<mode>3"
4437 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4439 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4440 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4441 "(TARGET_SSE && !TARGET_SSE2)
4442 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4443 "andps\t{%2, %0|%0, %2}"
4444 [(set_attr "type" "sselog")
4445 (set_attr "mode" "V4SF")])
4447 (define_insn "*sse2_and<mode>3"
4448 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4450 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4451 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4452 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
4453 "pand\t{%2, %0|%0, %2}"
4454 [(set_attr "type" "sselog")
4455 (set_attr "prefix_data16" "1")
4456 (set_attr "mode" "TI")])
4458 (define_insn "*sse_nand<mode>3"
4459 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4461 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4462 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4463 "(TARGET_SSE && !TARGET_SSE2)"
4464 "andnps\t{%2, %0|%0, %2}"
4465 [(set_attr "type" "sselog")
4466 (set_attr "mode" "V4SF")])
4468 (define_insn "sse2_nand<mode>3"
4469 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4471 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
4472 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4474 "pandn\t{%2, %0|%0, %2}"
4475 [(set_attr "type" "sselog")
4476 (set_attr "prefix_data16" "1")
4477 (set_attr "mode" "TI")])
4479 (define_expand "andtf3"
4480 [(set (match_operand:TF 0 "register_operand" "")
4481 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
4482 (match_operand:TF 2 "nonimmediate_operand" "")))]
4484 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
4486 (define_insn "*andtf3"
4487 [(set (match_operand:TF 0 "register_operand" "=x")
4489 (match_operand:TF 1 "nonimmediate_operand" "%0")
4490 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4491 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
4492 "pand\t{%2, %0|%0, %2}"
4493 [(set_attr "type" "sselog")
4494 (set_attr "prefix_data16" "1")
4495 (set_attr "mode" "TI")])
4497 (define_insn "*nandtf3"
4498 [(set (match_operand:TF 0 "register_operand" "=x")
4500 (not:TF (match_operand:TF 1 "register_operand" "0"))
4501 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4503 "pandn\t{%2, %0|%0, %2}"
4504 [(set_attr "type" "sselog")
4505 (set_attr "prefix_data16" "1")
4506 (set_attr "mode" "TI")])
4508 (define_expand "ior<mode>3"
4509 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4510 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4511 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4513 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
4515 (define_insn "*sse_ior<mode>3"
4516 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4518 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4519 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4520 "(TARGET_SSE && !TARGET_SSE2)
4521 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4522 "orps\t{%2, %0|%0, %2}"
4523 [(set_attr "type" "sselog")
4524 (set_attr "mode" "V4SF")])
4526 (define_insn "*sse2_ior<mode>3"
4527 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4529 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4530 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4531 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
4532 "por\t{%2, %0|%0, %2}"
4533 [(set_attr "type" "sselog")
4534 (set_attr "prefix_data16" "1")
4535 (set_attr "mode" "TI")])
4537 (define_expand "iortf3"
4538 [(set (match_operand:TF 0 "register_operand" "")
4539 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
4540 (match_operand:TF 2 "nonimmediate_operand" "")))]
4542 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
4544 (define_insn "*iortf3"
4545 [(set (match_operand:TF 0 "register_operand" "=x")
4547 (match_operand:TF 1 "nonimmediate_operand" "%0")
4548 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4549 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
4550 "por\t{%2, %0|%0, %2}"
4551 [(set_attr "type" "sselog")
4552 (set_attr "prefix_data16" "1")
4553 (set_attr "mode" "TI")])
4555 (define_expand "xor<mode>3"
4556 [(set (match_operand:SSEMODEI 0 "register_operand" "")
4557 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
4558 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
4560 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
4562 (define_insn "*sse_xor<mode>3"
4563 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4565 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4566 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4567 "(TARGET_SSE && !TARGET_SSE2)
4568 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4569 "xorps\t{%2, %0|%0, %2}"
4570 [(set_attr "type" "sselog")
4571 (set_attr "mode" "V4SF")])
4573 (define_insn "*sse2_xor<mode>3"
4574 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
4576 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
4577 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
4578 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
4579 "pxor\t{%2, %0|%0, %2}"
4580 [(set_attr "type" "sselog")
4581 (set_attr "prefix_data16" "1")
4582 (set_attr "mode" "TI")])
4584 (define_expand "xortf3"
4585 [(set (match_operand:TF 0 "register_operand" "")
4586 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
4587 (match_operand:TF 2 "nonimmediate_operand" "")))]
4589 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
4591 (define_insn "*xortf3"
4592 [(set (match_operand:TF 0 "register_operand" "=x")
4594 (match_operand:TF 1 "nonimmediate_operand" "%0")
4595 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
4596 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
4597 "pxor\t{%2, %0|%0, %2}"
4598 [(set_attr "type" "sselog")
4599 (set_attr "prefix_data16" "1")
4600 (set_attr "mode" "TI")])
4602 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4604 ;; Parallel integral element swizzling
4606 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4609 ;; op1 = abcdefghijklmnop
4610 ;; op2 = qrstuvwxyz012345
4611 ;; h1 = aqbrcsdteufvgwhx
4612 ;; l1 = iyjzk0l1m2n3o4p5
4613 ;; h2 = aiqybjrzcks0dlt1
4614 ;; l2 = emu2fnv3gow4hpx5
4615 ;; h3 = aeimquy2bfjnrvz3
4616 ;; l3 = cgkosw04dhlptx15
4617 ;; result = bdfhjlnprtvxz135
4618 (define_expand "vec_pack_trunc_v8hi"
4619 [(match_operand:V16QI 0 "register_operand" "")
4620 (match_operand:V8HI 1 "register_operand" "")
4621 (match_operand:V8HI 2 "register_operand" "")]
4624 rtx op1, op2, h1, l1, h2, l2, h3, l3;
4626 op1 = gen_lowpart (V16QImode, operands[1]);
4627 op2 = gen_lowpart (V16QImode, operands[2]);
4628 h1 = gen_reg_rtx (V16QImode);
4629 l1 = gen_reg_rtx (V16QImode);
4630 h2 = gen_reg_rtx (V16QImode);
4631 l2 = gen_reg_rtx (V16QImode);
4632 h3 = gen_reg_rtx (V16QImode);
4633 l3 = gen_reg_rtx (V16QImode);
4635 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
4636 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
4637 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
4638 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
4639 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
4640 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
4641 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4652 ;; result = bdfhjlnp
4653 (define_expand "vec_pack_trunc_v4si"
4654 [(match_operand:V8HI 0 "register_operand" "")
4655 (match_operand:V4SI 1 "register_operand" "")
4656 (match_operand:V4SI 2 "register_operand" "")]
4659 rtx op1, op2, h1, l1, h2, l2;
4661 op1 = gen_lowpart (V8HImode, operands[1]);
4662 op2 = gen_lowpart (V8HImode, operands[2]);
4663 h1 = gen_reg_rtx (V8HImode);
4664 l1 = gen_reg_rtx (V8HImode);
4665 h2 = gen_reg_rtx (V8HImode);
4666 l2 = gen_reg_rtx (V8HImode);
4668 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4669 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4670 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4671 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4672 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4682 (define_expand "vec_pack_trunc_v2di"
4683 [(match_operand:V4SI 0 "register_operand" "")
4684 (match_operand:V2DI 1 "register_operand" "")
4685 (match_operand:V2DI 2 "register_operand" "")]
4688 rtx op1, op2, h1, l1;
4690 op1 = gen_lowpart (V4SImode, operands[1]);
4691 op2 = gen_lowpart (V4SImode, operands[2]);
4692 h1 = gen_reg_rtx (V4SImode);
4693 l1 = gen_reg_rtx (V4SImode);
4695 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4696 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4697 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4701 (define_expand "vec_interleave_highv16qi"
4702 [(set (match_operand:V16QI 0 "register_operand" "")
4705 (match_operand:V16QI 1 "register_operand" "")
4706 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4707 (parallel [(const_int 8) (const_int 24)
4708 (const_int 9) (const_int 25)
4709 (const_int 10) (const_int 26)
4710 (const_int 11) (const_int 27)
4711 (const_int 12) (const_int 28)
4712 (const_int 13) (const_int 29)
4713 (const_int 14) (const_int 30)
4714 (const_int 15) (const_int 31)])))]
4717 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4721 (define_expand "vec_interleave_lowv16qi"
4722 [(set (match_operand:V16QI 0 "register_operand" "")
4725 (match_operand:V16QI 1 "register_operand" "")
4726 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4727 (parallel [(const_int 0) (const_int 16)
4728 (const_int 1) (const_int 17)
4729 (const_int 2) (const_int 18)
4730 (const_int 3) (const_int 19)
4731 (const_int 4) (const_int 20)
4732 (const_int 5) (const_int 21)
4733 (const_int 6) (const_int 22)
4734 (const_int 7) (const_int 23)])))]
4737 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4741 (define_expand "vec_interleave_highv8hi"
4742 [(set (match_operand:V8HI 0 "register_operand" "=")
4745 (match_operand:V8HI 1 "register_operand" "")
4746 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4747 (parallel [(const_int 4) (const_int 12)
4748 (const_int 5) (const_int 13)
4749 (const_int 6) (const_int 14)
4750 (const_int 7) (const_int 15)])))]
4753 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4757 (define_expand "vec_interleave_lowv8hi"
4758 [(set (match_operand:V8HI 0 "register_operand" "")
4761 (match_operand:V8HI 1 "register_operand" "")
4762 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4763 (parallel [(const_int 0) (const_int 8)
4764 (const_int 1) (const_int 9)
4765 (const_int 2) (const_int 10)
4766 (const_int 3) (const_int 11)])))]
4769 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4773 (define_expand "vec_interleave_highv4si"
4774 [(set (match_operand:V4SI 0 "register_operand" "")
4777 (match_operand:V4SI 1 "register_operand" "")
4778 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4779 (parallel [(const_int 2) (const_int 6)
4780 (const_int 3) (const_int 7)])))]
4783 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4787 (define_expand "vec_interleave_lowv4si"
4788 [(set (match_operand:V4SI 0 "register_operand" "")
4791 (match_operand:V4SI 1 "register_operand" "")
4792 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4793 (parallel [(const_int 0) (const_int 4)
4794 (const_int 1) (const_int 5)])))]
4797 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4801 (define_expand "vec_interleave_highv2di"
4802 [(set (match_operand:V2DI 0 "register_operand" "")
4805 (match_operand:V2DI 1 "register_operand" "")
4806 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4807 (parallel [(const_int 1)
4811 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4815 (define_expand "vec_interleave_lowv2di"
4816 [(set (match_operand:V2DI 0 "register_operand" "")
4819 (match_operand:V2DI 1 "register_operand" "")
4820 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4821 (parallel [(const_int 0)
4825 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4829 (define_insn "sse2_packsswb"
4830 [(set (match_operand:V16QI 0 "register_operand" "=x")
4833 (match_operand:V8HI 1 "register_operand" "0"))
4835 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4837 "packsswb\t{%2, %0|%0, %2}"
4838 [(set_attr "type" "sselog")
4839 (set_attr "prefix_data16" "1")
4840 (set_attr "mode" "TI")])
4842 (define_insn "sse2_packssdw"
4843 [(set (match_operand:V8HI 0 "register_operand" "=x")
4846 (match_operand:V4SI 1 "register_operand" "0"))
4848 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4850 "packssdw\t{%2, %0|%0, %2}"
4851 [(set_attr "type" "sselog")
4852 (set_attr "prefix_data16" "1")
4853 (set_attr "mode" "TI")])
4855 (define_insn "sse2_packuswb"
4856 [(set (match_operand:V16QI 0 "register_operand" "=x")
4859 (match_operand:V8HI 1 "register_operand" "0"))
4861 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4863 "packuswb\t{%2, %0|%0, %2}"
4864 [(set_attr "type" "sselog")
4865 (set_attr "prefix_data16" "1")
4866 (set_attr "mode" "TI")])
4868 (define_insn "sse2_punpckhbw"
4869 [(set (match_operand:V16QI 0 "register_operand" "=x")
4872 (match_operand:V16QI 1 "register_operand" "0")
4873 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4874 (parallel [(const_int 8) (const_int 24)
4875 (const_int 9) (const_int 25)
4876 (const_int 10) (const_int 26)
4877 (const_int 11) (const_int 27)
4878 (const_int 12) (const_int 28)
4879 (const_int 13) (const_int 29)
4880 (const_int 14) (const_int 30)
4881 (const_int 15) (const_int 31)])))]
4883 "punpckhbw\t{%2, %0|%0, %2}"
4884 [(set_attr "type" "sselog")
4885 (set_attr "prefix_data16" "1")
4886 (set_attr "mode" "TI")])
4888 (define_insn "sse2_punpcklbw"
4889 [(set (match_operand:V16QI 0 "register_operand" "=x")
4892 (match_operand:V16QI 1 "register_operand" "0")
4893 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4894 (parallel [(const_int 0) (const_int 16)
4895 (const_int 1) (const_int 17)
4896 (const_int 2) (const_int 18)
4897 (const_int 3) (const_int 19)
4898 (const_int 4) (const_int 20)
4899 (const_int 5) (const_int 21)
4900 (const_int 6) (const_int 22)
4901 (const_int 7) (const_int 23)])))]
4903 "punpcklbw\t{%2, %0|%0, %2}"
4904 [(set_attr "type" "sselog")
4905 (set_attr "prefix_data16" "1")
4906 (set_attr "mode" "TI")])
4908 (define_insn "sse2_punpckhwd"
4909 [(set (match_operand:V8HI 0 "register_operand" "=x")
4912 (match_operand:V8HI 1 "register_operand" "0")
4913 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4914 (parallel [(const_int 4) (const_int 12)
4915 (const_int 5) (const_int 13)
4916 (const_int 6) (const_int 14)
4917 (const_int 7) (const_int 15)])))]
4919 "punpckhwd\t{%2, %0|%0, %2}"
4920 [(set_attr "type" "sselog")
4921 (set_attr "prefix_data16" "1")
4922 (set_attr "mode" "TI")])
4924 (define_insn "sse2_punpcklwd"
4925 [(set (match_operand:V8HI 0 "register_operand" "=x")
4928 (match_operand:V8HI 1 "register_operand" "0")
4929 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4930 (parallel [(const_int 0) (const_int 8)
4931 (const_int 1) (const_int 9)
4932 (const_int 2) (const_int 10)
4933 (const_int 3) (const_int 11)])))]
4935 "punpcklwd\t{%2, %0|%0, %2}"
4936 [(set_attr "type" "sselog")
4937 (set_attr "prefix_data16" "1")
4938 (set_attr "mode" "TI")])
4940 (define_insn "sse2_punpckhdq"
4941 [(set (match_operand:V4SI 0 "register_operand" "=x")
4944 (match_operand:V4SI 1 "register_operand" "0")
4945 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4946 (parallel [(const_int 2) (const_int 6)
4947 (const_int 3) (const_int 7)])))]
4949 "punpckhdq\t{%2, %0|%0, %2}"
4950 [(set_attr "type" "sselog")
4951 (set_attr "prefix_data16" "1")
4952 (set_attr "mode" "TI")])
4954 (define_insn "sse2_punpckldq"
4955 [(set (match_operand:V4SI 0 "register_operand" "=x")
4958 (match_operand:V4SI 1 "register_operand" "0")
4959 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4960 (parallel [(const_int 0) (const_int 4)
4961 (const_int 1) (const_int 5)])))]
4963 "punpckldq\t{%2, %0|%0, %2}"
4964 [(set_attr "type" "sselog")
4965 (set_attr "prefix_data16" "1")
4966 (set_attr "mode" "TI")])
4968 (define_insn "sse2_punpckhqdq"
4969 [(set (match_operand:V2DI 0 "register_operand" "=x")
4972 (match_operand:V2DI 1 "register_operand" "0")
4973 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4974 (parallel [(const_int 1)
4977 "punpckhqdq\t{%2, %0|%0, %2}"
4978 [(set_attr "type" "sselog")
4979 (set_attr "prefix_data16" "1")
4980 (set_attr "mode" "TI")])
4982 (define_insn "sse2_punpcklqdq"
4983 [(set (match_operand:V2DI 0 "register_operand" "=x")
4986 (match_operand:V2DI 1 "register_operand" "0")
4987 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4988 (parallel [(const_int 0)
4991 "punpcklqdq\t{%2, %0|%0, %2}"
4992 [(set_attr "type" "sselog")
4993 (set_attr "prefix_data16" "1")
4994 (set_attr "mode" "TI")])
4996 (define_insn "*sse4_1_pinsrb"
4997 [(set (match_operand:V16QI 0 "register_operand" "=x")
4999 (vec_duplicate:V16QI
5000 (match_operand:QI 2 "nonimmediate_operand" "rm"))
5001 (match_operand:V16QI 1 "register_operand" "0")
5002 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
5005 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5006 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
5008 [(set_attr "type" "sselog")
5009 (set_attr "prefix_extra" "1")
5010 (set_attr "mode" "TI")])
5012 (define_insn "*sse2_pinsrw"
5013 [(set (match_operand:V8HI 0 "register_operand" "=x")
5016 (match_operand:HI 2 "nonimmediate_operand" "rm"))
5017 (match_operand:V8HI 1 "register_operand" "0")
5018 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
5021 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5022 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
5024 [(set_attr "type" "sselog")
5025 (set_attr "prefix_data16" "1")
5026 (set_attr "mode" "TI")])
5028 ;; It must come before sse2_loadld since it is preferred.
5029 (define_insn "*sse4_1_pinsrd"
5030 [(set (match_operand:V4SI 0 "register_operand" "=x")
5033 (match_operand:SI 2 "nonimmediate_operand" "rm"))
5034 (match_operand:V4SI 1 "register_operand" "0")
5035 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
5038 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5039 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
5041 [(set_attr "type" "sselog")
5042 (set_attr "prefix_extra" "1")
5043 (set_attr "mode" "TI")])
5045 (define_insn "*sse4_1_pinsrq"
5046 [(set (match_operand:V2DI 0 "register_operand" "=x")
5049 (match_operand:DI 2 "nonimmediate_operand" "rm"))
5050 (match_operand:V2DI 1 "register_operand" "0")
5051 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
5054 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
5055 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
5057 [(set_attr "type" "sselog")
5058 (set_attr "prefix_extra" "1")
5059 (set_attr "mode" "TI")])
5061 (define_insn "*sse4_1_pextrb"
5062 [(set (match_operand:SI 0 "register_operand" "=r")
5065 (match_operand:V16QI 1 "register_operand" "x")
5066 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
5068 "pextrb\t{%2, %1, %0|%0, %1, %2}"
5069 [(set_attr "type" "sselog")
5070 (set_attr "prefix_extra" "1")
5071 (set_attr "mode" "TI")])
5073 (define_insn "*sse4_1_pextrb_memory"
5074 [(set (match_operand:QI 0 "memory_operand" "=m")
5076 (match_operand:V16QI 1 "register_operand" "x")
5077 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
5079 "pextrb\t{%2, %1, %0|%0, %1, %2}"
5080 [(set_attr "type" "sselog")
5081 (set_attr "prefix_extra" "1")
5082 (set_attr "mode" "TI")])
5084 (define_insn "*sse2_pextrw"
5085 [(set (match_operand:SI 0 "register_operand" "=r")
5088 (match_operand:V8HI 1 "register_operand" "x")
5089 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
5091 "pextrw\t{%2, %1, %0|%0, %1, %2}"
5092 [(set_attr "type" "sselog")
5093 (set_attr "prefix_data16" "1")
5094 (set_attr "mode" "TI")])
5096 (define_insn "*sse4_1_pextrw_memory"
5097 [(set (match_operand:HI 0 "memory_operand" "=m")
5099 (match_operand:V8HI 1 "register_operand" "x")
5100 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
5102 "pextrw\t{%2, %1, %0|%0, %1, %2}"
5103 [(set_attr "type" "sselog")
5104 (set_attr "prefix_extra" "1")
5105 (set_attr "mode" "TI")])
5107 (define_insn "*sse4_1_pextrd"
5108 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
5110 (match_operand:V4SI 1 "register_operand" "x")
5111 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
5113 "pextrd\t{%2, %1, %0|%0, %1, %2}"
5114 [(set_attr "type" "sselog")
5115 (set_attr "prefix_extra" "1")
5116 (set_attr "mode" "TI")])
5118 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
5119 (define_insn "*sse4_1_pextrq"
5120 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
5122 (match_operand:V2DI 1 "register_operand" "x")
5123 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
5124 "TARGET_SSE4_1 && TARGET_64BIT"
5125 "pextrq\t{%2, %1, %0|%0, %1, %2}"
5126 [(set_attr "type" "sselog")
5127 (set_attr "prefix_extra" "1")
5128 (set_attr "mode" "TI")])
5130 (define_expand "sse2_pshufd"
5131 [(match_operand:V4SI 0 "register_operand" "")
5132 (match_operand:V4SI 1 "nonimmediate_operand" "")
5133 (match_operand:SI 2 "const_int_operand" "")]
5136 int mask = INTVAL (operands[2]);
5137 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
5138 GEN_INT ((mask >> 0) & 3),
5139 GEN_INT ((mask >> 2) & 3),
5140 GEN_INT ((mask >> 4) & 3),
5141 GEN_INT ((mask >> 6) & 3)));
5145 (define_insn "sse2_pshufd_1"
5146 [(set (match_operand:V4SI 0 "register_operand" "=x")
5148 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
5149 (parallel [(match_operand 2 "const_0_to_3_operand" "")
5150 (match_operand 3 "const_0_to_3_operand" "")
5151 (match_operand 4 "const_0_to_3_operand" "")
5152 (match_operand 5 "const_0_to_3_operand" "")])))]
5156 mask |= INTVAL (operands[2]) << 0;
5157 mask |= INTVAL (operands[3]) << 2;
5158 mask |= INTVAL (operands[4]) << 4;
5159 mask |= INTVAL (operands[5]) << 6;
5160 operands[2] = GEN_INT (mask);
5162 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
5164 [(set_attr "type" "sselog1")
5165 (set_attr "prefix_data16" "1")
5166 (set_attr "mode" "TI")])
5168 (define_expand "sse2_pshuflw"
5169 [(match_operand:V8HI 0 "register_operand" "")
5170 (match_operand:V8HI 1 "nonimmediate_operand" "")
5171 (match_operand:SI 2 "const_int_operand" "")]
5174 int mask = INTVAL (operands[2]);
5175 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
5176 GEN_INT ((mask >> 0) & 3),
5177 GEN_INT ((mask >> 2) & 3),
5178 GEN_INT ((mask >> 4) & 3),
5179 GEN_INT ((mask >> 6) & 3)));
5183 (define_insn "sse2_pshuflw_1"
5184 [(set (match_operand:V8HI 0 "register_operand" "=x")
5186 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
5187 (parallel [(match_operand 2 "const_0_to_3_operand" "")
5188 (match_operand 3 "const_0_to_3_operand" "")
5189 (match_operand 4 "const_0_to_3_operand" "")
5190 (match_operand 5 "const_0_to_3_operand" "")
5198 mask |= INTVAL (operands[2]) << 0;
5199 mask |= INTVAL (operands[3]) << 2;
5200 mask |= INTVAL (operands[4]) << 4;
5201 mask |= INTVAL (operands[5]) << 6;
5202 operands[2] = GEN_INT (mask);
5204 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
5206 [(set_attr "type" "sselog")
5207 (set_attr "prefix_rep" "1")
5208 (set_attr "mode" "TI")])
5210 (define_expand "sse2_pshufhw"
5211 [(match_operand:V8HI 0 "register_operand" "")
5212 (match_operand:V8HI 1 "nonimmediate_operand" "")
5213 (match_operand:SI 2 "const_int_operand" "")]
5216 int mask = INTVAL (operands[2]);
5217 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
5218 GEN_INT (((mask >> 0) & 3) + 4),
5219 GEN_INT (((mask >> 2) & 3) + 4),
5220 GEN_INT (((mask >> 4) & 3) + 4),
5221 GEN_INT (((mask >> 6) & 3) + 4)));
5225 (define_insn "sse2_pshufhw_1"
5226 [(set (match_operand:V8HI 0 "register_operand" "=x")
5228 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
5229 (parallel [(const_int 0)
5233 (match_operand 2 "const_4_to_7_operand" "")
5234 (match_operand 3 "const_4_to_7_operand" "")
5235 (match_operand 4 "const_4_to_7_operand" "")
5236 (match_operand 5 "const_4_to_7_operand" "")])))]
5240 mask |= (INTVAL (operands[2]) - 4) << 0;
5241 mask |= (INTVAL (operands[3]) - 4) << 2;
5242 mask |= (INTVAL (operands[4]) - 4) << 4;
5243 mask |= (INTVAL (operands[5]) - 4) << 6;
5244 operands[2] = GEN_INT (mask);
5246 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
5248 [(set_attr "type" "sselog")
5249 (set_attr "prefix_rep" "1")
5250 (set_attr "mode" "TI")])
5252 (define_expand "sse2_loadd"
5253 [(set (match_operand:V4SI 0 "register_operand" "")
5256 (match_operand:SI 1 "nonimmediate_operand" ""))
5260 "operands[2] = CONST0_RTX (V4SImode);")
5262 (define_insn "sse2_loadld"
5263 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
5266 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
5267 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
5271 movd\t{%2, %0|%0, %2}
5272 movd\t{%2, %0|%0, %2}
5273 movss\t{%2, %0|%0, %2}
5274 movss\t{%2, %0|%0, %2}"
5275 [(set_attr "type" "ssemov")
5276 (set_attr "mode" "TI,TI,V4SF,SF")])
5278 (define_insn_and_split "sse2_stored"
5279 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
5281 (match_operand:V4SI 1 "register_operand" "x,Yi")
5282 (parallel [(const_int 0)])))]
5285 "&& reload_completed
5286 && (TARGET_INTER_UNIT_MOVES
5287 || MEM_P (operands [0])
5288 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
5289 [(set (match_dup 0) (match_dup 1))]
5291 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
5294 (define_insn_and_split "*vec_ext_v4si_mem"
5295 [(set (match_operand:SI 0 "register_operand" "=r")
5297 (match_operand:V4SI 1 "memory_operand" "o")
5298 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
5304 int i = INTVAL (operands[2]);
5306 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
5310 (define_expand "sse_storeq"
5311 [(set (match_operand:DI 0 "nonimmediate_operand" "")
5313 (match_operand:V2DI 1 "register_operand" "")
5314 (parallel [(const_int 0)])))]
5318 (define_insn "*sse2_storeq_rex64"
5319 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
5321 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
5322 (parallel [(const_int 0)])))]
5323 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5327 mov{q}\t{%1, %0|%0, %1}"
5328 [(set_attr "type" "*,*,imov")
5329 (set_attr "mode" "*,*,DI")])
5331 (define_insn "*sse2_storeq"
5332 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
5334 (match_operand:V2DI 1 "register_operand" "x")
5335 (parallel [(const_int 0)])))]
5340 [(set (match_operand:DI 0 "nonimmediate_operand" "")
5342 (match_operand:V2DI 1 "register_operand" "")
5343 (parallel [(const_int 0)])))]
5346 && (TARGET_INTER_UNIT_MOVES
5347 || MEM_P (operands [0])
5348 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
5349 [(set (match_dup 0) (match_dup 1))]
5351 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
5354 (define_insn "*vec_extractv2di_1_rex64"
5355 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
5357 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
5358 (parallel [(const_int 1)])))]
5359 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5361 movhps\t{%1, %0|%0, %1}
5362 psrldq\t{$8, %0|%0, 8}
5363 movq\t{%H1, %0|%0, %H1}
5364 mov{q}\t{%H1, %0|%0, %H1}"
5365 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
5366 (set_attr "memory" "*,none,*,*")
5367 (set_attr "mode" "V2SF,TI,TI,DI")])
5369 (define_insn "*vec_extractv2di_1_sse2"
5370 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
5372 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
5373 (parallel [(const_int 1)])))]
5375 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5377 movhps\t{%1, %0|%0, %1}
5378 psrldq\t{$8, %0|%0, 8}
5379 movq\t{%H1, %0|%0, %H1}"
5380 [(set_attr "type" "ssemov,sseishft,ssemov")
5381 (set_attr "memory" "*,none,*")
5382 (set_attr "mode" "V2SF,TI,TI")])
5384 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
5385 (define_insn "*vec_extractv2di_1_sse"
5386 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
5388 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
5389 (parallel [(const_int 1)])))]
5390 "!TARGET_SSE2 && TARGET_SSE
5391 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5393 movhps\t{%1, %0|%0, %1}
5394 movhlps\t{%1, %0|%0, %1}
5395 movlps\t{%H1, %0|%0, %H1}"
5396 [(set_attr "type" "ssemov")
5397 (set_attr "mode" "V2SF,V4SF,V2SF")])
5399 (define_insn "*vec_dupv4si"
5400 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
5402 (match_operand:SI 1 "register_operand" " Yt,0")))]
5405 pshufd\t{$0, %1, %0|%0, %1, 0}
5406 shufps\t{$0, %0, %0|%0, %0, 0}"
5407 [(set_attr "type" "sselog1")
5408 (set_attr "mode" "TI,V4SF")])
5410 (define_insn "*vec_dupv2di"
5411 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
5413 (match_operand:DI 1 "register_operand" " 0 ,0")))]
5418 [(set_attr "type" "sselog1,ssemov")
5419 (set_attr "mode" "TI,V4SF")])
5421 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5422 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5423 ;; alternatives pretty much forces the MMX alternative to be chosen.
5424 (define_insn "*sse2_concatv2si"
5425 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
5427 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
5428 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
5431 punpckldq\t{%2, %0|%0, %2}
5432 movd\t{%1, %0|%0, %1}
5433 punpckldq\t{%2, %0|%0, %2}
5434 movd\t{%1, %0|%0, %1}"
5435 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5436 (set_attr "mode" "TI,TI,DI,DI")])
5438 (define_insn "*sse1_concatv2si"
5439 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
5441 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
5442 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
5445 unpcklps\t{%2, %0|%0, %2}
5446 movss\t{%1, %0|%0, %1}
5447 punpckldq\t{%2, %0|%0, %2}
5448 movd\t{%1, %0|%0, %1}"
5449 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5450 (set_attr "mode" "V4SF,V4SF,DI,DI")])
5452 (define_insn "*vec_concatv4si_1"
5453 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
5455 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
5456 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
5459 punpcklqdq\t{%2, %0|%0, %2}
5460 movlhps\t{%2, %0|%0, %2}
5461 movhps\t{%2, %0|%0, %2}"
5462 [(set_attr "type" "sselog,ssemov,ssemov")
5463 (set_attr "mode" "TI,V4SF,V2SF")])
5465 (define_insn "vec_concatv2di"
5466 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
5468 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
5469 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
5470 "!TARGET_64BIT && TARGET_SSE"
5472 movq\t{%1, %0|%0, %1}
5473 movq2dq\t{%1, %0|%0, %1}
5474 punpcklqdq\t{%2, %0|%0, %2}
5475 movlhps\t{%2, %0|%0, %2}
5476 movhps\t{%2, %0|%0, %2}
5477 movlps\t{%1, %0|%0, %1}"
5478 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5479 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
5481 (define_insn "*vec_concatv2di_rex"
5482 [(set (match_operand:V2DI 0 "register_operand" "=Yt,Yi,!Yt,Yt,x,x,x")
5484 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
5485 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Yt,x,m,0")))]
5488 movq\t{%1, %0|%0, %1}
5489 movq\t{%1, %0|%0, %1}
5490 movq2dq\t{%1, %0|%0, %1}
5491 punpcklqdq\t{%2, %0|%0, %2}
5492 movlhps\t{%2, %0|%0, %2}
5493 movhps\t{%2, %0|%0, %2}
5494 movlps\t{%1, %0|%0, %1}"
5495 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
5496 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
5498 (define_expand "vec_setv2di"
5499 [(match_operand:V2DI 0 "register_operand" "")
5500 (match_operand:DI 1 "register_operand" "")
5501 (match_operand 2 "const_int_operand" "")]
5504 ix86_expand_vector_set (false, operands[0], operands[1],
5505 INTVAL (operands[2]));
5509 (define_expand "vec_extractv2di"
5510 [(match_operand:DI 0 "register_operand" "")
5511 (match_operand:V2DI 1 "register_operand" "")
5512 (match_operand 2 "const_int_operand" "")]
5515 ix86_expand_vector_extract (false, operands[0], operands[1],
5516 INTVAL (operands[2]));
5520 (define_expand "vec_initv2di"
5521 [(match_operand:V2DI 0 "register_operand" "")
5522 (match_operand 1 "" "")]
5525 ix86_expand_vector_init (false, operands[0], operands[1]);
5529 (define_expand "vec_setv4si"
5530 [(match_operand:V4SI 0 "register_operand" "")
5531 (match_operand:SI 1 "register_operand" "")
5532 (match_operand 2 "const_int_operand" "")]
5535 ix86_expand_vector_set (false, operands[0], operands[1],
5536 INTVAL (operands[2]));
5540 (define_expand "vec_extractv4si"
5541 [(match_operand:SI 0 "register_operand" "")
5542 (match_operand:V4SI 1 "register_operand" "")
5543 (match_operand 2 "const_int_operand" "")]
5546 ix86_expand_vector_extract (false, operands[0], operands[1],
5547 INTVAL (operands[2]));
5551 (define_expand "vec_initv4si"
5552 [(match_operand:V4SI 0 "register_operand" "")
5553 (match_operand 1 "" "")]
5556 ix86_expand_vector_init (false, operands[0], operands[1]);
5560 (define_expand "vec_setv8hi"
5561 [(match_operand:V8HI 0 "register_operand" "")
5562 (match_operand:HI 1 "register_operand" "")
5563 (match_operand 2 "const_int_operand" "")]
5566 ix86_expand_vector_set (false, operands[0], operands[1],
5567 INTVAL (operands[2]));
5571 (define_expand "vec_extractv8hi"
5572 [(match_operand:HI 0 "register_operand" "")
5573 (match_operand:V8HI 1 "register_operand" "")
5574 (match_operand 2 "const_int_operand" "")]
5577 ix86_expand_vector_extract (false, operands[0], operands[1],
5578 INTVAL (operands[2]));
5582 (define_expand "vec_initv8hi"
5583 [(match_operand:V8HI 0 "register_operand" "")
5584 (match_operand 1 "" "")]
5587 ix86_expand_vector_init (false, operands[0], operands[1]);
5591 (define_expand "vec_setv16qi"
5592 [(match_operand:V16QI 0 "register_operand" "")
5593 (match_operand:QI 1 "register_operand" "")
5594 (match_operand 2 "const_int_operand" "")]
5597 ix86_expand_vector_set (false, operands[0], operands[1],
5598 INTVAL (operands[2]));
5602 (define_expand "vec_extractv16qi"
5603 [(match_operand:QI 0 "register_operand" "")
5604 (match_operand:V16QI 1 "register_operand" "")
5605 (match_operand 2 "const_int_operand" "")]
5608 ix86_expand_vector_extract (false, operands[0], operands[1],
5609 INTVAL (operands[2]));
5613 (define_expand "vec_initv16qi"
5614 [(match_operand:V16QI 0 "register_operand" "")
5615 (match_operand 1 "" "")]
5618 ix86_expand_vector_init (false, operands[0], operands[1]);
5622 (define_expand "vec_unpacku_hi_v16qi"
5623 [(match_operand:V8HI 0 "register_operand" "")
5624 (match_operand:V16QI 1 "register_operand" "")]
5628 ix86_expand_sse4_unpack (operands, true, true);
5629 else if (TARGET_SSE5)
5630 ix86_expand_sse5_unpack (operands, true, true);
5632 ix86_expand_sse_unpack (operands, true, true);
5636 (define_expand "vec_unpacks_hi_v16qi"
5637 [(match_operand:V8HI 0 "register_operand" "")
5638 (match_operand:V16QI 1 "register_operand" "")]
5642 ix86_expand_sse4_unpack (operands, false, true);
5643 else if (TARGET_SSE5)
5644 ix86_expand_sse5_unpack (operands, false, true);
5646 ix86_expand_sse_unpack (operands, false, true);
5650 (define_expand "vec_unpacku_lo_v16qi"
5651 [(match_operand:V8HI 0 "register_operand" "")
5652 (match_operand:V16QI 1 "register_operand" "")]
5656 ix86_expand_sse4_unpack (operands, true, false);
5657 else if (TARGET_SSE5)
5658 ix86_expand_sse5_unpack (operands, true, false);
5660 ix86_expand_sse_unpack (operands, true, false);
5664 (define_expand "vec_unpacks_lo_v16qi"
5665 [(match_operand:V8HI 0 "register_operand" "")
5666 (match_operand:V16QI 1 "register_operand" "")]
5670 ix86_expand_sse4_unpack (operands, false, false);
5671 else if (TARGET_SSE5)
5672 ix86_expand_sse5_unpack (operands, false, false);
5674 ix86_expand_sse_unpack (operands, false, false);
5678 (define_expand "vec_unpacku_hi_v8hi"
5679 [(match_operand:V4SI 0 "register_operand" "")
5680 (match_operand:V8HI 1 "register_operand" "")]
5684 ix86_expand_sse4_unpack (operands, true, true);
5685 else if (TARGET_SSE5)
5686 ix86_expand_sse5_unpack (operands, true, true);
5688 ix86_expand_sse_unpack (operands, true, true);
5692 (define_expand "vec_unpacks_hi_v8hi"
5693 [(match_operand:V4SI 0 "register_operand" "")
5694 (match_operand:V8HI 1 "register_operand" "")]
5698 ix86_expand_sse4_unpack (operands, false, true);
5699 else if (TARGET_SSE5)
5700 ix86_expand_sse5_unpack (operands, false, true);
5702 ix86_expand_sse_unpack (operands, false, true);
5706 (define_expand "vec_unpacku_lo_v8hi"
5707 [(match_operand:V4SI 0 "register_operand" "")
5708 (match_operand:V8HI 1 "register_operand" "")]
5712 ix86_expand_sse4_unpack (operands, true, false);
5713 else if (TARGET_SSE5)
5714 ix86_expand_sse5_unpack (operands, true, false);
5716 ix86_expand_sse_unpack (operands, true, false);
5720 (define_expand "vec_unpacks_lo_v8hi"
5721 [(match_operand:V4SI 0 "register_operand" "")
5722 (match_operand:V8HI 1 "register_operand" "")]
5726 ix86_expand_sse4_unpack (operands, false, false);
5727 else if (TARGET_SSE5)
5728 ix86_expand_sse5_unpack (operands, false, false);
5730 ix86_expand_sse_unpack (operands, false, false);
5734 (define_expand "vec_unpacku_hi_v4si"
5735 [(match_operand:V2DI 0 "register_operand" "")
5736 (match_operand:V4SI 1 "register_operand" "")]
5740 ix86_expand_sse4_unpack (operands, true, true);
5741 else if (TARGET_SSE5)
5742 ix86_expand_sse5_unpack (operands, true, true);
5744 ix86_expand_sse_unpack (operands, true, true);
5748 (define_expand "vec_unpacks_hi_v4si"
5749 [(match_operand:V2DI 0 "register_operand" "")
5750 (match_operand:V4SI 1 "register_operand" "")]
5754 ix86_expand_sse4_unpack (operands, false, true);
5755 else if (TARGET_SSE5)
5756 ix86_expand_sse5_unpack (operands, false, true);
5758 ix86_expand_sse_unpack (operands, false, true);
5762 (define_expand "vec_unpacku_lo_v4si"
5763 [(match_operand:V2DI 0 "register_operand" "")
5764 (match_operand:V4SI 1 "register_operand" "")]
5768 ix86_expand_sse4_unpack (operands, true, false);
5769 else if (TARGET_SSE5)
5770 ix86_expand_sse5_unpack (operands, true, false);
5772 ix86_expand_sse_unpack (operands, true, false);
5776 (define_expand "vec_unpacks_lo_v4si"
5777 [(match_operand:V2DI 0 "register_operand" "")
5778 (match_operand:V4SI 1 "register_operand" "")]
5782 ix86_expand_sse4_unpack (operands, false, false);
5783 else if (TARGET_SSE5)
5784 ix86_expand_sse5_unpack (operands, false, false);
5786 ix86_expand_sse_unpack (operands, false, false);
5790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5796 (define_insn "sse2_uavgv16qi3"
5797 [(set (match_operand:V16QI 0 "register_operand" "=x")
5803 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5805 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5806 (const_vector:V16QI [(const_int 1) (const_int 1)
5807 (const_int 1) (const_int 1)
5808 (const_int 1) (const_int 1)
5809 (const_int 1) (const_int 1)
5810 (const_int 1) (const_int 1)
5811 (const_int 1) (const_int 1)
5812 (const_int 1) (const_int 1)
5813 (const_int 1) (const_int 1)]))
5815 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5816 "pavgb\t{%2, %0|%0, %2}"
5817 [(set_attr "type" "sseiadd")
5818 (set_attr "prefix_data16" "1")
5819 (set_attr "mode" "TI")])
5821 (define_insn "sse2_uavgv8hi3"
5822 [(set (match_operand:V8HI 0 "register_operand" "=x")
5828 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5830 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5831 (const_vector:V8HI [(const_int 1) (const_int 1)
5832 (const_int 1) (const_int 1)
5833 (const_int 1) (const_int 1)
5834 (const_int 1) (const_int 1)]))
5836 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5837 "pavgw\t{%2, %0|%0, %2}"
5838 [(set_attr "type" "sseiadd")
5839 (set_attr "prefix_data16" "1")
5840 (set_attr "mode" "TI")])
5842 ;; The correct representation for this is absolutely enormous, and
5843 ;; surely not generally useful.
5844 (define_insn "sse2_psadbw"
5845 [(set (match_operand:V2DI 0 "register_operand" "=x")
5846 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5847 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5850 "psadbw\t{%2, %0|%0, %2}"
5851 [(set_attr "type" "sseiadd")
5852 (set_attr "prefix_data16" "1")
5853 (set_attr "mode" "TI")])
5855 (define_insn "sse_movmskps"
5856 [(set (match_operand:SI 0 "register_operand" "=r")
5857 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5860 "movmskps\t{%1, %0|%0, %1}"
5861 [(set_attr "type" "ssecvt")
5862 (set_attr "mode" "V4SF")])
5864 (define_insn "sse2_movmskpd"
5865 [(set (match_operand:SI 0 "register_operand" "=r")
5866 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5869 "movmskpd\t{%1, %0|%0, %1}"
5870 [(set_attr "type" "ssecvt")
5871 (set_attr "mode" "V2DF")])
5873 (define_insn "sse2_pmovmskb"
5874 [(set (match_operand:SI 0 "register_operand" "=r")
5875 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5878 "pmovmskb\t{%1, %0|%0, %1}"
5879 [(set_attr "type" "ssecvt")
5880 (set_attr "prefix_data16" "1")
5881 (set_attr "mode" "SI")])
5883 (define_expand "sse2_maskmovdqu"
5884 [(set (match_operand:V16QI 0 "memory_operand" "")
5885 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5886 (match_operand:V16QI 2 "register_operand" "")
5892 (define_insn "*sse2_maskmovdqu"
5893 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5894 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5895 (match_operand:V16QI 2 "register_operand" "x")
5896 (mem:V16QI (match_dup 0))]
5898 "TARGET_SSE2 && !TARGET_64BIT"
5899 ;; @@@ check ordering of operands in intel/nonintel syntax
5900 "maskmovdqu\t{%2, %1|%1, %2}"
5901 [(set_attr "type" "ssecvt")
5902 (set_attr "prefix_data16" "1")
5903 (set_attr "mode" "TI")])
5905 (define_insn "*sse2_maskmovdqu_rex64"
5906 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5907 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5908 (match_operand:V16QI 2 "register_operand" "x")
5909 (mem:V16QI (match_dup 0))]
5911 "TARGET_SSE2 && TARGET_64BIT"
5912 ;; @@@ check ordering of operands in intel/nonintel syntax
5913 "maskmovdqu\t{%2, %1|%1, %2}"
5914 [(set_attr "type" "ssecvt")
5915 (set_attr "prefix_data16" "1")
5916 (set_attr "mode" "TI")])
5918 (define_insn "sse_ldmxcsr"
5919 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5923 [(set_attr "type" "sse")
5924 (set_attr "memory" "load")])
5926 (define_insn "sse_stmxcsr"
5927 [(set (match_operand:SI 0 "memory_operand" "=m")
5928 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5931 [(set_attr "type" "sse")
5932 (set_attr "memory" "store")])
5934 (define_expand "sse_sfence"
5936 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5937 "TARGET_SSE || TARGET_3DNOW_A"
5939 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5940 MEM_VOLATILE_P (operands[0]) = 1;
5943 (define_insn "*sse_sfence"
5944 [(set (match_operand:BLK 0 "" "")
5945 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5946 "TARGET_SSE || TARGET_3DNOW_A"
5948 [(set_attr "type" "sse")
5949 (set_attr "memory" "unknown")])
5951 (define_insn "sse2_clflush"
5952 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5956 [(set_attr "type" "sse")
5957 (set_attr "memory" "unknown")])
5959 (define_expand "sse2_mfence"
5961 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5964 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5965 MEM_VOLATILE_P (operands[0]) = 1;
5968 (define_insn "*sse2_mfence"
5969 [(set (match_operand:BLK 0 "" "")
5970 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5973 [(set_attr "type" "sse")
5974 (set_attr "memory" "unknown")])
5976 (define_expand "sse2_lfence"
5978 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5981 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5982 MEM_VOLATILE_P (operands[0]) = 1;
5985 (define_insn "*sse2_lfence"
5986 [(set (match_operand:BLK 0 "" "")
5987 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5990 [(set_attr "type" "sse")
5991 (set_attr "memory" "unknown")])
5993 (define_insn "sse3_mwait"
5994 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5995 (match_operand:SI 1 "register_operand" "c")]
5998 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5999 ;; Since 32bit register operands are implicitly zero extended to 64bit,
6000 ;; we only need to set up 32bit registers.
6002 [(set_attr "length" "3")])
6004 (define_insn "sse3_monitor"
6005 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6006 (match_operand:SI 1 "register_operand" "c")
6007 (match_operand:SI 2 "register_operand" "d")]
6009 "TARGET_SSE3 && !TARGET_64BIT"
6010 "monitor\t%0, %1, %2"
6011 [(set_attr "length" "3")])
6013 (define_insn "sse3_monitor64"
6014 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
6015 (match_operand:SI 1 "register_operand" "c")
6016 (match_operand:SI 2 "register_operand" "d")]
6018 "TARGET_SSE3 && TARGET_64BIT"
6019 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
6020 ;; RCX and RDX are used. Since 32bit register operands are implicitly
6021 ;; zero extended to 64bit, we only need to set up 32bit registers.
6023 [(set_attr "length" "3")])
6026 (define_insn "ssse3_phaddwv8hi3"
6027 [(set (match_operand:V8HI 0 "register_operand" "=x")
6033 (match_operand:V8HI 1 "register_operand" "0")
6034 (parallel [(const_int 0)]))
6035 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6037 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6038 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6041 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6042 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6044 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6045 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6050 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6051 (parallel [(const_int 0)]))
6052 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6054 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6055 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6058 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6059 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6061 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6062 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6064 "phaddw\t{%2, %0|%0, %2}"
6065 [(set_attr "type" "sseiadd")
6066 (set_attr "prefix_data16" "1")
6067 (set_attr "prefix_extra" "1")
6068 (set_attr "mode" "TI")])
6070 (define_insn "ssse3_phaddwv4hi3"
6071 [(set (match_operand:V4HI 0 "register_operand" "=y")
6076 (match_operand:V4HI 1 "register_operand" "0")
6077 (parallel [(const_int 0)]))
6078 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6080 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6081 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6085 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6086 (parallel [(const_int 0)]))
6087 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6089 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6090 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6092 "phaddw\t{%2, %0|%0, %2}"
6093 [(set_attr "type" "sseiadd")
6094 (set_attr "prefix_extra" "1")
6095 (set_attr "mode" "DI")])
6097 (define_insn "ssse3_phadddv4si3"
6098 [(set (match_operand:V4SI 0 "register_operand" "=x")
6103 (match_operand:V4SI 1 "register_operand" "0")
6104 (parallel [(const_int 0)]))
6105 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6107 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
6108 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
6112 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
6113 (parallel [(const_int 0)]))
6114 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
6116 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
6117 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
6119 "phaddd\t{%2, %0|%0, %2}"
6120 [(set_attr "type" "sseiadd")
6121 (set_attr "prefix_data16" "1")
6122 (set_attr "prefix_extra" "1")
6123 (set_attr "mode" "TI")])
6125 (define_insn "ssse3_phadddv2si3"
6126 [(set (match_operand:V2SI 0 "register_operand" "=y")
6130 (match_operand:V2SI 1 "register_operand" "0")
6131 (parallel [(const_int 0)]))
6132 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6135 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
6136 (parallel [(const_int 0)]))
6137 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
6139 "phaddd\t{%2, %0|%0, %2}"
6140 [(set_attr "type" "sseiadd")
6141 (set_attr "prefix_extra" "1")
6142 (set_attr "mode" "DI")])
6144 (define_insn "ssse3_phaddswv8hi3"
6145 [(set (match_operand:V8HI 0 "register_operand" "=x")
6151 (match_operand:V8HI 1 "register_operand" "0")
6152 (parallel [(const_int 0)]))
6153 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6155 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6156 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6159 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6160 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6162 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6163 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6168 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6169 (parallel [(const_int 0)]))
6170 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6172 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6173 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6176 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6177 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6179 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6180 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6182 "phaddsw\t{%2, %0|%0, %2}"
6183 [(set_attr "type" "sseiadd")
6184 (set_attr "prefix_data16" "1")
6185 (set_attr "prefix_extra" "1")
6186 (set_attr "mode" "TI")])
6188 (define_insn "ssse3_phaddswv4hi3"
6189 [(set (match_operand:V4HI 0 "register_operand" "=y")
6194 (match_operand:V4HI 1 "register_operand" "0")
6195 (parallel [(const_int 0)]))
6196 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6198 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6199 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6203 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6204 (parallel [(const_int 0)]))
6205 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6207 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6208 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6210 "phaddsw\t{%2, %0|%0, %2}"
6211 [(set_attr "type" "sseiadd")
6212 (set_attr "prefix_extra" "1")
6213 (set_attr "mode" "DI")])
6215 (define_insn "ssse3_phsubwv8hi3"
6216 [(set (match_operand:V8HI 0 "register_operand" "=x")
6222 (match_operand:V8HI 1 "register_operand" "0")
6223 (parallel [(const_int 0)]))
6224 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6226 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6227 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6230 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6231 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6233 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6234 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6240 (parallel [(const_int 0)]))
6241 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6243 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6244 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6247 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6248 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6250 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6251 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6253 "phsubw\t{%2, %0|%0, %2}"
6254 [(set_attr "type" "sseiadd")
6255 (set_attr "prefix_data16" "1")
6256 (set_attr "prefix_extra" "1")
6257 (set_attr "mode" "TI")])
6259 (define_insn "ssse3_phsubwv4hi3"
6260 [(set (match_operand:V4HI 0 "register_operand" "=y")
6265 (match_operand:V4HI 1 "register_operand" "0")
6266 (parallel [(const_int 0)]))
6267 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6269 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6270 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6274 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6275 (parallel [(const_int 0)]))
6276 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6278 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6279 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6281 "phsubw\t{%2, %0|%0, %2}"
6282 [(set_attr "type" "sseiadd")
6283 (set_attr "prefix_extra" "1")
6284 (set_attr "mode" "DI")])
6286 (define_insn "ssse3_phsubdv4si3"
6287 [(set (match_operand:V4SI 0 "register_operand" "=x")
6292 (match_operand:V4SI 1 "register_operand" "0")
6293 (parallel [(const_int 0)]))
6294 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6296 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
6297 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
6301 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
6302 (parallel [(const_int 0)]))
6303 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
6305 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
6306 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
6308 "phsubd\t{%2, %0|%0, %2}"
6309 [(set_attr "type" "sseiadd")
6310 (set_attr "prefix_data16" "1")
6311 (set_attr "prefix_extra" "1")
6312 (set_attr "mode" "TI")])
6314 (define_insn "ssse3_phsubdv2si3"
6315 [(set (match_operand:V2SI 0 "register_operand" "=y")
6319 (match_operand:V2SI 1 "register_operand" "0")
6320 (parallel [(const_int 0)]))
6321 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
6324 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
6325 (parallel [(const_int 0)]))
6326 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
6328 "phsubd\t{%2, %0|%0, %2}"
6329 [(set_attr "type" "sseiadd")
6330 (set_attr "prefix_extra" "1")
6331 (set_attr "mode" "DI")])
6333 (define_insn "ssse3_phsubswv8hi3"
6334 [(set (match_operand:V8HI 0 "register_operand" "=x")
6340 (match_operand:V8HI 1 "register_operand" "0")
6341 (parallel [(const_int 0)]))
6342 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6344 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6345 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6348 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6349 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6351 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6352 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6357 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6358 (parallel [(const_int 0)]))
6359 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6361 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6362 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6365 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6366 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6368 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6369 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6371 "phsubsw\t{%2, %0|%0, %2}"
6372 [(set_attr "type" "sseiadd")
6373 (set_attr "prefix_data16" "1")
6374 (set_attr "prefix_extra" "1")
6375 (set_attr "mode" "TI")])
6377 (define_insn "ssse3_phsubswv4hi3"
6378 [(set (match_operand:V4HI 0 "register_operand" "=y")
6383 (match_operand:V4HI 1 "register_operand" "0")
6384 (parallel [(const_int 0)]))
6385 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6387 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6388 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6392 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
6393 (parallel [(const_int 0)]))
6394 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6396 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6397 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
6399 "phsubsw\t{%2, %0|%0, %2}"
6400 [(set_attr "type" "sseiadd")
6401 (set_attr "prefix_extra" "1")
6402 (set_attr "mode" "DI")])
6404 (define_insn "ssse3_pmaddubswv8hi3"
6405 [(set (match_operand:V8HI 0 "register_operand" "=x")
6410 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6411 (parallel [(const_int 0)
6421 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6422 (parallel [(const_int 0)
6432 (vec_select:V16QI (match_dup 1)
6433 (parallel [(const_int 1)
6442 (vec_select:V16QI (match_dup 2)
6443 (parallel [(const_int 1)
6450 (const_int 15)]))))))]
6452 "pmaddubsw\t{%2, %0|%0, %2}"
6453 [(set_attr "type" "sseiadd")
6454 (set_attr "prefix_data16" "1")
6455 (set_attr "prefix_extra" "1")
6456 (set_attr "mode" "TI")])
6458 (define_insn "ssse3_pmaddubswv4hi3"
6459 [(set (match_operand:V4HI 0 "register_operand" "=y")
6464 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
6465 (parallel [(const_int 0)
6471 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
6472 (parallel [(const_int 0)
6478 (vec_select:V8QI (match_dup 1)
6479 (parallel [(const_int 1)
6484 (vec_select:V8QI (match_dup 2)
6485 (parallel [(const_int 1)
6488 (const_int 7)]))))))]
6490 "pmaddubsw\t{%2, %0|%0, %2}"
6491 [(set_attr "type" "sseiadd")
6492 (set_attr "prefix_extra" "1")
6493 (set_attr "mode" "DI")])
6495 (define_insn "ssse3_pmulhrswv8hi3"
6496 [(set (match_operand:V8HI 0 "register_operand" "=x")
6503 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
6505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
6507 (const_vector:V8HI [(const_int 1) (const_int 1)
6508 (const_int 1) (const_int 1)
6509 (const_int 1) (const_int 1)
6510 (const_int 1) (const_int 1)]))
6512 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
6513 "pmulhrsw\t{%2, %0|%0, %2}"
6514 [(set_attr "type" "sseimul")
6515 (set_attr "prefix_data16" "1")
6516 (set_attr "prefix_extra" "1")
6517 (set_attr "mode" "TI")])
6519 (define_insn "ssse3_pmulhrswv4hi3"
6520 [(set (match_operand:V4HI 0 "register_operand" "=y")
6527 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
6529 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
6531 (const_vector:V4HI [(const_int 1) (const_int 1)
6532 (const_int 1) (const_int 1)]))
6534 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
6535 "pmulhrsw\t{%2, %0|%0, %2}"
6536 [(set_attr "type" "sseimul")
6537 (set_attr "prefix_extra" "1")
6538 (set_attr "mode" "DI")])
6540 (define_insn "ssse3_pshufbv16qi3"
6541 [(set (match_operand:V16QI 0 "register_operand" "=x")
6542 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6543 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
6546 "pshufb\t{%2, %0|%0, %2}";
6547 [(set_attr "type" "sselog1")
6548 (set_attr "prefix_data16" "1")
6549 (set_attr "prefix_extra" "1")
6550 (set_attr "mode" "TI")])
6552 (define_insn "ssse3_pshufbv8qi3"
6553 [(set (match_operand:V8QI 0 "register_operand" "=y")
6554 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
6555 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
6558 "pshufb\t{%2, %0|%0, %2}";
6559 [(set_attr "type" "sselog1")
6560 (set_attr "prefix_extra" "1")
6561 (set_attr "mode" "DI")])
6563 (define_insn "ssse3_psign<mode>3"
6564 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6565 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
6566 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
6569 "psign<ssevecsize>\t{%2, %0|%0, %2}";
6570 [(set_attr "type" "sselog1")
6571 (set_attr "prefix_data16" "1")
6572 (set_attr "prefix_extra" "1")
6573 (set_attr "mode" "TI")])
6575 (define_insn "ssse3_psign<mode>3"
6576 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6577 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
6578 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
6581 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
6582 [(set_attr "type" "sselog1")
6583 (set_attr "prefix_extra" "1")
6584 (set_attr "mode" "DI")])
6586 (define_insn "ssse3_palignrti"
6587 [(set (match_operand:TI 0 "register_operand" "=x")
6588 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
6589 (match_operand:TI 2 "nonimmediate_operand" "xm")
6590 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6594 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6595 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6597 [(set_attr "type" "sseishft")
6598 (set_attr "prefix_data16" "1")
6599 (set_attr "prefix_extra" "1")
6600 (set_attr "mode" "TI")])
6602 (define_insn "ssse3_palignrdi"
6603 [(set (match_operand:DI 0 "register_operand" "=y")
6604 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6605 (match_operand:DI 2 "nonimmediate_operand" "ym")
6606 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
6610 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
6611 return "palignr\t{%3, %2, %0|%0, %2, %3}";
6613 [(set_attr "type" "sseishft")
6614 (set_attr "prefix_extra" "1")
6615 (set_attr "mode" "DI")])
6617 (define_insn "abs<mode>2"
6618 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6619 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
6621 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
6622 [(set_attr "type" "sselog1")
6623 (set_attr "prefix_data16" "1")
6624 (set_attr "prefix_extra" "1")
6625 (set_attr "mode" "TI")])
6627 (define_insn "abs<mode>2"
6628 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
6629 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
6631 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
6632 [(set_attr "type" "sselog1")
6633 (set_attr "prefix_extra" "1")
6634 (set_attr "mode" "DI")])
6636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6638 ;; AMD SSE4A instructions
6640 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6642 (define_insn "sse4a_vmmovntv2df"
6643 [(set (match_operand:DF 0 "memory_operand" "=m")
6644 (unspec:DF [(vec_select:DF
6645 (match_operand:V2DF 1 "register_operand" "x")
6646 (parallel [(const_int 0)]))]
6649 "movntsd\t{%1, %0|%0, %1}"
6650 [(set_attr "type" "ssemov")
6651 (set_attr "mode" "DF")])
6653 (define_insn "sse4a_movntdf"
6654 [(set (match_operand:DF 0 "memory_operand" "=m")
6655 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
6658 "movntsd\t{%1, %0|%0, %1}"
6659 [(set_attr "type" "ssemov")
6660 (set_attr "mode" "DF")])
6662 (define_insn "sse4a_vmmovntv4sf"
6663 [(set (match_operand:SF 0 "memory_operand" "=m")
6664 (unspec:SF [(vec_select:SF
6665 (match_operand:V4SF 1 "register_operand" "x")
6666 (parallel [(const_int 0)]))]
6669 "movntss\t{%1, %0|%0, %1}"
6670 [(set_attr "type" "ssemov")
6671 (set_attr "mode" "SF")])
6673 (define_insn "sse4a_movntsf"
6674 [(set (match_operand:SF 0 "memory_operand" "=m")
6675 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
6678 "movntss\t{%1, %0|%0, %1}"
6679 [(set_attr "type" "ssemov")
6680 (set_attr "mode" "SF")])
6682 (define_insn "sse4a_extrqi"
6683 [(set (match_operand:V2DI 0 "register_operand" "=x")
6684 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6685 (match_operand 2 "const_int_operand" "")
6686 (match_operand 3 "const_int_operand" "")]
6689 "extrq\t{%3, %2, %0|%0, %2, %3}"
6690 [(set_attr "type" "sse")
6691 (set_attr "prefix_data16" "1")
6692 (set_attr "mode" "TI")])
6694 (define_insn "sse4a_extrq"
6695 [(set (match_operand:V2DI 0 "register_operand" "=x")
6696 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6697 (match_operand:V16QI 2 "register_operand" "x")]
6700 "extrq\t{%2, %0|%0, %2}"
6701 [(set_attr "type" "sse")
6702 (set_attr "prefix_data16" "1")
6703 (set_attr "mode" "TI")])
6705 (define_insn "sse4a_insertqi"
6706 [(set (match_operand:V2DI 0 "register_operand" "=x")
6707 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6708 (match_operand:V2DI 2 "register_operand" "x")
6709 (match_operand 3 "const_int_operand" "")
6710 (match_operand 4 "const_int_operand" "")]
6713 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6714 [(set_attr "type" "sseins")
6715 (set_attr "prefix_rep" "1")
6716 (set_attr "mode" "TI")])
6718 (define_insn "sse4a_insertq"
6719 [(set (match_operand:V2DI 0 "register_operand" "=x")
6720 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6721 (match_operand:V2DI 2 "register_operand" "x")]
6724 "insertq\t{%2, %0|%0, %2}"
6725 [(set_attr "type" "sseins")
6726 (set_attr "prefix_rep" "1")
6727 (set_attr "mode" "TI")])
6729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6731 ;; Intel SSE4.1 instructions
6733 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6735 (define_insn "sse4_1_blendpd"
6736 [(set (match_operand:V2DF 0 "register_operand" "=x")
6738 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6739 (match_operand:V2DF 1 "register_operand" "0")
6740 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6742 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6743 [(set_attr "type" "ssemov")
6744 (set_attr "prefix_extra" "1")
6745 (set_attr "mode" "V2DF")])
6747 (define_insn "sse4_1_blendps"
6748 [(set (match_operand:V4SF 0 "register_operand" "=x")
6750 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6751 (match_operand:V4SF 1 "register_operand" "0")
6752 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6754 "blendps\t{%3, %2, %0|%0, %2, %3}"
6755 [(set_attr "type" "ssemov")
6756 (set_attr "prefix_extra" "1")
6757 (set_attr "mode" "V4SF")])
6759 (define_insn "sse4_1_blendvpd"
6760 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6761 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6762 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6763 (match_operand:V2DF 3 "register_operand" "Y0")]
6766 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6767 [(set_attr "type" "ssemov")
6768 (set_attr "prefix_extra" "1")
6769 (set_attr "mode" "V2DF")])
6771 (define_insn "sse4_1_blendvps"
6772 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6773 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6774 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6775 (match_operand:V4SF 3 "register_operand" "Y0")]
6778 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6779 [(set_attr "type" "ssemov")
6780 (set_attr "prefix_extra" "1")
6781 (set_attr "mode" "V4SF")])
6783 (define_insn "sse4_1_dppd"
6784 [(set (match_operand:V2DF 0 "register_operand" "=x")
6785 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6786 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6787 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6790 "dppd\t{%3, %2, %0|%0, %2, %3}"
6791 [(set_attr "type" "ssemul")
6792 (set_attr "prefix_extra" "1")
6793 (set_attr "mode" "V2DF")])
6795 (define_insn "sse4_1_dpps"
6796 [(set (match_operand:V4SF 0 "register_operand" "=x")
6797 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6798 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6799 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6802 "dpps\t{%3, %2, %0|%0, %2, %3}"
6803 [(set_attr "type" "ssemul")
6804 (set_attr "prefix_extra" "1")
6805 (set_attr "mode" "V4SF")])
6807 (define_insn "sse4_1_movntdqa"
6808 [(set (match_operand:V2DI 0 "register_operand" "=x")
6809 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6812 "movntdqa\t{%1, %0|%0, %1}"
6813 [(set_attr "type" "ssecvt")
6814 (set_attr "prefix_extra" "1")
6815 (set_attr "mode" "TI")])
6817 (define_insn "sse4_1_mpsadbw"
6818 [(set (match_operand:V16QI 0 "register_operand" "=x")
6819 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6820 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6821 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6824 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6825 [(set_attr "type" "sselog1")
6826 (set_attr "prefix_extra" "1")
6827 (set_attr "mode" "TI")])
6829 (define_insn "sse4_1_packusdw"
6830 [(set (match_operand:V8HI 0 "register_operand" "=x")
6833 (match_operand:V4SI 1 "register_operand" "0"))
6835 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6837 "packusdw\t{%2, %0|%0, %2}"
6838 [(set_attr "type" "sselog")
6839 (set_attr "prefix_extra" "1")
6840 (set_attr "mode" "TI")])
6842 (define_insn "sse4_1_pblendvb"
6843 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6844 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6845 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6846 (match_operand:V16QI 3 "register_operand" "Y0")]
6849 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6850 [(set_attr "type" "ssemov")
6851 (set_attr "prefix_extra" "1")
6852 (set_attr "mode" "TI")])
6854 (define_insn "sse4_1_pblendw"
6855 [(set (match_operand:V8HI 0 "register_operand" "=x")
6857 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6858 (match_operand:V8HI 1 "register_operand" "0")
6859 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6861 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6862 [(set_attr "type" "ssemov")
6863 (set_attr "prefix_extra" "1")
6864 (set_attr "mode" "TI")])
6866 (define_insn "sse4_1_phminposuw"
6867 [(set (match_operand:V8HI 0 "register_operand" "=x")
6868 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6869 UNSPEC_PHMINPOSUW))]
6871 "phminposuw\t{%1, %0|%0, %1}"
6872 [(set_attr "type" "sselog1")
6873 (set_attr "prefix_extra" "1")
6874 (set_attr "mode" "TI")])
6876 (define_insn "sse4_1_extendv8qiv8hi2"
6877 [(set (match_operand:V8HI 0 "register_operand" "=x")
6880 (match_operand:V16QI 1 "register_operand" "x")
6881 (parallel [(const_int 0)
6890 "pmovsxbw\t{%1, %0|%0, %1}"
6891 [(set_attr "type" "ssemov")
6892 (set_attr "prefix_extra" "1")
6893 (set_attr "mode" "TI")])
6895 (define_insn "*sse4_1_extendv8qiv8hi2"
6896 [(set (match_operand:V8HI 0 "register_operand" "=x")
6899 (vec_duplicate:V16QI
6900 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6901 (parallel [(const_int 0)
6910 "pmovsxbw\t{%1, %0|%0, %1}"
6911 [(set_attr "type" "ssemov")
6912 (set_attr "prefix_extra" "1")
6913 (set_attr "mode" "TI")])
6915 (define_insn "sse4_1_extendv4qiv4si2"
6916 [(set (match_operand:V4SI 0 "register_operand" "=x")
6919 (match_operand:V16QI 1 "register_operand" "x")
6920 (parallel [(const_int 0)
6925 "pmovsxbd\t{%1, %0|%0, %1}"
6926 [(set_attr "type" "ssemov")
6927 (set_attr "prefix_extra" "1")
6928 (set_attr "mode" "TI")])
6930 (define_insn "*sse4_1_extendv4qiv4si2"
6931 [(set (match_operand:V4SI 0 "register_operand" "=x")
6934 (vec_duplicate:V16QI
6935 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6936 (parallel [(const_int 0)
6941 "pmovsxbd\t{%1, %0|%0, %1}"
6942 [(set_attr "type" "ssemov")
6943 (set_attr "prefix_extra" "1")
6944 (set_attr "mode" "TI")])
6946 (define_insn "sse4_1_extendv2qiv2di2"
6947 [(set (match_operand:V2DI 0 "register_operand" "=x")
6950 (match_operand:V16QI 1 "register_operand" "x")
6951 (parallel [(const_int 0)
6954 "pmovsxbq\t{%1, %0|%0, %1}"
6955 [(set_attr "type" "ssemov")
6956 (set_attr "prefix_extra" "1")
6957 (set_attr "mode" "TI")])
6959 (define_insn "*sse4_1_extendv2qiv2di2"
6960 [(set (match_operand:V2DI 0 "register_operand" "=x")
6963 (vec_duplicate:V16QI
6964 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6965 (parallel [(const_int 0)
6968 "pmovsxbq\t{%1, %0|%0, %1}"
6969 [(set_attr "type" "ssemov")
6970 (set_attr "prefix_extra" "1")
6971 (set_attr "mode" "TI")])
6973 (define_insn "sse4_1_extendv4hiv4si2"
6974 [(set (match_operand:V4SI 0 "register_operand" "=x")
6977 (match_operand:V8HI 1 "register_operand" "x")
6978 (parallel [(const_int 0)
6983 "pmovsxwd\t{%1, %0|%0, %1}"
6984 [(set_attr "type" "ssemov")
6985 (set_attr "prefix_extra" "1")
6986 (set_attr "mode" "TI")])
6988 (define_insn "*sse4_1_extendv4hiv4si2"
6989 [(set (match_operand:V4SI 0 "register_operand" "=x")
6993 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6994 (parallel [(const_int 0)
6999 "pmovsxwd\t{%1, %0|%0, %1}"
7000 [(set_attr "type" "ssemov")
7001 (set_attr "prefix_extra" "1")
7002 (set_attr "mode" "TI")])
7004 (define_insn "sse4_1_extendv2hiv2di2"
7005 [(set (match_operand:V2DI 0 "register_operand" "=x")
7008 (match_operand:V8HI 1 "register_operand" "x")
7009 (parallel [(const_int 0)
7012 "pmovsxwq\t{%1, %0|%0, %1}"
7013 [(set_attr "type" "ssemov")
7014 (set_attr "prefix_extra" "1")
7015 (set_attr "mode" "TI")])
7017 (define_insn "*sse4_1_extendv2hiv2di2"
7018 [(set (match_operand:V2DI 0 "register_operand" "=x")
7022 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
7023 (parallel [(const_int 0)
7026 "pmovsxwq\t{%1, %0|%0, %1}"
7027 [(set_attr "type" "ssemov")
7028 (set_attr "prefix_extra" "1")
7029 (set_attr "mode" "TI")])
7031 (define_insn "sse4_1_extendv2siv2di2"
7032 [(set (match_operand:V2DI 0 "register_operand" "=x")
7035 (match_operand:V4SI 1 "register_operand" "x")
7036 (parallel [(const_int 0)
7039 "pmovsxdq\t{%1, %0|%0, %1}"
7040 [(set_attr "type" "ssemov")
7041 (set_attr "prefix_extra" "1")
7042 (set_attr "mode" "TI")])
7044 (define_insn "*sse4_1_extendv2siv2di2"
7045 [(set (match_operand:V2DI 0 "register_operand" "=x")
7049 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
7050 (parallel [(const_int 0)
7053 "pmovsxdq\t{%1, %0|%0, %1}"
7054 [(set_attr "type" "ssemov")
7055 (set_attr "prefix_extra" "1")
7056 (set_attr "mode" "TI")])
7058 (define_insn "sse4_1_zero_extendv8qiv8hi2"
7059 [(set (match_operand:V8HI 0 "register_operand" "=x")
7062 (match_operand:V16QI 1 "register_operand" "x")
7063 (parallel [(const_int 0)
7072 "pmovzxbw\t{%1, %0|%0, %1}"
7073 [(set_attr "type" "ssemov")
7074 (set_attr "prefix_extra" "1")
7075 (set_attr "mode" "TI")])
7077 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
7078 [(set (match_operand:V8HI 0 "register_operand" "=x")
7081 (vec_duplicate:V16QI
7082 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
7083 (parallel [(const_int 0)
7092 "pmovzxbw\t{%1, %0|%0, %1}"
7093 [(set_attr "type" "ssemov")
7094 (set_attr "prefix_extra" "1")
7095 (set_attr "mode" "TI")])
7097 (define_insn "sse4_1_zero_extendv4qiv4si2"
7098 [(set (match_operand:V4SI 0 "register_operand" "=x")
7101 (match_operand:V16QI 1 "register_operand" "x")
7102 (parallel [(const_int 0)
7107 "pmovzxbd\t{%1, %0|%0, %1}"
7108 [(set_attr "type" "ssemov")
7109 (set_attr "prefix_extra" "1")
7110 (set_attr "mode" "TI")])
7112 (define_insn "*sse4_1_zero_extendv4qiv4si2"
7113 [(set (match_operand:V4SI 0 "register_operand" "=x")
7116 (vec_duplicate:V16QI
7117 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
7118 (parallel [(const_int 0)
7123 "pmovzxbd\t{%1, %0|%0, %1}"
7124 [(set_attr "type" "ssemov")
7125 (set_attr "prefix_extra" "1")
7126 (set_attr "mode" "TI")])
7128 (define_insn "sse4_1_zero_extendv2qiv2di2"
7129 [(set (match_operand:V2DI 0 "register_operand" "=x")
7132 (match_operand:V16QI 1 "register_operand" "x")
7133 (parallel [(const_int 0)
7136 "pmovzxbq\t{%1, %0|%0, %1}"
7137 [(set_attr "type" "ssemov")
7138 (set_attr "prefix_extra" "1")
7139 (set_attr "mode" "TI")])
7141 (define_insn "*sse4_1_zero_extendv2qiv2di2"
7142 [(set (match_operand:V2DI 0 "register_operand" "=x")
7145 (vec_duplicate:V16QI
7146 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
7147 (parallel [(const_int 0)
7150 "pmovzxbq\t{%1, %0|%0, %1}"
7151 [(set_attr "type" "ssemov")
7152 (set_attr "prefix_extra" "1")
7153 (set_attr "mode" "TI")])
7155 (define_insn "sse4_1_zero_extendv4hiv4si2"
7156 [(set (match_operand:V4SI 0 "register_operand" "=x")
7159 (match_operand:V8HI 1 "register_operand" "x")
7160 (parallel [(const_int 0)
7165 "pmovzxwd\t{%1, %0|%0, %1}"
7166 [(set_attr "type" "ssemov")
7167 (set_attr "prefix_extra" "1")
7168 (set_attr "mode" "TI")])
7170 (define_insn "*sse4_1_zero_extendv4hiv4si2"
7171 [(set (match_operand:V4SI 0 "register_operand" "=x")
7175 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
7176 (parallel [(const_int 0)
7181 "pmovzxwd\t{%1, %0|%0, %1}"
7182 [(set_attr "type" "ssemov")
7183 (set_attr "prefix_extra" "1")
7184 (set_attr "mode" "TI")])
7186 (define_insn "sse4_1_zero_extendv2hiv2di2"
7187 [(set (match_operand:V2DI 0 "register_operand" "=x")
7190 (match_operand:V8HI 1 "register_operand" "x")
7191 (parallel [(const_int 0)
7194 "pmovzxwq\t{%1, %0|%0, %1}"
7195 [(set_attr "type" "ssemov")
7196 (set_attr "prefix_extra" "1")
7197 (set_attr "mode" "TI")])
7199 (define_insn "*sse4_1_zero_extendv2hiv2di2"
7200 [(set (match_operand:V2DI 0 "register_operand" "=x")
7204 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
7205 (parallel [(const_int 0)
7208 "pmovzxwq\t{%1, %0|%0, %1}"
7209 [(set_attr "type" "ssemov")
7210 (set_attr "prefix_extra" "1")
7211 (set_attr "mode" "TI")])
7213 (define_insn "sse4_1_zero_extendv2siv2di2"
7214 [(set (match_operand:V2DI 0 "register_operand" "=x")
7217 (match_operand:V4SI 1 "register_operand" "x")
7218 (parallel [(const_int 0)
7221 "pmovzxdq\t{%1, %0|%0, %1}"
7222 [(set_attr "type" "ssemov")
7223 (set_attr "prefix_extra" "1")
7224 (set_attr "mode" "TI")])
7226 (define_insn "*sse4_1_zero_extendv2siv2di2"
7227 [(set (match_operand:V2DI 0 "register_operand" "=x")
7231 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
7232 (parallel [(const_int 0)
7235 "pmovzxdq\t{%1, %0|%0, %1}"
7236 [(set_attr "type" "ssemov")
7237 (set_attr "prefix_extra" "1")
7238 (set_attr "mode" "TI")])
7240 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
7241 ;; But it is not a really compare instruction.
7242 (define_insn "sse4_1_ptest"
7243 [(set (reg:CC FLAGS_REG)
7244 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
7245 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
7248 "ptest\t{%1, %0|%0, %1}"
7249 [(set_attr "type" "ssecomi")
7250 (set_attr "prefix_extra" "1")
7251 (set_attr "mode" "TI")])
7253 (define_insn "sse4_1_roundpd"
7254 [(set (match_operand:V2DF 0 "register_operand" "=x")
7255 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
7256 (match_operand:SI 2 "const_0_to_15_operand" "n")]
7259 "roundpd\t{%2, %1, %0|%0, %1, %2}"
7260 [(set_attr "type" "ssecvt")
7261 (set_attr "prefix_extra" "1")
7262 (set_attr "mode" "V2DF")])
7264 (define_insn "sse4_1_roundps"
7265 [(set (match_operand:V4SF 0 "register_operand" "=x")
7266 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
7267 (match_operand:SI 2 "const_0_to_15_operand" "n")]
7270 "roundps\t{%2, %1, %0|%0, %1, %2}"
7271 [(set_attr "type" "ssecvt")
7272 (set_attr "prefix_extra" "1")
7273 (set_attr "mode" "V4SF")])
7275 (define_insn "sse4_1_roundsd"
7276 [(set (match_operand:V2DF 0 "register_operand" "=x")
7278 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
7279 (match_operand:SI 3 "const_0_to_15_operand" "n")]
7281 (match_operand:V2DF 1 "register_operand" "0")
7284 "roundsd\t{%3, %2, %0|%0, %2, %3}"
7285 [(set_attr "type" "ssecvt")
7286 (set_attr "prefix_extra" "1")
7287 (set_attr "mode" "V2DF")])
7289 (define_insn "sse4_1_roundss"
7290 [(set (match_operand:V4SF 0 "register_operand" "=x")
7292 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
7293 (match_operand:SI 3 "const_0_to_15_operand" "n")]
7295 (match_operand:V4SF 1 "register_operand" "0")
7298 "roundss\t{%3, %2, %0|%0, %2, %3}"
7299 [(set_attr "type" "ssecvt")
7300 (set_attr "prefix_extra" "1")
7301 (set_attr "mode" "V4SF")])
7303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7305 ;; Intel SSE4.2 string/text processing instructions
7307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7309 (define_insn_and_split "sse4_2_pcmpestr"
7310 [(set (match_operand:SI 0 "register_operand" "=c,c")
7312 [(match_operand:V16QI 2 "register_operand" "x,x")
7313 (match_operand:SI 3 "register_operand" "a,a")
7314 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
7315 (match_operand:SI 5 "register_operand" "d,d")
7316 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
7318 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
7326 (set (reg:CC FLAGS_REG)
7335 && !(reload_completed || reload_in_progress)"
7340 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
7341 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
7342 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
7345 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
7346 operands[3], operands[4],
7347 operands[5], operands[6]));
7349 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
7350 operands[3], operands[4],
7351 operands[5], operands[6]));
7352 if (flags && !(ecx || xmm0))
7353 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
7354 operands[4], operands[5],
7358 [(set_attr "type" "sselog")
7359 (set_attr "prefix_data16" "1")
7360 (set_attr "prefix_extra" "1")
7361 (set_attr "memory" "none,load")
7362 (set_attr "mode" "TI")])
7364 (define_insn "sse4_2_pcmpestri"
7365 [(set (match_operand:SI 0 "register_operand" "=c,c")
7367 [(match_operand:V16QI 1 "register_operand" "x,x")
7368 (match_operand:SI 2 "register_operand" "a,a")
7369 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
7370 (match_operand:SI 4 "register_operand" "d,d")
7371 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
7373 (set (reg:CC FLAGS_REG)
7382 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
7383 [(set_attr "type" "sselog")
7384 (set_attr "prefix_data16" "1")
7385 (set_attr "prefix_extra" "1")
7386 (set_attr "memory" "none,load")
7387 (set_attr "mode" "TI")])
7389 (define_insn "sse4_2_pcmpestrm"
7390 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
7392 [(match_operand:V16QI 1 "register_operand" "x,x")
7393 (match_operand:SI 2 "register_operand" "a,a")
7394 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
7395 (match_operand:SI 4 "register_operand" "d,d")
7396 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
7398 (set (reg:CC FLAGS_REG)
7407 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
7408 [(set_attr "type" "sselog")
7409 (set_attr "prefix_data16" "1")
7410 (set_attr "prefix_extra" "1")
7411 (set_attr "memory" "none,load")
7412 (set_attr "mode" "TI")])
7414 (define_insn "sse4_2_pcmpestr_cconly"
7415 [(set (reg:CC FLAGS_REG)
7417 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
7418 (match_operand:SI 1 "register_operand" "a,a,a,a")
7419 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
7420 (match_operand:SI 3 "register_operand" "d,d,d,d")
7421 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
7423 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
7424 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
7427 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
7428 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
7429 pcmpestri\t{%4, %2, %0|%0, %2, %4}
7430 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
7431 [(set_attr "type" "sselog")
7432 (set_attr "prefix_data16" "1")
7433 (set_attr "prefix_extra" "1")
7434 (set_attr "memory" "none,load,none,load")
7435 (set_attr "mode" "TI")])
7437 (define_insn_and_split "sse4_2_pcmpistr"
7438 [(set (match_operand:SI 0 "register_operand" "=c,c")
7440 [(match_operand:V16QI 2 "register_operand" "x,x")
7441 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
7442 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
7444 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
7450 (set (reg:CC FLAGS_REG)
7457 && !(reload_completed || reload_in_progress)"
7462 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
7463 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
7464 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
7467 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
7468 operands[3], operands[4]));
7470 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
7471 operands[3], operands[4]));
7472 if (flags && !(ecx || xmm0))
7473 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
7477 [(set_attr "type" "sselog")
7478 (set_attr "prefix_data16" "1")
7479 (set_attr "prefix_extra" "1")
7480 (set_attr "memory" "none,load")
7481 (set_attr "mode" "TI")])
7483 (define_insn "sse4_2_pcmpistri"
7484 [(set (match_operand:SI 0 "register_operand" "=c,c")
7486 [(match_operand:V16QI 1 "register_operand" "x,x")
7487 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7488 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7490 (set (reg:CC FLAGS_REG)
7497 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
7498 [(set_attr "type" "sselog")
7499 (set_attr "prefix_data16" "1")
7500 (set_attr "prefix_extra" "1")
7501 (set_attr "memory" "none,load")
7502 (set_attr "mode" "TI")])
7504 (define_insn "sse4_2_pcmpistrm"
7505 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
7507 [(match_operand:V16QI 1 "register_operand" "x,x")
7508 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
7509 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7511 (set (reg:CC FLAGS_REG)
7518 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
7519 [(set_attr "type" "sselog")
7520 (set_attr "prefix_data16" "1")
7521 (set_attr "prefix_extra" "1")
7522 (set_attr "memory" "none,load")
7523 (set_attr "mode" "TI")])
7525 (define_insn "sse4_2_pcmpistr_cconly"
7526 [(set (reg:CC FLAGS_REG)
7528 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
7529 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
7530 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
7532 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
7533 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
7536 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
7537 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
7538 pcmpistri\t{%2, %1, %0|%0, %1, %2}
7539 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
7540 [(set_attr "type" "sselog")
7541 (set_attr "prefix_data16" "1")
7542 (set_attr "prefix_extra" "1")
7543 (set_attr "memory" "none,load,none,load")
7544 (set_attr "mode" "TI")])
7546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7548 ;; SSE5 instructions
7550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7552 ;; SSE5 parallel integer multiply/add instructions.
7553 ;; Note the instruction does not allow the value being added to be a memory
7554 ;; operation. However by pretending via the nonimmediate_operand predicate
7555 ;; that it does and splitting it later allows the following to be recognized:
7556 ;; a[i] = b[i] * c[i] + d[i];
7557 (define_insn "sse5_pmacsww"
7558 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7561 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7562 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7563 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7564 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7566 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7567 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7568 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7569 [(set_attr "type" "ssemuladd")
7570 (set_attr "mode" "TI")])
7572 ;; Split pmacsww with two memory operands into a load and the pmacsww.
7574 [(set (match_operand:V8HI 0 "register_operand" "")
7576 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
7577 (match_operand:V8HI 2 "nonimmediate_operand" ""))
7578 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
7580 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7581 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7582 && !reg_mentioned_p (operands[0], operands[1])
7583 && !reg_mentioned_p (operands[0], operands[2])
7584 && !reg_mentioned_p (operands[0], operands[3])"
7587 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
7588 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
7593 (define_insn "sse5_pmacssww"
7594 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
7596 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
7597 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
7598 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
7599 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7601 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7602 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
7603 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7604 [(set_attr "type" "ssemuladd")
7605 (set_attr "mode" "TI")])
7607 ;; Note the instruction does not allow the value being added to be a memory
7608 ;; operation. However by pretending via the nonimmediate_operand predicate
7609 ;; that it does and splitting it later allows the following to be recognized:
7610 ;; a[i] = b[i] * c[i] + d[i];
7611 (define_insn "sse5_pmacsdd"
7612 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7615 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7616 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7617 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7618 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
7620 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7621 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7622 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7623 [(set_attr "type" "ssemuladd")
7624 (set_attr "mode" "TI")])
7626 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
7628 [(set (match_operand:V4SI 0 "register_operand" "")
7630 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
7631 (match_operand:V4SI 2 "nonimmediate_operand" ""))
7632 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
7634 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
7635 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
7636 && !reg_mentioned_p (operands[0], operands[1])
7637 && !reg_mentioned_p (operands[0], operands[2])
7638 && !reg_mentioned_p (operands[0], operands[3])"
7641 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
7642 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
7647 (define_insn "sse5_pmacssdd"
7648 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7650 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
7651 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
7652 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
7653 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7655 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7656 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7657 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7658 [(set_attr "type" "ssemuladd")
7659 (set_attr "mode" "TI")])
7661 (define_insn "sse5_pmacssdql"
7662 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7667 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7668 (parallel [(const_int 1)
7671 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7672 (parallel [(const_int 1)
7674 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7675 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7677 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7678 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7679 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7680 [(set_attr "type" "ssemuladd")
7681 (set_attr "mode" "TI")])
7683 (define_insn "sse5_pmacssdqh"
7684 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7689 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7690 (parallel [(const_int 0)
7694 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7695 (parallel [(const_int 0)
7697 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7698 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7700 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7701 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7702 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7703 [(set_attr "type" "ssemuladd")
7704 (set_attr "mode" "TI")])
7706 (define_insn "sse5_pmacsdql"
7707 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7712 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7713 (parallel [(const_int 1)
7717 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7718 (parallel [(const_int 1)
7720 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7721 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7723 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7724 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7725 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7726 [(set_attr "type" "ssemuladd")
7727 (set_attr "mode" "TI")])
7729 (define_insn "sse5_pmacsdqh"
7730 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7735 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7736 (parallel [(const_int 0)
7740 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7741 (parallel [(const_int 0)
7743 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7744 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7746 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7747 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7748 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7749 [(set_attr "type" "ssemuladd")
7750 (set_attr "mode" "TI")])
7752 ;; SSE5 parallel integer mutliply/add instructions for the intrinisics
7753 (define_insn "sse5_pmacsswd"
7754 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7759 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7760 (parallel [(const_int 1)
7766 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7767 (parallel [(const_int 1)
7771 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7772 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7774 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7775 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7776 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7777 [(set_attr "type" "ssemuladd")
7778 (set_attr "mode" "TI")])
7780 (define_insn "sse5_pmacswd"
7781 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7786 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7787 (parallel [(const_int 1)
7793 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7794 (parallel [(const_int 1)
7798 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7799 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7801 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7802 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7803 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7804 [(set_attr "type" "ssemuladd")
7805 (set_attr "mode" "TI")])
7807 (define_insn "sse5_pmadcsswd"
7808 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7814 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7815 (parallel [(const_int 0)
7821 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7822 (parallel [(const_int 0)
7830 (parallel [(const_int 1)
7837 (parallel [(const_int 1)
7841 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7842 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7844 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7845 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7846 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7847 [(set_attr "type" "ssemuladd")
7848 (set_attr "mode" "TI")])
7850 (define_insn "sse5_pmadcswd"
7851 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7857 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7858 (parallel [(const_int 0)
7864 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7865 (parallel [(const_int 0)
7873 (parallel [(const_int 1)
7880 (parallel [(const_int 1)
7884 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7885 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7887 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7888 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7889 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7890 [(set_attr "type" "ssemuladd")
7891 (set_attr "mode" "TI")])
7893 ;; SSE5 parallel XMM conditional moves
7894 (define_insn "sse5_pcmov_<mode>"
7895 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x,x,x")
7896 (if_then_else:SSEMODE
7897 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,xm,0,0")
7898 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,x,C,x")
7899 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,0,x,C")))]
7900 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7902 pcmov\t{%3, %2, %1, %0|%3, %1, %2, %0}
7903 pcmov\t{%3, %2, %1, %0|%3, %1, %2, %0}
7904 pcmov\t{%3, %2, %1, %0|%3, %1, %2, %0}
7905 pcmov\t{%3, %2, %1, %0|%3, %1, %2, %0}
7906 andps\t{%2, %0|%0, %2}
7907 andnps\t{%1, %0|%0, %1}"
7908 [(set_attr "type" "sse4arg")])
7910 ;; SSE5 horizontal add/subtract instructions
7911 (define_insn "sse5_phaddbw"
7912 [(set (match_operand:V8HI 0 "register_operand" "=x")
7916 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7917 (parallel [(const_int 0)
7928 (parallel [(const_int 1)
7935 (const_int 15)])))))]
7937 "phaddbw\t{%1, %0|%0, %1}"
7938 [(set_attr "type" "sseiadd1")])
7940 (define_insn "sse5_phaddbd"
7941 [(set (match_operand:V4SI 0 "register_operand" "=x")
7946 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7947 (parallel [(const_int 0)
7954 (parallel [(const_int 1)
7962 (parallel [(const_int 2)
7969 (parallel [(const_int 3)
7972 (const_int 15)]))))))]
7974 "phaddbd\t{%1, %0|%0, %1}"
7975 [(set_attr "type" "sseiadd1")])
7977 (define_insn "sse5_phaddbq"
7978 [(set (match_operand:V2DI 0 "register_operand" "=x")
7984 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7985 (parallel [(const_int 0)
7990 (parallel [(const_int 1)
7996 (parallel [(const_int 2)
8001 (parallel [(const_int 3)
8008 (parallel [(const_int 8)
8013 (parallel [(const_int 9)
8019 (parallel [(const_int 10)
8024 (parallel [(const_int 11)
8025 (const_int 15)])))))))]
8027 "phaddbq\t{%1, %0|%0, %1}"
8028 [(set_attr "type" "sseiadd1")])
8030 (define_insn "sse5_phaddwd"
8031 [(set (match_operand:V4SI 0 "register_operand" "=x")
8035 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8036 (parallel [(const_int 0)
8043 (parallel [(const_int 1)
8046 (const_int 7)])))))]
8048 "phaddwd\t{%1, %0|%0, %1}"
8049 [(set_attr "type" "sseiadd1")])
8051 (define_insn "sse5_phaddwq"
8052 [(set (match_operand:V2DI 0 "register_operand" "=x")
8057 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8058 (parallel [(const_int 0)
8063 (parallel [(const_int 1)
8069 (parallel [(const_int 2)
8074 (parallel [(const_int 3)
8075 (const_int 7)]))))))]
8077 "phaddwq\t{%1, %0|%0, %1}"
8078 [(set_attr "type" "sseiadd1")])
8080 (define_insn "sse5_phadddq"
8081 [(set (match_operand:V2DI 0 "register_operand" "=x")
8085 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8086 (parallel [(const_int 0)
8091 (parallel [(const_int 1)
8092 (const_int 3)])))))]
8094 "phadddq\t{%1, %0|%0, %1}"
8095 [(set_attr "type" "sseiadd1")])
8097 (define_insn "sse5_phaddubw"
8098 [(set (match_operand:V8HI 0 "register_operand" "=x")
8102 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8103 (parallel [(const_int 0)
8114 (parallel [(const_int 1)
8121 (const_int 15)])))))]
8123 "phaddubw\t{%1, %0|%0, %1}"
8124 [(set_attr "type" "sseiadd1")])
8126 (define_insn "sse5_phaddubd"
8127 [(set (match_operand:V4SI 0 "register_operand" "=x")
8132 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8133 (parallel [(const_int 0)
8140 (parallel [(const_int 1)
8148 (parallel [(const_int 2)
8155 (parallel [(const_int 3)
8158 (const_int 15)]))))))]
8160 "phaddubd\t{%1, %0|%0, %1}"
8161 [(set_attr "type" "sseiadd1")])
8163 (define_insn "sse5_phaddubq"
8164 [(set (match_operand:V2DI 0 "register_operand" "=x")
8170 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8171 (parallel [(const_int 0)
8176 (parallel [(const_int 1)
8182 (parallel [(const_int 2)
8187 (parallel [(const_int 3)
8194 (parallel [(const_int 8)
8199 (parallel [(const_int 9)
8205 (parallel [(const_int 10)
8210 (parallel [(const_int 11)
8211 (const_int 15)])))))))]
8213 "phaddubq\t{%1, %0|%0, %1}"
8214 [(set_attr "type" "sseiadd1")])
8216 (define_insn "sse5_phadduwd"
8217 [(set (match_operand:V4SI 0 "register_operand" "=x")
8221 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8222 (parallel [(const_int 0)
8229 (parallel [(const_int 1)
8232 (const_int 7)])))))]
8234 "phadduwd\t{%1, %0|%0, %1}"
8235 [(set_attr "type" "sseiadd1")])
8237 (define_insn "sse5_phadduwq"
8238 [(set (match_operand:V2DI 0 "register_operand" "=x")
8243 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8244 (parallel [(const_int 0)
8249 (parallel [(const_int 1)
8255 (parallel [(const_int 2)
8260 (parallel [(const_int 3)
8261 (const_int 7)]))))))]
8263 "phadduwq\t{%1, %0|%0, %1}"
8264 [(set_attr "type" "sseiadd1")])
8266 (define_insn "sse5_phaddudq"
8267 [(set (match_operand:V2DI 0 "register_operand" "=x")
8271 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8272 (parallel [(const_int 0)
8277 (parallel [(const_int 1)
8278 (const_int 3)])))))]
8280 "phaddudq\t{%1, %0|%0, %1}"
8281 [(set_attr "type" "sseiadd1")])
8283 (define_insn "sse5_phsubbw"
8284 [(set (match_operand:V8HI 0 "register_operand" "=x")
8288 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8289 (parallel [(const_int 0)
8300 (parallel [(const_int 1)
8307 (const_int 15)])))))]
8309 "phsubbw\t{%1, %0|%0, %1}"
8310 [(set_attr "type" "sseiadd1")])
8312 (define_insn "sse5_phsubwd"
8313 [(set (match_operand:V4SI 0 "register_operand" "=x")
8317 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8318 (parallel [(const_int 0)
8325 (parallel [(const_int 1)
8328 (const_int 7)])))))]
8330 "phsubwd\t{%1, %0|%0, %1}"
8331 [(set_attr "type" "sseiadd1")])
8333 (define_insn "sse5_phsubdq"
8334 [(set (match_operand:V2DI 0 "register_operand" "=x")
8338 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8339 (parallel [(const_int 0)
8344 (parallel [(const_int 1)
8345 (const_int 3)])))))]
8347 "phsubdq\t{%1, %0|%0, %1}"
8348 [(set_attr "type" "sseiadd1")])
8350 ;; SSE5 permute instructions
8351 (define_insn "sse5_pperm"
8352 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8353 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,xm,xm")
8354 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,0,x")
8355 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")]
8356 UNSPEC_SSE5_PERMUTE))]
8357 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8358 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8359 [(set_attr "type" "sse4arg")
8360 (set_attr "mode" "TI")])
8362 ;; The following are for the various unpack insns which doesn't need the first
8363 ;; source operand, so we can just use the output operand for the first operand.
8364 ;; This allows either of the other two operands to be a memory operand. We
8365 ;; can't just use the first operand as an argument to the normal pperm because
8366 ;; then an output only argument, suddenly becomes an input operand.
8367 (define_insn "sse5_pperm_zero_v16qi_v8hi"
8368 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8371 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
8372 (match_operand 2 "" "")))) ;; parallel with const_int's
8373 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8375 && (register_operand (operands[1], V16QImode)
8376 || register_operand (operands[2], V16QImode))"
8377 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8378 [(set_attr "type" "sseadd")
8379 (set_attr "mode" "TI")])
8381 (define_insn "sse5_pperm_sign_v16qi_v8hi"
8382 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8385 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
8386 (match_operand 2 "" "")))) ;; parallel with const_int's
8387 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8389 && (register_operand (operands[1], V16QImode)
8390 || register_operand (operands[2], V16QImode))"
8391 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8392 [(set_attr "type" "sseadd")
8393 (set_attr "mode" "TI")])
8395 (define_insn "sse5_pperm_zero_v8hi_v4si"
8396 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8399 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
8400 (match_operand 2 "" "")))) ;; parallel with const_int's
8401 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8403 && (register_operand (operands[1], V8HImode)
8404 || register_operand (operands[2], V16QImode))"
8405 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8406 [(set_attr "type" "sseadd")
8407 (set_attr "mode" "TI")])
8409 (define_insn "sse5_pperm_sign_v8hi_v4si"
8410 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8413 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
8414 (match_operand 2 "" "")))) ;; parallel with const_int's
8415 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8417 && (register_operand (operands[1], V8HImode)
8418 || register_operand (operands[2], V16QImode))"
8419 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8420 [(set_attr "type" "sseadd")
8421 (set_attr "mode" "TI")])
8423 (define_insn "sse5_pperm_zero_v4si_v2di"
8424 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8427 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8428 (match_operand 2 "" "")))) ;; parallel with const_int's
8429 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8431 && (register_operand (operands[1], V4SImode)
8432 || register_operand (operands[2], V16QImode))"
8433 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8434 [(set_attr "type" "sseadd")
8435 (set_attr "mode" "TI")])
8437 (define_insn "sse5_pperm_sign_v4si_v2di"
8438 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8441 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
8442 (match_operand 2 "" "")))) ;; parallel with const_int's
8443 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
8445 && (register_operand (operands[1], V4SImode)
8446 || register_operand (operands[2], V16QImode))"
8447 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
8448 [(set_attr "type" "sseadd")
8449 (set_attr "mode" "TI")])
8451 ;; SSE5 pack instructions that combine two vectors into a smaller vector
8452 (define_insn "sse5_pperm_pack_v2di_v4si"
8453 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
8456 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,xm,xm"))
8458 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,0,x"))))
8459 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
8460 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8461 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8462 [(set_attr "type" "sse4arg")
8463 (set_attr "mode" "TI")])
8465 (define_insn "sse5_pperm_pack_v4si_v8hi"
8466 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
8469 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,xm,xm"))
8471 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,0,x"))))
8472 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
8473 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8474 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8475 [(set_attr "type" "sse4arg")
8476 (set_attr "mode" "TI")])
8478 (define_insn "sse5_pperm_pack_v8hi_v16qi"
8479 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
8482 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,xm,xm"))
8484 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,0,x"))))
8485 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
8486 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8487 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8488 [(set_attr "type" "sse4arg")
8489 (set_attr "mode" "TI")])
8491 ;; Floating point permutation (permps, permpd)
8492 (define_insn "sse5_perm<mode>"
8493 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
8495 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,xm,xm")
8496 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,0,x")
8497 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")]
8498 UNSPEC_SSE5_PERMUTE))]
8499 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
8500 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8501 [(set_attr "type" "sse4arg")
8502 (set_attr "mode" "<MODE>")])
8504 ;; SSE5 packed rotate instructions
8505 (define_insn "rotl<mode>3"
8506 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8508 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8509 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8511 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8512 [(set_attr "type" "sseishft")
8513 (set_attr "mode" "TI")])
8515 (define_insn "sse5_rotl<mode>3"
8516 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8518 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8519 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))]
8520 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8521 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8522 [(set_attr "type" "sseishft")
8523 (set_attr "mode" "TI")])
8525 ;; SSE5 packed shift instructions. Note negative values for the shift amount
8526 ;; convert this into a right shift instead of left shift. For now, model this
8527 ;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does
8528 ;; not have the concept of negating the shift amount. Also, there is no LSHIFT
8529 (define_insn "sse5_ashl<mode>3"
8530 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8532 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8533 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8534 UNSPEC_SSE5_ASHIFT))]
8535 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8536 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8537 [(set_attr "type" "sseishft")
8538 (set_attr "mode" "TI")])
8540 (define_insn "sse5_lshl<mode>3"
8541 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8543 [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8544 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")]
8545 UNSPEC_SSE5_LSHIFT))]
8546 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8547 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8548 [(set_attr "type" "sseishft")
8549 (set_attr "mode" "TI")])
8551 ;; SSE5 FRCZ support
8553 (define_insn "sse5_frcz<mode>2"
8554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8556 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8559 "frcz<ssesuffixf4>\t{%1, %0|%0, %1}"
8560 [(set_attr "type" "ssecvt1")
8561 (set_attr "prefix_extra" "1")
8562 (set_attr "mode" "<MODE>")])
8565 (define_insn "sse5_vmfrcz<mode>2"
8566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8567 (vec_merge:SSEMODEF2P
8569 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8571 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8574 "frcz<ssesuffixf2s>\t{%2, %0|%0, %2}"
8575 [(set_attr "type" "ssecvt1")
8576 (set_attr "prefix_extra" "1")
8577 (set_attr "mode" "<MODE>")])
8579 (define_insn "sse5_cvtph2ps"
8580 [(set (match_operand:V4SF 0 "register_operand" "=x")
8581 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8584 "cvtph2ps\t{%1, %0|%0, %1}"
8585 [(set_attr "type" "ssecvt")
8586 (set_attr "mode" "V4SF")])
8588 (define_insn "sse5_cvtps2ph"
8589 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8590 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8593 "cvtps2ph\t{%1, %0|%0, %1}"
8594 [(set_attr "type" "ssecvt")
8595 (set_attr "mode" "V4SF")])
8597 ;; Scalar versions of the com instructions that use vector types that are
8598 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8599 ;; com instructions fill in 0's in the upper bits instead of leaving them
8600 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8601 (define_expand "sse5_vmmaskcmp<mode>3"
8602 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8603 (vec_merge:SSEMODEF2P
8604 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8605 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8606 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8611 operands[4] = CONST0_RTX (<MODE>mode);
8614 (define_insn "*sse5_vmmaskcmp<mode>3"
8615 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8616 (vec_merge:SSEMODEF2P
8617 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8618 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8619 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8620 (match_operand:SSEMODEF2P 4 "")
8623 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8624 [(set_attr "type" "sse4arg")
8625 (set_attr "mode" "<ssescalarmode>")])
8627 ;; We don't have a comparison operator that always returns true/false, so
8628 ;; handle comfalse and comtrue specially.
8629 (define_insn "sse5_com_tf<mode>3"
8630 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8632 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8633 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8634 (match_operand:SI 3 "const_int_operand" "n")]
8635 UNSPEC_SSE5_TRUEFALSE))]
8638 const char *ret = NULL;
8640 switch (INTVAL (operands[3]))
8643 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8647 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8651 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8655 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8664 [(set_attr "type" "ssecmp")
8665 (set_attr "mode" "<MODE>")])
8667 (define_insn "sse5_maskcmp<mode>3"
8668 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8669 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8670 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8671 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8673 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8674 [(set_attr "type" "ssecmp")
8675 (set_attr "mode" "<MODE>")])
8677 (define_insn "sse5_maskcmp<mode>3"
8678 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8679 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8680 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8681 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8683 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8684 [(set_attr "type" "sse4arg")
8685 (set_attr "mode" "TI")])
8687 (define_insn "sse5_maskcmp_uns<mode>3"
8688 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8689 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8690 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8691 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8693 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8694 [(set_attr "type" "ssecmp")
8695 (set_attr "mode" "TI")])
8697 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8698 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8699 ;; the exact instruction generated for the intrinsic.
8700 (define_insn "sse5_maskcmp_uns2<mode>3"
8701 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8703 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8704 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8705 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8706 UNSPEC_SSE5_UNSIGNED_CMP))]
8708 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8709 [(set_attr "type" "ssecmp")
8710 (set_attr "mode" "TI")])
8712 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8713 ;; being added here to be complete.
8714 (define_insn "sse5_pcom_tf<mode>3"
8715 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8716 (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8717 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8718 (match_operand:SI 3 "const_int_operand" "n")]
8719 UNSPEC_SSE5_TRUEFALSE))]
8722 return ((INTVAL (operands[3]) != 0)
8723 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8724 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8726 [(set_attr "type" "ssecmp")
8727 (set_attr "mode" "TI")])