1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 59 Temple Place - Suite 330,
20 ;; Boston, MA 02111-1307, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
55 ix86_expand_vector_move (<MODE>mode, operands);
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
62 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64 switch (which_alternative)
67 if (get_attr_mode (insn) == MODE_V4SF)
68 return "xorps\t%0, %0";
70 return "pxor\t%0, %0";
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
76 return "movdqa\t{%1, %0|%0, %1}";
81 [(set_attr "type" "sselog1,ssemov,ssemov")
83 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
86 (eq_attr "alternative" "0,1")
88 (ne (symbol_ref "optimize_size")
92 (eq_attr "alternative" "2")
94 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
96 (ne (symbol_ref "optimize_size")
100 (const_string "TI")))])
102 (define_expand "movv4sf"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
104 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
107 ix86_expand_vector_move (V4SFmode, operands);
111 (define_insn "*movv4sf_internal"
112 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
113 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
117 movaps\t{%1, %0|%0, %1}
118 movaps\t{%1, %0|%0, %1}"
119 [(set_attr "type" "sselog1,ssemov,ssemov")
120 (set_attr "mode" "V4SF")])
123 [(set (match_operand:V4SF 0 "register_operand" "")
124 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
125 "TARGET_SSE && reload_completed"
128 (vec_duplicate:V4SF (match_dup 1))
132 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
133 operands[2] = CONST0_RTX (V4SFmode);
136 (define_expand "movv2df"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
138 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
141 ix86_expand_vector_move (V2DFmode, operands);
145 (define_insn "*movv2df_internal"
146 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
147 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
148 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
150 switch (which_alternative)
153 if (get_attr_mode (insn) == MODE_V4SF)
154 return "xorps\t%0, %0";
156 return "xorpd\t%0, %0";
159 if (get_attr_mode (insn) == MODE_V4SF)
160 return "movaps\t{%1, %0|%0, %1}";
162 return "movapd\t{%1, %0|%0, %1}";
167 [(set_attr "type" "sselog1,ssemov,ssemov")
169 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
170 (const_string "V4SF")
171 (eq_attr "alternative" "0,1")
173 (ne (symbol_ref "optimize_size")
175 (const_string "V4SF")
176 (const_string "V2DF"))
177 (eq_attr "alternative" "2")
179 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
181 (ne (symbol_ref "optimize_size")
183 (const_string "V4SF")
184 (const_string "V2DF"))]
185 (const_string "V2DF")))])
188 [(set (match_operand:V2DF 0 "register_operand" "")
189 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
190 "TARGET_SSE2 && reload_completed"
191 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
193 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
194 operands[2] = CONST0_RTX (DFmode);
197 (define_expand "push<mode>1"
198 [(match_operand:SSEMODE 0 "register_operand" "")]
201 ix86_expand_push (<MODE>mode, operands[0]);
205 (define_expand "movmisalign<mode>"
206 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
207 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
210 ix86_expand_vector_move_misalign (<MODE>mode, operands);
214 (define_insn "sse_movups"
215 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
216 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movups\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "V2DF")])
223 (define_insn "sse2_movupd"
224 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
225 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
227 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
228 "movupd\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V2DF")])
232 (define_insn "sse2_movdqu"
233 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
234 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
236 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
237 "movdqu\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssemov")
239 (set_attr "mode" "TI")])
241 (define_insn "sse_movntv4sf"
242 [(set (match_operand:V4SF 0 "memory_operand" "=m")
243 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
246 "movntps\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssemov")
248 (set_attr "mode" "V4SF")])
250 (define_insn "sse2_movntv2df"
251 [(set (match_operand:V2DF 0 "memory_operand" "=m")
252 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
255 "movntpd\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse2_movntv2di"
260 [(set (match_operand:V2DI 0 "memory_operand" "=m")
261 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
264 "movntdq\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 (define_insn "sse2_movntsi"
269 [(set (match_operand:SI 0 "memory_operand" "=m")
270 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
273 "movnti\t{%1, %0|%0, %1}"
274 [(set_attr "type" "ssecvt")
275 (set_attr "mode" "V2DF")])
277 (define_insn "sse3_lddqu"
278 [(set (match_operand:V16QI 0 "register_operand" "=x")
279 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
282 "lddqu\t{%1, %0|%0, %1}"
283 [(set_attr "type" "ssecvt")
284 (set_attr "mode" "TI")])
286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
288 ;; Parallel single-precision floating point arithmetic
290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
292 (define_expand "negv4sf2"
293 [(set (match_operand:V4SF 0 "register_operand" "")
294 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
296 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
298 (define_expand "absv4sf2"
299 [(set (match_operand:V4SF 0 "register_operand" "")
300 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
302 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
304 (define_expand "addv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "")
306 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
307 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
309 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
311 (define_insn "*addv4sf3"
312 [(set (match_operand:V4SF 0 "register_operand" "=x")
313 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
314 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
315 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
316 "addps\t{%2, %0|%0, %2}"
317 [(set_attr "type" "sseadd")
318 (set_attr "mode" "V4SF")])
320 (define_insn "sse_vmaddv4sf3"
321 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
327 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
328 "addss\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "SF")])
332 (define_expand "subv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "")
334 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
335 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
337 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
339 (define_insn "*subv4sf3"
340 [(set (match_operand:V4SF 0 "register_operand" "=x")
341 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
342 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
344 "subps\t{%2, %0|%0, %2}"
345 [(set_attr "type" "sseadd")
346 (set_attr "mode" "V4SF")])
348 (define_insn "sse_vmsubv4sf3"
349 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
356 "subss\t{%2, %0|%0, %2}"
357 [(set_attr "type" "sseadd")
358 (set_attr "mode" "SF")])
360 (define_expand "mulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "")
362 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
363 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
365 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
367 (define_insn "*mulv4sf3"
368 [(set (match_operand:V4SF 0 "register_operand" "=x")
369 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
370 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
371 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
372 "mulps\t{%2, %0|%0, %2}"
373 [(set_attr "type" "ssemul")
374 (set_attr "mode" "V4SF")])
376 (define_insn "sse_vmmulv4sf3"
377 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
383 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
384 "mulss\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssemul")
386 (set_attr "mode" "SF")])
388 (define_expand "divv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "")
390 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
391 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
393 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
395 (define_insn "*divv4sf3"
396 [(set (match_operand:V4SF 0 "register_operand" "=x")
397 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
398 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
400 "divps\t{%2, %0|%0, %2}"
401 [(set_attr "type" "ssediv")
402 (set_attr "mode" "V4SF")])
404 (define_insn "sse_vmdivv4sf3"
405 [(set (match_operand:V4SF 0 "register_operand" "=x")
407 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
408 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
412 "divss\t{%2, %0|%0, %2}"
413 [(set_attr "type" "ssediv")
414 (set_attr "mode" "SF")])
416 (define_insn "sse_rcpv4sf2"
417 [(set (match_operand:V4SF 0 "register_operand" "=x")
419 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
421 "rcpps\t{%1, %0|%0, %1}"
422 [(set_attr "type" "sse")
423 (set_attr "mode" "V4SF")])
425 (define_insn "sse_vmrcpv4sf2"
426 [(set (match_operand:V4SF 0 "register_operand" "=x")
428 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
430 (match_operand:V4SF 2 "register_operand" "0")
433 "rcpss\t{%1, %0|%0, %1}"
434 [(set_attr "type" "sse")
435 (set_attr "mode" "SF")])
437 (define_insn "sse_rsqrtv4sf2"
438 [(set (match_operand:V4SF 0 "register_operand" "=x")
440 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
442 "rsqrtps\t{%1, %0|%0, %1}"
443 [(set_attr "type" "sse")
444 (set_attr "mode" "V4SF")])
446 (define_insn "sse_vmrsqrtv4sf2"
447 [(set (match_operand:V4SF 0 "register_operand" "=x")
449 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
451 (match_operand:V4SF 2 "register_operand" "0")
454 "rsqrtss\t{%1, %0|%0, %1}"
455 [(set_attr "type" "sse")
456 (set_attr "mode" "SF")])
458 (define_insn "sqrtv4sf2"
459 [(set (match_operand:V4SF 0 "register_operand" "=x")
460 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
462 "sqrtps\t{%1, %0|%0, %1}"
463 [(set_attr "type" "sse")
464 (set_attr "mode" "V4SF")])
466 (define_insn "sse_vmsqrtv4sf2"
467 [(set (match_operand:V4SF 0 "register_operand" "=x")
469 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
470 (match_operand:V4SF 2 "register_operand" "0")
473 "sqrtss\t{%1, %0|%0, %1}"
474 [(set_attr "type" "sse")
475 (set_attr "mode" "SF")])
477 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
478 ;; isn't really correct, as those rtl operators aren't defined when
479 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
481 (define_expand "smaxv4sf3"
482 [(set (match_operand:V4SF 0 "register_operand" "")
483 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
484 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
487 if (!flag_finite_math_only)
488 operands[1] = force_reg (V4SFmode, operands[1]);
489 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
492 (define_insn "*smaxv4sf3_finite"
493 [(set (match_operand:V4SF 0 "register_operand" "=x")
494 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
495 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
496 "TARGET_SSE && flag_finite_math_only
497 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
498 "maxps\t{%2, %0|%0, %2}"
499 [(set_attr "type" "sse")
500 (set_attr "mode" "V4SF")])
502 (define_insn "*smaxv4sf3"
503 [(set (match_operand:V4SF 0 "register_operand" "=x")
504 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
505 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
507 "maxps\t{%2, %0|%0, %2}"
508 [(set_attr "type" "sse")
509 (set_attr "mode" "V4SF")])
511 (define_insn "*sse_vmsmaxv4sf3_finite"
512 [(set (match_operand:V4SF 0 "register_operand" "=x")
514 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
515 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
518 "TARGET_SSE && flag_finite_math_only
519 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
520 "maxss\t{%2, %0|%0, %2}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "SF")])
524 (define_insn "sse_vmsmaxv4sf3"
525 [(set (match_operand:V4SF 0 "register_operand" "=x")
527 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
528 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
532 "maxss\t{%2, %0|%0, %2}"
533 [(set_attr "type" "sse")
534 (set_attr "mode" "SF")])
536 (define_expand "sminv4sf3"
537 [(set (match_operand:V4SF 0 "register_operand" "")
538 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
539 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
542 if (!flag_finite_math_only)
543 operands[1] = force_reg (V4SFmode, operands[1]);
544 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
547 (define_insn "*sminv4sf3_finite"
548 [(set (match_operand:V4SF 0 "register_operand" "=x")
549 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
550 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
551 "TARGET_SSE && flag_finite_math_only
552 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
553 "minps\t{%2, %0|%0, %2}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "V4SF")])
557 (define_insn "*sminv4sf3"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
560 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
562 "minps\t{%2, %0|%0, %2}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "V4SF")])
566 (define_insn "*sse_vmsminv4sf3_finite"
567 [(set (match_operand:V4SF 0 "register_operand" "=x")
569 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
570 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
573 "TARGET_SSE && flag_finite_math_only
574 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
575 "minss\t{%2, %0|%0, %2}"
576 [(set_attr "type" "sse")
577 (set_attr "mode" "SF")])
579 (define_insn "sse_vmsminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
582 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
583 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
587 "minss\t{%2, %0|%0, %2}"
588 [(set_attr "type" "sse")
589 (set_attr "mode" "SF")])
591 (define_insn "sse3_addsubv4sf3"
592 [(set (match_operand:V4SF 0 "register_operand" "=x")
595 (match_operand:V4SF 1 "register_operand" "0")
596 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
597 (minus:V4SF (match_dup 1) (match_dup 2))
600 "addsubps\t{%2, %0|%0, %2}"
601 [(set_attr "type" "sseadd")
602 (set_attr "mode" "V4SF")])
604 (define_insn "sse3_haddv4sf3"
605 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (match_operand:V4SF 1 "register_operand" "0")
611 (parallel [(const_int 0)]))
612 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
614 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
615 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
619 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
620 (parallel [(const_int 0)]))
621 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
623 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
624 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
626 "haddps\t{%2, %0|%0, %2}"
627 [(set_attr "type" "sseadd")
628 (set_attr "mode" "V4SF")])
630 (define_insn "sse3_hsubv4sf3"
631 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (match_operand:V4SF 1 "register_operand" "0")
637 (parallel [(const_int 0)]))
638 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
641 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
645 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
646 (parallel [(const_int 0)]))
647 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
650 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
652 "hsubps\t{%2, %0|%0, %2}"
653 [(set_attr "type" "sseadd")
654 (set_attr "mode" "V4SF")])
656 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
658 ;; Parallel single-precision floating point comparisons
660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
662 (define_insn "sse_maskcmpv4sf3"
663 [(set (match_operand:V4SF 0 "register_operand" "=x")
664 (match_operator:V4SF 3 "sse_comparison_operator"
665 [(match_operand:V4SF 1 "register_operand" "0")
666 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
668 "cmp%D3ps\t{%2, %0|%0, %2}"
669 [(set_attr "type" "ssecmp")
670 (set_attr "mode" "V4SF")])
672 (define_insn "sse_vmmaskcmpv4sf3"
673 [(set (match_operand:V4SF 0 "register_operand" "=x")
675 (match_operator:V4SF 3 "sse_comparison_operator"
676 [(match_operand:V4SF 1 "register_operand" "0")
677 (match_operand:V4SF 2 "register_operand" "x")])
681 "cmp%D3ss\t{%2, %0|%0, %2}"
682 [(set_attr "type" "ssecmp")
683 (set_attr "mode" "SF")])
685 (define_insn "sse_comi"
686 [(set (reg:CCFP FLAGS_REG)
689 (match_operand:V4SF 0 "register_operand" "x")
690 (parallel [(const_int 0)]))
692 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
693 (parallel [(const_int 0)]))))]
695 "comiss\t{%1, %0|%0, %1}"
696 [(set_attr "type" "ssecomi")
697 (set_attr "mode" "SF")])
699 (define_insn "sse_ucomi"
700 [(set (reg:CCFPU FLAGS_REG)
703 (match_operand:V4SF 0 "register_operand" "x")
704 (parallel [(const_int 0)]))
706 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
707 (parallel [(const_int 0)]))))]
709 "ucomiss\t{%1, %0|%0, %1}"
710 [(set_attr "type" "ssecomi")
711 (set_attr "mode" "SF")])
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
715 ;; Parallel single-precision floating point logical operations
717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
719 (define_expand "andv4sf3"
720 [(set (match_operand:V4SF 0 "register_operand" "")
721 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
722 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
724 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
726 (define_insn "*andv4sf3"
727 [(set (match_operand:V4SF 0 "register_operand" "=x")
728 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
730 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
731 "andps\t{%2, %0|%0, %2}"
732 [(set_attr "type" "sselog")
733 (set_attr "mode" "V4SF")])
735 (define_insn "sse_nandv4sf3"
736 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
738 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
740 "andnps\t{%2, %0|%0, %2}"
741 [(set_attr "type" "sselog")
742 (set_attr "mode" "V4SF")])
744 (define_expand "iorv4sf3"
745 [(set (match_operand:V4SF 0 "register_operand" "")
746 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
747 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
749 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
751 (define_insn "*iorv4sf3"
752 [(set (match_operand:V4SF 0 "register_operand" "=x")
753 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
754 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
755 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
756 "orps\t{%2, %0|%0, %2}"
757 [(set_attr "type" "sselog")
758 (set_attr "mode" "V4SF")])
760 (define_expand "xorv4sf3"
761 [(set (match_operand:V4SF 0 "register_operand" "")
762 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
763 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
765 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
767 (define_insn "*xorv4sf3"
768 [(set (match_operand:V4SF 0 "register_operand" "=x")
769 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
770 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
771 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
772 "xorps\t{%2, %0|%0, %2}"
773 [(set_attr "type" "sselog")
774 (set_attr "mode" "V4SF")])
776 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
778 ;; Parallel single-precision floating point conversion operations
780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
782 (define_insn "sse_cvtpi2ps"
783 [(set (match_operand:V4SF 0 "register_operand" "=x")
786 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
787 (match_operand:V4SF 1 "register_operand" "0")
790 "cvtpi2ps\t{%2, %0|%0, %2}"
791 [(set_attr "type" "ssecvt")
792 (set_attr "mode" "V4SF")])
794 (define_insn "sse_cvtps2pi"
795 [(set (match_operand:V2SI 0 "register_operand" "=y")
797 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
799 (parallel [(const_int 0) (const_int 1)])))]
801 "cvtps2pi\t{%1, %0|%0, %1}"
802 [(set_attr "type" "ssecvt")
803 (set_attr "mode" "DI")])
805 (define_insn "sse_cvttps2pi"
806 [(set (match_operand:V2SI 0 "register_operand" "=y")
808 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
809 (parallel [(const_int 0) (const_int 1)])))]
811 "cvttps2pi\t{%1, %0|%0, %1}"
812 [(set_attr "type" "ssecvt")
813 (set_attr "mode" "SF")])
815 (define_insn "sse_cvtsi2ss"
816 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
819 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
820 (match_operand:V4SF 1 "register_operand" "0,0")
823 "cvtsi2ss\t{%2, %0|%0, %2}"
824 [(set_attr "type" "sseicvt")
825 (set_attr "athlon_decode" "vector,double")
826 (set_attr "mode" "SF")])
828 (define_insn "sse_cvtsi2ssq"
829 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
832 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
833 (match_operand:V4SF 1 "register_operand" "0,0")
835 "TARGET_SSE && TARGET_64BIT"
836 "cvtsi2ssq\t{%2, %0|%0, %2}"
837 [(set_attr "type" "sseicvt")
838 (set_attr "athlon_decode" "vector,double")
839 (set_attr "mode" "SF")])
841 (define_insn "sse_cvtss2si"
842 [(set (match_operand:SI 0 "register_operand" "=r,r")
845 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
846 (parallel [(const_int 0)]))]
847 UNSPEC_FIX_NOTRUNC))]
849 "cvtss2si\t{%1, %0|%0, %1}"
850 [(set_attr "type" "sseicvt")
851 (set_attr "athlon_decode" "double,vector")
852 (set_attr "mode" "SI")])
854 (define_insn "sse_cvtss2siq"
855 [(set (match_operand:DI 0 "register_operand" "=r,r")
858 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
859 (parallel [(const_int 0)]))]
860 UNSPEC_FIX_NOTRUNC))]
861 "TARGET_SSE && TARGET_64BIT"
862 "cvtss2siq\t{%1, %0|%0, %1}"
863 [(set_attr "type" "sseicvt")
864 (set_attr "athlon_decode" "double,vector")
865 (set_attr "mode" "DI")])
867 (define_insn "sse_cvttss2si"
868 [(set (match_operand:SI 0 "register_operand" "=r,r")
871 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
872 (parallel [(const_int 0)]))))]
874 "cvttss2si\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sseicvt")
876 (set_attr "athlon_decode" "double,vector")
877 (set_attr "mode" "SI")])
879 (define_insn "sse_cvttss2siq"
880 [(set (match_operand:DI 0 "register_operand" "=r,r")
883 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
884 (parallel [(const_int 0)]))))]
885 "TARGET_SSE && TARGET_64BIT"
886 "cvttss2siq\t{%1, %0|%0, %1}"
887 [(set_attr "type" "sseicvt")
888 (set_attr "athlon_decode" "double,vector")
889 (set_attr "mode" "DI")])
891 (define_insn "sse2_cvtdq2ps"
892 [(set (match_operand:V4SF 0 "register_operand" "=x")
893 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
895 "cvtdq2ps\t{%1, %0|%0, %1}"
896 [(set_attr "type" "ssecvt")
897 (set_attr "mode" "V2DF")])
899 (define_insn "sse2_cvtps2dq"
900 [(set (match_operand:V4SI 0 "register_operand" "=x")
901 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
902 UNSPEC_FIX_NOTRUNC))]
904 "cvtps2dq\t{%1, %0|%0, %1}"
905 [(set_attr "type" "ssecvt")
906 (set_attr "mode" "TI")])
908 (define_insn "sse2_cvttps2dq"
909 [(set (match_operand:V4SI 0 "register_operand" "=x")
910 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
912 "cvttps2dq\t{%1, %0|%0, %1}"
913 [(set_attr "type" "ssecvt")
914 (set_attr "mode" "TI")])
916 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
918 ;; Parallel single-precision floating point element swizzling
920 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
922 (define_insn "sse_movhlps"
923 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
926 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
927 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
928 (parallel [(const_int 4)
932 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
934 movhlps\t{%2, %0|%0, %2}
935 movlps\t{%H1, %0|%0, %H1}
936 movhps\t{%1, %0|%0, %1}"
937 [(set_attr "type" "ssemov")
938 (set_attr "mode" "V4SF,V2SF,V2SF")])
940 (define_insn "sse_movlhps"
941 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
944 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
945 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
946 (parallel [(const_int 0)
950 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
952 movlhps\t{%2, %0|%0, %2}
953 movhps\t{%2, %0|%0, %2}
954 movlps\t{%2, %H0|%H0, %2}"
955 [(set_attr "type" "ssemov")
956 (set_attr "mode" "V4SF,V2SF,V2SF")])
958 (define_insn "sse_unpckhps"
959 [(set (match_operand:V4SF 0 "register_operand" "=x")
962 (match_operand:V4SF 1 "register_operand" "0")
963 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
964 (parallel [(const_int 2) (const_int 6)
965 (const_int 3) (const_int 7)])))]
967 "unpckhps\t{%2, %0|%0, %2}"
968 [(set_attr "type" "sselog")
969 (set_attr "mode" "V4SF")])
971 (define_insn "sse_unpcklps"
972 [(set (match_operand:V4SF 0 "register_operand" "=x")
975 (match_operand:V4SF 1 "register_operand" "0")
976 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
977 (parallel [(const_int 0) (const_int 4)
978 (const_int 1) (const_int 5)])))]
980 "unpcklps\t{%2, %0|%0, %2}"
981 [(set_attr "type" "sselog")
982 (set_attr "mode" "V4SF")])
984 ;; These are modeled with the same vec_concat as the others so that we
985 ;; capture users of shufps that can use the new instructions
986 (define_insn "sse3_movshdup"
987 [(set (match_operand:V4SF 0 "register_operand" "=x")
990 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
992 (parallel [(const_int 1)
997 "movshdup\t{%1, %0|%0, %1}"
998 [(set_attr "type" "sse")
999 (set_attr "mode" "V4SF")])
1001 (define_insn "sse3_movsldup"
1002 [(set (match_operand:V4SF 0 "register_operand" "=x")
1005 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1007 (parallel [(const_int 0)
1012 "movsldup\t{%1, %0|%0, %1}"
1013 [(set_attr "type" "sse")
1014 (set_attr "mode" "V4SF")])
1016 (define_expand "sse_shufps"
1017 [(match_operand:V4SF 0 "register_operand" "")
1018 (match_operand:V4SF 1 "register_operand" "")
1019 (match_operand:V4SF 2 "nonimmediate_operand" "")
1020 (match_operand:SI 3 "const_int_operand" "")]
1023 int mask = INTVAL (operands[3]);
1024 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1025 GEN_INT ((mask >> 0) & 3),
1026 GEN_INT ((mask >> 2) & 3),
1027 GEN_INT (((mask >> 4) & 3) + 4),
1028 GEN_INT (((mask >> 6) & 3) + 4)));
1032 (define_insn "sse_shufps_1"
1033 [(set (match_operand:V4SF 0 "register_operand" "=x")
1036 (match_operand:V4SF 1 "register_operand" "0")
1037 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1038 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1039 (match_operand 4 "const_0_to_3_operand" "")
1040 (match_operand 5 "const_4_to_7_operand" "")
1041 (match_operand 6 "const_4_to_7_operand" "")])))]
1045 mask |= INTVAL (operands[3]) << 0;
1046 mask |= INTVAL (operands[4]) << 2;
1047 mask |= (INTVAL (operands[5]) - 4) << 4;
1048 mask |= (INTVAL (operands[6]) - 4) << 6;
1049 operands[3] = GEN_INT (mask);
1051 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1053 [(set_attr "type" "sselog")
1054 (set_attr "mode" "V4SF")])
1056 (define_insn "sse_storehps"
1057 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1059 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1060 (parallel [(const_int 2) (const_int 3)])))]
1063 movhps\t{%1, %0|%0, %1}
1064 movhlps\t{%1, %0|%0, %1}
1065 movlps\t{%H1, %0|%0, %H1}"
1066 [(set_attr "type" "ssemov")
1067 (set_attr "mode" "V2SF,V4SF,V2SF")])
1069 (define_insn "sse_loadhps"
1070 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1073 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1074 (parallel [(const_int 0) (const_int 1)]))
1075 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1078 movhps\t{%2, %0|%0, %2}
1079 movlhps\t{%2, %0|%0, %2}
1080 movlps\t{%2, %H0|%H0, %2}"
1081 [(set_attr "type" "ssemov")
1082 (set_attr "mode" "V2SF,V4SF,V2SF")])
1084 (define_insn "sse_storelps"
1085 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1087 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1088 (parallel [(const_int 0) (const_int 1)])))]
1091 movlps\t{%1, %0|%0, %1}
1092 movaps\t{%1, %0|%0, %1}
1093 movlps\t{%1, %0|%0, %1}"
1094 [(set_attr "type" "ssemov")
1095 (set_attr "mode" "V2SF,V4SF,V2SF")])
1097 (define_insn "sse_loadlps"
1098 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1100 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1102 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1103 (parallel [(const_int 2) (const_int 3)]))))]
1106 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1107 movlps\t{%2, %0|%0, %2}
1108 movlps\t{%2, %0|%0, %2}"
1109 [(set_attr "type" "sselog,ssemov,ssemov")
1110 (set_attr "mode" "V4SF,V2SF,V2SF")])
1112 (define_insn "sse_movss"
1113 [(set (match_operand:V4SF 0 "register_operand" "=x")
1115 (match_operand:V4SF 2 "register_operand" "x")
1116 (match_operand:V4SF 1 "register_operand" "0")
1119 "movss\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "ssemov")
1121 (set_attr "mode" "SF")])
1123 (define_insn "*vec_dupv4sf"
1124 [(set (match_operand:V4SF 0 "register_operand" "=x")
1126 (match_operand:SF 1 "register_operand" "0")))]
1128 "shufps\t{$0, %0, %0|%0, %0, 0}"
1129 [(set_attr "type" "sselog1")
1130 (set_attr "mode" "V4SF")])
1132 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1133 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1134 ;; alternatives pretty much forces the MMX alternative to be chosen.
1135 (define_insn "*sse_concatv2sf"
1136 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1138 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1139 (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
1142 unpcklps\t{%2, %0|%0, %2}
1143 movss\t{%1, %0|%0, %1}
1144 punpckldq\t{%2, %0|%0, %2}
1145 movd\t{%1, %0|%0, %1}"
1146 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1147 (set_attr "mode" "V4SF,SF,DI,DI")])
1149 (define_insn "*sse_concatv4sf"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1152 (match_operand:V2SF 1 "register_operand" " 0,0")
1153 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1156 movlhps\t{%2, %0|%0, %2}
1157 movhps\t{%2, %0|%0, %2}"
1158 [(set_attr "type" "ssemov")
1159 (set_attr "mode" "V4SF,V2SF")])
1161 (define_expand "vec_initv4sf"
1162 [(match_operand:V4SF 0 "register_operand" "")
1163 (match_operand 1 "" "")]
1166 ix86_expand_vector_init (false, operands[0], operands[1]);
1170 (define_insn "*vec_setv4sf_0"
1171 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1174 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1175 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1179 movss\t{%2, %0|%0, %2}
1180 movss\t{%2, %0|%0, %2}
1181 movd\t{%2, %0|%0, %2}
1183 [(set_attr "type" "ssemov")
1184 (set_attr "mode" "SF")])
1187 [(set (match_operand:V4SF 0 "memory_operand" "")
1190 (match_operand:SF 1 "nonmemory_operand" ""))
1193 "TARGET_SSE && reload_completed"
1196 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1200 (define_expand "vec_setv4sf"
1201 [(match_operand:V4SF 0 "register_operand" "")
1202 (match_operand:SF 1 "register_operand" "")
1203 (match_operand 2 "const_int_operand" "")]
1206 ix86_expand_vector_set (false, operands[0], operands[1],
1207 INTVAL (operands[2]));
1211 (define_insn_and_split "*vec_extractv4sf_0"
1212 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1214 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1215 (parallel [(const_int 0)])))]
1216 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1218 "&& reload_completed"
1221 rtx op1 = operands[1];
1223 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1225 op1 = gen_lowpart (SFmode, op1);
1226 emit_move_insn (operands[0], op1);
1230 (define_expand "vec_extractv4sf"
1231 [(match_operand:SF 0 "register_operand" "")
1232 (match_operand:V4SF 1 "register_operand" "")
1233 (match_operand 2 "const_int_operand" "")]
1236 ix86_expand_vector_extract (false, operands[0], operands[1],
1237 INTVAL (operands[2]));
1241 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1243 ;; Parallel double-precision floating point arithmetic
1245 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1247 (define_expand "negv2df2"
1248 [(set (match_operand:V2DF 0 "register_operand" "")
1249 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1251 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1253 (define_expand "absv2df2"
1254 [(set (match_operand:V2DF 0 "register_operand" "")
1255 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1257 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1259 (define_expand "addv2df3"
1260 [(set (match_operand:V2DF 0 "register_operand" "")
1261 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1262 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1264 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1266 (define_insn "*addv2df3"
1267 [(set (match_operand:V2DF 0 "register_operand" "=x")
1268 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1269 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1270 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1271 "addpd\t{%2, %0|%0, %2}"
1272 [(set_attr "type" "sseadd")
1273 (set_attr "mode" "V2DF")])
1275 (define_insn "sse2_vmaddv2df3"
1276 [(set (match_operand:V2DF 0 "register_operand" "=x")
1278 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1279 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1282 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1283 "addsd\t{%2, %0|%0, %2}"
1284 [(set_attr "type" "sseadd")
1285 (set_attr "mode" "DF")])
1287 (define_expand "subv2df3"
1288 [(set (match_operand:V2DF 0 "register_operand" "")
1289 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1290 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1292 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1294 (define_insn "*subv2df3"
1295 [(set (match_operand:V2DF 0 "register_operand" "=x")
1296 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1297 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1299 "subpd\t{%2, %0|%0, %2}"
1300 [(set_attr "type" "sseadd")
1301 (set_attr "mode" "V2DF")])
1303 (define_insn "sse2_vmsubv2df3"
1304 [(set (match_operand:V2DF 0 "register_operand" "=x")
1306 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1307 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1311 "subsd\t{%2, %0|%0, %2}"
1312 [(set_attr "type" "sseadd")
1313 (set_attr "mode" "DF")])
1315 (define_expand "mulv2df3"
1316 [(set (match_operand:V2DF 0 "register_operand" "")
1317 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1318 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1320 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1322 (define_insn "*mulv2df3"
1323 [(set (match_operand:V2DF 0 "register_operand" "=x")
1324 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1325 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1326 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1327 "mulpd\t{%2, %0|%0, %2}"
1328 [(set_attr "type" "ssemul")
1329 (set_attr "mode" "V2DF")])
1331 (define_insn "sse2_vmmulv2df3"
1332 [(set (match_operand:V2DF 0 "register_operand" "=x")
1334 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1335 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1338 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1339 "mulsd\t{%2, %0|%0, %2}"
1340 [(set_attr "type" "ssemul")
1341 (set_attr "mode" "DF")])
1343 (define_expand "divv2df3"
1344 [(set (match_operand:V2DF 0 "register_operand" "")
1345 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1346 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1348 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1350 (define_insn "*divv2df3"
1351 [(set (match_operand:V2DF 0 "register_operand" "=x")
1352 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1353 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1355 "divpd\t{%2, %0|%0, %2}"
1356 [(set_attr "type" "ssediv")
1357 (set_attr "mode" "V2DF")])
1359 (define_insn "sse2_vmdivv2df3"
1360 [(set (match_operand:V2DF 0 "register_operand" "=x")
1362 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1363 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1367 "divsd\t{%2, %0|%0, %2}"
1368 [(set_attr "type" "ssediv")
1369 (set_attr "mode" "DF")])
1371 (define_insn "sqrtv2df2"
1372 [(set (match_operand:V2DF 0 "register_operand" "=x")
1373 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1375 "sqrtpd\t{%1, %0|%0, %1}"
1376 [(set_attr "type" "sse")
1377 (set_attr "mode" "V2DF")])
1379 (define_insn "sse2_vmsqrtv2df2"
1380 [(set (match_operand:V2DF 0 "register_operand" "=x")
1382 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1383 (match_operand:V2DF 2 "register_operand" "0")
1386 "sqrtsd\t{%1, %0|%0, %1}"
1387 [(set_attr "type" "sse")
1388 (set_attr "mode" "SF")])
1390 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1391 ;; isn't really correct, as those rtl operators aren't defined when
1392 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1394 (define_expand "smaxv2df3"
1395 [(set (match_operand:V2DF 0 "register_operand" "")
1396 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1397 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1400 if (!flag_finite_math_only)
1401 operands[1] = force_reg (V2DFmode, operands[1]);
1402 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1405 (define_insn "*smaxv2df3_finite"
1406 [(set (match_operand:V2DF 0 "register_operand" "=x")
1407 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1408 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1409 "TARGET_SSE2 && flag_finite_math_only
1410 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1411 "maxpd\t{%2, %0|%0, %2}"
1412 [(set_attr "type" "sseadd")
1413 (set_attr "mode" "V2DF")])
1415 (define_insn "*smaxv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1417 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1420 "maxpd\t{%2, %0|%0, %2}"
1421 [(set_attr "type" "sseadd")
1422 (set_attr "mode" "V2DF")])
1424 (define_insn "*sse2_vmsmaxv2df3_finite"
1425 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1431 "TARGET_SSE2 && flag_finite_math_only
1432 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1433 "maxsd\t{%2, %0|%0, %2}"
1434 [(set_attr "type" "sseadd")
1435 (set_attr "mode" "DF")])
1437 (define_insn "sse2_vmsmaxv2df3"
1438 [(set (match_operand:V2DF 0 "register_operand" "=x")
1440 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1441 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1445 "maxsd\t{%2, %0|%0, %2}"
1446 [(set_attr "type" "sseadd")
1447 (set_attr "mode" "DF")])
1449 (define_expand "sminv2df3"
1450 [(set (match_operand:V2DF 0 "register_operand" "")
1451 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1452 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1455 if (!flag_finite_math_only)
1456 operands[1] = force_reg (V2DFmode, operands[1]);
1457 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1460 (define_insn "*sminv2df3_finite"
1461 [(set (match_operand:V2DF 0 "register_operand" "=x")
1462 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1463 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1464 "TARGET_SSE2 && flag_finite_math_only
1465 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1466 "minpd\t{%2, %0|%0, %2}"
1467 [(set_attr "type" "sseadd")
1468 (set_attr "mode" "V2DF")])
1470 (define_insn "*sminv2df3"
1471 [(set (match_operand:V2DF 0 "register_operand" "=x")
1472 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1473 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1475 "minpd\t{%2, %0|%0, %2}"
1476 [(set_attr "type" "sseadd")
1477 (set_attr "mode" "V2DF")])
1479 (define_insn "*sse2_vmsminv2df3_finite"
1480 [(set (match_operand:V2DF 0 "register_operand" "=x")
1482 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1483 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1486 "TARGET_SSE2 && flag_finite_math_only
1487 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1488 "minsd\t{%2, %0|%0, %2}"
1489 [(set_attr "type" "sseadd")
1490 (set_attr "mode" "DF")])
1492 (define_insn "sse2_vmsminv2df3"
1493 [(set (match_operand:V2DF 0 "register_operand" "=x")
1495 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1496 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1500 "minsd\t{%2, %0|%0, %2}"
1501 [(set_attr "type" "sseadd")
1502 (set_attr "mode" "DF")])
1504 (define_insn "sse3_addsubv2df3"
1505 [(set (match_operand:V2DF 0 "register_operand" "=x")
1508 (match_operand:V2DF 1 "register_operand" "0")
1509 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1510 (minus:V2DF (match_dup 1) (match_dup 2))
1513 "addsubpd\t{%2, %0|%0, %2}"
1514 [(set_attr "type" "sseadd")
1515 (set_attr "mode" "V2DF")])
1517 (define_insn "sse3_haddv2df3"
1518 [(set (match_operand:V2DF 0 "register_operand" "=x")
1522 (match_operand:V2DF 1 "register_operand" "0")
1523 (parallel [(const_int 0)]))
1524 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1527 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1528 (parallel [(const_int 0)]))
1529 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1531 "haddpd\t{%2, %0|%0, %2}"
1532 [(set_attr "type" "sseadd")
1533 (set_attr "mode" "V2DF")])
1535 (define_insn "sse3_hsubv2df3"
1536 [(set (match_operand:V2DF 0 "register_operand" "=x")
1540 (match_operand:V2DF 1 "register_operand" "0")
1541 (parallel [(const_int 0)]))
1542 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1545 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1546 (parallel [(const_int 0)]))
1547 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1549 "hsubpd\t{%2, %0|%0, %2}"
1550 [(set_attr "type" "sseadd")
1551 (set_attr "mode" "V2DF")])
1553 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1555 ;; Parallel double-precision floating point comparisons
1557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1559 (define_insn "sse2_maskcmpv2df3"
1560 [(set (match_operand:V2DF 0 "register_operand" "=x")
1561 (match_operator:V2DF 3 "sse_comparison_operator"
1562 [(match_operand:V2DF 1 "register_operand" "0")
1563 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1565 "cmp%D3pd\t{%2, %0|%0, %2}"
1566 [(set_attr "type" "ssecmp")
1567 (set_attr "mode" "V2DF")])
1569 (define_insn "sse2_vmmaskcmpv2df3"
1570 [(set (match_operand:V2DF 0 "register_operand" "=x")
1572 (match_operator:V2DF 3 "sse_comparison_operator"
1573 [(match_operand:V2DF 1 "register_operand" "0")
1574 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1578 "cmp%D3sd\t{%2, %0|%0, %2}"
1579 [(set_attr "type" "ssecmp")
1580 (set_attr "mode" "DF")])
1582 (define_insn "sse2_comi"
1583 [(set (reg:CCFP FLAGS_REG)
1586 (match_operand:V2DF 0 "register_operand" "x")
1587 (parallel [(const_int 0)]))
1589 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1590 (parallel [(const_int 0)]))))]
1592 "comisd\t{%1, %0|%0, %1}"
1593 [(set_attr "type" "ssecomi")
1594 (set_attr "mode" "DF")])
1596 (define_insn "sse2_ucomi"
1597 [(set (reg:CCFPU FLAGS_REG)
1600 (match_operand:V2DF 0 "register_operand" "x")
1601 (parallel [(const_int 0)]))
1603 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1604 (parallel [(const_int 0)]))))]
1606 "ucomisd\t{%1, %0|%0, %1}"
1607 [(set_attr "type" "ssecomi")
1608 (set_attr "mode" "DF")])
1610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1612 ;; Parallel double-precision floating point logical operations
1614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1616 (define_expand "andv2df3"
1617 [(set (match_operand:V2DF 0 "register_operand" "")
1618 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1619 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1621 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1623 (define_insn "*andv2df3"
1624 [(set (match_operand:V2DF 0 "register_operand" "=x")
1625 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1626 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1627 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
1628 "andpd\t{%2, %0|%0, %2}"
1629 [(set_attr "type" "sselog")
1630 (set_attr "mode" "V2DF")])
1632 (define_insn "sse2_nandv2df3"
1633 [(set (match_operand:V2DF 0 "register_operand" "=x")
1634 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1635 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1637 "andnpd\t{%2, %0|%0, %2}"
1638 [(set_attr "type" "sselog")
1639 (set_attr "mode" "V2DF")])
1641 (define_expand "iorv2df3"
1642 [(set (match_operand:V2DF 0 "register_operand" "")
1643 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1644 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1646 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1648 (define_insn "*iorv2df3"
1649 [(set (match_operand:V2DF 0 "register_operand" "=x")
1650 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1651 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1652 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1653 "orpd\t{%2, %0|%0, %2}"
1654 [(set_attr "type" "sselog")
1655 (set_attr "mode" "V2DF")])
1657 (define_expand "xorv2df3"
1658 [(set (match_operand:V2DF 0 "register_operand" "")
1659 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1660 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1662 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1664 (define_insn "*xorv2df3"
1665 [(set (match_operand:V2DF 0 "register_operand" "=x")
1666 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1667 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1668 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1669 "xorpd\t{%2, %0|%0, %2}"
1670 [(set_attr "type" "sselog")
1671 (set_attr "mode" "V2DF")])
1673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1675 ;; Parallel double-precision floating point conversion operations
1677 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1679 (define_insn "sse2_cvtpi2pd"
1680 [(set (match_operand:V2DF 0 "register_operand" "=x")
1681 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
1683 "cvtpi2pd\t{%1, %0|%0, %1}"
1684 [(set_attr "type" "ssecvt")
1685 (set_attr "mode" "V2DF")])
1687 (define_insn "sse2_cvtpd2pi"
1688 [(set (match_operand:V2SI 0 "register_operand" "=y")
1689 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1690 UNSPEC_FIX_NOTRUNC))]
1692 "cvtpd2pi\t{%1, %0|%0, %1}"
1693 [(set_attr "type" "ssecvt")
1694 (set_attr "mode" "DI")])
1696 (define_insn "sse2_cvttpd2pi"
1697 [(set (match_operand:V2SI 0 "register_operand" "=y")
1698 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1700 "cvttpd2pi\t{%1, %0|%0, %1}"
1701 [(set_attr "type" "ssecvt")
1702 (set_attr "mode" "TI")])
1704 (define_insn "sse2_cvtsi2sd"
1705 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1708 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1709 (match_operand:V2DF 1 "register_operand" "0,0")
1712 "cvtsi2sd\t{%2, %0|%0, %2}"
1713 [(set_attr "type" "sseicvt")
1714 (set_attr "mode" "DF")
1715 (set_attr "athlon_decode" "double,direct")])
1717 (define_insn "sse2_cvtsi2sdq"
1718 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1721 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1722 (match_operand:V2DF 1 "register_operand" "0,0")
1724 "TARGET_SSE2 && TARGET_64BIT"
1725 "cvtsi2sdq\t{%2, %0|%0, %2}"
1726 [(set_attr "type" "sseicvt")
1727 (set_attr "mode" "DF")
1728 (set_attr "athlon_decode" "double,direct")])
1730 (define_insn "sse2_cvtsd2si"
1731 [(set (match_operand:SI 0 "register_operand" "=r,r")
1734 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1735 (parallel [(const_int 0)]))]
1736 UNSPEC_FIX_NOTRUNC))]
1738 "cvtsd2si\t{%1, %0|%0, %1}"
1739 [(set_attr "type" "sseicvt")
1740 (set_attr "athlon_decode" "double,vector")
1741 (set_attr "mode" "SI")])
1743 (define_insn "sse2_cvtsd2siq"
1744 [(set (match_operand:DI 0 "register_operand" "=r,r")
1747 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1748 (parallel [(const_int 0)]))]
1749 UNSPEC_FIX_NOTRUNC))]
1750 "TARGET_SSE2 && TARGET_64BIT"
1751 "cvtsd2siq\t{%1, %0|%0, %1}"
1752 [(set_attr "type" "sseicvt")
1753 (set_attr "athlon_decode" "double,vector")
1754 (set_attr "mode" "DI")])
1756 (define_insn "sse2_cvttsd2si"
1757 [(set (match_operand:SI 0 "register_operand" "=r,r")
1760 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1761 (parallel [(const_int 0)]))))]
1763 "cvttsd2si\t{%1, %0|%0, %1}"
1764 [(set_attr "type" "sseicvt")
1765 (set_attr "mode" "SI")
1766 (set_attr "athlon_decode" "double,vector")])
1768 (define_insn "sse2_cvttsd2siq"
1769 [(set (match_operand:DI 0 "register_operand" "=r,r")
1772 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1773 (parallel [(const_int 0)]))))]
1774 "TARGET_SSE2 && TARGET_64BIT"
1775 "cvttsd2siq\t{%1, %0|%0, %1}"
1776 [(set_attr "type" "sseicvt")
1777 (set_attr "mode" "DI")
1778 (set_attr "athlon_decode" "double,vector")])
1780 (define_insn "sse2_cvtdq2pd"
1781 [(set (match_operand:V2DF 0 "register_operand" "=x")
1784 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1785 (parallel [(const_int 0) (const_int 1)]))))]
1787 "cvtdq2pd\t{%1, %0|%0, %1}"
1788 [(set_attr "type" "ssecvt")
1789 (set_attr "mode" "V2DF")])
1791 (define_expand "sse2_cvtpd2dq"
1792 [(set (match_operand:V4SI 0 "register_operand" "")
1794 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1798 "operands[2] = CONST0_RTX (V2SImode);")
1800 (define_insn "*sse2_cvtpd2dq"
1801 [(set (match_operand:V4SI 0 "register_operand" "=x")
1803 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1805 (match_operand:V2SI 2 "const0_operand" "")))]
1807 "cvtpd2dq\t{%1, %0|%0, %1}"
1808 [(set_attr "type" "ssecvt")
1809 (set_attr "mode" "TI")])
1811 (define_expand "sse2_cvttpd2dq"
1812 [(set (match_operand:V4SI 0 "register_operand" "")
1814 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1817 "operands[2] = CONST0_RTX (V2SImode);")
1819 (define_insn "*sse2_cvttpd2dq"
1820 [(set (match_operand:V4SI 0 "register_operand" "=x")
1822 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1823 (match_operand:V2SI 2 "const0_operand" "")))]
1825 "cvttpd2dq\t{%1, %0|%0, %1}"
1826 [(set_attr "type" "ssecvt")
1827 (set_attr "mode" "TI")])
1829 (define_insn "sse2_cvtsd2ss"
1830 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1833 (float_truncate:V2SF
1834 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1835 (match_operand:V4SF 1 "register_operand" "0,0")
1838 "cvtsd2ss\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "ssecvt")
1840 (set_attr "athlon_decode" "vector,double")
1841 (set_attr "mode" "SF")])
1843 (define_insn "sse2_cvtss2sd"
1844 [(set (match_operand:V2DF 0 "register_operand" "=x")
1848 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1849 (parallel [(const_int 0) (const_int 1)])))
1850 (match_operand:V2DF 1 "register_operand" "0")
1853 "cvtss2sd\t{%2, %0|%0, %2}"
1854 [(set_attr "type" "ssecvt")
1855 (set_attr "mode" "DF")])
1857 (define_expand "sse2_cvtpd2ps"
1858 [(set (match_operand:V4SF 0 "register_operand" "")
1860 (float_truncate:V2SF
1861 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1864 "operands[2] = CONST0_RTX (V2SFmode);")
1866 (define_insn "*sse2_cvtpd2ps"
1867 [(set (match_operand:V4SF 0 "register_operand" "=x")
1869 (float_truncate:V2SF
1870 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1871 (match_operand:V2SF 2 "const0_operand" "")))]
1873 "cvtpd2ps\t{%1, %0|%0, %1}"
1874 [(set_attr "type" "ssecvt")
1875 (set_attr "mode" "V4SF")])
1877 (define_insn "sse2_cvtps2pd"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1881 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1882 (parallel [(const_int 0) (const_int 1)]))))]
1884 "cvtps2pd\t{%1, %0|%0, %1}"
1885 [(set_attr "type" "ssecvt")
1886 (set_attr "mode" "V2DF")])
1888 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1890 ;; Parallel double-precision floating point element swizzling
1892 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1894 (define_insn "sse2_unpckhpd"
1895 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
1898 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
1899 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
1900 (parallel [(const_int 1)
1902 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1904 unpckhpd\t{%2, %0|%0, %2}
1905 movlpd\t{%H1, %0|%0, %H1}
1906 movhpd\t{%1, %0|%0, %1}"
1907 [(set_attr "type" "sselog,ssemov,ssemov")
1908 (set_attr "mode" "V2DF,V1DF,V1DF")])
1910 (define_insn "*sse3_movddup"
1911 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
1914 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
1916 (parallel [(const_int 0)
1918 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1920 movddup\t{%1, %0|%0, %1}
1922 [(set_attr "type" "sselog,ssemov")
1923 (set_attr "mode" "V2DF")])
1926 [(set (match_operand:V2DF 0 "memory_operand" "")
1929 (match_operand:V2DF 1 "register_operand" "")
1931 (parallel [(const_int 0)
1933 "TARGET_SSE3 && reload_completed"
1936 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
1937 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
1938 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
1942 (define_insn "sse2_unpcklpd"
1943 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
1946 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
1947 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
1948 (parallel [(const_int 0)
1950 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1952 unpcklpd\t{%2, %0|%0, %2}
1953 movhpd\t{%2, %0|%0, %2}
1954 movlpd\t{%2, %H0|%H0, %2}"
1955 [(set_attr "type" "sselog,ssemov,ssemov")
1956 (set_attr "mode" "V2DF,V1DF,V1DF")])
1958 (define_expand "sse2_shufpd"
1959 [(match_operand:V2DF 0 "register_operand" "")
1960 (match_operand:V2DF 1 "register_operand" "")
1961 (match_operand:V2DF 2 "nonimmediate_operand" "")
1962 (match_operand:SI 3 "const_int_operand" "")]
1965 int mask = INTVAL (operands[3]);
1966 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
1968 GEN_INT (mask & 2 ? 3 : 2)));
1972 (define_insn "sse2_shufpd_1"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1976 (match_operand:V2DF 1 "register_operand" "0")
1977 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1978 (parallel [(match_operand 3 "const_0_to_1_operand" "")
1979 (match_operand 4 "const_2_to_3_operand" "")])))]
1983 mask = INTVAL (operands[3]);
1984 mask |= (INTVAL (operands[4]) - 2) << 1;
1985 operands[3] = GEN_INT (mask);
1987 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
1989 [(set_attr "type" "sselog")
1990 (set_attr "mode" "V2DF")])
1992 (define_insn "sse2_storehpd"
1993 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
1995 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
1996 (parallel [(const_int 1)])))]
1997 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1999 movhpd\t{%1, %0|%0, %1}
2002 [(set_attr "type" "ssemov,sselog1,ssemov")
2003 (set_attr "mode" "V1DF,V2DF,DF")])
2006 [(set (match_operand:DF 0 "register_operand" "")
2008 (match_operand:V2DF 1 "memory_operand" "")
2009 (parallel [(const_int 1)])))]
2010 "TARGET_SSE2 && reload_completed"
2011 [(set (match_dup 0) (match_dup 1))]
2013 operands[1] = adjust_address (operands[1], DFmode, 8);
2016 (define_insn "sse2_storelpd"
2017 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2019 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2020 (parallel [(const_int 0)])))]
2021 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2023 movlpd\t{%1, %0|%0, %1}
2026 [(set_attr "type" "ssemov")
2027 (set_attr "mode" "V1DF,DF,DF")])
2030 [(set (match_operand:DF 0 "register_operand" "")
2032 (match_operand:V2DF 1 "nonimmediate_operand" "")
2033 (parallel [(const_int 0)])))]
2034 "TARGET_SSE2 && reload_completed"
2037 rtx op1 = operands[1];
2039 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2041 op1 = gen_lowpart (DFmode, op1);
2042 emit_move_insn (operands[0], op1);
2046 (define_insn "sse2_loadhpd"
2047 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2050 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2051 (parallel [(const_int 0)]))
2052 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2053 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2055 movhpd\t{%2, %0|%0, %2}
2056 unpcklpd\t{%2, %0|%0, %2}
2057 shufpd\t{$1, %1, %0|%0, %1, 1}
2059 [(set_attr "type" "ssemov,sselog,sselog,other")
2060 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2063 [(set (match_operand:V2DF 0 "memory_operand" "")
2065 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2066 (match_operand:DF 1 "register_operand" "")))]
2067 "TARGET_SSE2 && reload_completed"
2068 [(set (match_dup 0) (match_dup 1))]
2070 operands[0] = adjust_address (operands[0], DFmode, 8);
2073 (define_insn "sse2_loadlpd"
2074 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2076 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2078 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2079 (parallel [(const_int 1)]))))]
2080 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2082 movsd\t{%2, %0|%0, %2}
2083 movlpd\t{%2, %0|%0, %2}
2084 movsd\t{%2, %0|%0, %2}
2085 shufpd\t{$2, %2, %0|%0, %2, 2}
2086 movhpd\t{%H1, %0|%0, %H1}
2088 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2089 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2092 [(set (match_operand:V2DF 0 "memory_operand" "")
2094 (match_operand:DF 1 "register_operand" "")
2095 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2096 "TARGET_SSE2 && reload_completed"
2097 [(set (match_dup 0) (match_dup 1))]
2099 operands[0] = adjust_address (operands[0], DFmode, 8);
2102 (define_insn "sse2_movsd"
2103 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2105 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2106 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2110 movsd\t{%2, %0|%0, %2}
2111 movlpd\t{%2, %0|%0, %2}
2112 movlpd\t{%2, %0|%0, %2}
2113 shufpd\t{$2, %2, %0|%0, %2, 2}
2114 movhps\t{%H1, %0|%0, %H1
2115 movhps\t{%1, %H0|%H0, %1"
2116 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2117 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2119 (define_insn "*vec_dupv2df_sse3"
2120 [(set (match_operand:V2DF 0 "register_operand" "=x")
2122 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2124 "movddup\t{%1, %0|%0, %1}"
2125 [(set_attr "type" "sselog1")
2126 (set_attr "mode" "DF")])
2128 (define_insn "*vec_dupv2df"
2129 [(set (match_operand:V2DF 0 "register_operand" "=x")
2131 (match_operand:DF 1 "register_operand" "0")))]
2134 [(set_attr "type" "sselog1")
2135 (set_attr "mode" "V4SF")])
2137 (define_insn "*vec_concatv2df_sse3"
2138 [(set (match_operand:V2DF 0 "register_operand" "=x")
2140 (match_operand:DF 1 "nonimmediate_operand" "xm")
2143 "movddup\t{%1, %0|%0, %1}"
2144 [(set_attr "type" "sselog1")
2145 (set_attr "mode" "DF")])
2147 (define_insn "*vec_concatv2df"
2148 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2150 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2151 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2154 unpcklpd\t{%2, %0|%0, %2}
2155 movhpd\t{%2, %0|%0, %2}
2156 movsd\t{%1, %0|%0, %1}
2157 movlhps\t{%2, %0|%0, %2}
2158 movhps\t{%2, %0|%0, %2}"
2159 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2160 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2162 (define_expand "vec_setv2df"
2163 [(match_operand:V2DF 0 "register_operand" "")
2164 (match_operand:DF 1 "register_operand" "")
2165 (match_operand 2 "const_int_operand" "")]
2168 ix86_expand_vector_set (false, operands[0], operands[1],
2169 INTVAL (operands[2]));
2173 (define_expand "vec_extractv2df"
2174 [(match_operand:DF 0 "register_operand" "")
2175 (match_operand:V2DF 1 "register_operand" "")
2176 (match_operand 2 "const_int_operand" "")]
2179 ix86_expand_vector_extract (false, operands[0], operands[1],
2180 INTVAL (operands[2]));
2184 (define_expand "vec_initv2df"
2185 [(match_operand:V2DF 0 "register_operand" "")
2186 (match_operand 1 "" "")]
2189 ix86_expand_vector_init (false, operands[0], operands[1]);
2193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2195 ;; Parallel integral arithmetic
2197 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2199 (define_expand "neg<mode>2"
2200 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2203 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2205 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2207 (define_expand "add<mode>3"
2208 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2209 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2210 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2212 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2214 (define_insn "*add<mode>3"
2215 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2217 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2218 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2219 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2220 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2221 [(set_attr "type" "sseiadd")
2222 (set_attr "mode" "TI")])
2224 (define_insn "sse2_ssadd<mode>3"
2225 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2227 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2228 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2229 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2230 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2231 [(set_attr "type" "sseiadd")
2232 (set_attr "mode" "TI")])
2234 (define_insn "sse2_usadd<mode>3"
2235 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2237 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2238 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2239 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2240 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2241 [(set_attr "type" "sseiadd")
2242 (set_attr "mode" "TI")])
2244 (define_expand "sub<mode>3"
2245 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2246 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2247 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2249 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2251 (define_insn "*sub<mode>3"
2252 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2254 (match_operand:SSEMODEI 1 "register_operand" "0")
2255 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2257 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2258 [(set_attr "type" "sseiadd")
2259 (set_attr "mode" "TI")])
2261 (define_insn "sse2_sssub<mode>3"
2262 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2264 (match_operand:SSEMODE12 1 "register_operand" "0")
2265 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2267 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2268 [(set_attr "type" "sseiadd")
2269 (set_attr "mode" "TI")])
2271 (define_insn "sse2_ussub<mode>3"
2272 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2274 (match_operand:SSEMODE12 1 "register_operand" "0")
2275 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2277 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2278 [(set_attr "type" "sseiadd")
2279 (set_attr "mode" "TI")])
2281 (define_expand "mulv8hi3"
2282 [(set (match_operand:V8HI 0 "register_operand" "")
2283 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2284 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2286 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2288 (define_insn "*mulv8hi3"
2289 [(set (match_operand:V8HI 0 "register_operand" "=x")
2290 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2291 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2292 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2293 "pmullw\t{%2, %0|%0, %2}"
2294 [(set_attr "type" "sseimul")
2295 (set_attr "mode" "TI")])
2297 (define_insn "sse2_smulv8hi3_highpart"
2298 [(set (match_operand:V8HI 0 "register_operand" "=x")
2303 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2305 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2307 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2308 "pmulhw\t{%2, %0|%0, %2}"
2309 [(set_attr "type" "sseimul")
2310 (set_attr "mode" "TI")])
2312 (define_insn "sse2_umulv8hi3_highpart"
2313 [(set (match_operand:V8HI 0 "register_operand" "=x")
2318 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2320 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2322 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2323 "pmulhuw\t{%2, %0|%0, %2}"
2324 [(set_attr "type" "sseimul")
2325 (set_attr "mode" "TI")])
2327 (define_insn "sse2_umulv2siv2di3"
2328 [(set (match_operand:V2DI 0 "register_operand" "=x")
2332 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2333 (parallel [(const_int 0) (const_int 2)])))
2336 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2337 (parallel [(const_int 0) (const_int 2)])))))]
2338 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2339 "pmuludq\t{%2, %0|%0, %2}"
2340 [(set_attr "type" "sseimul")
2341 (set_attr "mode" "TI")])
2343 (define_insn "sse2_pmaddwd"
2344 [(set (match_operand:V4SI 0 "register_operand" "=x")
2349 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2350 (parallel [(const_int 0)
2356 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2357 (parallel [(const_int 0)
2363 (vec_select:V4HI (match_dup 1)
2364 (parallel [(const_int 1)
2369 (vec_select:V4HI (match_dup 2)
2370 (parallel [(const_int 1)
2373 (const_int 7)]))))))]
2375 "pmaddwd\t{%2, %0|%0, %2}"
2376 [(set_attr "type" "sseiadd")
2377 (set_attr "mode" "TI")])
2379 (define_insn "ashr<mode>3"
2380 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2382 (match_operand:SSEMODE24 1 "register_operand" "0")
2383 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2385 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2386 [(set_attr "type" "sseishft")
2387 (set_attr "mode" "TI")])
2389 (define_insn "lshr<mode>3"
2390 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2391 (lshiftrt:SSEMODE248
2392 (match_operand:SSEMODE248 1 "register_operand" "0")
2393 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2395 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2396 [(set_attr "type" "sseishft")
2397 (set_attr "mode" "TI")])
2399 (define_insn "ashl<mode>3"
2400 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2402 (match_operand:SSEMODE248 1 "register_operand" "0")
2403 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2405 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2406 [(set_attr "type" "sseishft")
2407 (set_attr "mode" "TI")])
2409 (define_insn "sse2_ashlti3"
2410 [(set (match_operand:TI 0 "register_operand" "=x")
2411 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2412 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2415 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2416 return "pslldq\t{%2, %0|%0, %2}";
2418 [(set_attr "type" "sseishft")
2419 (set_attr "mode" "TI")])
2421 (define_insn "sse2_lshrti3"
2422 [(set (match_operand:TI 0 "register_operand" "=x")
2423 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2424 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2427 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2428 return "psrldq\t{%2, %0|%0, %2}";
2430 [(set_attr "type" "sseishft")
2431 (set_attr "mode" "TI")])
2433 (define_expand "umaxv16qi3"
2434 [(set (match_operand:V16QI 0 "register_operand" "")
2435 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2436 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2438 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2440 (define_insn "*umaxv16qi3"
2441 [(set (match_operand:V16QI 0 "register_operand" "=x")
2442 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2443 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2444 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2445 "pmaxub\t{%2, %0|%0, %2}"
2446 [(set_attr "type" "sseiadd")
2447 (set_attr "mode" "TI")])
2449 (define_expand "smaxv8hi3"
2450 [(set (match_operand:V8HI 0 "register_operand" "")
2451 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2452 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2454 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2456 (define_insn "*smaxv8hi3"
2457 [(set (match_operand:V8HI 0 "register_operand" "=x")
2458 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2459 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2460 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2461 "pmaxsw\t{%2, %0|%0, %2}"
2462 [(set_attr "type" "sseiadd")
2463 (set_attr "mode" "TI")])
2465 (define_expand "uminv16qi3"
2466 [(set (match_operand:V16QI 0 "register_operand" "")
2467 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2468 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2470 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2472 (define_insn "*uminv16qi3"
2473 [(set (match_operand:V16QI 0 "register_operand" "=x")
2474 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2475 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2476 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2477 "pminub\t{%2, %0|%0, %2}"
2478 [(set_attr "type" "sseiadd")
2479 (set_attr "mode" "TI")])
2481 (define_expand "sminv8hi3"
2482 [(set (match_operand:V8HI 0 "register_operand" "")
2483 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2484 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2486 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2488 (define_insn "*sminv8hi3"
2489 [(set (match_operand:V8HI 0 "register_operand" "=x")
2490 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2491 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2492 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2493 "pminsw\t{%2, %0|%0, %2}"
2494 [(set_attr "type" "sseiadd")
2495 (set_attr "mode" "TI")])
2497 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2499 ;; Parallel integral comparisons
2501 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2503 (define_insn "sse2_eq<mode>3"
2504 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2506 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2507 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2508 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2509 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2510 [(set_attr "type" "ssecmp")
2511 (set_attr "mode" "TI")])
2513 (define_insn "sse2_gt<mode>3"
2514 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2516 (match_operand:SSEMODE124 1 "register_operand" "0")
2517 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2519 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2520 [(set_attr "type" "ssecmp")
2521 (set_attr "mode" "TI")])
2523 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2525 ;; Parallel integral logical operations
2527 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2529 (define_expand "one_cmpl<mode>2"
2530 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2531 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2535 int i, n = GET_MODE_NUNITS (<MODE>mode);
2536 rtvec v = rtvec_alloc (n);
2538 for (i = 0; i < n; ++i)
2539 RTVEC_ELT (v, i) = constm1_rtx;
2541 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
2544 (define_expand "and<mode>3"
2545 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2546 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2547 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2549 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
2551 (define_insn "*and<mode>3"
2552 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2554 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2555 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2556 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
2557 "pand\t{%2, %0|%0, %2}"
2558 [(set_attr "type" "sselog")
2559 (set_attr "mode" "TI")])
2561 (define_insn "sse2_nand<mode>3"
2562 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2564 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
2565 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2567 "pandn\t{%2, %0|%0, %2}"
2568 [(set_attr "type" "sselog")
2569 (set_attr "mode" "TI")])
2571 (define_expand "ior<mode>3"
2572 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2573 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2574 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2576 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
2578 (define_insn "*ior<mode>3"
2579 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2581 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2582 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2583 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
2584 "por\t{%2, %0|%0, %2}"
2585 [(set_attr "type" "sselog")
2586 (set_attr "mode" "TI")])
2588 (define_expand "xor<mode>3"
2589 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2590 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2591 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2593 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
2595 (define_insn "*xor<mode>3"
2596 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2598 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2599 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2600 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
2601 "pxor\t{%2, %0|%0, %2}"
2602 [(set_attr "type" "sselog")
2603 (set_attr "mode" "TI")])
2605 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2607 ;; Parallel integral element swizzling
2609 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2611 (define_insn "sse2_packsswb"
2612 [(set (match_operand:V16QI 0 "register_operand" "=x")
2615 (match_operand:V8HI 1 "register_operand" "0"))
2617 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
2619 "packsswb\t{%2, %0|%0, %2}"
2620 [(set_attr "type" "sselog")
2621 (set_attr "mode" "TI")])
2623 (define_insn "sse2_packssdw"
2624 [(set (match_operand:V8HI 0 "register_operand" "=x")
2627 (match_operand:V4SI 1 "register_operand" "0"))
2629 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
2631 "packssdw\t{%2, %0|%0, %2}"
2632 [(set_attr "type" "sselog")
2633 (set_attr "mode" "TI")])
2635 (define_insn "sse2_packuswb"
2636 [(set (match_operand:V16QI 0 "register_operand" "=x")
2639 (match_operand:V8HI 1 "register_operand" "0"))
2641 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
2643 "packuswb\t{%2, %0|%0, %2}"
2644 [(set_attr "type" "sselog")
2645 (set_attr "mode" "TI")])
2647 (define_insn "sse2_punpckhbw"
2648 [(set (match_operand:V16QI 0 "register_operand" "=x")
2651 (match_operand:V16QI 1 "register_operand" "0")
2652 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
2653 (parallel [(const_int 8) (const_int 24)
2654 (const_int 9) (const_int 25)
2655 (const_int 10) (const_int 26)
2656 (const_int 11) (const_int 27)
2657 (const_int 12) (const_int 28)
2658 (const_int 13) (const_int 29)
2659 (const_int 14) (const_int 30)
2660 (const_int 15) (const_int 31)])))]
2662 "punpckhbw\t{%2, %0|%0, %2}"
2663 [(set_attr "type" "sselog")
2664 (set_attr "mode" "TI")])
2666 (define_insn "sse2_punpcklbw"
2667 [(set (match_operand:V16QI 0 "register_operand" "=x")
2670 (match_operand:V16QI 1 "register_operand" "0")
2671 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
2672 (parallel [(const_int 0) (const_int 16)
2673 (const_int 1) (const_int 17)
2674 (const_int 2) (const_int 18)
2675 (const_int 3) (const_int 19)
2676 (const_int 4) (const_int 20)
2677 (const_int 5) (const_int 21)
2678 (const_int 6) (const_int 22)
2679 (const_int 7) (const_int 23)])))]
2681 "punpcklbw\t{%2, %0|%0, %2}"
2682 [(set_attr "type" "sselog")
2683 (set_attr "mode" "TI")])
2685 (define_insn "sse2_punpckhwd"
2686 [(set (match_operand:V8HI 0 "register_operand" "=x")
2689 (match_operand:V8HI 1 "register_operand" "0")
2690 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
2691 (parallel [(const_int 4) (const_int 12)
2692 (const_int 5) (const_int 13)
2693 (const_int 6) (const_int 14)
2694 (const_int 7) (const_int 15)])))]
2696 "punpckhwd\t{%2, %0|%0, %2}"
2697 [(set_attr "type" "sselog")
2698 (set_attr "mode" "TI")])
2700 (define_insn "sse2_punpcklwd"
2701 [(set (match_operand:V8HI 0 "register_operand" "=x")
2704 (match_operand:V8HI 1 "register_operand" "0")
2705 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
2706 (parallel [(const_int 0) (const_int 8)
2707 (const_int 1) (const_int 9)
2708 (const_int 2) (const_int 10)
2709 (const_int 3) (const_int 11)])))]
2711 "punpcklwd\t{%2, %0|%0, %2}"
2712 [(set_attr "type" "sselog")
2713 (set_attr "mode" "TI")])
2715 (define_insn "sse2_punpckhdq"
2716 [(set (match_operand:V4SI 0 "register_operand" "=x")
2719 (match_operand:V4SI 1 "register_operand" "0")
2720 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
2721 (parallel [(const_int 2) (const_int 6)
2722 (const_int 3) (const_int 7)])))]
2724 "punpckhdq\t{%2, %0|%0, %2}"
2725 [(set_attr "type" "sselog")
2726 (set_attr "mode" "TI")])
2728 (define_insn "sse2_punpckldq"
2729 [(set (match_operand:V4SI 0 "register_operand" "=x")
2732 (match_operand:V4SI 1 "register_operand" "0")
2733 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
2734 (parallel [(const_int 0) (const_int 4)
2735 (const_int 1) (const_int 5)])))]
2737 "punpckldq\t{%2, %0|%0, %2}"
2738 [(set_attr "type" "sselog")
2739 (set_attr "mode" "TI")])
2741 (define_insn "sse2_punpckhqdq"
2742 [(set (match_operand:V2DI 0 "register_operand" "=x")
2745 (match_operand:V2DI 1 "register_operand" "0")
2746 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
2747 (parallel [(const_int 1)
2750 "punpckhqdq\t{%2, %0|%0, %2}"
2751 [(set_attr "type" "sselog")
2752 (set_attr "mode" "TI")])
2754 (define_insn "sse2_punpcklqdq"
2755 [(set (match_operand:V2DI 0 "register_operand" "=x")
2758 (match_operand:V2DI 1 "register_operand" "0")
2759 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
2760 (parallel [(const_int 0)
2763 "punpcklqdq\t{%2, %0|%0, %2}"
2764 [(set_attr "type" "sselog")
2765 (set_attr "mode" "TI")])
2767 (define_expand "sse2_pinsrw"
2768 [(set (match_operand:V8HI 0 "register_operand" "")
2771 (match_operand:SI 2 "nonimmediate_operand" ""))
2772 (match_operand:V8HI 1 "register_operand" "")
2773 (match_operand:SI 3 "const_0_to_7_operand" "")))]
2776 operands[2] = gen_lowpart (HImode, operands[2]);
2777 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
2780 (define_insn "*sse2_pinsrw"
2781 [(set (match_operand:V8HI 0 "register_operand" "=x")
2784 (match_operand:HI 2 "nonimmediate_operand" "rm"))
2785 (match_operand:V8HI 1 "register_operand" "0")
2786 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
2789 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
2790 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
2792 [(set_attr "type" "sselog")
2793 (set_attr "mode" "TI")])
2795 (define_insn "sse2_pextrw"
2796 [(set (match_operand:SI 0 "register_operand" "=r")
2799 (match_operand:V8HI 1 "register_operand" "x")
2800 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
2802 "pextrw\t{%2, %1, %0|%0, %1, %2}"
2803 [(set_attr "type" "sselog")
2804 (set_attr "mode" "TI")])
2806 (define_expand "sse2_pshufd"
2807 [(match_operand:V4SI 0 "register_operand" "")
2808 (match_operand:V4SI 1 "nonimmediate_operand" "")
2809 (match_operand:SI 2 "const_int_operand" "")]
2812 int mask = INTVAL (operands[2]);
2813 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
2814 GEN_INT ((mask >> 0) & 3),
2815 GEN_INT ((mask >> 2) & 3),
2816 GEN_INT ((mask >> 4) & 3),
2817 GEN_INT ((mask >> 6) & 3)));
2821 (define_insn "sse2_pshufd_1"
2822 [(set (match_operand:V4SI 0 "register_operand" "=x")
2824 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2825 (parallel [(match_operand 2 "const_0_to_3_operand" "")
2826 (match_operand 3 "const_0_to_3_operand" "")
2827 (match_operand 4 "const_0_to_3_operand" "")
2828 (match_operand 5 "const_0_to_3_operand" "")])))]
2832 mask |= INTVAL (operands[2]) << 0;
2833 mask |= INTVAL (operands[3]) << 2;
2834 mask |= INTVAL (operands[4]) << 4;
2835 mask |= INTVAL (operands[5]) << 6;
2836 operands[2] = GEN_INT (mask);
2838 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
2840 [(set_attr "type" "sselog1")
2841 (set_attr "mode" "TI")])
2843 (define_expand "sse2_pshuflw"
2844 [(match_operand:V8HI 0 "register_operand" "")
2845 (match_operand:V8HI 1 "nonimmediate_operand" "")
2846 (match_operand:SI 2 "const_int_operand" "")]
2849 int mask = INTVAL (operands[2]);
2850 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
2851 GEN_INT ((mask >> 0) & 3),
2852 GEN_INT ((mask >> 2) & 3),
2853 GEN_INT ((mask >> 4) & 3),
2854 GEN_INT ((mask >> 6) & 3)));
2858 (define_insn "sse2_pshuflw_1"
2859 [(set (match_operand:V8HI 0 "register_operand" "=x")
2861 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
2862 (parallel [(match_operand 2 "const_0_to_3_operand" "")
2863 (match_operand 3 "const_0_to_3_operand" "")
2864 (match_operand 4 "const_0_to_3_operand" "")
2865 (match_operand 5 "const_0_to_3_operand" "")
2873 mask |= INTVAL (operands[2]) << 0;
2874 mask |= INTVAL (operands[3]) << 2;
2875 mask |= INTVAL (operands[4]) << 4;
2876 mask |= INTVAL (operands[5]) << 6;
2877 operands[2] = GEN_INT (mask);
2879 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
2881 [(set_attr "type" "sselog")
2882 (set_attr "mode" "TI")])
2884 (define_expand "sse2_pshufhw"
2885 [(match_operand:V8HI 0 "register_operand" "")
2886 (match_operand:V8HI 1 "nonimmediate_operand" "")
2887 (match_operand:SI 2 "const_int_operand" "")]
2890 int mask = INTVAL (operands[2]);
2891 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
2892 GEN_INT (((mask >> 0) & 3) + 4),
2893 GEN_INT (((mask >> 2) & 3) + 4),
2894 GEN_INT (((mask >> 4) & 3) + 4),
2895 GEN_INT (((mask >> 6) & 3) + 4)));
2899 (define_insn "sse2_pshufhw_1"
2900 [(set (match_operand:V8HI 0 "register_operand" "=x")
2902 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
2903 (parallel [(const_int 0)
2907 (match_operand 2 "const_4_to_7_operand" "")
2908 (match_operand 3 "const_4_to_7_operand" "")
2909 (match_operand 4 "const_4_to_7_operand" "")
2910 (match_operand 5 "const_4_to_7_operand" "")])))]
2914 mask |= (INTVAL (operands[2]) - 4) << 0;
2915 mask |= (INTVAL (operands[3]) - 4) << 2;
2916 mask |= (INTVAL (operands[4]) - 4) << 4;
2917 mask |= (INTVAL (operands[5]) - 4) << 6;
2918 operands[2] = GEN_INT (mask);
2920 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
2922 [(set_attr "type" "sselog")
2923 (set_attr "mode" "TI")])
2925 (define_expand "sse2_loadd"
2926 [(set (match_operand:V4SI 0 "register_operand" "")
2929 (match_operand:SI 1 "nonimmediate_operand" ""))
2933 "operands[2] = CONST0_RTX (V4SImode);")
2935 (define_insn "sse2_loadld"
2936 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
2939 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
2940 (match_operand:V4SI 1 "vector_move_operand" " C,C,0")
2944 movd\t{%2, %0|%0, %2}
2945 movss\t{%2, %0|%0, %2}
2946 movss\t{%2, %0|%0, %2}"
2947 [(set_attr "type" "ssemov")
2948 (set_attr "mode" "TI,V4SF,SF")])
2950 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
2951 ;; be taken into account, and movdi isn't fully populated even without.
2952 (define_insn_and_split "sse2_stored"
2953 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
2955 (match_operand:V4SI 1 "register_operand" "x")
2956 (parallel [(const_int 0)])))]
2959 "&& reload_completed"
2960 [(set (match_dup 0) (match_dup 1))]
2962 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
2965 (define_expand "sse_storeq"
2966 [(set (match_operand:DI 0 "nonimmediate_operand" "")
2968 (match_operand:V2DI 1 "register_operand" "")
2969 (parallel [(const_int 0)])))]
2973 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
2974 ;; be taken into account, and movdi isn't fully populated even without.
2975 (define_insn "*sse2_storeq"
2976 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
2978 (match_operand:V2DI 1 "register_operand" "x")
2979 (parallel [(const_int 0)])))]
2984 [(set (match_operand:DI 0 "nonimmediate_operand" "")
2986 (match_operand:V2DI 1 "register_operand" "")
2987 (parallel [(const_int 0)])))]
2988 "TARGET_SSE && reload_completed"
2989 [(set (match_dup 0) (match_dup 1))]
2991 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
2994 (define_insn "*vec_dupv4si"
2995 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
2997 (match_operand:SI 1 "register_operand" " Y,0")))]
3000 pshufd\t{$0, %1, %0|%0, %1, 0}
3001 shufps\t{$0, %0, %0|%0, %0, 0}"
3002 [(set_attr "type" "sselog1")
3003 (set_attr "mode" "TI,V4SF")])
3005 (define_insn "*vec_dupv2di"
3006 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3008 (match_operand:DI 1 "register_operand" " 0,0")))]
3013 [(set_attr "type" "sselog1,ssemov")
3014 (set_attr "mode" "TI,V4SF")])
3016 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3017 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3018 ;; alternatives pretty much forces the MMX alternative to be chosen.
3019 (define_insn "*sse2_concatv2si"
3020 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3022 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3023 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3026 punpckldq\t{%2, %0|%0, %2}
3027 movd\t{%1, %0|%0, %1}
3028 punpckldq\t{%2, %0|%0, %2}
3029 movd\t{%1, %0|%0, %1}"
3030 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3031 (set_attr "mode" "TI,TI,DI,DI")])
3033 (define_insn "*sse1_concatv2si"
3034 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3036 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3037 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3040 unpcklps\t{%2, %0|%0, %2}
3041 movss\t{%1, %0|%0, %1}
3042 punpckldq\t{%2, %0|%0, %2}
3043 movd\t{%1, %0|%0, %1}"
3044 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3045 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3047 (define_insn "*vec_concatv4si_1"
3048 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3050 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3051 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3054 punpcklqdq\t{%2, %0|%0, %2}
3055 movlhps\t{%2, %0|%0, %2}
3056 movhps\t{%2, %0|%0, %2}"
3057 [(set_attr "type" "sselog,ssemov,ssemov")
3058 (set_attr "mode" "TI,V4SF,V2SF")])
3060 (define_insn "*vec_concatv2di"
3061 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3063 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3064 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3067 movq\t{%1, %0|%0, %1}
3068 movq2dq\t{%1, %0|%0, %1}
3069 punpcklqdq\t{%2, %0|%0, %2}
3070 movlhps\t{%2, %0|%0, %2}
3071 movhps\t{%2, %0|%0, %2}
3072 movlps\t{%1, %0|%0, %1}"
3073 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3074 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3076 (define_expand "vec_setv2di"
3077 [(match_operand:V2DI 0 "register_operand" "")
3078 (match_operand:DI 1 "register_operand" "")
3079 (match_operand 2 "const_int_operand" "")]
3082 ix86_expand_vector_set (false, operands[0], operands[1],
3083 INTVAL (operands[2]));
3087 (define_expand "vec_extractv2di"
3088 [(match_operand:DI 0 "register_operand" "")
3089 (match_operand:V2DI 1 "register_operand" "")
3090 (match_operand 2 "const_int_operand" "")]
3093 ix86_expand_vector_extract (false, operands[0], operands[1],
3094 INTVAL (operands[2]));
3098 (define_expand "vec_initv2di"
3099 [(match_operand:V2DI 0 "register_operand" "")
3100 (match_operand 1 "" "")]
3103 ix86_expand_vector_init (false, operands[0], operands[1]);
3107 (define_expand "vec_setv4si"
3108 [(match_operand:V4SI 0 "register_operand" "")
3109 (match_operand:SI 1 "register_operand" "")
3110 (match_operand 2 "const_int_operand" "")]
3113 ix86_expand_vector_set (false, operands[0], operands[1],
3114 INTVAL (operands[2]));
3118 (define_expand "vec_extractv4si"
3119 [(match_operand:SI 0 "register_operand" "")
3120 (match_operand:V4SI 1 "register_operand" "")
3121 (match_operand 2 "const_int_operand" "")]
3124 ix86_expand_vector_extract (false, operands[0], operands[1],
3125 INTVAL (operands[2]));
3129 (define_expand "vec_initv4si"
3130 [(match_operand:V4SI 0 "register_operand" "")
3131 (match_operand 1 "" "")]
3134 ix86_expand_vector_init (false, operands[0], operands[1]);
3138 (define_expand "vec_setv8hi"
3139 [(match_operand:V8HI 0 "register_operand" "")
3140 (match_operand:HI 1 "register_operand" "")
3141 (match_operand 2 "const_int_operand" "")]
3144 ix86_expand_vector_set (false, operands[0], operands[1],
3145 INTVAL (operands[2]));
3149 (define_expand "vec_extractv8hi"
3150 [(match_operand:HI 0 "register_operand" "")
3151 (match_operand:V8HI 1 "register_operand" "")
3152 (match_operand 2 "const_int_operand" "")]
3155 ix86_expand_vector_extract (false, operands[0], operands[1],
3156 INTVAL (operands[2]));
3160 (define_expand "vec_initv8hi"
3161 [(match_operand:V8HI 0 "register_operand" "")
3162 (match_operand 1 "" "")]
3165 ix86_expand_vector_init (false, operands[0], operands[1]);
3169 (define_expand "vec_setv16qi"
3170 [(match_operand:V16QI 0 "register_operand" "")
3171 (match_operand:QI 1 "register_operand" "")
3172 (match_operand 2 "const_int_operand" "")]
3175 ix86_expand_vector_set (false, operands[0], operands[1],
3176 INTVAL (operands[2]));
3180 (define_expand "vec_extractv16qi"
3181 [(match_operand:QI 0 "register_operand" "")
3182 (match_operand:V16QI 1 "register_operand" "")
3183 (match_operand 2 "const_int_operand" "")]
3186 ix86_expand_vector_extract (false, operands[0], operands[1],
3187 INTVAL (operands[2]));
3191 (define_expand "vec_initv16qi"
3192 [(match_operand:V16QI 0 "register_operand" "")
3193 (match_operand 1 "" "")]
3196 ix86_expand_vector_init (false, operands[0], operands[1]);
3200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3206 (define_insn "sse2_uavgv16qi3"
3207 [(set (match_operand:V16QI 0 "register_operand" "=x")
3213 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3215 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3216 (const_vector:V16QI [(const_int 1) (const_int 1)
3217 (const_int 1) (const_int 1)
3218 (const_int 1) (const_int 1)
3219 (const_int 1) (const_int 1)
3220 (const_int 1) (const_int 1)
3221 (const_int 1) (const_int 1)
3222 (const_int 1) (const_int 1)
3223 (const_int 1) (const_int 1)]))
3225 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3226 "pavgb\t{%2, %0|%0, %2}"
3227 [(set_attr "type" "sseiadd")
3228 (set_attr "mode" "TI")])
3230 (define_insn "sse2_uavgv8hi3"
3231 [(set (match_operand:V8HI 0 "register_operand" "=x")
3237 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3239 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3240 (const_vector:V8HI [(const_int 1) (const_int 1)
3241 (const_int 1) (const_int 1)
3242 (const_int 1) (const_int 1)
3243 (const_int 1) (const_int 1)]))
3245 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3246 "pavgw\t{%2, %0|%0, %2}"
3247 [(set_attr "type" "sseiadd")
3248 (set_attr "mode" "TI")])
3250 ;; The correct representation for this is absolutely enormous, and
3251 ;; surely not generally useful.
3252 (define_insn "sse2_psadbw"
3253 [(set (match_operand:V2DI 0 "register_operand" "=x")
3254 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3255 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3258 "psadbw\t{%2, %0|%0, %2}"
3259 [(set_attr "type" "sseiadd")
3260 (set_attr "mode" "TI")])
3262 (define_insn "sse_movmskps"
3263 [(set (match_operand:SI 0 "register_operand" "=r")
3264 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3267 "movmskps\t{%1, %0|%0, %1}"
3268 [(set_attr "type" "ssecvt")
3269 (set_attr "mode" "V4SF")])
3271 (define_insn "sse2_movmskpd"
3272 [(set (match_operand:SI 0 "register_operand" "=r")
3273 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3276 "movmskpd\t{%1, %0|%0, %1}"
3277 [(set_attr "type" "ssecvt")
3278 (set_attr "mode" "V2DF")])
3280 (define_insn "sse2_pmovmskb"
3281 [(set (match_operand:SI 0 "register_operand" "=r")
3282 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3285 "pmovmskb\t{%1, %0|%0, %1}"
3286 [(set_attr "type" "ssecvt")
3287 (set_attr "mode" "V2DF")])
3289 (define_expand "sse2_maskmovdqu"
3290 [(set (match_operand:V16QI 0 "memory_operand" "")
3291 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3292 (match_operand:V16QI 2 "register_operand" "x")
3298 (define_insn "*sse2_maskmovdqu"
3299 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3300 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3301 (match_operand:V16QI 2 "register_operand" "x")
3302 (mem:V16QI (match_dup 0))]
3304 "TARGET_SSE2 && !TARGET_64BIT"
3305 ;; @@@ check ordering of operands in intel/nonintel syntax
3306 "maskmovdqu\t{%2, %1|%1, %2}"
3307 [(set_attr "type" "ssecvt")
3308 (set_attr "mode" "TI")])
3310 (define_insn "*sse2_maskmovdqu_rex64"
3311 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3312 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3313 (match_operand:V16QI 2 "register_operand" "x")
3314 (mem:V16QI (match_dup 0))]
3316 "TARGET_SSE2 && TARGET_64BIT"
3317 ;; @@@ check ordering of operands in intel/nonintel syntax
3318 "maskmovdqu\t{%2, %1|%1, %2}"
3319 [(set_attr "type" "ssecvt")
3320 (set_attr "mode" "TI")])
3322 (define_insn "sse_ldmxcsr"
3323 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3327 [(set_attr "type" "sse")
3328 (set_attr "memory" "load")])
3330 (define_insn "sse_stmxcsr"
3331 [(set (match_operand:SI 0 "memory_operand" "=m")
3332 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3335 [(set_attr "type" "sse")
3336 (set_attr "memory" "store")])
3338 (define_expand "sse_sfence"
3340 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3341 "TARGET_SSE || TARGET_3DNOW_A"
3343 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3344 MEM_VOLATILE_P (operands[0]) = 1;
3347 (define_insn "*sse_sfence"
3348 [(set (match_operand:BLK 0 "" "")
3349 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3350 "TARGET_SSE || TARGET_3DNOW_A"
3352 [(set_attr "type" "sse")
3353 (set_attr "memory" "unknown")])
3355 (define_insn "sse2_clflush"
3356 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3360 [(set_attr "type" "sse")
3361 (set_attr "memory" "unknown")])
3363 (define_expand "sse2_mfence"
3365 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3368 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3369 MEM_VOLATILE_P (operands[0]) = 1;
3372 (define_insn "*sse2_mfence"
3373 [(set (match_operand:BLK 0 "" "")
3374 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3377 [(set_attr "type" "sse")
3378 (set_attr "memory" "unknown")])
3380 (define_expand "sse2_lfence"
3382 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3385 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3386 MEM_VOLATILE_P (operands[0]) = 1;
3389 (define_insn "*sse2_lfence"
3390 [(set (match_operand:BLK 0 "" "")
3391 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3394 [(set_attr "type" "sse")
3395 (set_attr "memory" "unknown")])
3397 (define_insn "sse3_mwait"
3398 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3399 (match_operand:SI 1 "register_operand" "c")]
3403 [(set_attr "length" "3")])
3405 (define_insn "sse3_monitor"
3406 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3407 (match_operand:SI 1 "register_operand" "c")
3408 (match_operand:SI 2 "register_operand" "d")]
3411 "monitor\t%0, %1, %2"
3412 [(set_attr "length" "3")])