1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
85 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
86 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
87 (and (eq_attr "alternative" "2")
88 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
91 (const_string "TI")))])
93 (define_expand "movv4sf"
94 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
95 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
98 ix86_expand_vector_move (V4SFmode, operands);
102 (define_insn "*movv4sf_internal"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
104 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
108 movaps\t{%1, %0|%0, %1}
109 movaps\t{%1, %0|%0, %1}"
110 [(set_attr "type" "sselog1,ssemov,ssemov")
111 (set_attr "mode" "V4SF")])
114 [(set (match_operand:V4SF 0 "register_operand" "")
115 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
116 "TARGET_SSE && reload_completed"
119 (vec_duplicate:V4SF (match_dup 1))
123 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
124 operands[2] = CONST0_RTX (V4SFmode);
127 (define_expand "movv2df"
128 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
129 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
132 ix86_expand_vector_move (V2DFmode, operands);
136 (define_insn "*movv2df_internal"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
139 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
141 switch (which_alternative)
144 if (get_attr_mode (insn) == MODE_V4SF)
145 return "xorps\t%0, %0";
147 return "xorpd\t%0, %0";
150 if (get_attr_mode (insn) == MODE_V4SF)
151 return "movaps\t{%1, %0|%0, %1}";
153 return "movapd\t{%1, %0|%0, %1}";
158 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
162 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
163 (and (eq_attr "alternative" "2")
164 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
166 (const_string "V4SF")
167 (const_string "V2DF")))])
170 [(set (match_operand:V2DF 0 "register_operand" "")
171 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
172 "TARGET_SSE2 && reload_completed"
173 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
175 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
176 operands[2] = CONST0_RTX (DFmode);
179 (define_expand "push<mode>1"
180 [(match_operand:SSEMODE 0 "register_operand" "")]
183 ix86_expand_push (<MODE>mode, operands[0]);
187 (define_expand "movmisalign<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move_misalign (<MODE>mode, operands);
196 (define_insn "sse_movups"
197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
198 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
200 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
201 "movups\t{%1, %0|%0, %1}"
202 [(set_attr "type" "ssemov")
203 (set_attr "mode" "V2DF")])
205 (define_insn "sse2_movupd"
206 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
207 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
209 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
210 "movupd\t{%1, %0|%0, %1}"
211 [(set_attr "type" "ssemov")
212 (set_attr "mode" "V2DF")])
214 (define_insn "sse2_movdqu"
215 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
216 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movdqu\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "TI")])
223 (define_insn "sse_movntv4sf"
224 [(set (match_operand:V4SF 0 "memory_operand" "=m")
225 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228 "movntps\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V4SF")])
232 (define_insn "sse2_movntv2df"
233 [(set (match_operand:V2DF 0 "memory_operand" "=m")
234 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237 "movntpd\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssecvt")
239 (set_attr "mode" "V2DF")])
241 (define_insn "sse2_movntv2di"
242 [(set (match_operand:V2DI 0 "memory_operand" "=m")
243 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246 "movntdq\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssecvt")
248 (set_attr "mode" "TI")])
250 (define_insn "sse2_movntsi"
251 [(set (match_operand:SI 0 "memory_operand" "=m")
252 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255 "movnti\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse3_lddqu"
260 [(set (match_operand:V16QI 0 "register_operand" "=x")
261 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264 "lddqu\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
270 ;; Parallel single-precision floating point arithmetic
272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
274 (define_expand "negv4sf2"
275 [(set (match_operand:V4SF 0 "register_operand" "")
276 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
278 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
280 (define_expand "absv4sf2"
281 [(set (match_operand:V4SF 0 "register_operand" "")
282 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
284 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
286 (define_expand "addv4sf3"
287 [(set (match_operand:V4SF 0 "register_operand" "")
288 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
289 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
291 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
293 (define_insn "*addv4sf3"
294 [(set (match_operand:V4SF 0 "register_operand" "=x")
295 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
297 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
298 "addps\t{%2, %0|%0, %2}"
299 [(set_attr "type" "sseadd")
300 (set_attr "mode" "V4SF")])
302 (define_insn "sse_vmaddv4sf3"
303 [(set (match_operand:V4SF 0 "register_operand" "=x")
305 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
306 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
310 "addss\t{%2, %0|%0, %2}"
311 [(set_attr "type" "sseadd")
312 (set_attr "mode" "SF")])
314 (define_expand "subv4sf3"
315 [(set (match_operand:V4SF 0 "register_operand" "")
316 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
317 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
319 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
321 (define_insn "*subv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
326 "subps\t{%2, %0|%0, %2}"
327 [(set_attr "type" "sseadd")
328 (set_attr "mode" "V4SF")])
330 (define_insn "sse_vmsubv4sf3"
331 [(set (match_operand:V4SF 0 "register_operand" "=x")
333 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
334 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
338 "subss\t{%2, %0|%0, %2}"
339 [(set_attr "type" "sseadd")
340 (set_attr "mode" "SF")])
342 (define_expand "mulv4sf3"
343 [(set (match_operand:V4SF 0 "register_operand" "")
344 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
345 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
347 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
349 (define_insn "*mulv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
353 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
354 "mulps\t{%2, %0|%0, %2}"
355 [(set_attr "type" "ssemul")
356 (set_attr "mode" "V4SF")])
358 (define_insn "sse_vmmulv4sf3"
359 [(set (match_operand:V4SF 0 "register_operand" "=x")
361 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
362 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
366 "mulss\t{%2, %0|%0, %2}"
367 [(set_attr "type" "ssemul")
368 (set_attr "mode" "SF")])
370 (define_expand "divv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "")
372 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
373 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
377 (define_insn "*divv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
382 "divps\t{%2, %0|%0, %2}"
383 [(set_attr "type" "ssediv")
384 (set_attr "mode" "V4SF")])
386 (define_insn "sse_vmdivv4sf3"
387 [(set (match_operand:V4SF 0 "register_operand" "=x")
389 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
394 "divss\t{%2, %0|%0, %2}"
395 [(set_attr "type" "ssediv")
396 (set_attr "mode" "SF")])
398 (define_insn "sse_rcpv4sf2"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
403 "rcpps\t{%1, %0|%0, %1}"
404 [(set_attr "type" "sse")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmrcpv4sf2"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
412 (match_operand:V4SF 2 "register_operand" "0")
415 "rcpss\t{%1, %0|%0, %1}"
416 [(set_attr "type" "sse")
417 (set_attr "mode" "SF")])
419 (define_insn "sse_rsqrtv4sf2"
420 [(set (match_operand:V4SF 0 "register_operand" "=x")
422 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
424 "rsqrtps\t{%1, %0|%0, %1}"
425 [(set_attr "type" "sse")
426 (set_attr "mode" "V4SF")])
428 (define_insn "sse_vmrsqrtv4sf2"
429 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
433 (match_operand:V4SF 2 "register_operand" "0")
436 "rsqrtss\t{%1, %0|%0, %1}"
437 [(set_attr "type" "sse")
438 (set_attr "mode" "SF")])
440 (define_insn "sqrtv4sf2"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
444 "sqrtps\t{%1, %0|%0, %1}"
445 [(set_attr "type" "sse")
446 (set_attr "mode" "V4SF")])
448 (define_insn "sse_vmsqrtv4sf2"
449 [(set (match_operand:V4SF 0 "register_operand" "=x")
451 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
452 (match_operand:V4SF 2 "register_operand" "0")
455 "sqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
460 ;; isn't really correct, as those rtl operators aren't defined when
461 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
463 (define_expand "smaxv4sf3"
464 [(set (match_operand:V4SF 0 "register_operand" "")
465 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
466 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469 if (!flag_finite_math_only)
470 operands[1] = force_reg (V4SFmode, operands[1]);
471 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474 (define_insn "*smaxv4sf3_finite"
475 [(set (match_operand:V4SF 0 "register_operand" "=x")
476 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
477 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
478 "TARGET_SSE && flag_finite_math_only
479 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
480 "maxps\t{%2, %0|%0, %2}"
481 [(set_attr "type" "sse")
482 (set_attr "mode" "V4SF")])
484 (define_insn "*smaxv4sf3"
485 [(set (match_operand:V4SF 0 "register_operand" "=x")
486 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
487 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
489 "maxps\t{%2, %0|%0, %2}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "V4SF")])
493 (define_insn "*sse_vmsmaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
496 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
497 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500 "TARGET_SSE && flag_finite_math_only
501 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
502 "maxss\t{%2, %0|%0, %2}"
503 [(set_attr "type" "sse")
504 (set_attr "mode" "SF")])
506 (define_insn "sse_vmsmaxv4sf3"
507 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
510 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
514 "maxss\t{%2, %0|%0, %2}"
515 [(set_attr "type" "sse")
516 (set_attr "mode" "SF")])
518 (define_expand "sminv4sf3"
519 [(set (match_operand:V4SF 0 "register_operand" "")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
521 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
524 if (!flag_finite_math_only)
525 operands[1] = force_reg (V4SFmode, operands[1]);
526 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
529 (define_insn "*sminv4sf3_finite"
530 [(set (match_operand:V4SF 0 "register_operand" "=x")
531 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
532 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
533 "TARGET_SSE && flag_finite_math_only
534 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
535 "minps\t{%2, %0|%0, %2}"
536 [(set_attr "type" "sse")
537 (set_attr "mode" "V4SF")])
539 (define_insn "*sminv4sf3"
540 [(set (match_operand:V4SF 0 "register_operand" "=x")
541 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
542 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
544 "minps\t{%2, %0|%0, %2}"
545 [(set_attr "type" "sse")
546 (set_attr "mode" "V4SF")])
548 (define_insn "*sse_vmsminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
551 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
552 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
555 "TARGET_SSE && flag_finite_math_only
556 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
557 "minss\t{%2, %0|%0, %2}"
558 [(set_attr "type" "sse")
559 (set_attr "mode" "SF")])
561 (define_insn "sse_vmsminv4sf3"
562 [(set (match_operand:V4SF 0 "register_operand" "=x")
564 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
565 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
569 "minss\t{%2, %0|%0, %2}"
570 [(set_attr "type" "sse")
571 (set_attr "mode" "SF")])
573 ;; These versions of the min/max patterns implement exactly the operations
574 ;; min = (op1 < op2 ? op1 : op2)
575 ;; max = (!(op1 < op2) ? op1 : op2)
576 ;; Their operands are not commutative, and thus they may be used in the
577 ;; presence of -0.0 and NaN.
579 (define_insn "*ieee_sminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
581 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
582 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
585 "minps\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sseadd")
587 (set_attr "mode" "V4SF")])
589 (define_insn "*ieee_smaxv4sf3"
590 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
595 "maxps\t{%2, %0|%0, %2}"
596 [(set_attr "type" "sseadd")
597 (set_attr "mode" "V4SF")])
599 (define_insn "*ieee_sminv2df3"
600 [(set (match_operand:V2DF 0 "register_operand" "=x")
601 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
602 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
605 "minpd\t{%2, %0|%0, %2}"
606 [(set_attr "type" "sseadd")
607 (set_attr "mode" "V2DF")])
609 (define_insn "*ieee_smaxv2df3"
610 [(set (match_operand:V2DF 0 "register_operand" "=x")
611 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
615 "maxpd\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sseadd")
617 (set_attr "mode" "V2DF")])
619 (define_insn "sse3_addsubv4sf3"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
623 (match_operand:V4SF 1 "register_operand" "0")
624 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
625 (minus:V4SF (match_dup 1) (match_dup 2))
628 "addsubps\t{%2, %0|%0, %2}"
629 [(set_attr "type" "sseadd")
630 (set_attr "mode" "V4SF")])
632 (define_insn "sse3_haddv4sf3"
633 [(set (match_operand:V4SF 0 "register_operand" "=x")
638 (match_operand:V4SF 1 "register_operand" "0")
639 (parallel [(const_int 0)]))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
643 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
648 (parallel [(const_int 0)]))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
652 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
654 "haddps\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "mode" "V4SF")])
658 (define_insn "sse3_hsubv4sf3"
659 [(set (match_operand:V4SF 0 "register_operand" "=x")
664 (match_operand:V4SF 1 "register_operand" "0")
665 (parallel [(const_int 0)]))
666 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
669 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
673 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
674 (parallel [(const_int 0)]))
675 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
678 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
680 "hsubps\t{%2, %0|%0, %2}"
681 [(set_attr "type" "sseadd")
682 (set_attr "mode" "V4SF")])
684 (define_expand "reduc_splus_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
691 rtx tmp = gen_reg_rtx (V4SFmode);
692 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
693 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
696 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smax_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
709 (define_expand "reduc_smin_v4sf"
710 [(match_operand:V4SF 0 "register_operand" "")
711 (match_operand:V4SF 1 "register_operand" "")]
714 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
718 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720 ;; Parallel single-precision floating point comparisons
722 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
724 (define_insn "sse_maskcmpv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
726 (match_operator:V4SF 3 "sse_comparison_operator"
727 [(match_operand:V4SF 1 "register_operand" "0")
728 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
730 "cmp%D3ps\t{%2, %0|%0, %2}"
731 [(set_attr "type" "ssecmp")
732 (set_attr "mode" "V4SF")])
734 (define_insn "sse_vmmaskcmpv4sf3"
735 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 (match_operator:V4SF 3 "sse_comparison_operator"
738 [(match_operand:V4SF 1 "register_operand" "0")
739 (match_operand:V4SF 2 "register_operand" "x")])
743 "cmp%D3ss\t{%2, %0|%0, %2}"
744 [(set_attr "type" "ssecmp")
745 (set_attr "mode" "SF")])
747 (define_insn "sse_comi"
748 [(set (reg:CCFP FLAGS_REG)
751 (match_operand:V4SF 0 "register_operand" "x")
752 (parallel [(const_int 0)]))
754 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
755 (parallel [(const_int 0)]))))]
757 "comiss\t{%1, %0|%0, %1}"
758 [(set_attr "type" "ssecomi")
759 (set_attr "mode" "SF")])
761 (define_insn "sse_ucomi"
762 [(set (reg:CCFPU FLAGS_REG)
765 (match_operand:V4SF 0 "register_operand" "x")
766 (parallel [(const_int 0)]))
768 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
769 (parallel [(const_int 0)]))))]
771 "ucomiss\t{%1, %0|%0, %1}"
772 [(set_attr "type" "ssecomi")
773 (set_attr "mode" "SF")])
775 (define_expand "vcondv4sf"
776 [(set (match_operand:V4SF 0 "register_operand" "")
779 [(match_operand:V4SF 4 "nonimmediate_operand" "")
780 (match_operand:V4SF 5 "nonimmediate_operand" "")])
781 (match_operand:V4SF 1 "general_operand" "")
782 (match_operand:V4SF 2 "general_operand" "")))]
785 if (ix86_expand_fp_vcond (operands))
791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
793 ;; Parallel single-precision floating point logical operations
795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797 (define_expand "andv4sf3"
798 [(set (match_operand:V4SF 0 "register_operand" "")
799 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
800 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
804 (define_insn "*andv4sf3"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
807 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
808 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
809 "andps\t{%2, %0|%0, %2}"
810 [(set_attr "type" "sselog")
811 (set_attr "mode" "V4SF")])
813 (define_insn "sse_nandv4sf3"
814 [(set (match_operand:V4SF 0 "register_operand" "=x")
815 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
816 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
818 "andnps\t{%2, %0|%0, %2}"
819 [(set_attr "type" "sselog")
820 (set_attr "mode" "V4SF")])
822 (define_expand "iorv4sf3"
823 [(set (match_operand:V4SF 0 "register_operand" "")
824 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
825 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
827 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
829 (define_insn "*iorv4sf3"
830 [(set (match_operand:V4SF 0 "register_operand" "=x")
831 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
832 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
833 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
834 "orps\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "V4SF")])
838 (define_expand "xorv4sf3"
839 [(set (match_operand:V4SF 0 "register_operand" "")
840 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
841 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
843 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
845 (define_insn "*xorv4sf3"
846 [(set (match_operand:V4SF 0 "register_operand" "=x")
847 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
848 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
849 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
850 "xorps\t{%2, %0|%0, %2}"
851 [(set_attr "type" "sselog")
852 (set_attr "mode" "V4SF")])
854 ;; Also define scalar versions. These are used for abs, neg, and
855 ;; conditional move. Using subregs into vector modes causes register
856 ;; allocation lossage. These patterns do not allow memory operands
857 ;; because the native instructions read the full 128-bits.
859 (define_insn "*andsf3"
860 [(set (match_operand:SF 0 "register_operand" "=x")
861 (and:SF (match_operand:SF 1 "register_operand" "0")
862 (match_operand:SF 2 "register_operand" "x")))]
864 "andps\t{%2, %0|%0, %2}"
865 [(set_attr "type" "sselog")
866 (set_attr "mode" "V4SF")])
868 (define_insn "*nandsf3"
869 [(set (match_operand:SF 0 "register_operand" "=x")
870 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
871 (match_operand:SF 2 "register_operand" "x")))]
873 "andnps\t{%2, %0|%0, %2}"
874 [(set_attr "type" "sselog")
875 (set_attr "mode" "V4SF")])
877 (define_insn "*iorsf3"
878 [(set (match_operand:SF 0 "register_operand" "=x")
879 (ior:SF (match_operand:SF 1 "register_operand" "0")
880 (match_operand:SF 2 "register_operand" "x")))]
882 "orps\t{%2, %0|%0, %2}"
883 [(set_attr "type" "sselog")
884 (set_attr "mode" "V4SF")])
886 (define_insn "*xorsf3"
887 [(set (match_operand:SF 0 "register_operand" "=x")
888 (xor:SF (match_operand:SF 1 "register_operand" "0")
889 (match_operand:SF 2 "register_operand" "x")))]
891 "xorps\t{%2, %0|%0, %2}"
892 [(set_attr "type" "sselog")
893 (set_attr "mode" "V4SF")])
895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;; Parallel single-precision floating point conversion operations
899 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
901 (define_insn "sse_cvtpi2ps"
902 [(set (match_operand:V4SF 0 "register_operand" "=x")
905 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
906 (match_operand:V4SF 1 "register_operand" "0")
909 "cvtpi2ps\t{%2, %0|%0, %2}"
910 [(set_attr "type" "ssecvt")
911 (set_attr "mode" "V4SF")])
913 (define_insn "sse_cvtps2pi"
914 [(set (match_operand:V2SI 0 "register_operand" "=y")
916 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
918 (parallel [(const_int 0) (const_int 1)])))]
920 "cvtps2pi\t{%1, %0|%0, %1}"
921 [(set_attr "type" "ssecvt")
922 (set_attr "unit" "mmx")
923 (set_attr "mode" "DI")])
925 (define_insn "sse_cvttps2pi"
926 [(set (match_operand:V2SI 0 "register_operand" "=y")
928 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
929 (parallel [(const_int 0) (const_int 1)])))]
931 "cvttps2pi\t{%1, %0|%0, %1}"
932 [(set_attr "type" "ssecvt")
933 (set_attr "unit" "mmx")
934 (set_attr "mode" "SF")])
936 (define_insn "sse_cvtsi2ss"
937 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
940 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
941 (match_operand:V4SF 1 "register_operand" "0,0")
944 "cvtsi2ss\t{%2, %0|%0, %2}"
945 [(set_attr "type" "sseicvt")
946 (set_attr "athlon_decode" "vector,double")
947 (set_attr "mode" "SF")])
949 (define_insn "sse_cvtsi2ssq"
950 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
953 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
954 (match_operand:V4SF 1 "register_operand" "0,0")
956 "TARGET_SSE && TARGET_64BIT"
957 "cvtsi2ssq\t{%2, %0|%0, %2}"
958 [(set_attr "type" "sseicvt")
959 (set_attr "athlon_decode" "vector,double")
960 (set_attr "mode" "SF")])
962 (define_insn "sse_cvtss2si"
963 [(set (match_operand:SI 0 "register_operand" "=r,r")
966 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
967 (parallel [(const_int 0)]))]
968 UNSPEC_FIX_NOTRUNC))]
970 "cvtss2si\t{%1, %0|%0, %1}"
971 [(set_attr "type" "sseicvt")
972 (set_attr "athlon_decode" "double,vector")
973 (set_attr "mode" "SI")])
975 (define_insn "sse_cvtss2siq"
976 [(set (match_operand:DI 0 "register_operand" "=r,r")
979 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
980 (parallel [(const_int 0)]))]
981 UNSPEC_FIX_NOTRUNC))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvtss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
988 (define_insn "sse_cvttss2si"
989 [(set (match_operand:SI 0 "register_operand" "=r,r")
992 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
993 (parallel [(const_int 0)]))))]
995 "cvttss2si\t{%1, %0|%0, %1}"
996 [(set_attr "type" "sseicvt")
997 (set_attr "athlon_decode" "double,vector")
998 (set_attr "mode" "SI")])
1000 (define_insn "sse_cvttss2siq"
1001 [(set (match_operand:DI 0 "register_operand" "=r,r")
1004 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1005 (parallel [(const_int 0)]))))]
1006 "TARGET_SSE && TARGET_64BIT"
1007 "cvttss2siq\t{%1, %0|%0, %1}"
1008 [(set_attr "type" "sseicvt")
1009 (set_attr "athlon_decode" "double,vector")
1010 (set_attr "mode" "DI")])
1012 (define_insn "sse2_cvtdq2ps"
1013 [(set (match_operand:V4SF 0 "register_operand" "=x")
1014 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1016 "cvtdq2ps\t{%1, %0|%0, %1}"
1017 [(set_attr "type" "ssecvt")
1018 (set_attr "mode" "V2DF")])
1020 (define_insn "sse2_cvtps2dq"
1021 [(set (match_operand:V4SI 0 "register_operand" "=x")
1022 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1023 UNSPEC_FIX_NOTRUNC))]
1025 "cvtps2dq\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "mode" "TI")])
1029 (define_insn "sse2_cvttps2dq"
1030 [(set (match_operand:V4SI 0 "register_operand" "=x")
1031 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1033 "cvttps2dq\t{%1, %0|%0, %1}"
1034 [(set_attr "type" "ssecvt")
1035 (set_attr "mode" "TI")])
1037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1039 ;; Parallel single-precision floating point element swizzling
1041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1043 (define_insn "sse_movhlps"
1044 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1047 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1048 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1049 (parallel [(const_int 6)
1053 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1055 movhlps\t{%2, %0|%0, %2}
1056 movlps\t{%H2, %0|%0, %H2}
1057 movhps\t{%2, %0|%0, %2}"
1058 [(set_attr "type" "ssemov")
1059 (set_attr "mode" "V4SF,V2SF,V2SF")])
1061 (define_insn "sse_movlhps"
1062 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1065 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1066 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1067 (parallel [(const_int 0)
1071 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1073 movlhps\t{%2, %0|%0, %2}
1074 movhps\t{%2, %0|%0, %2}
1075 movlps\t{%2, %H0|%H0, %2}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "mode" "V4SF,V2SF,V2SF")])
1079 (define_insn "sse_unpckhps"
1080 [(set (match_operand:V4SF 0 "register_operand" "=x")
1083 (match_operand:V4SF 1 "register_operand" "0")
1084 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1085 (parallel [(const_int 2) (const_int 6)
1086 (const_int 3) (const_int 7)])))]
1088 "unpckhps\t{%2, %0|%0, %2}"
1089 [(set_attr "type" "sselog")
1090 (set_attr "mode" "V4SF")])
1092 (define_insn "sse_unpcklps"
1093 [(set (match_operand:V4SF 0 "register_operand" "=x")
1096 (match_operand:V4SF 1 "register_operand" "0")
1097 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1098 (parallel [(const_int 0) (const_int 4)
1099 (const_int 1) (const_int 5)])))]
1101 "unpcklps\t{%2, %0|%0, %2}"
1102 [(set_attr "type" "sselog")
1103 (set_attr "mode" "V4SF")])
1105 ;; These are modeled with the same vec_concat as the others so that we
1106 ;; capture users of shufps that can use the new instructions
1107 (define_insn "sse3_movshdup"
1108 [(set (match_operand:V4SF 0 "register_operand" "=x")
1111 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1113 (parallel [(const_int 1)
1118 "movshdup\t{%1, %0|%0, %1}"
1119 [(set_attr "type" "sse")
1120 (set_attr "mode" "V4SF")])
1122 (define_insn "sse3_movsldup"
1123 [(set (match_operand:V4SF 0 "register_operand" "=x")
1126 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1128 (parallel [(const_int 0)
1133 "movsldup\t{%1, %0|%0, %1}"
1134 [(set_attr "type" "sse")
1135 (set_attr "mode" "V4SF")])
1137 (define_expand "sse_shufps"
1138 [(match_operand:V4SF 0 "register_operand" "")
1139 (match_operand:V4SF 1 "register_operand" "")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "")
1141 (match_operand:SI 3 "const_int_operand" "")]
1144 int mask = INTVAL (operands[3]);
1145 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1146 GEN_INT ((mask >> 0) & 3),
1147 GEN_INT ((mask >> 2) & 3),
1148 GEN_INT (((mask >> 4) & 3) + 4),
1149 GEN_INT (((mask >> 6) & 3) + 4)));
1153 (define_insn "sse_shufps_1"
1154 [(set (match_operand:V4SF 0 "register_operand" "=x")
1157 (match_operand:V4SF 1 "register_operand" "0")
1158 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1159 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1160 (match_operand 4 "const_0_to_3_operand" "")
1161 (match_operand 5 "const_4_to_7_operand" "")
1162 (match_operand 6 "const_4_to_7_operand" "")])))]
1166 mask |= INTVAL (operands[3]) << 0;
1167 mask |= INTVAL (operands[4]) << 2;
1168 mask |= (INTVAL (operands[5]) - 4) << 4;
1169 mask |= (INTVAL (operands[6]) - 4) << 6;
1170 operands[3] = GEN_INT (mask);
1172 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1174 [(set_attr "type" "sselog")
1175 (set_attr "mode" "V4SF")])
1177 (define_insn "sse_storehps"
1178 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1180 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1181 (parallel [(const_int 2) (const_int 3)])))]
1184 movhps\t{%1, %0|%0, %1}
1185 movhlps\t{%1, %0|%0, %1}
1186 movlps\t{%H1, %0|%0, %H1}"
1187 [(set_attr "type" "ssemov")
1188 (set_attr "mode" "V2SF,V4SF,V2SF")])
1190 (define_insn "sse_loadhps"
1191 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1194 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1195 (parallel [(const_int 0) (const_int 1)]))
1196 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1199 movhps\t{%2, %0|%0, %2}
1200 movlhps\t{%2, %0|%0, %2}
1201 movlps\t{%2, %H0|%H0, %2}"
1202 [(set_attr "type" "ssemov")
1203 (set_attr "mode" "V2SF,V4SF,V2SF")])
1205 (define_insn "sse_storelps"
1206 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1208 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1209 (parallel [(const_int 0) (const_int 1)])))]
1212 movlps\t{%1, %0|%0, %1}
1213 movaps\t{%1, %0|%0, %1}
1214 movlps\t{%1, %0|%0, %1}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V2SF,V4SF,V2SF")])
1218 (define_insn "sse_loadlps"
1219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1224 (parallel [(const_int 2) (const_int 3)]))))]
1227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1228 movlps\t{%2, %0|%0, %2}
1229 movlps\t{%2, %0|%0, %2}"
1230 [(set_attr "type" "sselog,ssemov,ssemov")
1231 (set_attr "mode" "V4SF,V2SF,V2SF")])
1233 (define_insn "sse_movss"
1234 [(set (match_operand:V4SF 0 "register_operand" "=x")
1236 (match_operand:V4SF 2 "register_operand" "x")
1237 (match_operand:V4SF 1 "register_operand" "0")
1240 "movss\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "ssemov")
1242 (set_attr "mode" "SF")])
1244 (define_insn "*vec_dupv4sf"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1247 (match_operand:SF 1 "register_operand" "0")))]
1249 "shufps\t{$0, %0, %0|%0, %0, 0}"
1250 [(set_attr "type" "sselog1")
1251 (set_attr "mode" "V4SF")])
1253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1255 ;; alternatives pretty much forces the MMX alternative to be chosen.
1256 (define_insn "*sse_concatv2sf"
1257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1263 unpcklps\t{%2, %0|%0, %2}
1264 movss\t{%1, %0|%0, %1}
1265 punpckldq\t{%2, %0|%0, %2}
1266 movd\t{%1, %0|%0, %1}"
1267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1268 (set_attr "mode" "V4SF,SF,DI,DI")])
1270 (define_insn "*sse_concatv4sf"
1271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1273 (match_operand:V2SF 1 "register_operand" " 0,0")
1274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1277 movlhps\t{%2, %0|%0, %2}
1278 movhps\t{%2, %0|%0, %2}"
1279 [(set_attr "type" "ssemov")
1280 (set_attr "mode" "V4SF,V2SF")])
1282 (define_expand "vec_initv4sf"
1283 [(match_operand:V4SF 0 "register_operand" "")
1284 (match_operand 1 "" "")]
1287 ix86_expand_vector_init (false, operands[0], operands[1]);
1291 (define_insn "*vec_setv4sf_0"
1292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1300 movss\t{%2, %0|%0, %2}
1301 movss\t{%2, %0|%0, %2}
1302 movd\t{%2, %0|%0, %2}
1304 [(set_attr "type" "ssemov")
1305 (set_attr "mode" "SF")])
1308 [(set (match_operand:V4SF 0 "memory_operand" "")
1311 (match_operand:SF 1 "nonmemory_operand" ""))
1314 "TARGET_SSE && reload_completed"
1317 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1321 (define_expand "vec_setv4sf"
1322 [(match_operand:V4SF 0 "register_operand" "")
1323 (match_operand:SF 1 "register_operand" "")
1324 (match_operand 2 "const_int_operand" "")]
1327 ix86_expand_vector_set (false, operands[0], operands[1],
1328 INTVAL (operands[2]));
1332 (define_insn_and_split "*vec_extractv4sf_0"
1333 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1335 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1336 (parallel [(const_int 0)])))]
1337 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1339 "&& reload_completed"
1342 rtx op1 = operands[1];
1344 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1346 op1 = gen_lowpart (SFmode, op1);
1347 emit_move_insn (operands[0], op1);
1351 (define_expand "vec_extractv4sf"
1352 [(match_operand:SF 0 "register_operand" "")
1353 (match_operand:V4SF 1 "register_operand" "")
1354 (match_operand 2 "const_int_operand" "")]
1357 ix86_expand_vector_extract (false, operands[0], operands[1],
1358 INTVAL (operands[2]));
1362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1364 ;; Parallel double-precision floating point arithmetic
1366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1368 (define_expand "negv2df2"
1369 [(set (match_operand:V2DF 0 "register_operand" "")
1370 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1372 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1374 (define_expand "absv2df2"
1375 [(set (match_operand:V2DF 0 "register_operand" "")
1376 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1378 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1380 (define_expand "addv2df3"
1381 [(set (match_operand:V2DF 0 "register_operand" "")
1382 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1383 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1385 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1387 (define_insn "*addv2df3"
1388 [(set (match_operand:V2DF 0 "register_operand" "=x")
1389 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1390 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1391 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1392 "addpd\t{%2, %0|%0, %2}"
1393 [(set_attr "type" "sseadd")
1394 (set_attr "mode" "V2DF")])
1396 (define_insn "sse2_vmaddv2df3"
1397 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1403 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1404 "addsd\t{%2, %0|%0, %2}"
1405 [(set_attr "type" "sseadd")
1406 (set_attr "mode" "DF")])
1408 (define_expand "subv2df3"
1409 [(set (match_operand:V2DF 0 "register_operand" "")
1410 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1411 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1413 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1415 (define_insn "*subv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1417 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1420 "subpd\t{%2, %0|%0, %2}"
1421 [(set_attr "type" "sseadd")
1422 (set_attr "mode" "V2DF")])
1424 (define_insn "sse2_vmsubv2df3"
1425 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1432 "subsd\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "sseadd")
1434 (set_attr "mode" "DF")])
1436 (define_expand "mulv2df3"
1437 [(set (match_operand:V2DF 0 "register_operand" "")
1438 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1439 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1441 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1443 (define_insn "*mulv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1445 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1446 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1447 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1448 "mulpd\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "ssemul")
1450 (set_attr "mode" "V2DF")])
1452 (define_insn "sse2_vmmulv2df3"
1453 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1459 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1460 "mulsd\t{%2, %0|%0, %2}"
1461 [(set_attr "type" "ssemul")
1462 (set_attr "mode" "DF")])
1464 (define_expand "divv2df3"
1465 [(set (match_operand:V2DF 0 "register_operand" "")
1466 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1467 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1469 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1471 (define_insn "*divv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1473 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1474 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1476 "divpd\t{%2, %0|%0, %2}"
1477 [(set_attr "type" "ssediv")
1478 (set_attr "mode" "V2DF")])
1480 (define_insn "sse2_vmdivv2df3"
1481 [(set (match_operand:V2DF 0 "register_operand" "=x")
1483 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1484 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1488 "divsd\t{%2, %0|%0, %2}"
1489 [(set_attr "type" "ssediv")
1490 (set_attr "mode" "DF")])
1492 (define_insn "sqrtv2df2"
1493 [(set (match_operand:V2DF 0 "register_operand" "=x")
1494 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1496 "sqrtpd\t{%1, %0|%0, %1}"
1497 [(set_attr "type" "sse")
1498 (set_attr "mode" "V2DF")])
1500 (define_insn "sse2_vmsqrtv2df2"
1501 [(set (match_operand:V2DF 0 "register_operand" "=x")
1503 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1504 (match_operand:V2DF 2 "register_operand" "0")
1507 "sqrtsd\t{%1, %0|%0, %1}"
1508 [(set_attr "type" "sse")
1509 (set_attr "mode" "DF")])
1511 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1512 ;; isn't really correct, as those rtl operators aren't defined when
1513 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1515 (define_expand "smaxv2df3"
1516 [(set (match_operand:V2DF 0 "register_operand" "")
1517 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1518 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1521 if (!flag_finite_math_only)
1522 operands[1] = force_reg (V2DFmode, operands[1]);
1523 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1526 (define_insn "*smaxv2df3_finite"
1527 [(set (match_operand:V2DF 0 "register_operand" "=x")
1528 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1529 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1530 "TARGET_SSE2 && flag_finite_math_only
1531 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1532 "maxpd\t{%2, %0|%0, %2}"
1533 [(set_attr "type" "sseadd")
1534 (set_attr "mode" "V2DF")])
1536 (define_insn "*smaxv2df3"
1537 [(set (match_operand:V2DF 0 "register_operand" "=x")
1538 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1539 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1541 "maxpd\t{%2, %0|%0, %2}"
1542 [(set_attr "type" "sseadd")
1543 (set_attr "mode" "V2DF")])
1545 (define_insn "*sse2_vmsmaxv2df3_finite"
1546 [(set (match_operand:V2DF 0 "register_operand" "=x")
1548 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1549 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1552 "TARGET_SSE2 && flag_finite_math_only
1553 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1554 "maxsd\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sseadd")
1556 (set_attr "mode" "DF")])
1558 (define_insn "sse2_vmsmaxv2df3"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1561 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1562 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1566 "maxsd\t{%2, %0|%0, %2}"
1567 [(set_attr "type" "sseadd")
1568 (set_attr "mode" "DF")])
1570 (define_expand "sminv2df3"
1571 [(set (match_operand:V2DF 0 "register_operand" "")
1572 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1573 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1576 if (!flag_finite_math_only)
1577 operands[1] = force_reg (V2DFmode, operands[1]);
1578 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1581 (define_insn "*sminv2df3_finite"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && flag_finite_math_only
1586 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1587 "minpd\t{%2, %0|%0, %2}"
1588 [(set_attr "type" "sseadd")
1589 (set_attr "mode" "V2DF")])
1591 (define_insn "*sminv2df3"
1592 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1596 "minpd\t{%2, %0|%0, %2}"
1597 [(set_attr "type" "sseadd")
1598 (set_attr "mode" "V2DF")])
1600 (define_insn "*sse2_vmsminv2df3_finite"
1601 [(set (match_operand:V2DF 0 "register_operand" "=x")
1603 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1607 "TARGET_SSE2 && flag_finite_math_only
1608 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1609 "minsd\t{%2, %0|%0, %2}"
1610 [(set_attr "type" "sseadd")
1611 (set_attr "mode" "DF")])
1613 (define_insn "sse2_vmsminv2df3"
1614 [(set (match_operand:V2DF 0 "register_operand" "=x")
1616 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1617 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1621 "minsd\t{%2, %0|%0, %2}"
1622 [(set_attr "type" "sseadd")
1623 (set_attr "mode" "DF")])
1625 (define_insn "sse3_addsubv2df3"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (match_operand:V2DF 1 "register_operand" "0")
1630 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1631 (minus:V2DF (match_dup 1) (match_dup 2))
1634 "addsubpd\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "mode" "V2DF")])
1638 (define_insn "sse3_haddv2df3"
1639 [(set (match_operand:V2DF 0 "register_operand" "=x")
1643 (match_operand:V2DF 1 "register_operand" "0")
1644 (parallel [(const_int 0)]))
1645 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1648 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1649 (parallel [(const_int 0)]))
1650 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1652 "haddpd\t{%2, %0|%0, %2}"
1653 [(set_attr "type" "sseadd")
1654 (set_attr "mode" "V2DF")])
1656 (define_insn "sse3_hsubv2df3"
1657 [(set (match_operand:V2DF 0 "register_operand" "=x")
1661 (match_operand:V2DF 1 "register_operand" "0")
1662 (parallel [(const_int 0)]))
1663 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1667 (parallel [(const_int 0)]))
1668 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1670 "hsubpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "sseadd")
1672 (set_attr "mode" "V2DF")])
1674 (define_expand "reduc_splus_v2df"
1675 [(match_operand:V2DF 0 "register_operand" "")
1676 (match_operand:V2DF 1 "register_operand" "")]
1679 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; Parallel double-precision floating point comparisons
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 (define_insn "sse2_maskcmpv2df3"
1690 [(set (match_operand:V2DF 0 "register_operand" "=x")
1691 (match_operator:V2DF 3 "sse_comparison_operator"
1692 [(match_operand:V2DF 1 "register_operand" "0")
1693 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1695 "cmp%D3pd\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "ssecmp")
1697 (set_attr "mode" "V2DF")])
1699 (define_insn "sse2_vmmaskcmpv2df3"
1700 [(set (match_operand:V2DF 0 "register_operand" "=x")
1702 (match_operator:V2DF 3 "sse_comparison_operator"
1703 [(match_operand:V2DF 1 "register_operand" "0")
1704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1708 "cmp%D3sd\t{%2, %0|%0, %2}"
1709 [(set_attr "type" "ssecmp")
1710 (set_attr "mode" "DF")])
1712 (define_insn "sse2_comi"
1713 [(set (reg:CCFP FLAGS_REG)
1716 (match_operand:V2DF 0 "register_operand" "x")
1717 (parallel [(const_int 0)]))
1719 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0)]))))]
1722 "comisd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecomi")
1724 (set_attr "mode" "DF")])
1726 (define_insn "sse2_ucomi"
1727 [(set (reg:CCFPU FLAGS_REG)
1730 (match_operand:V2DF 0 "register_operand" "x")
1731 (parallel [(const_int 0)]))
1733 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1734 (parallel [(const_int 0)]))))]
1736 "ucomisd\t{%1, %0|%0, %1}"
1737 [(set_attr "type" "ssecomi")
1738 (set_attr "mode" "DF")])
1740 (define_expand "vcondv2df"
1741 [(set (match_operand:V2DF 0 "register_operand" "")
1743 (match_operator 3 ""
1744 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1745 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1746 (match_operand:V2DF 1 "general_operand" "")
1747 (match_operand:V2DF 2 "general_operand" "")))]
1750 if (ix86_expand_fp_vcond (operands))
1756 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1758 ;; Parallel double-precision floating point logical operations
1760 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1762 (define_expand "andv2df3"
1763 [(set (match_operand:V2DF 0 "register_operand" "")
1764 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1767 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1769 (define_insn "*andv2df3"
1770 [(set (match_operand:V2DF 0 "register_operand" "=x")
1771 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1773 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1774 "andpd\t{%2, %0|%0, %2}"
1775 [(set_attr "type" "sselog")
1776 (set_attr "mode" "V2DF")])
1778 (define_insn "sse2_nandv2df3"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1783 "andnpd\t{%2, %0|%0, %2}"
1784 [(set_attr "type" "sselog")
1785 (set_attr "mode" "V2DF")])
1787 (define_expand "iorv2df3"
1788 [(set (match_operand:V2DF 0 "register_operand" "")
1789 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1790 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1792 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1794 (define_insn "*iorv2df3"
1795 [(set (match_operand:V2DF 0 "register_operand" "=x")
1796 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1797 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1798 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1799 "orpd\t{%2, %0|%0, %2}"
1800 [(set_attr "type" "sselog")
1801 (set_attr "mode" "V2DF")])
1803 (define_expand "xorv2df3"
1804 [(set (match_operand:V2DF 0 "register_operand" "")
1805 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1806 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1808 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1810 (define_insn "*xorv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x")
1812 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1813 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1814 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1815 "xorpd\t{%2, %0|%0, %2}"
1816 [(set_attr "type" "sselog")
1817 (set_attr "mode" "V2DF")])
1819 ;; Also define scalar versions. These are used for abs, neg, and
1820 ;; conditional move. Using subregs into vector modes causes register
1821 ;; allocation lossage. These patterns do not allow memory operands
1822 ;; because the native instructions read the full 128-bits.
1824 (define_insn "*anddf3"
1825 [(set (match_operand:DF 0 "register_operand" "=x")
1826 (and:DF (match_operand:DF 1 "register_operand" "0")
1827 (match_operand:DF 2 "register_operand" "x")))]
1829 "andpd\t{%2, %0|%0, %2}"
1830 [(set_attr "type" "sselog")
1831 (set_attr "mode" "V2DF")])
1833 (define_insn "*nanddf3"
1834 [(set (match_operand:DF 0 "register_operand" "=x")
1835 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1836 (match_operand:DF 2 "register_operand" "x")))]
1838 "andnpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sselog")
1840 (set_attr "mode" "V2DF")])
1842 (define_insn "*iordf3"
1843 [(set (match_operand:DF 0 "register_operand" "=x")
1844 (ior:DF (match_operand:DF 1 "register_operand" "0")
1845 (match_operand:DF 2 "register_operand" "x")))]
1847 "orpd\t{%2, %0|%0, %2}"
1848 [(set_attr "type" "sselog")
1849 (set_attr "mode" "V2DF")])
1851 (define_insn "*xordf3"
1852 [(set (match_operand:DF 0 "register_operand" "=x")
1853 (xor:DF (match_operand:DF 1 "register_operand" "0")
1854 (match_operand:DF 2 "register_operand" "x")))]
1856 "xorpd\t{%2, %0|%0, %2}"
1857 [(set_attr "type" "sselog")
1858 (set_attr "mode" "V2DF")])
1860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1862 ;; Parallel double-precision floating point conversion operations
1864 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1866 (define_insn "sse2_cvtpi2pd"
1867 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1868 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1870 "cvtpi2pd\t{%1, %0|%0, %1}"
1871 [(set_attr "type" "ssecvt")
1872 (set_attr "unit" "mmx,*")
1873 (set_attr "mode" "V2DF")])
1875 (define_insn "sse2_cvtpd2pi"
1876 [(set (match_operand:V2SI 0 "register_operand" "=y")
1877 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1878 UNSPEC_FIX_NOTRUNC))]
1880 "cvtpd2pi\t{%1, %0|%0, %1}"
1881 [(set_attr "type" "ssecvt")
1882 (set_attr "unit" "mmx")
1883 (set_attr "mode" "DI")])
1885 (define_insn "sse2_cvttpd2pi"
1886 [(set (match_operand:V2SI 0 "register_operand" "=y")
1887 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1889 "cvttpd2pi\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "ssecvt")
1891 (set_attr "unit" "mmx")
1892 (set_attr "mode" "TI")])
1894 (define_insn "sse2_cvtsi2sd"
1895 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1898 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1899 (match_operand:V2DF 1 "register_operand" "0,0")
1902 "cvtsi2sd\t{%2, %0|%0, %2}"
1903 [(set_attr "type" "sseicvt")
1904 (set_attr "mode" "DF")
1905 (set_attr "athlon_decode" "double,direct")])
1907 (define_insn "sse2_cvtsi2sdq"
1908 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1911 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1912 (match_operand:V2DF 1 "register_operand" "0,0")
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvtsi2sdq\t{%2, %0|%0, %2}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DF")
1918 (set_attr "athlon_decode" "double,direct")])
1920 (define_insn "sse2_cvtsd2si"
1921 [(set (match_operand:SI 0 "register_operand" "=r,r")
1924 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1925 (parallel [(const_int 0)]))]
1926 UNSPEC_FIX_NOTRUNC))]
1928 "cvtsd2si\t{%1, %0|%0, %1}"
1929 [(set_attr "type" "sseicvt")
1930 (set_attr "athlon_decode" "double,vector")
1931 (set_attr "mode" "SI")])
1933 (define_insn "sse2_cvtsd2siq"
1934 [(set (match_operand:DI 0 "register_operand" "=r,r")
1937 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1938 (parallel [(const_int 0)]))]
1939 UNSPEC_FIX_NOTRUNC))]
1940 "TARGET_SSE2 && TARGET_64BIT"
1941 "cvtsd2siq\t{%1, %0|%0, %1}"
1942 [(set_attr "type" "sseicvt")
1943 (set_attr "athlon_decode" "double,vector")
1944 (set_attr "mode" "DI")])
1946 (define_insn "sse2_cvttsd2si"
1947 [(set (match_operand:SI 0 "register_operand" "=r,r")
1950 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1951 (parallel [(const_int 0)]))))]
1953 "cvttsd2si\t{%1, %0|%0, %1}"
1954 [(set_attr "type" "sseicvt")
1955 (set_attr "mode" "SI")
1956 (set_attr "athlon_decode" "double,vector")])
1958 (define_insn "sse2_cvttsd2siq"
1959 [(set (match_operand:DI 0 "register_operand" "=r,r")
1962 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1963 (parallel [(const_int 0)]))))]
1964 "TARGET_SSE2 && TARGET_64BIT"
1965 "cvttsd2siq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "sseicvt")
1967 (set_attr "mode" "DI")
1968 (set_attr "athlon_decode" "double,vector")])
1970 (define_insn "sse2_cvtdq2pd"
1971 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1975 (parallel [(const_int 0) (const_int 1)]))))]
1977 "cvtdq2pd\t{%1, %0|%0, %1}"
1978 [(set_attr "type" "ssecvt")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "sse2_cvtpd2dq"
1982 [(set (match_operand:V4SI 0 "register_operand" "")
1984 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1988 "operands[2] = CONST0_RTX (V2SImode);")
1990 (define_insn "*sse2_cvtpd2dq"
1991 [(set (match_operand:V4SI 0 "register_operand" "=x")
1993 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1995 (match_operand:V2SI 2 "const0_operand" "")))]
1997 "cvtpd2dq\t{%1, %0|%0, %1}"
1998 [(set_attr "type" "ssecvt")
1999 (set_attr "mode" "TI")])
2001 (define_expand "sse2_cvttpd2dq"
2002 [(set (match_operand:V4SI 0 "register_operand" "")
2004 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2007 "operands[2] = CONST0_RTX (V2SImode);")
2009 (define_insn "*sse2_cvttpd2dq"
2010 [(set (match_operand:V4SI 0 "register_operand" "=x")
2012 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2013 (match_operand:V2SI 2 "const0_operand" "")))]
2015 "cvttpd2dq\t{%1, %0|%0, %1}"
2016 [(set_attr "type" "ssecvt")
2017 (set_attr "mode" "TI")])
2019 (define_insn "sse2_cvtsd2ss"
2020 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2023 (float_truncate:V2SF
2024 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2025 (match_operand:V4SF 1 "register_operand" "0,0")
2028 "cvtsd2ss\t{%2, %0|%0, %2}"
2029 [(set_attr "type" "ssecvt")
2030 (set_attr "athlon_decode" "vector,double")
2031 (set_attr "mode" "SF")])
2033 (define_insn "sse2_cvtss2sd"
2034 [(set (match_operand:V2DF 0 "register_operand" "=x")
2038 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2039 (parallel [(const_int 0) (const_int 1)])))
2040 (match_operand:V2DF 1 "register_operand" "0")
2043 "cvtss2sd\t{%2, %0|%0, %2}"
2044 [(set_attr "type" "ssecvt")
2045 (set_attr "mode" "DF")])
2047 (define_expand "sse2_cvtpd2ps"
2048 [(set (match_operand:V4SF 0 "register_operand" "")
2050 (float_truncate:V2SF
2051 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2054 "operands[2] = CONST0_RTX (V2SFmode);")
2056 (define_insn "*sse2_cvtpd2ps"
2057 [(set (match_operand:V4SF 0 "register_operand" "=x")
2059 (float_truncate:V2SF
2060 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2061 (match_operand:V2SF 2 "const0_operand" "")))]
2063 "cvtpd2ps\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "ssecvt")
2065 (set_attr "mode" "V4SF")])
2067 (define_insn "sse2_cvtps2pd"
2068 [(set (match_operand:V2DF 0 "register_operand" "=x")
2071 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2072 (parallel [(const_int 0) (const_int 1)]))))]
2074 "cvtps2pd\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "ssecvt")
2076 (set_attr "mode" "V2DF")])
2078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2080 ;; Parallel double-precision floating point element swizzling
2082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2084 (define_insn "sse2_unpckhpd"
2085 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2088 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2089 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2090 (parallel [(const_int 1)
2092 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2094 unpckhpd\t{%2, %0|%0, %2}
2095 movlpd\t{%H1, %0|%0, %H1}
2096 movhpd\t{%1, %0|%0, %1}"
2097 [(set_attr "type" "sselog,ssemov,ssemov")
2098 (set_attr "mode" "V2DF,V1DF,V1DF")])
2100 (define_insn "*sse3_movddup"
2101 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2104 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2106 (parallel [(const_int 0)
2108 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2110 movddup\t{%1, %0|%0, %1}
2112 [(set_attr "type" "sselog,ssemov")
2113 (set_attr "mode" "V2DF")])
2116 [(set (match_operand:V2DF 0 "memory_operand" "")
2119 (match_operand:V2DF 1 "register_operand" "")
2121 (parallel [(const_int 0)
2123 "TARGET_SSE3 && reload_completed"
2126 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2127 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2128 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2132 (define_insn "sse2_unpcklpd"
2133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2136 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2137 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2138 (parallel [(const_int 0)
2140 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2142 unpcklpd\t{%2, %0|%0, %2}
2143 movhpd\t{%2, %0|%0, %2}
2144 movlpd\t{%2, %H0|%H0, %2}"
2145 [(set_attr "type" "sselog,ssemov,ssemov")
2146 (set_attr "mode" "V2DF,V1DF,V1DF")])
2148 (define_expand "sse2_shufpd"
2149 [(match_operand:V2DF 0 "register_operand" "")
2150 (match_operand:V2DF 1 "register_operand" "")
2151 (match_operand:V2DF 2 "nonimmediate_operand" "")
2152 (match_operand:SI 3 "const_int_operand" "")]
2155 int mask = INTVAL (operands[3]);
2156 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2158 GEN_INT (mask & 2 ? 3 : 2)));
2162 (define_insn "sse2_shufpd_1"
2163 [(set (match_operand:V2DF 0 "register_operand" "=x")
2166 (match_operand:V2DF 1 "register_operand" "0")
2167 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2168 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2169 (match_operand 4 "const_2_to_3_operand" "")])))]
2173 mask = INTVAL (operands[3]);
2174 mask |= (INTVAL (operands[4]) - 2) << 1;
2175 operands[3] = GEN_INT (mask);
2177 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2179 [(set_attr "type" "sselog")
2180 (set_attr "mode" "V2DF")])
2182 (define_insn "sse2_storehpd"
2183 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2185 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2186 (parallel [(const_int 1)])))]
2187 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2189 movhpd\t{%1, %0|%0, %1}
2192 [(set_attr "type" "ssemov,sselog1,ssemov")
2193 (set_attr "mode" "V1DF,V2DF,DF")])
2196 [(set (match_operand:DF 0 "register_operand" "")
2198 (match_operand:V2DF 1 "memory_operand" "")
2199 (parallel [(const_int 1)])))]
2200 "TARGET_SSE2 && reload_completed"
2201 [(set (match_dup 0) (match_dup 1))]
2203 operands[1] = adjust_address (operands[1], DFmode, 8);
2206 (define_insn "sse2_storelpd"
2207 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2209 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2210 (parallel [(const_int 0)])))]
2211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2213 movlpd\t{%1, %0|%0, %1}
2216 [(set_attr "type" "ssemov")
2217 (set_attr "mode" "V1DF,DF,DF")])
2220 [(set (match_operand:DF 0 "register_operand" "")
2222 (match_operand:V2DF 1 "nonimmediate_operand" "")
2223 (parallel [(const_int 0)])))]
2224 "TARGET_SSE2 && reload_completed"
2227 rtx op1 = operands[1];
2229 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2231 op1 = gen_lowpart (DFmode, op1);
2232 emit_move_insn (operands[0], op1);
2236 (define_insn "sse2_loadhpd"
2237 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2240 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2241 (parallel [(const_int 0)]))
2242 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2243 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2245 movhpd\t{%2, %0|%0, %2}
2246 unpcklpd\t{%2, %0|%0, %2}
2247 shufpd\t{$1, %1, %0|%0, %1, 1}
2249 [(set_attr "type" "ssemov,sselog,sselog,other")
2250 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2253 [(set (match_operand:V2DF 0 "memory_operand" "")
2255 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2256 (match_operand:DF 1 "register_operand" "")))]
2257 "TARGET_SSE2 && reload_completed"
2258 [(set (match_dup 0) (match_dup 1))]
2260 operands[0] = adjust_address (operands[0], DFmode, 8);
2263 (define_insn "sse2_loadlpd"
2264 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2266 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2268 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2269 (parallel [(const_int 1)]))))]
2270 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2272 movsd\t{%2, %0|%0, %2}
2273 movlpd\t{%2, %0|%0, %2}
2274 movsd\t{%2, %0|%0, %2}
2275 shufpd\t{$2, %2, %0|%0, %2, 2}
2276 movhpd\t{%H1, %0|%0, %H1}
2278 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2279 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2282 [(set (match_operand:V2DF 0 "memory_operand" "")
2284 (match_operand:DF 1 "register_operand" "")
2285 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2286 "TARGET_SSE2 && reload_completed"
2287 [(set (match_dup 0) (match_dup 1))]
2289 operands[0] = adjust_address (operands[0], DFmode, 8);
2292 (define_insn "sse2_movsd"
2293 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2295 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2296 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2300 movsd\t{%2, %0|%0, %2}
2301 movlpd\t{%2, %0|%0, %2}
2302 movlpd\t{%2, %0|%0, %2}
2303 shufpd\t{$2, %2, %0|%0, %2, 2}
2304 movhps\t{%H1, %0|%0, %H1}
2305 movhps\t{%1, %H0|%H0, %1}"
2306 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2307 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2309 (define_insn "*vec_dupv2df_sse3"
2310 [(set (match_operand:V2DF 0 "register_operand" "=x")
2312 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2314 "movddup\t{%1, %0|%0, %1}"
2315 [(set_attr "type" "sselog1")
2316 (set_attr "mode" "DF")])
2318 (define_insn "*vec_dupv2df"
2319 [(set (match_operand:V2DF 0 "register_operand" "=x")
2321 (match_operand:DF 1 "register_operand" "0")))]
2324 [(set_attr "type" "sselog1")
2325 (set_attr "mode" "V4SF")])
2327 (define_insn "*vec_concatv2df_sse3"
2328 [(set (match_operand:V2DF 0 "register_operand" "=x")
2330 (match_operand:DF 1 "nonimmediate_operand" "xm")
2333 "movddup\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "sselog1")
2335 (set_attr "mode" "DF")])
2337 (define_insn "*vec_concatv2df"
2338 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2340 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2341 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2344 unpcklpd\t{%2, %0|%0, %2}
2345 movhpd\t{%2, %0|%0, %2}
2346 movsd\t{%1, %0|%0, %1}
2347 movlhps\t{%2, %0|%0, %2}
2348 movhps\t{%2, %0|%0, %2}"
2349 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2350 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2352 (define_expand "vec_setv2df"
2353 [(match_operand:V2DF 0 "register_operand" "")
2354 (match_operand:DF 1 "register_operand" "")
2355 (match_operand 2 "const_int_operand" "")]
2358 ix86_expand_vector_set (false, operands[0], operands[1],
2359 INTVAL (operands[2]));
2363 (define_expand "vec_extractv2df"
2364 [(match_operand:DF 0 "register_operand" "")
2365 (match_operand:V2DF 1 "register_operand" "")
2366 (match_operand 2 "const_int_operand" "")]
2369 ix86_expand_vector_extract (false, operands[0], operands[1],
2370 INTVAL (operands[2]));
2374 (define_expand "vec_initv2df"
2375 [(match_operand:V2DF 0 "register_operand" "")
2376 (match_operand 1 "" "")]
2379 ix86_expand_vector_init (false, operands[0], operands[1]);
2383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2385 ;; Parallel integral arithmetic
2387 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2389 (define_expand "neg<mode>2"
2390 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2393 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2395 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2397 (define_expand "add<mode>3"
2398 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2399 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2400 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2402 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2404 (define_insn "*add<mode>3"
2405 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2407 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2408 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2409 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2410 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2411 [(set_attr "type" "sseiadd")
2412 (set_attr "mode" "TI")])
2414 (define_insn "sse2_ssadd<mode>3"
2415 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2417 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2418 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2419 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2420 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2421 [(set_attr "type" "sseiadd")
2422 (set_attr "mode" "TI")])
2424 (define_insn "sse2_usadd<mode>3"
2425 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2427 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2428 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2429 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2430 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2431 [(set_attr "type" "sseiadd")
2432 (set_attr "mode" "TI")])
2434 (define_expand "sub<mode>3"
2435 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2436 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2437 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2439 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2441 (define_insn "*sub<mode>3"
2442 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2444 (match_operand:SSEMODEI 1 "register_operand" "0")
2445 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2447 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2448 [(set_attr "type" "sseiadd")
2449 (set_attr "mode" "TI")])
2451 (define_insn "sse2_sssub<mode>3"
2452 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2454 (match_operand:SSEMODE12 1 "register_operand" "0")
2455 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2457 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2458 [(set_attr "type" "sseiadd")
2459 (set_attr "mode" "TI")])
2461 (define_insn "sse2_ussub<mode>3"
2462 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2464 (match_operand:SSEMODE12 1 "register_operand" "0")
2465 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2467 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2468 [(set_attr "type" "sseiadd")
2469 (set_attr "mode" "TI")])
2471 (define_expand "mulv16qi3"
2472 [(set (match_operand:V16QI 0 "register_operand" "")
2473 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2474 (match_operand:V16QI 2 "register_operand" "")))]
2480 for (i = 0; i < 12; ++i)
2481 t[i] = gen_reg_rtx (V16QImode);
2483 /* Unpack data such that we've got a source byte in each low byte of
2484 each word. We don't care what goes into the high byte of each word.
2485 Rather than trying to get zero in there, most convenient is to let
2486 it be a copy of the low byte. */
2487 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2488 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2489 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2490 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2492 /* Multiply words. The end-of-line annotations here give a picture of what
2493 the output of that instruction looks like. Dot means don't care; the
2494 letters are the bytes of the result with A being the most significant. */
2495 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2496 gen_lowpart (V8HImode, t[0]),
2497 gen_lowpart (V8HImode, t[1])));
2498 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2499 gen_lowpart (V8HImode, t[2]),
2500 gen_lowpart (V8HImode, t[3])));
2502 /* Extract the relevant bytes and merge them back together. */
2503 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2504 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2505 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2506 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2507 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2508 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2511 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2515 (define_expand "mulv8hi3"
2516 [(set (match_operand:V8HI 0 "register_operand" "")
2517 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2518 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2520 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2522 (define_insn "*mulv8hi3"
2523 [(set (match_operand:V8HI 0 "register_operand" "=x")
2524 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2525 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2526 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2527 "pmullw\t{%2, %0|%0, %2}"
2528 [(set_attr "type" "sseimul")
2529 (set_attr "mode" "TI")])
2531 (define_insn "sse2_smulv8hi3_highpart"
2532 [(set (match_operand:V8HI 0 "register_operand" "=x")
2537 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2539 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2541 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2542 "pmulhw\t{%2, %0|%0, %2}"
2543 [(set_attr "type" "sseimul")
2544 (set_attr "mode" "TI")])
2546 (define_insn "sse2_umulv8hi3_highpart"
2547 [(set (match_operand:V8HI 0 "register_operand" "=x")
2552 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2554 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2556 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2557 "pmulhuw\t{%2, %0|%0, %2}"
2558 [(set_attr "type" "sseimul")
2559 (set_attr "mode" "TI")])
2561 (define_insn "sse2_umulv2siv2di3"
2562 [(set (match_operand:V2DI 0 "register_operand" "=x")
2566 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2567 (parallel [(const_int 0) (const_int 2)])))
2570 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2571 (parallel [(const_int 0) (const_int 2)])))))]
2572 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2573 "pmuludq\t{%2, %0|%0, %2}"
2574 [(set_attr "type" "sseimul")
2575 (set_attr "mode" "TI")])
2577 (define_insn "sse2_pmaddwd"
2578 [(set (match_operand:V4SI 0 "register_operand" "=x")
2583 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2584 (parallel [(const_int 0)
2590 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2591 (parallel [(const_int 0)
2597 (vec_select:V4HI (match_dup 1)
2598 (parallel [(const_int 1)
2603 (vec_select:V4HI (match_dup 2)
2604 (parallel [(const_int 1)
2607 (const_int 7)]))))))]
2609 "pmaddwd\t{%2, %0|%0, %2}"
2610 [(set_attr "type" "sseiadd")
2611 (set_attr "mode" "TI")])
2613 (define_expand "mulv4si3"
2614 [(set (match_operand:V4SI 0 "register_operand" "")
2615 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2616 (match_operand:V4SI 2 "register_operand" "")))]
2619 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2625 t1 = gen_reg_rtx (V4SImode);
2626 t2 = gen_reg_rtx (V4SImode);
2627 t3 = gen_reg_rtx (V4SImode);
2628 t4 = gen_reg_rtx (V4SImode);
2629 t5 = gen_reg_rtx (V4SImode);
2630 t6 = gen_reg_rtx (V4SImode);
2631 thirtytwo = GEN_INT (32);
2633 /* Multiply elements 2 and 0. */
2634 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2636 /* Shift both input vectors down one element, so that elements 3 and 1
2637 are now in the slots for elements 2 and 0. For K8, at least, this is
2638 faster than using a shuffle. */
2639 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2640 gen_lowpart (TImode, op1), thirtytwo));
2641 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2642 gen_lowpart (TImode, op2), thirtytwo));
2644 /* Multiply elements 3 and 1. */
2645 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2647 /* Move the results in element 2 down to element 1; we don't care what
2648 goes in elements 2 and 3. */
2649 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2650 const0_rtx, const0_rtx));
2651 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2652 const0_rtx, const0_rtx));
2654 /* Merge the parts back together. */
2655 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2659 (define_expand "mulv2di3"
2660 [(set (match_operand:V2DI 0 "register_operand" "")
2661 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2662 (match_operand:V2DI 2 "register_operand" "")))]
2665 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2671 t1 = gen_reg_rtx (V2DImode);
2672 t2 = gen_reg_rtx (V2DImode);
2673 t3 = gen_reg_rtx (V2DImode);
2674 t4 = gen_reg_rtx (V2DImode);
2675 t5 = gen_reg_rtx (V2DImode);
2676 t6 = gen_reg_rtx (V2DImode);
2677 thirtytwo = GEN_INT (32);
2679 /* Multiply low parts. */
2680 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2681 gen_lowpart (V4SImode, op2)));
2683 /* Shift input vectors left 32 bits so we can multiply high parts. */
2684 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2685 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2687 /* Multiply high parts by low parts. */
2688 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2689 gen_lowpart (V4SImode, t3)));
2690 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2691 gen_lowpart (V4SImode, t2)));
2693 /* Shift them back. */
2694 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2695 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2697 /* Add the three parts together. */
2698 emit_insn (gen_addv2di3 (t6, t1, t4));
2699 emit_insn (gen_addv2di3 (op0, t6, t5));
2703 (define_expand "sdot_prodv8hi"
2704 [(match_operand:V4SI 0 "register_operand" "")
2705 (match_operand:V8HI 1 "nonimmediate_operand" "")
2706 (match_operand:V8HI 2 "nonimmediate_operand" "")
2707 (match_operand:V4SI 3 "register_operand" "")]
2710 rtx t = gen_reg_rtx (V4SImode);
2711 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2712 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2716 (define_expand "udot_prodv4si"
2717 [(match_operand:V2DI 0 "register_operand" "")
2718 (match_operand:V4SI 1 "register_operand" "")
2719 (match_operand:V4SI 2 "register_operand" "")
2720 (match_operand:V2DI 3 "register_operand" "")]
2725 t1 = gen_reg_rtx (V2DImode);
2726 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2727 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2729 t2 = gen_reg_rtx (V4SImode);
2730 t3 = gen_reg_rtx (V4SImode);
2731 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2732 gen_lowpart (TImode, operands[1]),
2734 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2735 gen_lowpart (TImode, operands[2]),
2738 t4 = gen_reg_rtx (V2DImode);
2739 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2741 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2745 (define_insn "ashr<mode>3"
2746 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2748 (match_operand:SSEMODE24 1 "register_operand" "0")
2749 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2751 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2752 [(set_attr "type" "sseishft")
2753 (set_attr "mode" "TI")])
2755 (define_insn "lshr<mode>3"
2756 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2757 (lshiftrt:SSEMODE248
2758 (match_operand:SSEMODE248 1 "register_operand" "0")
2759 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2761 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2762 [(set_attr "type" "sseishft")
2763 (set_attr "mode" "TI")])
2765 (define_insn "ashl<mode>3"
2766 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2768 (match_operand:SSEMODE248 1 "register_operand" "0")
2769 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2771 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2772 [(set_attr "type" "sseishft")
2773 (set_attr "mode" "TI")])
2775 (define_insn "sse2_ashlti3"
2776 [(set (match_operand:TI 0 "register_operand" "=x")
2777 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2778 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2781 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2782 return "pslldq\t{%2, %0|%0, %2}";
2784 [(set_attr "type" "sseishft")
2785 (set_attr "mode" "TI")])
2787 (define_expand "vec_shl_<mode>"
2788 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2789 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2790 (match_operand:SI 2 "general_operand" "")))]
2793 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2795 operands[0] = gen_lowpart (TImode, operands[0]);
2796 operands[1] = gen_lowpart (TImode, operands[1]);
2799 (define_insn "sse2_lshrti3"
2800 [(set (match_operand:TI 0 "register_operand" "=x")
2801 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2802 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2805 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2806 return "psrldq\t{%2, %0|%0, %2}";
2808 [(set_attr "type" "sseishft")
2809 (set_attr "mode" "TI")])
2811 (define_expand "vec_shr_<mode>"
2812 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2813 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2814 (match_operand:SI 2 "general_operand" "")))]
2817 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2819 operands[0] = gen_lowpart (TImode, operands[0]);
2820 operands[1] = gen_lowpart (TImode, operands[1]);
2823 (define_expand "umaxv16qi3"
2824 [(set (match_operand:V16QI 0 "register_operand" "")
2825 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2826 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2828 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2830 (define_insn "*umaxv16qi3"
2831 [(set (match_operand:V16QI 0 "register_operand" "=x")
2832 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2833 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2834 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2835 "pmaxub\t{%2, %0|%0, %2}"
2836 [(set_attr "type" "sseiadd")
2837 (set_attr "mode" "TI")])
2839 (define_expand "smaxv8hi3"
2840 [(set (match_operand:V8HI 0 "register_operand" "")
2841 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2842 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2844 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2846 (define_insn "*smaxv8hi3"
2847 [(set (match_operand:V8HI 0 "register_operand" "=x")
2848 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2849 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2850 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2851 "pmaxsw\t{%2, %0|%0, %2}"
2852 [(set_attr "type" "sseiadd")
2853 (set_attr "mode" "TI")])
2855 (define_expand "umaxv8hi3"
2856 [(set (match_operand:V8HI 0 "register_operand" "=x")
2857 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2858 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2860 (plus:V8HI (match_dup 0) (match_dup 2)))]
2863 operands[3] = operands[0];
2864 if (rtx_equal_p (operands[0], operands[2]))
2865 operands[0] = gen_reg_rtx (V8HImode);
2868 (define_expand "smax<mode>3"
2869 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2870 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2871 (match_operand:SSEMODE14 2 "register_operand" "")))]
2877 xops[0] = operands[0];
2878 xops[1] = operands[1];
2879 xops[2] = operands[2];
2880 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2881 xops[4] = operands[1];
2882 xops[5] = operands[2];
2883 ok = ix86_expand_int_vcond (xops);
2888 (define_expand "umaxv4si3"
2889 [(set (match_operand:V4SI 0 "register_operand" "")
2890 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2891 (match_operand:V4SI 2 "register_operand" "")))]
2897 xops[0] = operands[0];
2898 xops[1] = operands[1];
2899 xops[2] = operands[2];
2900 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2901 xops[4] = operands[1];
2902 xops[5] = operands[2];
2903 ok = ix86_expand_int_vcond (xops);
2908 (define_expand "uminv16qi3"
2909 [(set (match_operand:V16QI 0 "register_operand" "")
2910 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2911 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2913 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2915 (define_insn "*uminv16qi3"
2916 [(set (match_operand:V16QI 0 "register_operand" "=x")
2917 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2918 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2919 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2920 "pminub\t{%2, %0|%0, %2}"
2921 [(set_attr "type" "sseiadd")
2922 (set_attr "mode" "TI")])
2924 (define_expand "sminv8hi3"
2925 [(set (match_operand:V8HI 0 "register_operand" "")
2926 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2927 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2929 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2931 (define_insn "*sminv8hi3"
2932 [(set (match_operand:V8HI 0 "register_operand" "=x")
2933 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2934 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2935 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2936 "pminsw\t{%2, %0|%0, %2}"
2937 [(set_attr "type" "sseiadd")
2938 (set_attr "mode" "TI")])
2940 (define_expand "smin<mode>3"
2941 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2942 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2943 (match_operand:SSEMODE14 2 "register_operand" "")))]
2949 xops[0] = operands[0];
2950 xops[1] = operands[2];
2951 xops[2] = operands[1];
2952 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2953 xops[4] = operands[1];
2954 xops[5] = operands[2];
2955 ok = ix86_expand_int_vcond (xops);
2960 (define_expand "umin<mode>3"
2961 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2962 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2963 (match_operand:SSEMODE24 2 "register_operand" "")))]
2969 xops[0] = operands[0];
2970 xops[1] = operands[2];
2971 xops[2] = operands[1];
2972 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2973 xops[4] = operands[1];
2974 xops[5] = operands[2];
2975 ok = ix86_expand_int_vcond (xops);
2980 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2982 ;; Parallel integral comparisons
2984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2986 (define_insn "sse2_eq<mode>3"
2987 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2989 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2990 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2991 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2992 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2993 [(set_attr "type" "ssecmp")
2994 (set_attr "mode" "TI")])
2996 (define_insn "sse2_gt<mode>3"
2997 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2999 (match_operand:SSEMODE124 1 "register_operand" "0")
3000 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3002 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3003 [(set_attr "type" "ssecmp")
3004 (set_attr "mode" "TI")])
3006 (define_expand "vcond<mode>"
3007 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3008 (if_then_else:SSEMODE124
3009 (match_operator 3 ""
3010 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3011 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3012 (match_operand:SSEMODE124 1 "general_operand" "")
3013 (match_operand:SSEMODE124 2 "general_operand" "")))]
3016 if (ix86_expand_int_vcond (operands))
3022 (define_expand "vcondu<mode>"
3023 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3024 (if_then_else:SSEMODE124
3025 (match_operator 3 ""
3026 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3027 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3028 (match_operand:SSEMODE124 1 "general_operand" "")
3029 (match_operand:SSEMODE124 2 "general_operand" "")))]
3032 if (ix86_expand_int_vcond (operands))
3038 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3040 ;; Parallel integral logical operations
3042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3044 (define_expand "one_cmpl<mode>2"
3045 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3046 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3050 int i, n = GET_MODE_NUNITS (<MODE>mode);
3051 rtvec v = rtvec_alloc (n);
3053 for (i = 0; i < n; ++i)
3054 RTVEC_ELT (v, i) = constm1_rtx;
3056 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3059 (define_expand "and<mode>3"
3060 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3061 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3062 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3064 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3066 (define_insn "*and<mode>3"
3067 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3069 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3070 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3071 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3072 "pand\t{%2, %0|%0, %2}"
3073 [(set_attr "type" "sselog")
3074 (set_attr "mode" "TI")])
3076 (define_insn "sse2_nand<mode>3"
3077 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3079 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3080 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3082 "pandn\t{%2, %0|%0, %2}"
3083 [(set_attr "type" "sselog")
3084 (set_attr "mode" "TI")])
3086 (define_expand "ior<mode>3"
3087 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3088 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3089 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3091 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3093 (define_insn "*ior<mode>3"
3094 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3096 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3097 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3098 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3099 "por\t{%2, %0|%0, %2}"
3100 [(set_attr "type" "sselog")
3101 (set_attr "mode" "TI")])
3103 (define_expand "xor<mode>3"
3104 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3105 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3106 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3108 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3110 (define_insn "*xor<mode>3"
3111 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3113 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3114 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3115 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3116 "pxor\t{%2, %0|%0, %2}"
3117 [(set_attr "type" "sselog")
3118 (set_attr "mode" "TI")])
3120 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3122 ;; Parallel integral element swizzling
3124 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3126 (define_insn "sse2_packsswb"
3127 [(set (match_operand:V16QI 0 "register_operand" "=x")
3130 (match_operand:V8HI 1 "register_operand" "0"))
3132 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3134 "packsswb\t{%2, %0|%0, %2}"
3135 [(set_attr "type" "sselog")
3136 (set_attr "mode" "TI")])
3138 (define_insn "sse2_packssdw"
3139 [(set (match_operand:V8HI 0 "register_operand" "=x")
3142 (match_operand:V4SI 1 "register_operand" "0"))
3144 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3146 "packssdw\t{%2, %0|%0, %2}"
3147 [(set_attr "type" "sselog")
3148 (set_attr "mode" "TI")])
3150 (define_insn "sse2_packuswb"
3151 [(set (match_operand:V16QI 0 "register_operand" "=x")
3154 (match_operand:V8HI 1 "register_operand" "0"))
3156 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3158 "packuswb\t{%2, %0|%0, %2}"
3159 [(set_attr "type" "sselog")
3160 (set_attr "mode" "TI")])
3162 (define_insn "sse2_punpckhbw"
3163 [(set (match_operand:V16QI 0 "register_operand" "=x")
3166 (match_operand:V16QI 1 "register_operand" "0")
3167 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3168 (parallel [(const_int 8) (const_int 24)
3169 (const_int 9) (const_int 25)
3170 (const_int 10) (const_int 26)
3171 (const_int 11) (const_int 27)
3172 (const_int 12) (const_int 28)
3173 (const_int 13) (const_int 29)
3174 (const_int 14) (const_int 30)
3175 (const_int 15) (const_int 31)])))]
3177 "punpckhbw\t{%2, %0|%0, %2}"
3178 [(set_attr "type" "sselog")
3179 (set_attr "mode" "TI")])
3181 (define_insn "sse2_punpcklbw"
3182 [(set (match_operand:V16QI 0 "register_operand" "=x")
3185 (match_operand:V16QI 1 "register_operand" "0")
3186 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3187 (parallel [(const_int 0) (const_int 16)
3188 (const_int 1) (const_int 17)
3189 (const_int 2) (const_int 18)
3190 (const_int 3) (const_int 19)
3191 (const_int 4) (const_int 20)
3192 (const_int 5) (const_int 21)
3193 (const_int 6) (const_int 22)
3194 (const_int 7) (const_int 23)])))]
3196 "punpcklbw\t{%2, %0|%0, %2}"
3197 [(set_attr "type" "sselog")
3198 (set_attr "mode" "TI")])
3200 (define_insn "sse2_punpckhwd"
3201 [(set (match_operand:V8HI 0 "register_operand" "=x")
3204 (match_operand:V8HI 1 "register_operand" "0")
3205 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3206 (parallel [(const_int 4) (const_int 12)
3207 (const_int 5) (const_int 13)
3208 (const_int 6) (const_int 14)
3209 (const_int 7) (const_int 15)])))]
3211 "punpckhwd\t{%2, %0|%0, %2}"
3212 [(set_attr "type" "sselog")
3213 (set_attr "mode" "TI")])
3215 (define_insn "sse2_punpcklwd"
3216 [(set (match_operand:V8HI 0 "register_operand" "=x")
3219 (match_operand:V8HI 1 "register_operand" "0")
3220 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3221 (parallel [(const_int 0) (const_int 8)
3222 (const_int 1) (const_int 9)
3223 (const_int 2) (const_int 10)
3224 (const_int 3) (const_int 11)])))]
3226 "punpcklwd\t{%2, %0|%0, %2}"
3227 [(set_attr "type" "sselog")
3228 (set_attr "mode" "TI")])
3230 (define_insn "sse2_punpckhdq"
3231 [(set (match_operand:V4SI 0 "register_operand" "=x")
3234 (match_operand:V4SI 1 "register_operand" "0")
3235 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3236 (parallel [(const_int 2) (const_int 6)
3237 (const_int 3) (const_int 7)])))]
3239 "punpckhdq\t{%2, %0|%0, %2}"
3240 [(set_attr "type" "sselog")
3241 (set_attr "mode" "TI")])
3243 (define_insn "sse2_punpckldq"
3244 [(set (match_operand:V4SI 0 "register_operand" "=x")
3247 (match_operand:V4SI 1 "register_operand" "0")
3248 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3249 (parallel [(const_int 0) (const_int 4)
3250 (const_int 1) (const_int 5)])))]
3252 "punpckldq\t{%2, %0|%0, %2}"
3253 [(set_attr "type" "sselog")
3254 (set_attr "mode" "TI")])
3256 (define_insn "sse2_punpckhqdq"
3257 [(set (match_operand:V2DI 0 "register_operand" "=x")
3260 (match_operand:V2DI 1 "register_operand" "0")
3261 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3262 (parallel [(const_int 1)
3265 "punpckhqdq\t{%2, %0|%0, %2}"
3266 [(set_attr "type" "sselog")
3267 (set_attr "mode" "TI")])
3269 (define_insn "sse2_punpcklqdq"
3270 [(set (match_operand:V2DI 0 "register_operand" "=x")
3273 (match_operand:V2DI 1 "register_operand" "0")
3274 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3275 (parallel [(const_int 0)
3278 "punpcklqdq\t{%2, %0|%0, %2}"
3279 [(set_attr "type" "sselog")
3280 (set_attr "mode" "TI")])
3282 (define_expand "sse2_pinsrw"
3283 [(set (match_operand:V8HI 0 "register_operand" "")
3286 (match_operand:SI 2 "nonimmediate_operand" ""))
3287 (match_operand:V8HI 1 "register_operand" "")
3288 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3291 operands[2] = gen_lowpart (HImode, operands[2]);
3292 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3295 (define_insn "*sse2_pinsrw"
3296 [(set (match_operand:V8HI 0 "register_operand" "=x")
3299 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3300 (match_operand:V8HI 1 "register_operand" "0")
3301 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3304 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3305 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3307 [(set_attr "type" "sselog")
3308 (set_attr "mode" "TI")])
3310 (define_insn "sse2_pextrw"
3311 [(set (match_operand:SI 0 "register_operand" "=r")
3314 (match_operand:V8HI 1 "register_operand" "x")
3315 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3317 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3318 [(set_attr "type" "sselog")
3319 (set_attr "mode" "TI")])
3321 (define_expand "sse2_pshufd"
3322 [(match_operand:V4SI 0 "register_operand" "")
3323 (match_operand:V4SI 1 "nonimmediate_operand" "")
3324 (match_operand:SI 2 "const_int_operand" "")]
3327 int mask = INTVAL (operands[2]);
3328 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3329 GEN_INT ((mask >> 0) & 3),
3330 GEN_INT ((mask >> 2) & 3),
3331 GEN_INT ((mask >> 4) & 3),
3332 GEN_INT ((mask >> 6) & 3)));
3336 (define_insn "sse2_pshufd_1"
3337 [(set (match_operand:V4SI 0 "register_operand" "=x")
3339 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3340 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3341 (match_operand 3 "const_0_to_3_operand" "")
3342 (match_operand 4 "const_0_to_3_operand" "")
3343 (match_operand 5 "const_0_to_3_operand" "")])))]
3347 mask |= INTVAL (operands[2]) << 0;
3348 mask |= INTVAL (operands[3]) << 2;
3349 mask |= INTVAL (operands[4]) << 4;
3350 mask |= INTVAL (operands[5]) << 6;
3351 operands[2] = GEN_INT (mask);
3353 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3355 [(set_attr "type" "sselog1")
3356 (set_attr "mode" "TI")])
3358 (define_expand "sse2_pshuflw"
3359 [(match_operand:V8HI 0 "register_operand" "")
3360 (match_operand:V8HI 1 "nonimmediate_operand" "")
3361 (match_operand:SI 2 "const_int_operand" "")]
3364 int mask = INTVAL (operands[2]);
3365 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3366 GEN_INT ((mask >> 0) & 3),
3367 GEN_INT ((mask >> 2) & 3),
3368 GEN_INT ((mask >> 4) & 3),
3369 GEN_INT ((mask >> 6) & 3)));
3373 (define_insn "sse2_pshuflw_1"
3374 [(set (match_operand:V8HI 0 "register_operand" "=x")
3376 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3377 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3378 (match_operand 3 "const_0_to_3_operand" "")
3379 (match_operand 4 "const_0_to_3_operand" "")
3380 (match_operand 5 "const_0_to_3_operand" "")
3388 mask |= INTVAL (operands[2]) << 0;
3389 mask |= INTVAL (operands[3]) << 2;
3390 mask |= INTVAL (operands[4]) << 4;
3391 mask |= INTVAL (operands[5]) << 6;
3392 operands[2] = GEN_INT (mask);
3394 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3396 [(set_attr "type" "sselog")
3397 (set_attr "mode" "TI")])
3399 (define_expand "sse2_pshufhw"
3400 [(match_operand:V8HI 0 "register_operand" "")
3401 (match_operand:V8HI 1 "nonimmediate_operand" "")
3402 (match_operand:SI 2 "const_int_operand" "")]
3405 int mask = INTVAL (operands[2]);
3406 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3407 GEN_INT (((mask >> 0) & 3) + 4),
3408 GEN_INT (((mask >> 2) & 3) + 4),
3409 GEN_INT (((mask >> 4) & 3) + 4),
3410 GEN_INT (((mask >> 6) & 3) + 4)));
3414 (define_insn "sse2_pshufhw_1"
3415 [(set (match_operand:V8HI 0 "register_operand" "=x")
3417 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3418 (parallel [(const_int 0)
3422 (match_operand 2 "const_4_to_7_operand" "")
3423 (match_operand 3 "const_4_to_7_operand" "")
3424 (match_operand 4 "const_4_to_7_operand" "")
3425 (match_operand 5 "const_4_to_7_operand" "")])))]
3429 mask |= (INTVAL (operands[2]) - 4) << 0;
3430 mask |= (INTVAL (operands[3]) - 4) << 2;
3431 mask |= (INTVAL (operands[4]) - 4) << 4;
3432 mask |= (INTVAL (operands[5]) - 4) << 6;
3433 operands[2] = GEN_INT (mask);
3435 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3437 [(set_attr "type" "sselog")
3438 (set_attr "mode" "TI")])
3440 (define_expand "sse2_loadd"
3441 [(set (match_operand:V4SI 0 "register_operand" "")
3444 (match_operand:SI 1 "nonimmediate_operand" ""))
3448 "operands[2] = CONST0_RTX (V4SImode);")
3450 (define_insn "sse2_loadld"
3451 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3454 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3455 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3459 movd\t{%2, %0|%0, %2}
3460 movss\t{%2, %0|%0, %2}
3461 movss\t{%2, %0|%0, %2}"
3462 [(set_attr "type" "ssemov")
3463 (set_attr "mode" "TI,V4SF,SF")])
3465 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3466 ;; be taken into account, and movdi isn't fully populated even without.
3467 (define_insn_and_split "sse2_stored"
3468 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3470 (match_operand:V4SI 1 "register_operand" "x")
3471 (parallel [(const_int 0)])))]
3474 "&& reload_completed"
3475 [(set (match_dup 0) (match_dup 1))]
3477 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3480 (define_expand "sse_storeq"
3481 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3483 (match_operand:V2DI 1 "register_operand" "")
3484 (parallel [(const_int 0)])))]
3488 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3489 ;; be taken into account, and movdi isn't fully populated even without.
3490 (define_insn "*sse2_storeq"
3491 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3493 (match_operand:V2DI 1 "register_operand" "x")
3494 (parallel [(const_int 0)])))]
3499 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3501 (match_operand:V2DI 1 "register_operand" "")
3502 (parallel [(const_int 0)])))]
3503 "TARGET_SSE && reload_completed"
3504 [(set (match_dup 0) (match_dup 1))]
3506 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3509 (define_insn "*vec_extractv2di_1_sse2"
3510 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3512 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3513 (parallel [(const_int 1)])))]
3514 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3516 movhps\t{%1, %0|%0, %1}
3517 psrldq\t{$4, %0|%0, 4}
3518 movq\t{%H1, %0|%0, %H1}"
3519 [(set_attr "type" "ssemov,sseishft,ssemov")
3520 (set_attr "mode" "V2SF,TI,TI")])
3522 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3523 (define_insn "*vec_extractv2di_1_sse"
3524 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3526 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3527 (parallel [(const_int 1)])))]
3528 "!TARGET_SSE2 && TARGET_SSE
3529 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3531 movhps\t{%1, %0|%0, %1}
3532 movhlps\t{%1, %0|%0, %1}
3533 movlps\t{%H1, %0|%0, %H1}"
3534 [(set_attr "type" "ssemov")
3535 (set_attr "mode" "V2SF,V4SF,V2SF")])
3537 (define_insn "*vec_dupv4si"
3538 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3540 (match_operand:SI 1 "register_operand" " Y,0")))]
3543 pshufd\t{$0, %1, %0|%0, %1, 0}
3544 shufps\t{$0, %0, %0|%0, %0, 0}"
3545 [(set_attr "type" "sselog1")
3546 (set_attr "mode" "TI,V4SF")])
3548 (define_insn "*vec_dupv2di"
3549 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3551 (match_operand:DI 1 "register_operand" " 0,0")))]
3556 [(set_attr "type" "sselog1,ssemov")
3557 (set_attr "mode" "TI,V4SF")])
3559 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3560 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3561 ;; alternatives pretty much forces the MMX alternative to be chosen.
3562 (define_insn "*sse2_concatv2si"
3563 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3565 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3566 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3569 punpckldq\t{%2, %0|%0, %2}
3570 movd\t{%1, %0|%0, %1}
3571 punpckldq\t{%2, %0|%0, %2}
3572 movd\t{%1, %0|%0, %1}"
3573 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3574 (set_attr "mode" "TI,TI,DI,DI")])
3576 (define_insn "*sse1_concatv2si"
3577 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3579 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3580 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3583 unpcklps\t{%2, %0|%0, %2}
3584 movss\t{%1, %0|%0, %1}
3585 punpckldq\t{%2, %0|%0, %2}
3586 movd\t{%1, %0|%0, %1}"
3587 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3588 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3590 (define_insn "*vec_concatv4si_1"
3591 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3593 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3594 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3597 punpcklqdq\t{%2, %0|%0, %2}
3598 movlhps\t{%2, %0|%0, %2}
3599 movhps\t{%2, %0|%0, %2}"
3600 [(set_attr "type" "sselog,ssemov,ssemov")
3601 (set_attr "mode" "TI,V4SF,V2SF")])
3603 (define_insn "*vec_concatv2di"
3604 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3606 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3607 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3610 movq\t{%1, %0|%0, %1}
3611 movq2dq\t{%1, %0|%0, %1}
3612 punpcklqdq\t{%2, %0|%0, %2}
3613 movlhps\t{%2, %0|%0, %2}
3614 movhps\t{%2, %0|%0, %2}
3615 movlps\t{%1, %0|%0, %1}"
3616 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3617 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3619 (define_expand "vec_setv2di"
3620 [(match_operand:V2DI 0 "register_operand" "")
3621 (match_operand:DI 1 "register_operand" "")
3622 (match_operand 2 "const_int_operand" "")]
3625 ix86_expand_vector_set (false, operands[0], operands[1],
3626 INTVAL (operands[2]));
3630 (define_expand "vec_extractv2di"
3631 [(match_operand:DI 0 "register_operand" "")
3632 (match_operand:V2DI 1 "register_operand" "")
3633 (match_operand 2 "const_int_operand" "")]
3636 ix86_expand_vector_extract (false, operands[0], operands[1],
3637 INTVAL (operands[2]));
3641 (define_expand "vec_initv2di"
3642 [(match_operand:V2DI 0 "register_operand" "")
3643 (match_operand 1 "" "")]
3646 ix86_expand_vector_init (false, operands[0], operands[1]);
3650 (define_expand "vec_setv4si"
3651 [(match_operand:V4SI 0 "register_operand" "")
3652 (match_operand:SI 1 "register_operand" "")
3653 (match_operand 2 "const_int_operand" "")]
3656 ix86_expand_vector_set (false, operands[0], operands[1],
3657 INTVAL (operands[2]));
3661 (define_expand "vec_extractv4si"
3662 [(match_operand:SI 0 "register_operand" "")
3663 (match_operand:V4SI 1 "register_operand" "")
3664 (match_operand 2 "const_int_operand" "")]
3667 ix86_expand_vector_extract (false, operands[0], operands[1],
3668 INTVAL (operands[2]));
3672 (define_expand "vec_initv4si"
3673 [(match_operand:V4SI 0 "register_operand" "")
3674 (match_operand 1 "" "")]
3677 ix86_expand_vector_init (false, operands[0], operands[1]);
3681 (define_expand "vec_setv8hi"
3682 [(match_operand:V8HI 0 "register_operand" "")
3683 (match_operand:HI 1 "register_operand" "")
3684 (match_operand 2 "const_int_operand" "")]
3687 ix86_expand_vector_set (false, operands[0], operands[1],
3688 INTVAL (operands[2]));
3692 (define_expand "vec_extractv8hi"
3693 [(match_operand:HI 0 "register_operand" "")
3694 (match_operand:V8HI 1 "register_operand" "")
3695 (match_operand 2 "const_int_operand" "")]
3698 ix86_expand_vector_extract (false, operands[0], operands[1],
3699 INTVAL (operands[2]));
3703 (define_expand "vec_initv8hi"
3704 [(match_operand:V8HI 0 "register_operand" "")
3705 (match_operand 1 "" "")]
3708 ix86_expand_vector_init (false, operands[0], operands[1]);
3712 (define_expand "vec_setv16qi"
3713 [(match_operand:V16QI 0 "register_operand" "")
3714 (match_operand:QI 1 "register_operand" "")
3715 (match_operand 2 "const_int_operand" "")]
3718 ix86_expand_vector_set (false, operands[0], operands[1],
3719 INTVAL (operands[2]));
3723 (define_expand "vec_extractv16qi"
3724 [(match_operand:QI 0 "register_operand" "")
3725 (match_operand:V16QI 1 "register_operand" "")
3726 (match_operand 2 "const_int_operand" "")]
3729 ix86_expand_vector_extract (false, operands[0], operands[1],
3730 INTVAL (operands[2]));
3734 (define_expand "vec_initv16qi"
3735 [(match_operand:V16QI 0 "register_operand" "")
3736 (match_operand 1 "" "")]
3739 ix86_expand_vector_init (false, operands[0], operands[1]);
3743 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3747 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3749 (define_insn "sse2_uavgv16qi3"
3750 [(set (match_operand:V16QI 0 "register_operand" "=x")
3756 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3758 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3759 (const_vector:V16QI [(const_int 1) (const_int 1)
3760 (const_int 1) (const_int 1)
3761 (const_int 1) (const_int 1)
3762 (const_int 1) (const_int 1)
3763 (const_int 1) (const_int 1)
3764 (const_int 1) (const_int 1)
3765 (const_int 1) (const_int 1)
3766 (const_int 1) (const_int 1)]))
3768 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3769 "pavgb\t{%2, %0|%0, %2}"
3770 [(set_attr "type" "sseiadd")
3771 (set_attr "mode" "TI")])
3773 (define_insn "sse2_uavgv8hi3"
3774 [(set (match_operand:V8HI 0 "register_operand" "=x")
3780 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3782 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3783 (const_vector:V8HI [(const_int 1) (const_int 1)
3784 (const_int 1) (const_int 1)
3785 (const_int 1) (const_int 1)
3786 (const_int 1) (const_int 1)]))
3788 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3789 "pavgw\t{%2, %0|%0, %2}"
3790 [(set_attr "type" "sseiadd")
3791 (set_attr "mode" "TI")])
3793 ;; The correct representation for this is absolutely enormous, and
3794 ;; surely not generally useful.
3795 (define_insn "sse2_psadbw"
3796 [(set (match_operand:V2DI 0 "register_operand" "=x")
3797 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3798 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3801 "psadbw\t{%2, %0|%0, %2}"
3802 [(set_attr "type" "sseiadd")
3803 (set_attr "mode" "TI")])
3805 (define_insn "sse_movmskps"
3806 [(set (match_operand:SI 0 "register_operand" "=r")
3807 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3810 "movmskps\t{%1, %0|%0, %1}"
3811 [(set_attr "type" "ssecvt")
3812 (set_attr "mode" "V4SF")])
3814 (define_insn "sse2_movmskpd"
3815 [(set (match_operand:SI 0 "register_operand" "=r")
3816 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3819 "movmskpd\t{%1, %0|%0, %1}"
3820 [(set_attr "type" "ssecvt")
3821 (set_attr "mode" "V2DF")])
3823 (define_insn "sse2_pmovmskb"
3824 [(set (match_operand:SI 0 "register_operand" "=r")
3825 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3828 "pmovmskb\t{%1, %0|%0, %1}"
3829 [(set_attr "type" "ssecvt")
3830 (set_attr "mode" "V2DF")])
3832 (define_expand "sse2_maskmovdqu"
3833 [(set (match_operand:V16QI 0 "memory_operand" "")
3834 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3835 (match_operand:V16QI 2 "register_operand" "x")
3841 (define_insn "*sse2_maskmovdqu"
3842 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3843 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3844 (match_operand:V16QI 2 "register_operand" "x")
3845 (mem:V16QI (match_dup 0))]
3847 "TARGET_SSE2 && !TARGET_64BIT"
3848 ;; @@@ check ordering of operands in intel/nonintel syntax
3849 "maskmovdqu\t{%2, %1|%1, %2}"
3850 [(set_attr "type" "ssecvt")
3851 (set_attr "mode" "TI")])
3853 (define_insn "*sse2_maskmovdqu_rex64"
3854 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3855 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3856 (match_operand:V16QI 2 "register_operand" "x")
3857 (mem:V16QI (match_dup 0))]
3859 "TARGET_SSE2 && TARGET_64BIT"
3860 ;; @@@ check ordering of operands in intel/nonintel syntax
3861 "maskmovdqu\t{%2, %1|%1, %2}"
3862 [(set_attr "type" "ssecvt")
3863 (set_attr "mode" "TI")])
3865 (define_insn "sse_ldmxcsr"
3866 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3870 [(set_attr "type" "sse")
3871 (set_attr "memory" "load")])
3873 (define_insn "sse_stmxcsr"
3874 [(set (match_operand:SI 0 "memory_operand" "=m")
3875 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3878 [(set_attr "type" "sse")
3879 (set_attr "memory" "store")])
3881 (define_expand "sse_sfence"
3883 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3884 "TARGET_SSE || TARGET_3DNOW_A"
3886 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3887 MEM_VOLATILE_P (operands[0]) = 1;
3890 (define_insn "*sse_sfence"
3891 [(set (match_operand:BLK 0 "" "")
3892 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3893 "TARGET_SSE || TARGET_3DNOW_A"
3895 [(set_attr "type" "sse")
3896 (set_attr "memory" "unknown")])
3898 (define_insn "sse2_clflush"
3899 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3903 [(set_attr "type" "sse")
3904 (set_attr "memory" "unknown")])
3906 (define_expand "sse2_mfence"
3908 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3911 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3912 MEM_VOLATILE_P (operands[0]) = 1;
3915 (define_insn "*sse2_mfence"
3916 [(set (match_operand:BLK 0 "" "")
3917 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3920 [(set_attr "type" "sse")
3921 (set_attr "memory" "unknown")])
3923 (define_expand "sse2_lfence"
3925 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3928 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3929 MEM_VOLATILE_P (operands[0]) = 1;
3932 (define_insn "*sse2_lfence"
3933 [(set (match_operand:BLK 0 "" "")
3934 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3937 [(set_attr "type" "sse")
3938 (set_attr "memory" "unknown")])
3940 (define_insn "sse3_mwait"
3941 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3942 (match_operand:SI 1 "register_operand" "c")]
3946 [(set_attr "length" "3")])
3948 (define_insn "sse3_monitor"
3949 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3950 (match_operand:SI 1 "register_operand" "c")
3951 (match_operand:SI 2 "register_operand" "d")]
3954 "monitor\t%0, %1, %2"
3955 [(set_attr "length" "3")])