1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
85 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
86 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
87 (and (eq_attr "alternative" "2")
88 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
91 (const_string "TI")))])
93 (define_expand "movv4sf"
94 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
95 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
98 ix86_expand_vector_move (V4SFmode, operands);
102 (define_insn "*movv4sf_internal"
103 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
104 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
108 movaps\t{%1, %0|%0, %1}
109 movaps\t{%1, %0|%0, %1}"
110 [(set_attr "type" "sselog1,ssemov,ssemov")
111 (set_attr "mode" "V4SF")])
114 [(set (match_operand:V4SF 0 "register_operand" "")
115 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
116 "TARGET_SSE && reload_completed"
119 (vec_duplicate:V4SF (match_dup 1))
123 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
124 operands[2] = CONST0_RTX (V4SFmode);
127 (define_expand "movv2df"
128 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
129 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
132 ix86_expand_vector_move (V2DFmode, operands);
136 (define_insn "*movv2df_internal"
137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
139 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
141 switch (which_alternative)
144 if (get_attr_mode (insn) == MODE_V4SF)
145 return "xorps\t%0, %0";
147 return "xorpd\t%0, %0";
150 if (get_attr_mode (insn) == MODE_V4SF)
151 return "movaps\t{%1, %0|%0, %1}";
153 return "movapd\t{%1, %0|%0, %1}";
158 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
162 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
163 (and (eq_attr "alternative" "2")
164 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
166 (const_string "V4SF")
167 (const_string "V2DF")))])
170 [(set (match_operand:V2DF 0 "register_operand" "")
171 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
172 "TARGET_SSE2 && reload_completed"
173 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
175 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
176 operands[2] = CONST0_RTX (DFmode);
179 (define_expand "push<mode>1"
180 [(match_operand:SSEMODE 0 "register_operand" "")]
183 ix86_expand_push (<MODE>mode, operands[0]);
187 (define_expand "movmisalign<mode>"
188 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 ix86_expand_vector_move_misalign (<MODE>mode, operands);
196 (define_insn "sse_movups"
197 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
198 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
200 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
201 "movups\t{%1, %0|%0, %1}"
202 [(set_attr "type" "ssemov")
203 (set_attr "mode" "V2DF")])
205 (define_insn "sse2_movupd"
206 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
207 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
209 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
210 "movupd\t{%1, %0|%0, %1}"
211 [(set_attr "type" "ssemov")
212 (set_attr "mode" "V2DF")])
214 (define_insn "sse2_movdqu"
215 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
216 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
218 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219 "movdqu\t{%1, %0|%0, %1}"
220 [(set_attr "type" "ssemov")
221 (set_attr "mode" "TI")])
223 (define_insn "sse_movntv4sf"
224 [(set (match_operand:V4SF 0 "memory_operand" "=m")
225 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228 "movntps\t{%1, %0|%0, %1}"
229 [(set_attr "type" "ssemov")
230 (set_attr "mode" "V4SF")])
232 (define_insn "sse2_movntv2df"
233 [(set (match_operand:V2DF 0 "memory_operand" "=m")
234 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237 "movntpd\t{%1, %0|%0, %1}"
238 [(set_attr "type" "ssecvt")
239 (set_attr "mode" "V2DF")])
241 (define_insn "sse2_movntv2di"
242 [(set (match_operand:V2DI 0 "memory_operand" "=m")
243 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246 "movntdq\t{%1, %0|%0, %1}"
247 [(set_attr "type" "ssecvt")
248 (set_attr "mode" "TI")])
250 (define_insn "sse2_movntsi"
251 [(set (match_operand:SI 0 "memory_operand" "=m")
252 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255 "movnti\t{%1, %0|%0, %1}"
256 [(set_attr "type" "ssecvt")
257 (set_attr "mode" "V2DF")])
259 (define_insn "sse3_lddqu"
260 [(set (match_operand:V16QI 0 "register_operand" "=x")
261 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264 "lddqu\t{%1, %0|%0, %1}"
265 [(set_attr "type" "ssecvt")
266 (set_attr "mode" "TI")])
268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
270 ;; Parallel single-precision floating point arithmetic
272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
274 (define_expand "negv4sf2"
275 [(set (match_operand:V4SF 0 "register_operand" "")
276 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
278 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
280 (define_expand "absv4sf2"
281 [(set (match_operand:V4SF 0 "register_operand" "")
282 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
284 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
286 (define_expand "addv4sf3"
287 [(set (match_operand:V4SF 0 "register_operand" "")
288 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
289 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
291 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
293 (define_insn "*addv4sf3"
294 [(set (match_operand:V4SF 0 "register_operand" "=x")
295 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
296 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
297 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
298 "addps\t{%2, %0|%0, %2}"
299 [(set_attr "type" "sseadd")
300 (set_attr "mode" "V4SF")])
302 (define_insn "sse_vmaddv4sf3"
303 [(set (match_operand:V4SF 0 "register_operand" "=x")
305 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
306 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
310 "addss\t{%2, %0|%0, %2}"
311 [(set_attr "type" "sseadd")
312 (set_attr "mode" "SF")])
314 (define_expand "subv4sf3"
315 [(set (match_operand:V4SF 0 "register_operand" "")
316 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
317 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
319 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
321 (define_insn "*subv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
323 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
324 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
326 "subps\t{%2, %0|%0, %2}"
327 [(set_attr "type" "sseadd")
328 (set_attr "mode" "V4SF")])
330 (define_insn "sse_vmsubv4sf3"
331 [(set (match_operand:V4SF 0 "register_operand" "=x")
333 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
334 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
338 "subss\t{%2, %0|%0, %2}"
339 [(set_attr "type" "sseadd")
340 (set_attr "mode" "SF")])
342 (define_expand "mulv4sf3"
343 [(set (match_operand:V4SF 0 "register_operand" "")
344 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
345 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
347 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
349 (define_insn "*mulv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
351 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
352 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
353 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
354 "mulps\t{%2, %0|%0, %2}"
355 [(set_attr "type" "ssemul")
356 (set_attr "mode" "V4SF")])
358 (define_insn "sse_vmmulv4sf3"
359 [(set (match_operand:V4SF 0 "register_operand" "=x")
361 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
362 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
366 "mulss\t{%2, %0|%0, %2}"
367 [(set_attr "type" "ssemul")
368 (set_attr "mode" "SF")])
370 (define_expand "divv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "")
372 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
373 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
375 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
377 (define_insn "*divv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
382 "divps\t{%2, %0|%0, %2}"
383 [(set_attr "type" "ssediv")
384 (set_attr "mode" "V4SF")])
386 (define_insn "sse_vmdivv4sf3"
387 [(set (match_operand:V4SF 0 "register_operand" "=x")
389 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
394 "divss\t{%2, %0|%0, %2}"
395 [(set_attr "type" "ssediv")
396 (set_attr "mode" "SF")])
398 (define_insn "sse_rcpv4sf2"
399 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
403 "rcpps\t{%1, %0|%0, %1}"
404 [(set_attr "type" "sse")
405 (set_attr "mode" "V4SF")])
407 (define_insn "sse_vmrcpv4sf2"
408 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
412 (match_operand:V4SF 2 "register_operand" "0")
415 "rcpss\t{%1, %0|%0, %1}"
416 [(set_attr "type" "sse")
417 (set_attr "mode" "SF")])
419 (define_insn "sse_rsqrtv4sf2"
420 [(set (match_operand:V4SF 0 "register_operand" "=x")
422 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
424 "rsqrtps\t{%1, %0|%0, %1}"
425 [(set_attr "type" "sse")
426 (set_attr "mode" "V4SF")])
428 (define_insn "sse_vmrsqrtv4sf2"
429 [(set (match_operand:V4SF 0 "register_operand" "=x")
431 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
433 (match_operand:V4SF 2 "register_operand" "0")
436 "rsqrtss\t{%1, %0|%0, %1}"
437 [(set_attr "type" "sse")
438 (set_attr "mode" "SF")])
440 (define_insn "sqrtv4sf2"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
444 "sqrtps\t{%1, %0|%0, %1}"
445 [(set_attr "type" "sse")
446 (set_attr "mode" "V4SF")])
448 (define_insn "sse_vmsqrtv4sf2"
449 [(set (match_operand:V4SF 0 "register_operand" "=x")
451 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
452 (match_operand:V4SF 2 "register_operand" "0")
455 "sqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
460 ;; isn't really correct, as those rtl operators aren't defined when
461 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
463 (define_expand "smaxv4sf3"
464 [(set (match_operand:V4SF 0 "register_operand" "")
465 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
466 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469 if (!flag_finite_math_only)
470 operands[1] = force_reg (V4SFmode, operands[1]);
471 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474 (define_insn "*smaxv4sf3_finite"
475 [(set (match_operand:V4SF 0 "register_operand" "=x")
476 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
477 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
478 "TARGET_SSE && flag_finite_math_only
479 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
480 "maxps\t{%2, %0|%0, %2}"
481 [(set_attr "type" "sse")
482 (set_attr "mode" "V4SF")])
484 (define_insn "*smaxv4sf3"
485 [(set (match_operand:V4SF 0 "register_operand" "=x")
486 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
487 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
489 "maxps\t{%2, %0|%0, %2}"
490 [(set_attr "type" "sse")
491 (set_attr "mode" "V4SF")])
493 (define_insn "*sse_vmsmaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
496 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
497 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500 "TARGET_SSE && flag_finite_math_only
501 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
502 "maxss\t{%2, %0|%0, %2}"
503 [(set_attr "type" "sse")
504 (set_attr "mode" "SF")])
506 (define_insn "sse_vmsmaxv4sf3"
507 [(set (match_operand:V4SF 0 "register_operand" "=x")
509 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
510 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
514 "maxss\t{%2, %0|%0, %2}"
515 [(set_attr "type" "sse")
516 (set_attr "mode" "SF")])
518 (define_expand "sminv4sf3"
519 [(set (match_operand:V4SF 0 "register_operand" "")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
521 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
524 if (!flag_finite_math_only)
525 operands[1] = force_reg (V4SFmode, operands[1]);
526 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
529 (define_insn "*sminv4sf3_finite"
530 [(set (match_operand:V4SF 0 "register_operand" "=x")
531 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
532 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
533 "TARGET_SSE && flag_finite_math_only
534 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
535 "minps\t{%2, %0|%0, %2}"
536 [(set_attr "type" "sse")
537 (set_attr "mode" "V4SF")])
539 (define_insn "*sminv4sf3"
540 [(set (match_operand:V4SF 0 "register_operand" "=x")
541 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
542 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
544 "minps\t{%2, %0|%0, %2}"
545 [(set_attr "type" "sse")
546 (set_attr "mode" "V4SF")])
548 (define_insn "*sse_vmsminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
551 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
552 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
555 "TARGET_SSE && flag_finite_math_only
556 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
557 "minss\t{%2, %0|%0, %2}"
558 [(set_attr "type" "sse")
559 (set_attr "mode" "SF")])
561 (define_insn "sse_vmsminv4sf3"
562 [(set (match_operand:V4SF 0 "register_operand" "=x")
564 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
565 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
569 "minss\t{%2, %0|%0, %2}"
570 [(set_attr "type" "sse")
571 (set_attr "mode" "SF")])
573 ;; These versions of the min/max patterns implement exactly the operations
574 ;; min = (op1 < op2 ? op1 : op2)
575 ;; max = (!(op1 < op2) ? op1 : op2)
576 ;; Their operands are not commutative, and thus they may be used in the
577 ;; presence of -0.0 and NaN.
579 (define_insn "*ieee_sminv4sf3"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
581 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
582 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
585 "minps\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sseadd")
587 (set_attr "mode" "V4SF")])
589 (define_insn "*ieee_smaxv4sf3"
590 [(set (match_operand:V4SF 0 "register_operand" "=x")
591 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
592 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
595 "maxps\t{%2, %0|%0, %2}"
596 [(set_attr "type" "sseadd")
597 (set_attr "mode" "V4SF")])
599 (define_insn "*ieee_sminv2df3"
600 [(set (match_operand:V2DF 0 "register_operand" "=x")
601 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
602 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
605 "minpd\t{%2, %0|%0, %2}"
606 [(set_attr "type" "sseadd")
607 (set_attr "mode" "V2DF")])
609 (define_insn "*ieee_smaxv2df3"
610 [(set (match_operand:V2DF 0 "register_operand" "=x")
611 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
615 "maxpd\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sseadd")
617 (set_attr "mode" "V2DF")])
619 (define_insn "sse3_addsubv4sf3"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
623 (match_operand:V4SF 1 "register_operand" "0")
624 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
625 (minus:V4SF (match_dup 1) (match_dup 2))
628 "addsubps\t{%2, %0|%0, %2}"
629 [(set_attr "type" "sseadd")
630 (set_attr "mode" "V4SF")])
632 (define_insn "sse3_haddv4sf3"
633 [(set (match_operand:V4SF 0 "register_operand" "=x")
638 (match_operand:V4SF 1 "register_operand" "0")
639 (parallel [(const_int 0)]))
640 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
643 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
648 (parallel [(const_int 0)]))
649 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
652 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
654 "haddps\t{%2, %0|%0, %2}"
655 [(set_attr "type" "sseadd")
656 (set_attr "mode" "V4SF")])
658 (define_insn "sse3_hsubv4sf3"
659 [(set (match_operand:V4SF 0 "register_operand" "=x")
664 (match_operand:V4SF 1 "register_operand" "0")
665 (parallel [(const_int 0)]))
666 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
668 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
669 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
673 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
674 (parallel [(const_int 0)]))
675 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
677 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
678 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
680 "hsubps\t{%2, %0|%0, %2}"
681 [(set_attr "type" "sseadd")
682 (set_attr "mode" "V4SF")])
684 (define_expand "reduc_splus_v4sf"
685 [(match_operand:V4SF 0 "register_operand" "")
686 (match_operand:V4SF 1 "register_operand" "")]
691 rtx tmp = gen_reg_rtx (V4SFmode);
692 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
693 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
696 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smax_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
709 (define_expand "reduc_smin_v4sf"
710 [(match_operand:V4SF 0 "register_operand" "")
711 (match_operand:V4SF 1 "register_operand" "")]
714 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
718 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720 ;; Parallel single-precision floating point comparisons
722 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
724 (define_insn "sse_maskcmpv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
726 (match_operator:V4SF 3 "sse_comparison_operator"
727 [(match_operand:V4SF 1 "register_operand" "0")
728 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
730 "cmp%D3ps\t{%2, %0|%0, %2}"
731 [(set_attr "type" "ssecmp")
732 (set_attr "mode" "V4SF")])
734 (define_insn "sse_vmmaskcmpv4sf3"
735 [(set (match_operand:V4SF 0 "register_operand" "=x")
737 (match_operator:V4SF 3 "sse_comparison_operator"
738 [(match_operand:V4SF 1 "register_operand" "0")
739 (match_operand:V4SF 2 "register_operand" "x")])
743 "cmp%D3ss\t{%2, %0|%0, %2}"
744 [(set_attr "type" "ssecmp")
745 (set_attr "mode" "SF")])
747 (define_insn "sse_comi"
748 [(set (reg:CCFP FLAGS_REG)
751 (match_operand:V4SF 0 "register_operand" "x")
752 (parallel [(const_int 0)]))
754 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
755 (parallel [(const_int 0)]))))]
757 "comiss\t{%1, %0|%0, %1}"
758 [(set_attr "type" "ssecomi")
759 (set_attr "mode" "SF")])
761 (define_insn "sse_ucomi"
762 [(set (reg:CCFPU FLAGS_REG)
765 (match_operand:V4SF 0 "register_operand" "x")
766 (parallel [(const_int 0)]))
768 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
769 (parallel [(const_int 0)]))))]
771 "ucomiss\t{%1, %0|%0, %1}"
772 [(set_attr "type" "ssecomi")
773 (set_attr "mode" "SF")])
775 (define_expand "vcondv4sf"
776 [(set (match_operand:V4SF 0 "register_operand" "")
779 [(match_operand:V4SF 4 "nonimmediate_operand" "")
780 (match_operand:V4SF 5 "nonimmediate_operand" "")])
781 (match_operand:V4SF 1 "general_operand" "")
782 (match_operand:V4SF 2 "general_operand" "")))]
785 if (ix86_expand_fp_vcond (operands))
791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
793 ;; Parallel single-precision floating point logical operations
795 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797 (define_expand "andv4sf3"
798 [(set (match_operand:V4SF 0 "register_operand" "")
799 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
800 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
804 (define_insn "*andv4sf3"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
807 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
808 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
809 "andps\t{%2, %0|%0, %2}"
810 [(set_attr "type" "sselog")
811 (set_attr "mode" "V4SF")])
813 (define_insn "sse_nandv4sf3"
814 [(set (match_operand:V4SF 0 "register_operand" "=x")
815 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
816 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
818 "andnps\t{%2, %0|%0, %2}"
819 [(set_attr "type" "sselog")
820 (set_attr "mode" "V4SF")])
822 (define_expand "iorv4sf3"
823 [(set (match_operand:V4SF 0 "register_operand" "")
824 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
825 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
827 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
829 (define_insn "*iorv4sf3"
830 [(set (match_operand:V4SF 0 "register_operand" "=x")
831 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
832 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
833 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
834 "orps\t{%2, %0|%0, %2}"
835 [(set_attr "type" "sselog")
836 (set_attr "mode" "V4SF")])
838 (define_expand "xorv4sf3"
839 [(set (match_operand:V4SF 0 "register_operand" "")
840 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
841 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
843 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
845 (define_insn "*xorv4sf3"
846 [(set (match_operand:V4SF 0 "register_operand" "=x")
847 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
848 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
849 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
850 "xorps\t{%2, %0|%0, %2}"
851 [(set_attr "type" "sselog")
852 (set_attr "mode" "V4SF")])
854 ;; Also define scalar versions. These are used for abs, neg, and
855 ;; conditional move. Using subregs into vector modes causes register
856 ;; allocation lossage. These patterns do not allow memory operands
857 ;; because the native instructions read the full 128-bits.
859 (define_insn "*andsf3"
860 [(set (match_operand:SF 0 "register_operand" "=x")
861 (and:SF (match_operand:SF 1 "register_operand" "0")
862 (match_operand:SF 2 "register_operand" "x")))]
864 "andps\t{%2, %0|%0, %2}"
865 [(set_attr "type" "sselog")
866 (set_attr "mode" "V4SF")])
868 (define_insn "*nandsf3"
869 [(set (match_operand:SF 0 "register_operand" "=x")
870 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
871 (match_operand:SF 2 "register_operand" "x")))]
873 "andnps\t{%2, %0|%0, %2}"
874 [(set_attr "type" "sselog")
875 (set_attr "mode" "V4SF")])
877 (define_insn "*iorsf3"
878 [(set (match_operand:SF 0 "register_operand" "=x")
879 (ior:SF (match_operand:SF 1 "register_operand" "0")
880 (match_operand:SF 2 "register_operand" "x")))]
882 "orps\t{%2, %0|%0, %2}"
883 [(set_attr "type" "sselog")
884 (set_attr "mode" "V4SF")])
886 (define_insn "*xorsf3"
887 [(set (match_operand:SF 0 "register_operand" "=x")
888 (xor:SF (match_operand:SF 1 "register_operand" "0")
889 (match_operand:SF 2 "register_operand" "x")))]
891 "xorps\t{%2, %0|%0, %2}"
892 [(set_attr "type" "sselog")
893 (set_attr "mode" "V4SF")])
895 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;; Parallel single-precision floating point conversion operations
899 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
901 (define_insn "sse_cvtpi2ps"
902 [(set (match_operand:V4SF 0 "register_operand" "=x")
905 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
906 (match_operand:V4SF 1 "register_operand" "0")
909 "cvtpi2ps\t{%2, %0|%0, %2}"
910 [(set_attr "type" "ssecvt")
911 (set_attr "mode" "V4SF")])
913 (define_insn "sse_cvtps2pi"
914 [(set (match_operand:V2SI 0 "register_operand" "=y")
916 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
918 (parallel [(const_int 0) (const_int 1)])))]
920 "cvtps2pi\t{%1, %0|%0, %1}"
921 [(set_attr "type" "ssecvt")
922 (set_attr "unit" "mmx")
923 (set_attr "mode" "DI")])
925 (define_insn "sse_cvttps2pi"
926 [(set (match_operand:V2SI 0 "register_operand" "=y")
928 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
929 (parallel [(const_int 0) (const_int 1)])))]
931 "cvttps2pi\t{%1, %0|%0, %1}"
932 [(set_attr "type" "ssecvt")
933 (set_attr "unit" "mmx")
934 (set_attr "mode" "SF")])
936 (define_insn "sse_cvtsi2ss"
937 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
940 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
941 (match_operand:V4SF 1 "register_operand" "0,0")
944 "cvtsi2ss\t{%2, %0|%0, %2}"
945 [(set_attr "type" "sseicvt")
946 (set_attr "athlon_decode" "vector,double")
947 (set_attr "mode" "SF")])
949 (define_insn "sse_cvtsi2ssq"
950 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
953 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
954 (match_operand:V4SF 1 "register_operand" "0,0")
956 "TARGET_SSE && TARGET_64BIT"
957 "cvtsi2ssq\t{%2, %0|%0, %2}"
958 [(set_attr "type" "sseicvt")
959 (set_attr "athlon_decode" "vector,double")
960 (set_attr "mode" "SF")])
962 (define_insn "sse_cvtss2si"
963 [(set (match_operand:SI 0 "register_operand" "=r,r")
966 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
967 (parallel [(const_int 0)]))]
968 UNSPEC_FIX_NOTRUNC))]
970 "cvtss2si\t{%1, %0|%0, %1}"
971 [(set_attr "type" "sseicvt")
972 (set_attr "athlon_decode" "double,vector")
973 (set_attr "mode" "SI")])
975 (define_insn "sse_cvtss2siq"
976 [(set (match_operand:DI 0 "register_operand" "=r,r")
979 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
980 (parallel [(const_int 0)]))]
981 UNSPEC_FIX_NOTRUNC))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvtss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
988 (define_insn "sse_cvttss2si"
989 [(set (match_operand:SI 0 "register_operand" "=r,r")
992 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
993 (parallel [(const_int 0)]))))]
995 "cvttss2si\t{%1, %0|%0, %1}"
996 [(set_attr "type" "sseicvt")
997 (set_attr "athlon_decode" "double,vector")
998 (set_attr "mode" "SI")])
1000 (define_insn "sse_cvttss2siq"
1001 [(set (match_operand:DI 0 "register_operand" "=r,r")
1004 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1005 (parallel [(const_int 0)]))))]
1006 "TARGET_SSE && TARGET_64BIT"
1007 "cvttss2siq\t{%1, %0|%0, %1}"
1008 [(set_attr "type" "sseicvt")
1009 (set_attr "athlon_decode" "double,vector")
1010 (set_attr "mode" "DI")])
1012 (define_insn "sse2_cvtdq2ps"
1013 [(set (match_operand:V4SF 0 "register_operand" "=x")
1014 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1016 "cvtdq2ps\t{%1, %0|%0, %1}"
1017 [(set_attr "type" "ssecvt")
1018 (set_attr "mode" "V2DF")])
1020 (define_insn "sse2_cvtps2dq"
1021 [(set (match_operand:V4SI 0 "register_operand" "=x")
1022 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1023 UNSPEC_FIX_NOTRUNC))]
1025 "cvtps2dq\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "mode" "TI")])
1029 (define_insn "sse2_cvttps2dq"
1030 [(set (match_operand:V4SI 0 "register_operand" "=x")
1031 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1033 "cvttps2dq\t{%1, %0|%0, %1}"
1034 [(set_attr "type" "ssecvt")
1035 (set_attr "mode" "TI")])
1037 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1039 ;; Parallel single-precision floating point element swizzling
1041 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1043 (define_insn "sse_movhlps"
1044 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1047 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
1048 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
1049 (parallel [(const_int 6)
1053 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1055 movhlps\t{%2, %0|%0, %2}
1056 movlps\t{%H1, %0|%0, %H1}
1057 movhps\t{%1, %0|%0, %1}"
1058 [(set_attr "type" "ssemov")
1059 (set_attr "mode" "V4SF,V2SF,V2SF")])
1061 (define_insn "sse_movlhps"
1062 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1065 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1066 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1067 (parallel [(const_int 0)
1071 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1073 movlhps\t{%2, %0|%0, %2}
1074 movhps\t{%2, %0|%0, %2}
1075 movlps\t{%2, %H0|%H0, %2}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "mode" "V4SF,V2SF,V2SF")])
1079 (define_insn "vec_interleave_highv4sf"
1080 [(set (match_operand:V4SF 0 "register_operand" "=x")
1083 (match_operand:V4SF 1 "register_operand" "0")
1084 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1085 (parallel [(const_int 2) (const_int 6)
1086 (const_int 3) (const_int 7)])))]
1088 "unpckhps\t{%2, %0|%0, %2}"
1089 [(set_attr "type" "sselog")
1090 (set_attr "mode" "V4SF")])
1092 (define_insn "vec_interleave_lowv4sf"
1093 [(set (match_operand:V4SF 0 "register_operand" "=x")
1096 (match_operand:V4SF 1 "register_operand" "0")
1097 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1098 (parallel [(const_int 0) (const_int 4)
1099 (const_int 1) (const_int 5)])))]
1101 "unpcklps\t{%2, %0|%0, %2}"
1102 [(set_attr "type" "sselog")
1103 (set_attr "mode" "V4SF")])
1105 ;; These are modeled with the same vec_concat as the others so that we
1106 ;; capture users of shufps that can use the new instructions
1107 (define_insn "sse3_movshdup"
1108 [(set (match_operand:V4SF 0 "register_operand" "=x")
1111 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1113 (parallel [(const_int 1)
1118 "movshdup\t{%1, %0|%0, %1}"
1119 [(set_attr "type" "sse")
1120 (set_attr "mode" "V4SF")])
1122 (define_insn "sse3_movsldup"
1123 [(set (match_operand:V4SF 0 "register_operand" "=x")
1126 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1128 (parallel [(const_int 0)
1133 "movsldup\t{%1, %0|%0, %1}"
1134 [(set_attr "type" "sse")
1135 (set_attr "mode" "V4SF")])
1137 (define_expand "sse_shufps"
1138 [(match_operand:V4SF 0 "register_operand" "")
1139 (match_operand:V4SF 1 "register_operand" "")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "")
1141 (match_operand:SI 3 "const_int_operand" "")]
1144 int mask = INTVAL (operands[3]);
1145 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1146 GEN_INT ((mask >> 0) & 3),
1147 GEN_INT ((mask >> 2) & 3),
1148 GEN_INT (((mask >> 4) & 3) + 4),
1149 GEN_INT (((mask >> 6) & 3) + 4)));
1153 (define_insn "sse_shufps_1"
1154 [(set (match_operand:V4SF 0 "register_operand" "=x")
1157 (match_operand:V4SF 1 "register_operand" "0")
1158 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1159 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1160 (match_operand 4 "const_0_to_3_operand" "")
1161 (match_operand 5 "const_4_to_7_operand" "")
1162 (match_operand 6 "const_4_to_7_operand" "")])))]
1166 mask |= INTVAL (operands[3]) << 0;
1167 mask |= INTVAL (operands[4]) << 2;
1168 mask |= (INTVAL (operands[5]) - 4) << 4;
1169 mask |= (INTVAL (operands[6]) - 4) << 6;
1170 operands[3] = GEN_INT (mask);
1172 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1174 [(set_attr "type" "sselog")
1175 (set_attr "mode" "V4SF")])
1177 (define_insn "sse_storehps"
1178 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1180 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1181 (parallel [(const_int 2) (const_int 3)])))]
1184 movhps\t{%1, %0|%0, %1}
1185 movhlps\t{%1, %0|%0, %1}
1186 movlps\t{%H1, %0|%0, %H1}"
1187 [(set_attr "type" "ssemov")
1188 (set_attr "mode" "V2SF,V4SF,V2SF")])
1190 (define_insn "sse_loadhps"
1191 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1194 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1195 (parallel [(const_int 0) (const_int 1)]))
1196 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1199 movhps\t{%2, %0|%0, %2}
1200 movlhps\t{%2, %0|%0, %2}
1201 movlps\t{%2, %H0|%H0, %2}"
1202 [(set_attr "type" "ssemov")
1203 (set_attr "mode" "V2SF,V4SF,V2SF")])
1205 (define_insn "sse_storelps"
1206 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1208 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1209 (parallel [(const_int 0) (const_int 1)])))]
1212 movlps\t{%1, %0|%0, %1}
1213 movaps\t{%1, %0|%0, %1}
1214 movlps\t{%1, %0|%0, %1}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V2SF,V4SF,V2SF")])
1218 (define_insn "sse_loadlps"
1219 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1221 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1223 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1224 (parallel [(const_int 2) (const_int 3)]))))]
1227 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1228 movlps\t{%2, %0|%0, %2}
1229 movlps\t{%2, %0|%0, %2}"
1230 [(set_attr "type" "sselog,ssemov,ssemov")
1231 (set_attr "mode" "V4SF,V2SF,V2SF")])
1233 (define_insn "sse_movss"
1234 [(set (match_operand:V4SF 0 "register_operand" "=x")
1236 (match_operand:V4SF 2 "register_operand" "x")
1237 (match_operand:V4SF 1 "register_operand" "0")
1240 "movss\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "ssemov")
1242 (set_attr "mode" "SF")])
1244 (define_insn "*vec_dupv4sf"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1247 (match_operand:SF 1 "register_operand" "0")))]
1249 "shufps\t{$0, %0, %0|%0, %0, 0}"
1250 [(set_attr "type" "sselog1")
1251 (set_attr "mode" "V4SF")])
1253 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1254 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1255 ;; alternatives pretty much forces the MMX alternative to be chosen.
1256 (define_insn "*sse_concatv2sf"
1257 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1259 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1260 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1263 unpcklps\t{%2, %0|%0, %2}
1264 movss\t{%1, %0|%0, %1}
1265 punpckldq\t{%2, %0|%0, %2}
1266 movd\t{%1, %0|%0, %1}"
1267 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1268 (set_attr "mode" "V4SF,SF,DI,DI")])
1270 (define_insn "*sse_concatv4sf"
1271 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1273 (match_operand:V2SF 1 "register_operand" " 0,0")
1274 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1277 movlhps\t{%2, %0|%0, %2}
1278 movhps\t{%2, %0|%0, %2}"
1279 [(set_attr "type" "ssemov")
1280 (set_attr "mode" "V4SF,V2SF")])
1282 (define_expand "vec_initv4sf"
1283 [(match_operand:V4SF 0 "register_operand" "")
1284 (match_operand 1 "" "")]
1287 ix86_expand_vector_init (false, operands[0], operands[1]);
1291 (define_insn "*vec_setv4sf_0"
1292 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1295 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1296 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1300 movss\t{%2, %0|%0, %2}
1301 movss\t{%2, %0|%0, %2}
1302 movd\t{%2, %0|%0, %2}
1304 [(set_attr "type" "ssemov")
1305 (set_attr "mode" "SF")])
1308 [(set (match_operand:V4SF 0 "memory_operand" "")
1311 (match_operand:SF 1 "nonmemory_operand" ""))
1314 "TARGET_SSE && reload_completed"
1317 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1321 (define_expand "vec_setv4sf"
1322 [(match_operand:V4SF 0 "register_operand" "")
1323 (match_operand:SF 1 "register_operand" "")
1324 (match_operand 2 "const_int_operand" "")]
1327 ix86_expand_vector_set (false, operands[0], operands[1],
1328 INTVAL (operands[2]));
1332 (define_insn_and_split "*vec_extractv4sf_0"
1333 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1335 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1336 (parallel [(const_int 0)])))]
1337 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1339 "&& reload_completed"
1342 rtx op1 = operands[1];
1344 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1346 op1 = gen_lowpart (SFmode, op1);
1347 emit_move_insn (operands[0], op1);
1351 (define_expand "vec_extractv4sf"
1352 [(match_operand:SF 0 "register_operand" "")
1353 (match_operand:V4SF 1 "register_operand" "")
1354 (match_operand 2 "const_int_operand" "")]
1357 ix86_expand_vector_extract (false, operands[0], operands[1],
1358 INTVAL (operands[2]));
1362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1364 ;; Parallel double-precision floating point arithmetic
1366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1368 (define_expand "negv2df2"
1369 [(set (match_operand:V2DF 0 "register_operand" "")
1370 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1372 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1374 (define_expand "absv2df2"
1375 [(set (match_operand:V2DF 0 "register_operand" "")
1376 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1378 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1380 (define_expand "addv2df3"
1381 [(set (match_operand:V2DF 0 "register_operand" "")
1382 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1383 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1385 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1387 (define_insn "*addv2df3"
1388 [(set (match_operand:V2DF 0 "register_operand" "=x")
1389 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1390 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1391 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1392 "addpd\t{%2, %0|%0, %2}"
1393 [(set_attr "type" "sseadd")
1394 (set_attr "mode" "V2DF")])
1396 (define_insn "sse2_vmaddv2df3"
1397 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1403 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1404 "addsd\t{%2, %0|%0, %2}"
1405 [(set_attr "type" "sseadd")
1406 (set_attr "mode" "DF")])
1408 (define_expand "subv2df3"
1409 [(set (match_operand:V2DF 0 "register_operand" "")
1410 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1411 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1413 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1415 (define_insn "*subv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1417 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1418 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1420 "subpd\t{%2, %0|%0, %2}"
1421 [(set_attr "type" "sseadd")
1422 (set_attr "mode" "V2DF")])
1424 (define_insn "sse2_vmsubv2df3"
1425 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1432 "subsd\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "sseadd")
1434 (set_attr "mode" "DF")])
1436 (define_expand "mulv2df3"
1437 [(set (match_operand:V2DF 0 "register_operand" "")
1438 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1439 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1441 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1443 (define_insn "*mulv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1445 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1446 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1447 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1448 "mulpd\t{%2, %0|%0, %2}"
1449 [(set_attr "type" "ssemul")
1450 (set_attr "mode" "V2DF")])
1452 (define_insn "sse2_vmmulv2df3"
1453 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1459 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1460 "mulsd\t{%2, %0|%0, %2}"
1461 [(set_attr "type" "ssemul")
1462 (set_attr "mode" "DF")])
1464 (define_expand "divv2df3"
1465 [(set (match_operand:V2DF 0 "register_operand" "")
1466 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1467 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1469 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1471 (define_insn "*divv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1473 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1474 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1476 "divpd\t{%2, %0|%0, %2}"
1477 [(set_attr "type" "ssediv")
1478 (set_attr "mode" "V2DF")])
1480 (define_insn "sse2_vmdivv2df3"
1481 [(set (match_operand:V2DF 0 "register_operand" "=x")
1483 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1484 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1488 "divsd\t{%2, %0|%0, %2}"
1489 [(set_attr "type" "ssediv")
1490 (set_attr "mode" "DF")])
1492 (define_insn "sqrtv2df2"
1493 [(set (match_operand:V2DF 0 "register_operand" "=x")
1494 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1496 "sqrtpd\t{%1, %0|%0, %1}"
1497 [(set_attr "type" "sse")
1498 (set_attr "mode" "V2DF")])
1500 (define_insn "sse2_vmsqrtv2df2"
1501 [(set (match_operand:V2DF 0 "register_operand" "=x")
1503 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1504 (match_operand:V2DF 2 "register_operand" "0")
1507 "sqrtsd\t{%1, %0|%0, %1}"
1508 [(set_attr "type" "sse")
1509 (set_attr "mode" "DF")])
1511 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1512 ;; isn't really correct, as those rtl operators aren't defined when
1513 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1515 (define_expand "smaxv2df3"
1516 [(set (match_operand:V2DF 0 "register_operand" "")
1517 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1518 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1521 if (!flag_finite_math_only)
1522 operands[1] = force_reg (V2DFmode, operands[1]);
1523 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1526 (define_insn "*smaxv2df3_finite"
1527 [(set (match_operand:V2DF 0 "register_operand" "=x")
1528 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1529 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1530 "TARGET_SSE2 && flag_finite_math_only
1531 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1532 "maxpd\t{%2, %0|%0, %2}"
1533 [(set_attr "type" "sseadd")
1534 (set_attr "mode" "V2DF")])
1536 (define_insn "*smaxv2df3"
1537 [(set (match_operand:V2DF 0 "register_operand" "=x")
1538 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1539 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1541 "maxpd\t{%2, %0|%0, %2}"
1542 [(set_attr "type" "sseadd")
1543 (set_attr "mode" "V2DF")])
1545 (define_insn "*sse2_vmsmaxv2df3_finite"
1546 [(set (match_operand:V2DF 0 "register_operand" "=x")
1548 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1549 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1552 "TARGET_SSE2 && flag_finite_math_only
1553 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1554 "maxsd\t{%2, %0|%0, %2}"
1555 [(set_attr "type" "sseadd")
1556 (set_attr "mode" "DF")])
1558 (define_insn "sse2_vmsmaxv2df3"
1559 [(set (match_operand:V2DF 0 "register_operand" "=x")
1561 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1562 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1566 "maxsd\t{%2, %0|%0, %2}"
1567 [(set_attr "type" "sseadd")
1568 (set_attr "mode" "DF")])
1570 (define_expand "sminv2df3"
1571 [(set (match_operand:V2DF 0 "register_operand" "")
1572 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1573 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1576 if (!flag_finite_math_only)
1577 operands[1] = force_reg (V2DFmode, operands[1]);
1578 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1581 (define_insn "*sminv2df3_finite"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && flag_finite_math_only
1586 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1587 "minpd\t{%2, %0|%0, %2}"
1588 [(set_attr "type" "sseadd")
1589 (set_attr "mode" "V2DF")])
1591 (define_insn "*sminv2df3"
1592 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1596 "minpd\t{%2, %0|%0, %2}"
1597 [(set_attr "type" "sseadd")
1598 (set_attr "mode" "V2DF")])
1600 (define_insn "*sse2_vmsminv2df3_finite"
1601 [(set (match_operand:V2DF 0 "register_operand" "=x")
1603 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1607 "TARGET_SSE2 && flag_finite_math_only
1608 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1609 "minsd\t{%2, %0|%0, %2}"
1610 [(set_attr "type" "sseadd")
1611 (set_attr "mode" "DF")])
1613 (define_insn "sse2_vmsminv2df3"
1614 [(set (match_operand:V2DF 0 "register_operand" "=x")
1616 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1617 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1621 "minsd\t{%2, %0|%0, %2}"
1622 [(set_attr "type" "sseadd")
1623 (set_attr "mode" "DF")])
1625 (define_insn "sse3_addsubv2df3"
1626 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (match_operand:V2DF 1 "register_operand" "0")
1630 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1631 (minus:V2DF (match_dup 1) (match_dup 2))
1634 "addsubpd\t{%2, %0|%0, %2}"
1635 [(set_attr "type" "sseadd")
1636 (set_attr "mode" "V2DF")])
1638 (define_insn "sse3_haddv2df3"
1639 [(set (match_operand:V2DF 0 "register_operand" "=x")
1643 (match_operand:V2DF 1 "register_operand" "0")
1644 (parallel [(const_int 0)]))
1645 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1648 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1649 (parallel [(const_int 0)]))
1650 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1652 "haddpd\t{%2, %0|%0, %2}"
1653 [(set_attr "type" "sseadd")
1654 (set_attr "mode" "V2DF")])
1656 (define_insn "sse3_hsubv2df3"
1657 [(set (match_operand:V2DF 0 "register_operand" "=x")
1661 (match_operand:V2DF 1 "register_operand" "0")
1662 (parallel [(const_int 0)]))
1663 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1666 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1667 (parallel [(const_int 0)]))
1668 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1670 "hsubpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "sseadd")
1672 (set_attr "mode" "V2DF")])
1674 (define_expand "reduc_splus_v2df"
1675 [(match_operand:V2DF 0 "register_operand" "")
1676 (match_operand:V2DF 1 "register_operand" "")]
1679 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; Parallel double-precision floating point comparisons
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 (define_insn "sse2_maskcmpv2df3"
1690 [(set (match_operand:V2DF 0 "register_operand" "=x")
1691 (match_operator:V2DF 3 "sse_comparison_operator"
1692 [(match_operand:V2DF 1 "register_operand" "0")
1693 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1695 "cmp%D3pd\t{%2, %0|%0, %2}"
1696 [(set_attr "type" "ssecmp")
1697 (set_attr "mode" "V2DF")])
1699 (define_insn "sse2_vmmaskcmpv2df3"
1700 [(set (match_operand:V2DF 0 "register_operand" "=x")
1702 (match_operator:V2DF 3 "sse_comparison_operator"
1703 [(match_operand:V2DF 1 "register_operand" "0")
1704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1708 "cmp%D3sd\t{%2, %0|%0, %2}"
1709 [(set_attr "type" "ssecmp")
1710 (set_attr "mode" "DF")])
1712 (define_insn "sse2_comi"
1713 [(set (reg:CCFP FLAGS_REG)
1716 (match_operand:V2DF 0 "register_operand" "x")
1717 (parallel [(const_int 0)]))
1719 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1720 (parallel [(const_int 0)]))))]
1722 "comisd\t{%1, %0|%0, %1}"
1723 [(set_attr "type" "ssecomi")
1724 (set_attr "mode" "DF")])
1726 (define_insn "sse2_ucomi"
1727 [(set (reg:CCFPU FLAGS_REG)
1730 (match_operand:V2DF 0 "register_operand" "x")
1731 (parallel [(const_int 0)]))
1733 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1734 (parallel [(const_int 0)]))))]
1736 "ucomisd\t{%1, %0|%0, %1}"
1737 [(set_attr "type" "ssecomi")
1738 (set_attr "mode" "DF")])
1740 (define_expand "vcondv2df"
1741 [(set (match_operand:V2DF 0 "register_operand" "")
1743 (match_operator 3 ""
1744 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1745 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1746 (match_operand:V2DF 1 "general_operand" "")
1747 (match_operand:V2DF 2 "general_operand" "")))]
1750 if (ix86_expand_fp_vcond (operands))
1756 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1758 ;; Parallel double-precision floating point logical operations
1760 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1762 (define_expand "andv2df3"
1763 [(set (match_operand:V2DF 0 "register_operand" "")
1764 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1767 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1769 (define_insn "*andv2df3"
1770 [(set (match_operand:V2DF 0 "register_operand" "=x")
1771 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1773 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1774 "andpd\t{%2, %0|%0, %2}"
1775 [(set_attr "type" "sselog")
1776 (set_attr "mode" "V2DF")])
1778 (define_insn "sse2_nandv2df3"
1779 [(set (match_operand:V2DF 0 "register_operand" "=x")
1780 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1781 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1783 "andnpd\t{%2, %0|%0, %2}"
1784 [(set_attr "type" "sselog")
1785 (set_attr "mode" "V2DF")])
1787 (define_expand "iorv2df3"
1788 [(set (match_operand:V2DF 0 "register_operand" "")
1789 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1790 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1792 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1794 (define_insn "*iorv2df3"
1795 [(set (match_operand:V2DF 0 "register_operand" "=x")
1796 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1797 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1798 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1799 "orpd\t{%2, %0|%0, %2}"
1800 [(set_attr "type" "sselog")
1801 (set_attr "mode" "V2DF")])
1803 (define_expand "xorv2df3"
1804 [(set (match_operand:V2DF 0 "register_operand" "")
1805 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1806 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1808 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1810 (define_insn "*xorv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x")
1812 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1813 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1814 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1815 "xorpd\t{%2, %0|%0, %2}"
1816 [(set_attr "type" "sselog")
1817 (set_attr "mode" "V2DF")])
1819 ;; Also define scalar versions. These are used for abs, neg, and
1820 ;; conditional move. Using subregs into vector modes causes register
1821 ;; allocation lossage. These patterns do not allow memory operands
1822 ;; because the native instructions read the full 128-bits.
1824 (define_insn "*anddf3"
1825 [(set (match_operand:DF 0 "register_operand" "=x")
1826 (and:DF (match_operand:DF 1 "register_operand" "0")
1827 (match_operand:DF 2 "register_operand" "x")))]
1829 "andpd\t{%2, %0|%0, %2}"
1830 [(set_attr "type" "sselog")
1831 (set_attr "mode" "V2DF")])
1833 (define_insn "*nanddf3"
1834 [(set (match_operand:DF 0 "register_operand" "=x")
1835 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1836 (match_operand:DF 2 "register_operand" "x")))]
1838 "andnpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sselog")
1840 (set_attr "mode" "V2DF")])
1842 (define_insn "*iordf3"
1843 [(set (match_operand:DF 0 "register_operand" "=x")
1844 (ior:DF (match_operand:DF 1 "register_operand" "0")
1845 (match_operand:DF 2 "register_operand" "x")))]
1847 "orpd\t{%2, %0|%0, %2}"
1848 [(set_attr "type" "sselog")
1849 (set_attr "mode" "V2DF")])
1851 (define_insn "*xordf3"
1852 [(set (match_operand:DF 0 "register_operand" "=x")
1853 (xor:DF (match_operand:DF 1 "register_operand" "0")
1854 (match_operand:DF 2 "register_operand" "x")))]
1856 "xorpd\t{%2, %0|%0, %2}"
1857 [(set_attr "type" "sselog")
1858 (set_attr "mode" "V2DF")])
1860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1862 ;; Parallel double-precision floating point conversion operations
1864 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1866 (define_insn "sse2_cvtpi2pd"
1867 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1868 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1870 "cvtpi2pd\t{%1, %0|%0, %1}"
1871 [(set_attr "type" "ssecvt")
1872 (set_attr "unit" "mmx,*")
1873 (set_attr "mode" "V2DF")])
1875 (define_insn "sse2_cvtpd2pi"
1876 [(set (match_operand:V2SI 0 "register_operand" "=y")
1877 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1878 UNSPEC_FIX_NOTRUNC))]
1880 "cvtpd2pi\t{%1, %0|%0, %1}"
1881 [(set_attr "type" "ssecvt")
1882 (set_attr "unit" "mmx")
1883 (set_attr "mode" "DI")])
1885 (define_insn "sse2_cvttpd2pi"
1886 [(set (match_operand:V2SI 0 "register_operand" "=y")
1887 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1889 "cvttpd2pi\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "ssecvt")
1891 (set_attr "unit" "mmx")
1892 (set_attr "mode" "TI")])
1894 (define_insn "sse2_cvtsi2sd"
1895 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1898 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1899 (match_operand:V2DF 1 "register_operand" "0,0")
1902 "cvtsi2sd\t{%2, %0|%0, %2}"
1903 [(set_attr "type" "sseicvt")
1904 (set_attr "mode" "DF")
1905 (set_attr "athlon_decode" "double,direct")])
1907 (define_insn "sse2_cvtsi2sdq"
1908 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1911 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1912 (match_operand:V2DF 1 "register_operand" "0,0")
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvtsi2sdq\t{%2, %0|%0, %2}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DF")
1918 (set_attr "athlon_decode" "double,direct")])
1920 (define_insn "sse2_cvtsd2si"
1921 [(set (match_operand:SI 0 "register_operand" "=r,r")
1924 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1925 (parallel [(const_int 0)]))]
1926 UNSPEC_FIX_NOTRUNC))]
1928 "cvtsd2si\t{%1, %0|%0, %1}"
1929 [(set_attr "type" "sseicvt")
1930 (set_attr "athlon_decode" "double,vector")
1931 (set_attr "mode" "SI")])
1933 (define_insn "sse2_cvtsd2siq"
1934 [(set (match_operand:DI 0 "register_operand" "=r,r")
1937 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1938 (parallel [(const_int 0)]))]
1939 UNSPEC_FIX_NOTRUNC))]
1940 "TARGET_SSE2 && TARGET_64BIT"
1941 "cvtsd2siq\t{%1, %0|%0, %1}"
1942 [(set_attr "type" "sseicvt")
1943 (set_attr "athlon_decode" "double,vector")
1944 (set_attr "mode" "DI")])
1946 (define_insn "sse2_cvttsd2si"
1947 [(set (match_operand:SI 0 "register_operand" "=r,r")
1950 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1951 (parallel [(const_int 0)]))))]
1953 "cvttsd2si\t{%1, %0|%0, %1}"
1954 [(set_attr "type" "sseicvt")
1955 (set_attr "mode" "SI")
1956 (set_attr "athlon_decode" "double,vector")])
1958 (define_insn "sse2_cvttsd2siq"
1959 [(set (match_operand:DI 0 "register_operand" "=r,r")
1962 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1963 (parallel [(const_int 0)]))))]
1964 "TARGET_SSE2 && TARGET_64BIT"
1965 "cvttsd2siq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "sseicvt")
1967 (set_attr "mode" "DI")
1968 (set_attr "athlon_decode" "double,vector")])
1970 (define_insn "sse2_cvtdq2pd"
1971 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1975 (parallel [(const_int 0) (const_int 1)]))))]
1977 "cvtdq2pd\t{%1, %0|%0, %1}"
1978 [(set_attr "type" "ssecvt")
1979 (set_attr "mode" "V2DF")])
1981 (define_expand "sse2_cvtpd2dq"
1982 [(set (match_operand:V4SI 0 "register_operand" "")
1984 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1988 "operands[2] = CONST0_RTX (V2SImode);")
1990 (define_insn "*sse2_cvtpd2dq"
1991 [(set (match_operand:V4SI 0 "register_operand" "=x")
1993 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1995 (match_operand:V2SI 2 "const0_operand" "")))]
1997 "cvtpd2dq\t{%1, %0|%0, %1}"
1998 [(set_attr "type" "ssecvt")
1999 (set_attr "mode" "TI")])
2001 (define_expand "sse2_cvttpd2dq"
2002 [(set (match_operand:V4SI 0 "register_operand" "")
2004 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2007 "operands[2] = CONST0_RTX (V2SImode);")
2009 (define_insn "*sse2_cvttpd2dq"
2010 [(set (match_operand:V4SI 0 "register_operand" "=x")
2012 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2013 (match_operand:V2SI 2 "const0_operand" "")))]
2015 "cvttpd2dq\t{%1, %0|%0, %1}"
2016 [(set_attr "type" "ssecvt")
2017 (set_attr "mode" "TI")])
2019 (define_insn "sse2_cvtsd2ss"
2020 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2023 (float_truncate:V2SF
2024 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2025 (match_operand:V4SF 1 "register_operand" "0,0")
2028 "cvtsd2ss\t{%2, %0|%0, %2}"
2029 [(set_attr "type" "ssecvt")
2030 (set_attr "athlon_decode" "vector,double")
2031 (set_attr "mode" "SF")])
2033 (define_insn "sse2_cvtss2sd"
2034 [(set (match_operand:V2DF 0 "register_operand" "=x")
2038 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2039 (parallel [(const_int 0) (const_int 1)])))
2040 (match_operand:V2DF 1 "register_operand" "0")
2043 "cvtss2sd\t{%2, %0|%0, %2}"
2044 [(set_attr "type" "ssecvt")
2045 (set_attr "mode" "DF")])
2047 (define_expand "sse2_cvtpd2ps"
2048 [(set (match_operand:V4SF 0 "register_operand" "")
2050 (float_truncate:V2SF
2051 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2054 "operands[2] = CONST0_RTX (V2SFmode);")
2056 (define_insn "*sse2_cvtpd2ps"
2057 [(set (match_operand:V4SF 0 "register_operand" "=x")
2059 (float_truncate:V2SF
2060 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2061 (match_operand:V2SF 2 "const0_operand" "")))]
2063 "cvtpd2ps\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "ssecvt")
2065 (set_attr "mode" "V4SF")])
2067 (define_insn "sse2_cvtps2pd"
2068 [(set (match_operand:V2DF 0 "register_operand" "=x")
2071 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2072 (parallel [(const_int 0) (const_int 1)]))))]
2074 "cvtps2pd\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "ssecvt")
2076 (set_attr "mode" "V2DF")])
2078 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2080 ;; Parallel double-precision floating point element swizzling
2082 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2084 (define_expand "vec_interleave_highv2df"
2085 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2088 (match_operand:V2DF 1 "nonimmediate_operand" "")
2089 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2090 (parallel [(const_int 1)
2094 if (MEM_P (operands[1]) && MEM_P (operands[2]))
2095 operands[1] = force_reg (V2DFmode, operands[1]);
2098 (define_insn "*sse2_unpckhpd"
2099 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2102 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2103 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2104 (parallel [(const_int 1)
2106 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2108 unpckhpd\t{%2, %0|%0, %2}
2109 movlpd\t{%H1, %0|%0, %H1}
2110 movhpd\t{%1, %0|%0, %1}"
2111 [(set_attr "type" "sselog,ssemov,ssemov")
2112 (set_attr "mode" "V2DF,V1DF,V1DF")])
2114 (define_insn "*sse3_movddup"
2115 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2118 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2120 (parallel [(const_int 0)
2122 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2124 movddup\t{%1, %0|%0, %1}
2126 [(set_attr "type" "sselog,ssemov")
2127 (set_attr "mode" "V2DF")])
2130 [(set (match_operand:V2DF 0 "memory_operand" "")
2133 (match_operand:V2DF 1 "register_operand" "")
2135 (parallel [(const_int 0)
2137 "TARGET_SSE3 && reload_completed"
2140 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2141 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2142 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2146 (define_expand "vec_interleave_lowv2df"
2147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2150 (match_operand:V2DF 1 "nonimmediate_operand" "")
2151 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2152 (parallel [(const_int 0)
2156 if (MEM_P (operands[1]) && MEM_P (operands[2]))
2157 operands[1] = force_reg (V2DFmode, operands[1]);
2160 (define_insn "*sse2_unpcklpd"
2161 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2164 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2165 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2166 (parallel [(const_int 0)
2168 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2170 unpcklpd\t{%2, %0|%0, %2}
2171 movhpd\t{%2, %0|%0, %2}
2172 movlpd\t{%2, %H0|%H0, %2}"
2173 [(set_attr "type" "sselog,ssemov,ssemov")
2174 (set_attr "mode" "V2DF,V1DF,V1DF")])
2176 (define_expand "sse2_shufpd"
2177 [(match_operand:V2DF 0 "register_operand" "")
2178 (match_operand:V2DF 1 "register_operand" "")
2179 (match_operand:V2DF 2 "nonimmediate_operand" "")
2180 (match_operand:SI 3 "const_int_operand" "")]
2183 int mask = INTVAL (operands[3]);
2184 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2186 GEN_INT (mask & 2 ? 3 : 2)));
2190 (define_insn "sse2_shufpd_1"
2191 [(set (match_operand:V2DF 0 "register_operand" "=x")
2194 (match_operand:V2DF 1 "register_operand" "0")
2195 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2196 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2197 (match_operand 4 "const_2_to_3_operand" "")])))]
2201 mask = INTVAL (operands[3]);
2202 mask |= (INTVAL (operands[4]) - 2) << 1;
2203 operands[3] = GEN_INT (mask);
2205 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2207 [(set_attr "type" "sselog")
2208 (set_attr "mode" "V2DF")])
2210 (define_insn "sse2_storehpd"
2211 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2213 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2214 (parallel [(const_int 1)])))]
2215 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2217 movhpd\t{%1, %0|%0, %1}
2220 [(set_attr "type" "ssemov,sselog1,ssemov")
2221 (set_attr "mode" "V1DF,V2DF,DF")])
2224 [(set (match_operand:DF 0 "register_operand" "")
2226 (match_operand:V2DF 1 "memory_operand" "")
2227 (parallel [(const_int 1)])))]
2228 "TARGET_SSE2 && reload_completed"
2229 [(set (match_dup 0) (match_dup 1))]
2231 operands[1] = adjust_address (operands[1], DFmode, 8);
2234 (define_insn "sse2_storelpd"
2235 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2237 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2238 (parallel [(const_int 0)])))]
2239 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2241 movlpd\t{%1, %0|%0, %1}
2244 [(set_attr "type" "ssemov")
2245 (set_attr "mode" "V1DF,DF,DF")])
2248 [(set (match_operand:DF 0 "register_operand" "")
2250 (match_operand:V2DF 1 "nonimmediate_operand" "")
2251 (parallel [(const_int 0)])))]
2252 "TARGET_SSE2 && reload_completed"
2255 rtx op1 = operands[1];
2257 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2259 op1 = gen_lowpart (DFmode, op1);
2260 emit_move_insn (operands[0], op1);
2264 (define_insn "sse2_loadhpd"
2265 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2268 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2269 (parallel [(const_int 0)]))
2270 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2271 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2273 movhpd\t{%2, %0|%0, %2}
2274 unpcklpd\t{%2, %0|%0, %2}
2275 shufpd\t{$1, %1, %0|%0, %1, 1}
2277 [(set_attr "type" "ssemov,sselog,sselog,other")
2278 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2281 [(set (match_operand:V2DF 0 "memory_operand" "")
2283 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2284 (match_operand:DF 1 "register_operand" "")))]
2285 "TARGET_SSE2 && reload_completed"
2286 [(set (match_dup 0) (match_dup 1))]
2288 operands[0] = adjust_address (operands[0], DFmode, 8);
2291 (define_insn "sse2_loadlpd"
2292 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2294 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2296 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2297 (parallel [(const_int 1)]))))]
2298 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2300 movsd\t{%2, %0|%0, %2}
2301 movlpd\t{%2, %0|%0, %2}
2302 movsd\t{%2, %0|%0, %2}
2303 shufpd\t{$2, %2, %0|%0, %2, 2}
2304 movhpd\t{%H1, %0|%0, %H1}
2306 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2307 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2310 [(set (match_operand:V2DF 0 "memory_operand" "")
2312 (match_operand:DF 1 "register_operand" "")
2313 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2314 "TARGET_SSE2 && reload_completed"
2315 [(set (match_dup 0) (match_dup 1))]
2317 operands[0] = adjust_address (operands[0], DFmode, 8);
2320 (define_insn "sse2_movsd"
2321 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2323 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2324 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2328 movsd\t{%2, %0|%0, %2}
2329 movlpd\t{%2, %0|%0, %2}
2330 movlpd\t{%2, %0|%0, %2}
2331 shufpd\t{$2, %2, %0|%0, %2, 2}
2332 movhps\t{%H1, %0|%0, %H1}
2333 movhps\t{%1, %H0|%H0, %1}"
2334 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2335 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2337 (define_insn "*vec_dupv2df_sse3"
2338 [(set (match_operand:V2DF 0 "register_operand" "=x")
2340 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2342 "movddup\t{%1, %0|%0, %1}"
2343 [(set_attr "type" "sselog1")
2344 (set_attr "mode" "DF")])
2346 (define_insn "*vec_dupv2df"
2347 [(set (match_operand:V2DF 0 "register_operand" "=x")
2349 (match_operand:DF 1 "register_operand" "0")))]
2352 [(set_attr "type" "sselog1")
2353 (set_attr "mode" "V4SF")])
2355 (define_insn "*vec_concatv2df_sse3"
2356 [(set (match_operand:V2DF 0 "register_operand" "=x")
2358 (match_operand:DF 1 "nonimmediate_operand" "xm")
2361 "movddup\t{%1, %0|%0, %1}"
2362 [(set_attr "type" "sselog1")
2363 (set_attr "mode" "DF")])
2365 (define_insn "*vec_concatv2df"
2366 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2368 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2369 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2372 unpcklpd\t{%2, %0|%0, %2}
2373 movhpd\t{%2, %0|%0, %2}
2374 movsd\t{%1, %0|%0, %1}
2375 movlhps\t{%2, %0|%0, %2}
2376 movhps\t{%2, %0|%0, %2}"
2377 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2378 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2380 (define_expand "vec_setv2df"
2381 [(match_operand:V2DF 0 "register_operand" "")
2382 (match_operand:DF 1 "register_operand" "")
2383 (match_operand 2 "const_int_operand" "")]
2386 ix86_expand_vector_set (false, operands[0], operands[1],
2387 INTVAL (operands[2]));
2391 (define_expand "vec_extractv2df"
2392 [(match_operand:DF 0 "register_operand" "")
2393 (match_operand:V2DF 1 "register_operand" "")
2394 (match_operand 2 "const_int_operand" "")]
2397 ix86_expand_vector_extract (false, operands[0], operands[1],
2398 INTVAL (operands[2]));
2402 (define_expand "vec_initv2df"
2403 [(match_operand:V2DF 0 "register_operand" "")
2404 (match_operand 1 "" "")]
2407 ix86_expand_vector_init (false, operands[0], operands[1]);
2411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2413 ;; Parallel integral arithmetic
2415 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2417 (define_expand "neg<mode>2"
2418 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2421 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2423 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2425 (define_expand "add<mode>3"
2426 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2427 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2428 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2430 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2432 (define_insn "*add<mode>3"
2433 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2435 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2436 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2437 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2438 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2439 [(set_attr "type" "sseiadd")
2440 (set_attr "mode" "TI")])
2442 (define_insn "sse2_ssadd<mode>3"
2443 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2445 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2446 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2447 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2448 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2449 [(set_attr "type" "sseiadd")
2450 (set_attr "mode" "TI")])
2452 (define_insn "sse2_usadd<mode>3"
2453 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2455 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2456 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2457 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2458 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2459 [(set_attr "type" "sseiadd")
2460 (set_attr "mode" "TI")])
2462 (define_expand "sub<mode>3"
2463 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2464 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2465 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2467 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2469 (define_insn "*sub<mode>3"
2470 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2472 (match_operand:SSEMODEI 1 "register_operand" "0")
2473 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2475 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2476 [(set_attr "type" "sseiadd")
2477 (set_attr "mode" "TI")])
2479 (define_insn "sse2_sssub<mode>3"
2480 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2482 (match_operand:SSEMODE12 1 "register_operand" "0")
2483 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2485 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2486 [(set_attr "type" "sseiadd")
2487 (set_attr "mode" "TI")])
2489 (define_insn "sse2_ussub<mode>3"
2490 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2492 (match_operand:SSEMODE12 1 "register_operand" "0")
2493 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2495 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2496 [(set_attr "type" "sseiadd")
2497 (set_attr "mode" "TI")])
2499 (define_expand "mulv16qi3"
2500 [(set (match_operand:V16QI 0 "register_operand" "")
2501 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2502 (match_operand:V16QI 2 "register_operand" "")))]
2508 for (i = 0; i < 12; ++i)
2509 t[i] = gen_reg_rtx (V16QImode);
2511 /* Unpack data such that we've got a source byte in each low byte of
2512 each word. We don't care what goes into the high byte of each word.
2513 Rather than trying to get zero in there, most convenient is to let
2514 it be a copy of the low byte. */
2515 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
2516 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
2517 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
2518 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
2520 /* Multiply words. The end-of-line annotations here give a picture of what
2521 the output of that instruction looks like. Dot means don't care; the
2522 letters are the bytes of the result with A being the most significant. */
2523 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2524 gen_lowpart (V8HImode, t[0]),
2525 gen_lowpart (V8HImode, t[1])));
2526 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2527 gen_lowpart (V8HImode, t[2]),
2528 gen_lowpart (V8HImode, t[3])));
2530 /* Extract the relevant bytes and merge them back together. */
2531 emit_insn (gen_vec_interleave_highv16qi (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2532 emit_insn (gen_vec_interleave_lowv16qi (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2533 emit_insn (gen_vec_interleave_highv16qi (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2534 emit_insn (gen_vec_interleave_lowv16qi (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2535 emit_insn (gen_vec_interleave_highv16qi (t[10], t[9], t[8])); /* ........ACEGIKMO */
2536 emit_insn (gen_vec_interleave_lowv16qi (t[11], t[9], t[8])); /* ........BDFHJLNP */
2539 emit_insn (gen_vec_interleave_lowv16qi (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2543 (define_expand "mulv8hi3"
2544 [(set (match_operand:V8HI 0 "register_operand" "")
2545 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2546 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2548 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2550 (define_insn "*mulv8hi3"
2551 [(set (match_operand:V8HI 0 "register_operand" "=x")
2552 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2553 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2554 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2555 "pmullw\t{%2, %0|%0, %2}"
2556 [(set_attr "type" "sseimul")
2557 (set_attr "mode" "TI")])
2559 (define_expand "smulv8hi3_highpart"
2560 [(set (match_operand:V8HI 0 "register_operand" "")
2565 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2567 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2570 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2572 (define_insn "*smulv8hi3_highpart"
2573 [(set (match_operand:V8HI 0 "register_operand" "=x")
2578 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2580 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2582 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2583 "pmulhw\t{%2, %0|%0, %2}"
2584 [(set_attr "type" "sseimul")
2585 (set_attr "mode" "TI")])
2587 (define_expand "umulv8hi3_highpart"
2588 [(set (match_operand:V8HI 0 "register_operand" "")
2593 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2595 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2598 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2600 (define_insn "*umulv8hi3_highpart"
2601 [(set (match_operand:V8HI 0 "register_operand" "=x")
2606 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2608 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2610 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2611 "pmulhuw\t{%2, %0|%0, %2}"
2612 [(set_attr "type" "sseimul")
2613 (set_attr "mode" "TI")])
2615 (define_insn "sse2_umulv2siv2di3"
2616 [(set (match_operand:V2DI 0 "register_operand" "=x")
2620 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2621 (parallel [(const_int 0) (const_int 2)])))
2624 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2625 (parallel [(const_int 0) (const_int 2)])))))]
2626 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2627 "pmuludq\t{%2, %0|%0, %2}"
2628 [(set_attr "type" "sseimul")
2629 (set_attr "mode" "TI")])
2631 (define_insn "sse2_pmaddwd"
2632 [(set (match_operand:V4SI 0 "register_operand" "=x")
2637 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2638 (parallel [(const_int 0)
2644 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2645 (parallel [(const_int 0)
2651 (vec_select:V4HI (match_dup 1)
2652 (parallel [(const_int 1)
2657 (vec_select:V4HI (match_dup 2)
2658 (parallel [(const_int 1)
2661 (const_int 7)]))))))]
2663 "pmaddwd\t{%2, %0|%0, %2}"
2664 [(set_attr "type" "sseiadd")
2665 (set_attr "mode" "TI")])
2667 (define_expand "mulv4si3"
2668 [(set (match_operand:V4SI 0 "register_operand" "")
2669 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2670 (match_operand:V4SI 2 "register_operand" "")))]
2673 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2679 t1 = gen_reg_rtx (V4SImode);
2680 t2 = gen_reg_rtx (V4SImode);
2681 t3 = gen_reg_rtx (V4SImode);
2682 t4 = gen_reg_rtx (V4SImode);
2683 t5 = gen_reg_rtx (V4SImode);
2684 t6 = gen_reg_rtx (V4SImode);
2685 thirtytwo = GEN_INT (32);
2687 /* Multiply elements 2 and 0. */
2688 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2690 /* Shift both input vectors down one element, so that elements 3 and 1
2691 are now in the slots for elements 2 and 0. For K8, at least, this is
2692 faster than using a shuffle. */
2693 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2694 gen_lowpart (TImode, op1), thirtytwo));
2695 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2696 gen_lowpart (TImode, op2), thirtytwo));
2698 /* Multiply elements 3 and 1. */
2699 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2701 /* Move the results in element 2 down to element 1; we don't care what
2702 goes in elements 2 and 3. */
2703 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2704 const0_rtx, const0_rtx));
2705 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2706 const0_rtx, const0_rtx));
2708 /* Merge the parts back together. */
2709 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
2713 (define_expand "mulv2di3"
2714 [(set (match_operand:V2DI 0 "register_operand" "")
2715 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2716 (match_operand:V2DI 2 "register_operand" "")))]
2719 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2725 t1 = gen_reg_rtx (V2DImode);
2726 t2 = gen_reg_rtx (V2DImode);
2727 t3 = gen_reg_rtx (V2DImode);
2728 t4 = gen_reg_rtx (V2DImode);
2729 t5 = gen_reg_rtx (V2DImode);
2730 t6 = gen_reg_rtx (V2DImode);
2731 thirtytwo = GEN_INT (32);
2733 /* Multiply low parts. */
2734 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2735 gen_lowpart (V4SImode, op2)));
2737 /* Shift input vectors left 32 bits so we can multiply high parts. */
2738 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2739 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2741 /* Multiply high parts by low parts. */
2742 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2743 gen_lowpart (V4SImode, t3)));
2744 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2745 gen_lowpart (V4SImode, t2)));
2747 /* Shift them back. */
2748 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2749 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2751 /* Add the three parts together. */
2752 emit_insn (gen_addv2di3 (t6, t1, t4));
2753 emit_insn (gen_addv2di3 (op0, t6, t5));
2757 (define_expand "vec_widen_smult_hi_v8hi"
2758 [(match_operand:V4SI 0 "register_operand" "")
2759 (match_operand:V8HI 1 "register_operand" "")
2760 (match_operand:V8HI 2 "register_operand" "")]
2763 rtx op1, op2, t1, t2, dest;
2767 t1 = gen_reg_rtx (V8HImode);
2768 t2 = gen_reg_rtx (V8HImode);
2769 dest = gen_lowpart (V8HImode, operands[0]);
2771 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2772 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2773 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2777 (define_expand "vec_widen_smult_lo_v8hi"
2778 [(match_operand:V4SI 0 "register_operand" "")
2779 (match_operand:V8HI 1 "register_operand" "")
2780 (match_operand:V8HI 2 "register_operand" "")]
2783 rtx op1, op2, t1, t2, dest;
2787 t1 = gen_reg_rtx (V8HImode);
2788 t2 = gen_reg_rtx (V8HImode);
2789 dest = gen_lowpart (V8HImode, operands[0]);
2791 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2792 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
2793 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2797 (define_expand "vec_widen_umult_hi_v8hi"
2798 [(match_operand:V4SI 0 "register_operand" "")
2799 (match_operand:V8HI 1 "register_operand" "")
2800 (match_operand:V8HI 2 "register_operand" "")]
2803 rtx op1, op2, t1, t2, dest;
2807 t1 = gen_reg_rtx (V8HImode);
2808 t2 = gen_reg_rtx (V8HImode);
2809 dest = gen_lowpart (V8HImode, operands[0]);
2811 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2812 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2813 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
2817 (define_expand "vec_widen_umult_lo_v8hi"
2818 [(match_operand:V4SI 0 "register_operand" "")
2819 (match_operand:V8HI 1 "register_operand" "")
2820 (match_operand:V8HI 2 "register_operand" "")]
2823 rtx op1, op2, t1, t2, dest;
2827 t1 = gen_reg_rtx (V8HImode);
2828 t2 = gen_reg_rtx (V8HImode);
2829 dest = gen_lowpart (V8HImode, operands[0]);
2831 emit_insn (gen_mulv8hi3 (t1, op1, op2));
2832 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
2833 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
2837 (define_expand "vec_widen_smult_hi_v4si"
2838 [(match_operand:V2DI 0 "register_operand" "")
2839 (match_operand:V4SI 1 "register_operand" "")
2840 (match_operand:V4SI 2 "register_operand" "")]
2843 rtx op1, op2, t1, t2;
2847 t1 = gen_reg_rtx (V4SImode);
2848 t2 = gen_reg_rtx (V4SImode);
2850 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2851 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2852 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2856 (define_expand "vec_widen_smult_lo_v4si"
2857 [(match_operand:V2DI 0 "register_operand" "")
2858 (match_operand:V4SI 1 "register_operand" "")
2859 (match_operand:V4SI 2 "register_operand" "")]
2862 rtx op1, op2, t1, t2;
2866 t1 = gen_reg_rtx (V4SImode);
2867 t2 = gen_reg_rtx (V4SImode);
2869 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2870 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2871 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2875 (define_expand "vec_widen_umult_hi_v4si"
2876 [(match_operand:V2DI 0 "register_operand" "")
2877 (match_operand:V4SI 1 "register_operand" "")
2878 (match_operand:V4SI 2 "register_operand" "")]
2881 rtx op1, op2, t1, t2;
2885 t1 = gen_reg_rtx (V4SImode);
2886 t2 = gen_reg_rtx (V4SImode);
2888 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
2889 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
2890 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2894 (define_expand "vec_widen_umult_lo_v4si"
2895 [(match_operand:V2DI 0 "register_operand" "")
2896 (match_operand:V4SI 1 "register_operand" "")
2897 (match_operand:V4SI 2 "register_operand" "")]
2900 rtx op1, op2, t1, t2;
2904 t1 = gen_reg_rtx (V4SImode);
2905 t2 = gen_reg_rtx (V4SImode);
2907 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
2908 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
2909 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
2913 (define_expand "sdot_prodv8hi"
2914 [(match_operand:V4SI 0 "register_operand" "")
2915 (match_operand:V8HI 1 "nonimmediate_operand" "")
2916 (match_operand:V8HI 2 "nonimmediate_operand" "")
2917 (match_operand:V4SI 3 "register_operand" "")]
2920 rtx t = gen_reg_rtx (V4SImode);
2921 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2922 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2926 (define_expand "udot_prodv4si"
2927 [(match_operand:V2DI 0 "register_operand" "")
2928 (match_operand:V4SI 1 "register_operand" "")
2929 (match_operand:V4SI 2 "register_operand" "")
2930 (match_operand:V2DI 3 "register_operand" "")]
2935 t1 = gen_reg_rtx (V2DImode);
2936 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2937 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2939 t2 = gen_reg_rtx (V4SImode);
2940 t3 = gen_reg_rtx (V4SImode);
2941 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2942 gen_lowpart (TImode, operands[1]),
2944 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2945 gen_lowpart (TImode, operands[2]),
2948 t4 = gen_reg_rtx (V2DImode);
2949 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2951 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2955 (define_insn "ashr<mode>3"
2956 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2958 (match_operand:SSEMODE24 1 "register_operand" "0")
2959 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2961 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2962 [(set_attr "type" "sseishft")
2963 (set_attr "mode" "TI")])
2965 (define_insn "lshr<mode>3"
2966 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2967 (lshiftrt:SSEMODE248
2968 (match_operand:SSEMODE248 1 "register_operand" "0")
2969 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2971 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2972 [(set_attr "type" "sseishft")
2973 (set_attr "mode" "TI")])
2975 (define_insn "ashl<mode>3"
2976 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2978 (match_operand:SSEMODE248 1 "register_operand" "0")
2979 (match_operand:SI 2 "nonmemory_operand" "xi")))]
2981 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2982 [(set_attr "type" "sseishft")
2983 (set_attr "mode" "TI")])
2985 (define_insn "sse2_ashlti3"
2986 [(set (match_operand:TI 0 "register_operand" "=x")
2987 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2988 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2991 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2992 return "pslldq\t{%2, %0|%0, %2}";
2994 [(set_attr "type" "sseishft")
2995 (set_attr "mode" "TI")])
2997 (define_expand "vec_shl_<mode>"
2998 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2999 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3000 (match_operand:SI 2 "general_operand" "")))]
3003 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3005 operands[0] = gen_lowpart (TImode, operands[0]);
3006 operands[1] = gen_lowpart (TImode, operands[1]);
3009 (define_insn "sse2_lshrti3"
3010 [(set (match_operand:TI 0 "register_operand" "=x")
3011 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3012 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3015 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3016 return "psrldq\t{%2, %0|%0, %2}";
3018 [(set_attr "type" "sseishft")
3019 (set_attr "mode" "TI")])
3021 (define_expand "vec_shr_<mode>"
3022 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3023 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3024 (match_operand:SI 2 "general_operand" "")))]
3027 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3029 operands[0] = gen_lowpart (TImode, operands[0]);
3030 operands[1] = gen_lowpart (TImode, operands[1]);
3033 (define_expand "umaxv16qi3"
3034 [(set (match_operand:V16QI 0 "register_operand" "")
3035 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3036 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3038 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3040 (define_insn "*umaxv16qi3"
3041 [(set (match_operand:V16QI 0 "register_operand" "=x")
3042 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3043 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3044 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3045 "pmaxub\t{%2, %0|%0, %2}"
3046 [(set_attr "type" "sseiadd")
3047 (set_attr "mode" "TI")])
3049 (define_expand "smaxv8hi3"
3050 [(set (match_operand:V8HI 0 "register_operand" "")
3051 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3052 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3054 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3056 (define_insn "*smaxv8hi3"
3057 [(set (match_operand:V8HI 0 "register_operand" "=x")
3058 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3059 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3060 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3061 "pmaxsw\t{%2, %0|%0, %2}"
3062 [(set_attr "type" "sseiadd")
3063 (set_attr "mode" "TI")])
3065 (define_expand "umaxv8hi3"
3066 [(set (match_operand:V8HI 0 "register_operand" "=x")
3067 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
3068 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3070 (plus:V8HI (match_dup 0) (match_dup 2)))]
3073 operands[3] = operands[0];
3074 if (rtx_equal_p (operands[0], operands[2]))
3075 operands[0] = gen_reg_rtx (V8HImode);
3078 (define_expand "smax<mode>3"
3079 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3080 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3081 (match_operand:SSEMODE14 2 "register_operand" "")))]
3087 xops[0] = operands[0];
3088 xops[1] = operands[1];
3089 xops[2] = operands[2];
3090 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3091 xops[4] = operands[1];
3092 xops[5] = operands[2];
3093 ok = ix86_expand_int_vcond (xops);
3098 (define_expand "umaxv4si3"
3099 [(set (match_operand:V4SI 0 "register_operand" "")
3100 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3101 (match_operand:V4SI 2 "register_operand" "")))]
3107 xops[0] = operands[0];
3108 xops[1] = operands[1];
3109 xops[2] = operands[2];
3110 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3111 xops[4] = operands[1];
3112 xops[5] = operands[2];
3113 ok = ix86_expand_int_vcond (xops);
3118 (define_expand "uminv16qi3"
3119 [(set (match_operand:V16QI 0 "register_operand" "")
3120 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3121 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3123 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3125 (define_insn "*uminv16qi3"
3126 [(set (match_operand:V16QI 0 "register_operand" "=x")
3127 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3128 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3129 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3130 "pminub\t{%2, %0|%0, %2}"
3131 [(set_attr "type" "sseiadd")
3132 (set_attr "mode" "TI")])
3134 (define_expand "sminv8hi3"
3135 [(set (match_operand:V8HI 0 "register_operand" "")
3136 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3137 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3139 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3141 (define_insn "*sminv8hi3"
3142 [(set (match_operand:V8HI 0 "register_operand" "=x")
3143 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3144 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3145 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3146 "pminsw\t{%2, %0|%0, %2}"
3147 [(set_attr "type" "sseiadd")
3148 (set_attr "mode" "TI")])
3150 (define_expand "smin<mode>3"
3151 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3152 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3153 (match_operand:SSEMODE14 2 "register_operand" "")))]
3159 xops[0] = operands[0];
3160 xops[1] = operands[2];
3161 xops[2] = operands[1];
3162 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3163 xops[4] = operands[1];
3164 xops[5] = operands[2];
3165 ok = ix86_expand_int_vcond (xops);
3170 (define_expand "umin<mode>3"
3171 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3172 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3173 (match_operand:SSEMODE24 2 "register_operand" "")))]
3179 xops[0] = operands[0];
3180 xops[1] = operands[2];
3181 xops[2] = operands[1];
3182 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3183 xops[4] = operands[1];
3184 xops[5] = operands[2];
3185 ok = ix86_expand_int_vcond (xops);
3190 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3192 ;; Parallel integral comparisons
3194 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3196 (define_insn "sse2_eq<mode>3"
3197 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3199 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3200 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3201 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3202 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3203 [(set_attr "type" "ssecmp")
3204 (set_attr "mode" "TI")])
3206 (define_insn "sse2_gt<mode>3"
3207 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3209 (match_operand:SSEMODE124 1 "register_operand" "0")
3210 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3212 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3213 [(set_attr "type" "ssecmp")
3214 (set_attr "mode" "TI")])
3216 (define_expand "vcond<mode>"
3217 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3218 (if_then_else:SSEMODE124
3219 (match_operator 3 ""
3220 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3221 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3222 (match_operand:SSEMODE124 1 "general_operand" "")
3223 (match_operand:SSEMODE124 2 "general_operand" "")))]
3226 if (ix86_expand_int_vcond (operands))
3232 (define_expand "vcondu<mode>"
3233 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3234 (if_then_else:SSEMODE124
3235 (match_operator 3 ""
3236 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3237 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3238 (match_operand:SSEMODE124 1 "general_operand" "")
3239 (match_operand:SSEMODE124 2 "general_operand" "")))]
3242 if (ix86_expand_int_vcond (operands))
3248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3250 ;; Parallel integral logical operations
3252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3254 (define_expand "one_cmpl<mode>2"
3255 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3256 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3260 int i, n = GET_MODE_NUNITS (<MODE>mode);
3261 rtvec v = rtvec_alloc (n);
3263 for (i = 0; i < n; ++i)
3264 RTVEC_ELT (v, i) = constm1_rtx;
3266 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3269 (define_expand "and<mode>3"
3270 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3271 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3272 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3274 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3276 (define_insn "*and<mode>3"
3277 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3279 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3280 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3281 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3282 "pand\t{%2, %0|%0, %2}"
3283 [(set_attr "type" "sselog")
3284 (set_attr "mode" "TI")])
3286 (define_insn "sse2_nand<mode>3"
3287 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3289 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3290 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3292 "pandn\t{%2, %0|%0, %2}"
3293 [(set_attr "type" "sselog")
3294 (set_attr "mode" "TI")])
3296 (define_expand "ior<mode>3"
3297 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3298 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3299 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3301 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3303 (define_insn "*ior<mode>3"
3304 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3306 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3307 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3308 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3309 "por\t{%2, %0|%0, %2}"
3310 [(set_attr "type" "sselog")
3311 (set_attr "mode" "TI")])
3313 (define_expand "xor<mode>3"
3314 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3315 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3316 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3318 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3320 (define_insn "*xor<mode>3"
3321 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3323 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3324 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3325 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3326 "pxor\t{%2, %0|%0, %2}"
3327 [(set_attr "type" "sselog")
3328 (set_attr "mode" "TI")])
3330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3332 ;; Parallel integral element swizzling
3334 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3336 (define_insn "vec_pack_ssat_v8hi"
3337 [(set (match_operand:V16QI 0 "register_operand" "=x")
3340 (match_operand:V8HI 1 "register_operand" "0"))
3342 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3344 "packsswb\t{%2, %0|%0, %2}"
3345 [(set_attr "type" "sselog")
3346 (set_attr "mode" "TI")])
3348 (define_insn "vec_pack_ssat_v4si"
3349 [(set (match_operand:V8HI 0 "register_operand" "=x")
3352 (match_operand:V4SI 1 "register_operand" "0"))
3354 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3356 "packssdw\t{%2, %0|%0, %2}"
3357 [(set_attr "type" "sselog")
3358 (set_attr "mode" "TI")])
3360 (define_insn "vec_pack_usat_v8hi"
3361 [(set (match_operand:V16QI 0 "register_operand" "=x")
3364 (match_operand:V8HI 1 "register_operand" "0"))
3366 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3368 "packuswb\t{%2, %0|%0, %2}"
3369 [(set_attr "type" "sselog")
3370 (set_attr "mode" "TI")])
3373 ;; op1 = abcdefghijklmnop
3374 ;; op2 = qrstuvwxyz012345
3375 ;; h1 = aqbrcsdteufvgwhx
3376 ;; l1 = iyjzk0l1m2n3o4p5
3377 ;; h2 = aiqybjrzcks0dlt1
3378 ;; l2 = emu2fnv3gow4hpx5
3379 ;; h3 = aeimquy2bfjnrvz3
3380 ;; l3 = cgkosw04dhlptx15
3381 ;; result = bdfhjlnprtvxz135
3382 (define_expand "vec_pack_mod_v8hi"
3383 [(match_operand:V16QI 0 "register_operand" "")
3384 (match_operand:V8HI 1 "register_operand" "")
3385 (match_operand:V8HI 2 "register_operand" "")]
3388 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3390 op1 = gen_lowpart (V16QImode, operands[1]);
3391 op2 = gen_lowpart (V16QImode, operands[2]);
3392 h1 = gen_reg_rtx (V16QImode);
3393 l1 = gen_reg_rtx (V16QImode);
3394 h2 = gen_reg_rtx (V16QImode);
3395 l2 = gen_reg_rtx (V16QImode);
3396 h3 = gen_reg_rtx (V16QImode);
3397 l3 = gen_reg_rtx (V16QImode);
3399 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3400 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3401 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3402 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3403 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3404 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3405 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3416 ;; result = bdfhjlnp
3417 (define_expand "vec_pack_mod_v4si"
3418 [(match_operand:V8HI 0 "register_operand" "")
3419 (match_operand:V4SI 1 "register_operand" "")
3420 (match_operand:V4SI 2 "register_operand" "")]
3423 rtx op1, op2, h1, l1, h2, l2;
3425 op1 = gen_lowpart (V8HImode, operands[1]);
3426 op2 = gen_lowpart (V8HImode, operands[2]);
3427 h1 = gen_reg_rtx (V8HImode);
3428 l1 = gen_reg_rtx (V8HImode);
3429 h2 = gen_reg_rtx (V8HImode);
3430 l2 = gen_reg_rtx (V8HImode);
3432 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3433 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3434 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3435 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3436 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3446 (define_expand "vec_pack_mod_v2di"
3447 [(match_operand:V4SI 0 "register_operand" "")
3448 (match_operand:V2DI 1 "register_operand" "")
3449 (match_operand:V2DI 2 "register_operand" "")]
3452 rtx op1, op2, h1, l1;
3454 op1 = gen_lowpart (V4SImode, operands[1]);
3455 op2 = gen_lowpart (V4SImode, operands[2]);
3456 h1 = gen_reg_rtx (V4SImode);
3457 l1 = gen_reg_rtx (V4SImode);
3459 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3460 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3461 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3465 (define_insn "vec_interleave_highv16qi"
3466 [(set (match_operand:V16QI 0 "register_operand" "=x")
3469 (match_operand:V16QI 1 "register_operand" "0")
3470 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3471 (parallel [(const_int 8) (const_int 24)
3472 (const_int 9) (const_int 25)
3473 (const_int 10) (const_int 26)
3474 (const_int 11) (const_int 27)
3475 (const_int 12) (const_int 28)
3476 (const_int 13) (const_int 29)
3477 (const_int 14) (const_int 30)
3478 (const_int 15) (const_int 31)])))]
3480 "punpckhbw\t{%2, %0|%0, %2}"
3481 [(set_attr "type" "sselog")
3482 (set_attr "mode" "TI")])
3484 (define_insn "vec_interleave_lowv16qi"
3485 [(set (match_operand:V16QI 0 "register_operand" "=x")
3488 (match_operand:V16QI 1 "register_operand" "0")
3489 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3490 (parallel [(const_int 0) (const_int 16)
3491 (const_int 1) (const_int 17)
3492 (const_int 2) (const_int 18)
3493 (const_int 3) (const_int 19)
3494 (const_int 4) (const_int 20)
3495 (const_int 5) (const_int 21)
3496 (const_int 6) (const_int 22)
3497 (const_int 7) (const_int 23)])))]
3499 "punpcklbw\t{%2, %0|%0, %2}"
3500 [(set_attr "type" "sselog")
3501 (set_attr "mode" "TI")])
3503 (define_insn "vec_interleave_highv8hi"
3504 [(set (match_operand:V8HI 0 "register_operand" "=x")
3507 (match_operand:V8HI 1 "register_operand" "0")
3508 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3509 (parallel [(const_int 4) (const_int 12)
3510 (const_int 5) (const_int 13)
3511 (const_int 6) (const_int 14)
3512 (const_int 7) (const_int 15)])))]
3514 "punpckhwd\t{%2, %0|%0, %2}"
3515 [(set_attr "type" "sselog")
3516 (set_attr "mode" "TI")])
3518 (define_insn "vec_interleave_lowv8hi"
3519 [(set (match_operand:V8HI 0 "register_operand" "=x")
3522 (match_operand:V8HI 1 "register_operand" "0")
3523 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3524 (parallel [(const_int 0) (const_int 8)
3525 (const_int 1) (const_int 9)
3526 (const_int 2) (const_int 10)
3527 (const_int 3) (const_int 11)])))]
3529 "punpcklwd\t{%2, %0|%0, %2}"
3530 [(set_attr "type" "sselog")
3531 (set_attr "mode" "TI")])
3533 (define_insn "vec_interleave_highv4si"
3534 [(set (match_operand:V4SI 0 "register_operand" "=x")
3537 (match_operand:V4SI 1 "register_operand" "0")
3538 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3539 (parallel [(const_int 2) (const_int 6)
3540 (const_int 3) (const_int 7)])))]
3542 "punpckhdq\t{%2, %0|%0, %2}"
3543 [(set_attr "type" "sselog")
3544 (set_attr "mode" "TI")])
3546 (define_insn "vec_interleave_lowv4si"
3547 [(set (match_operand:V4SI 0 "register_operand" "=x")
3550 (match_operand:V4SI 1 "register_operand" "0")
3551 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3552 (parallel [(const_int 0) (const_int 4)
3553 (const_int 1) (const_int 5)])))]
3555 "punpckldq\t{%2, %0|%0, %2}"
3556 [(set_attr "type" "sselog")
3557 (set_attr "mode" "TI")])
3559 (define_insn "vec_interleave_highv2di"
3560 [(set (match_operand:V2DI 0 "register_operand" "=x")
3563 (match_operand:V2DI 1 "register_operand" "0")
3564 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3565 (parallel [(const_int 1)
3568 "punpckhqdq\t{%2, %0|%0, %2}"
3569 [(set_attr "type" "sselog")
3570 (set_attr "mode" "TI")])
3572 (define_insn "vec_interleave_lowv2di"
3573 [(set (match_operand:V2DI 0 "register_operand" "=x")
3576 (match_operand:V2DI 1 "register_operand" "0")
3577 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3578 (parallel [(const_int 0)
3581 "punpcklqdq\t{%2, %0|%0, %2}"
3582 [(set_attr "type" "sselog")
3583 (set_attr "mode" "TI")])
3585 (define_expand "sse2_pinsrw"
3586 [(set (match_operand:V8HI 0 "register_operand" "")
3589 (match_operand:SI 2 "nonimmediate_operand" ""))
3590 (match_operand:V8HI 1 "register_operand" "")
3591 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3594 operands[2] = gen_lowpart (HImode, operands[2]);
3595 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3598 (define_insn "*sse2_pinsrw"
3599 [(set (match_operand:V8HI 0 "register_operand" "=x")
3602 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3603 (match_operand:V8HI 1 "register_operand" "0")
3604 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3607 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3608 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3610 [(set_attr "type" "sselog")
3611 (set_attr "mode" "TI")])
3613 (define_insn "sse2_pextrw"
3614 [(set (match_operand:SI 0 "register_operand" "=r")
3617 (match_operand:V8HI 1 "register_operand" "x")
3618 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3620 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3621 [(set_attr "type" "sselog")
3622 (set_attr "mode" "TI")])
3624 (define_expand "sse2_pshufd"
3625 [(match_operand:V4SI 0 "register_operand" "")
3626 (match_operand:V4SI 1 "nonimmediate_operand" "")
3627 (match_operand:SI 2 "const_int_operand" "")]
3630 int mask = INTVAL (operands[2]);
3631 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3632 GEN_INT ((mask >> 0) & 3),
3633 GEN_INT ((mask >> 2) & 3),
3634 GEN_INT ((mask >> 4) & 3),
3635 GEN_INT ((mask >> 6) & 3)));
3639 (define_insn "sse2_pshufd_1"
3640 [(set (match_operand:V4SI 0 "register_operand" "=x")
3642 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3643 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3644 (match_operand 3 "const_0_to_3_operand" "")
3645 (match_operand 4 "const_0_to_3_operand" "")
3646 (match_operand 5 "const_0_to_3_operand" "")])))]
3650 mask |= INTVAL (operands[2]) << 0;
3651 mask |= INTVAL (operands[3]) << 2;
3652 mask |= INTVAL (operands[4]) << 4;
3653 mask |= INTVAL (operands[5]) << 6;
3654 operands[2] = GEN_INT (mask);
3656 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3658 [(set_attr "type" "sselog1")
3659 (set_attr "mode" "TI")])
3661 (define_expand "sse2_pshuflw"
3662 [(match_operand:V8HI 0 "register_operand" "")
3663 (match_operand:V8HI 1 "nonimmediate_operand" "")
3664 (match_operand:SI 2 "const_int_operand" "")]
3667 int mask = INTVAL (operands[2]);
3668 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3669 GEN_INT ((mask >> 0) & 3),
3670 GEN_INT ((mask >> 2) & 3),
3671 GEN_INT ((mask >> 4) & 3),
3672 GEN_INT ((mask >> 6) & 3)));
3676 (define_insn "sse2_pshuflw_1"
3677 [(set (match_operand:V8HI 0 "register_operand" "=x")
3679 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3680 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3681 (match_operand 3 "const_0_to_3_operand" "")
3682 (match_operand 4 "const_0_to_3_operand" "")
3683 (match_operand 5 "const_0_to_3_operand" "")
3691 mask |= INTVAL (operands[2]) << 0;
3692 mask |= INTVAL (operands[3]) << 2;
3693 mask |= INTVAL (operands[4]) << 4;
3694 mask |= INTVAL (operands[5]) << 6;
3695 operands[2] = GEN_INT (mask);
3697 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3699 [(set_attr "type" "sselog")
3700 (set_attr "mode" "TI")])
3702 (define_expand "sse2_pshufhw"
3703 [(match_operand:V8HI 0 "register_operand" "")
3704 (match_operand:V8HI 1 "nonimmediate_operand" "")
3705 (match_operand:SI 2 "const_int_operand" "")]
3708 int mask = INTVAL (operands[2]);
3709 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3710 GEN_INT (((mask >> 0) & 3) + 4),
3711 GEN_INT (((mask >> 2) & 3) + 4),
3712 GEN_INT (((mask >> 4) & 3) + 4),
3713 GEN_INT (((mask >> 6) & 3) + 4)));
3717 (define_insn "sse2_pshufhw_1"
3718 [(set (match_operand:V8HI 0 "register_operand" "=x")
3720 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3721 (parallel [(const_int 0)
3725 (match_operand 2 "const_4_to_7_operand" "")
3726 (match_operand 3 "const_4_to_7_operand" "")
3727 (match_operand 4 "const_4_to_7_operand" "")
3728 (match_operand 5 "const_4_to_7_operand" "")])))]
3732 mask |= (INTVAL (operands[2]) - 4) << 0;
3733 mask |= (INTVAL (operands[3]) - 4) << 2;
3734 mask |= (INTVAL (operands[4]) - 4) << 4;
3735 mask |= (INTVAL (operands[5]) - 4) << 6;
3736 operands[2] = GEN_INT (mask);
3738 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3740 [(set_attr "type" "sselog")
3741 (set_attr "mode" "TI")])
3743 (define_expand "sse2_loadd"
3744 [(set (match_operand:V4SI 0 "register_operand" "")
3747 (match_operand:SI 1 "nonimmediate_operand" ""))
3751 "operands[2] = CONST0_RTX (V4SImode);")
3753 (define_insn "sse2_loadld"
3754 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3757 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3758 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3762 movd\t{%2, %0|%0, %2}
3763 movss\t{%2, %0|%0, %2}
3764 movss\t{%2, %0|%0, %2}"
3765 [(set_attr "type" "ssemov")
3766 (set_attr "mode" "TI,V4SF,SF")])
3768 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3769 ;; be taken into account, and movdi isn't fully populated even without.
3770 (define_insn_and_split "sse2_stored"
3771 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3773 (match_operand:V4SI 1 "register_operand" "x")
3774 (parallel [(const_int 0)])))]
3777 "&& reload_completed"
3778 [(set (match_dup 0) (match_dup 1))]
3780 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3783 (define_expand "sse_storeq"
3784 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3786 (match_operand:V2DI 1 "register_operand" "")
3787 (parallel [(const_int 0)])))]
3791 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3792 ;; be taken into account, and movdi isn't fully populated even without.
3793 (define_insn "*sse2_storeq"
3794 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3796 (match_operand:V2DI 1 "register_operand" "x")
3797 (parallel [(const_int 0)])))]
3802 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3804 (match_operand:V2DI 1 "register_operand" "")
3805 (parallel [(const_int 0)])))]
3806 "TARGET_SSE && reload_completed"
3807 [(set (match_dup 0) (match_dup 1))]
3809 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3812 (define_insn "*vec_dupv4si"
3813 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3815 (match_operand:SI 1 "register_operand" " Y,0")))]
3818 pshufd\t{$0, %1, %0|%0, %1, 0}
3819 shufps\t{$0, %0, %0|%0, %0, 0}"
3820 [(set_attr "type" "sselog1")
3821 (set_attr "mode" "TI,V4SF")])
3823 (define_insn "*vec_dupv2di"
3824 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3826 (match_operand:DI 1 "register_operand" " 0,0")))]
3831 [(set_attr "type" "sselog1,ssemov")
3832 (set_attr "mode" "TI,V4SF")])
3834 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3835 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3836 ;; alternatives pretty much forces the MMX alternative to be chosen.
3837 (define_insn "*sse2_concatv2si"
3838 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3840 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3841 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3844 punpckldq\t{%2, %0|%0, %2}
3845 movd\t{%1, %0|%0, %1}
3846 punpckldq\t{%2, %0|%0, %2}
3847 movd\t{%1, %0|%0, %1}"
3848 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3849 (set_attr "mode" "TI,TI,DI,DI")])
3851 (define_insn "*sse1_concatv2si"
3852 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3854 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3855 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3858 unpcklps\t{%2, %0|%0, %2}
3859 movss\t{%1, %0|%0, %1}
3860 punpckldq\t{%2, %0|%0, %2}
3861 movd\t{%1, %0|%0, %1}"
3862 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3863 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3865 (define_insn "*vec_concatv4si_1"
3866 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3868 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3869 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3872 punpcklqdq\t{%2, %0|%0, %2}
3873 movlhps\t{%2, %0|%0, %2}
3874 movhps\t{%2, %0|%0, %2}"
3875 [(set_attr "type" "sselog,ssemov,ssemov")
3876 (set_attr "mode" "TI,V4SF,V2SF")])
3878 (define_insn "*vec_concatv2di"
3879 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3881 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3882 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3885 movq\t{%1, %0|%0, %1}
3886 movq2dq\t{%1, %0|%0, %1}
3887 punpcklqdq\t{%2, %0|%0, %2}
3888 movlhps\t{%2, %0|%0, %2}
3889 movhps\t{%2, %0|%0, %2}
3890 movlps\t{%1, %0|%0, %1}"
3891 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3892 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3894 (define_expand "vec_setv2di"
3895 [(match_operand:V2DI 0 "register_operand" "")
3896 (match_operand:DI 1 "register_operand" "")
3897 (match_operand 2 "const_int_operand" "")]
3900 ix86_expand_vector_set (false, operands[0], operands[1],
3901 INTVAL (operands[2]));
3905 (define_expand "vec_extractv2di"
3906 [(match_operand:DI 0 "register_operand" "")
3907 (match_operand:V2DI 1 "register_operand" "")
3908 (match_operand 2 "const_int_operand" "")]
3911 ix86_expand_vector_extract (false, operands[0], operands[1],
3912 INTVAL (operands[2]));
3916 (define_expand "vec_initv2di"
3917 [(match_operand:V2DI 0 "register_operand" "")
3918 (match_operand 1 "" "")]
3921 ix86_expand_vector_init (false, operands[0], operands[1]);
3925 (define_expand "vec_setv4si"
3926 [(match_operand:V4SI 0 "register_operand" "")
3927 (match_operand:SI 1 "register_operand" "")
3928 (match_operand 2 "const_int_operand" "")]
3931 ix86_expand_vector_set (false, operands[0], operands[1],
3932 INTVAL (operands[2]));
3936 (define_expand "vec_extractv4si"
3937 [(match_operand:SI 0 "register_operand" "")
3938 (match_operand:V4SI 1 "register_operand" "")
3939 (match_operand 2 "const_int_operand" "")]
3942 ix86_expand_vector_extract (false, operands[0], operands[1],
3943 INTVAL (operands[2]));
3947 (define_expand "vec_initv4si"
3948 [(match_operand:V4SI 0 "register_operand" "")
3949 (match_operand 1 "" "")]
3952 ix86_expand_vector_init (false, operands[0], operands[1]);
3956 (define_expand "vec_setv8hi"
3957 [(match_operand:V8HI 0 "register_operand" "")
3958 (match_operand:HI 1 "register_operand" "")
3959 (match_operand 2 "const_int_operand" "")]
3962 ix86_expand_vector_set (false, operands[0], operands[1],
3963 INTVAL (operands[2]));
3967 (define_expand "vec_extractv8hi"
3968 [(match_operand:HI 0 "register_operand" "")
3969 (match_operand:V8HI 1 "register_operand" "")
3970 (match_operand 2 "const_int_operand" "")]
3973 ix86_expand_vector_extract (false, operands[0], operands[1],
3974 INTVAL (operands[2]));
3978 (define_expand "vec_initv8hi"
3979 [(match_operand:V8HI 0 "register_operand" "")
3980 (match_operand 1 "" "")]
3983 ix86_expand_vector_init (false, operands[0], operands[1]);
3987 (define_expand "vec_setv16qi"
3988 [(match_operand:V16QI 0 "register_operand" "")
3989 (match_operand:QI 1 "register_operand" "")
3990 (match_operand 2 "const_int_operand" "")]
3993 ix86_expand_vector_set (false, operands[0], operands[1],
3994 INTVAL (operands[2]));
3998 (define_expand "vec_extractv16qi"
3999 [(match_operand:QI 0 "register_operand" "")
4000 (match_operand:V16QI 1 "register_operand" "")
4001 (match_operand 2 "const_int_operand" "")]
4004 ix86_expand_vector_extract (false, operands[0], operands[1],
4005 INTVAL (operands[2]));
4009 (define_expand "vec_initv16qi"
4010 [(match_operand:V16QI 0 "register_operand" "")
4011 (match_operand 1 "" "")]
4014 ix86_expand_vector_init (false, operands[0], operands[1]);
4018 (define_expand "vec_unpacku_hi_v16qi"
4019 [(match_operand:V8HI 0 "register_operand" "")
4020 (match_operand:V16QI 1 "register_operand" "")]
4023 ix86_expand_sse_unpack (operands, true, true);
4027 (define_expand "vec_unpacks_hi_v16qi"
4028 [(match_operand:V8HI 0 "register_operand" "")
4029 (match_operand:V16QI 1 "register_operand" "")]
4032 ix86_expand_sse_unpack (operands, false, true);
4036 (define_expand "vec_unpacku_lo_v16qi"
4037 [(match_operand:V8HI 0 "register_operand" "")
4038 (match_operand:V16QI 1 "register_operand" "")]
4041 ix86_expand_sse_unpack (operands, true, false);
4045 (define_expand "vec_unpacks_lo_v16qi"
4046 [(match_operand:V8HI 0 "register_operand" "")
4047 (match_operand:V16QI 1 "register_operand" "")]
4050 ix86_expand_sse_unpack (operands, false, false);
4054 (define_expand "vec_unpacku_hi_v8hi"
4055 [(match_operand:V4SI 0 "register_operand" "")
4056 (match_operand:V8HI 1 "register_operand" "")]
4059 ix86_expand_sse_unpack (operands, true, true);
4063 (define_expand "vec_unpacks_hi_v8hi"
4064 [(match_operand:V4SI 0 "register_operand" "")
4065 (match_operand:V8HI 1 "register_operand" "")]
4068 ix86_expand_sse_unpack (operands, false, true);
4072 (define_expand "vec_unpacku_lo_v8hi"
4073 [(match_operand:V4SI 0 "register_operand" "")
4074 (match_operand:V8HI 1 "register_operand" "")]
4077 ix86_expand_sse_unpack (operands, true, false);
4081 (define_expand "vec_unpacks_lo_v8hi"
4082 [(match_operand:V4SI 0 "register_operand" "")
4083 (match_operand:V8HI 1 "register_operand" "")]
4086 ix86_expand_sse_unpack (operands, false, false);
4090 (define_expand "vec_unpacku_hi_v4si"
4091 [(match_operand:V2DI 0 "register_operand" "")
4092 (match_operand:V4SI 1 "register_operand" "")]
4095 ix86_expand_sse_unpack (operands, true, true);
4099 (define_expand "vec_unpacks_hi_v4si"
4100 [(match_operand:V2DI 0 "register_operand" "")
4101 (match_operand:V4SI 1 "register_operand" "")]
4104 ix86_expand_sse_unpack (operands, false, true);
4108 (define_expand "vec_unpacku_lo_v4si"
4109 [(match_operand:V2DI 0 "register_operand" "")
4110 (match_operand:V4SI 1 "register_operand" "")]
4113 ix86_expand_sse_unpack (operands, true, false);
4117 (define_expand "vec_unpacks_lo_v4si"
4118 [(match_operand:V2DI 0 "register_operand" "")
4119 (match_operand:V4SI 1 "register_operand" "")]
4122 ix86_expand_sse_unpack (operands, false, false);
4126 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4132 (define_insn "sse2_uavgv16qi3"
4133 [(set (match_operand:V16QI 0 "register_operand" "=x")
4139 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4141 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4142 (const_vector:V16QI [(const_int 1) (const_int 1)
4143 (const_int 1) (const_int 1)
4144 (const_int 1) (const_int 1)
4145 (const_int 1) (const_int 1)
4146 (const_int 1) (const_int 1)
4147 (const_int 1) (const_int 1)
4148 (const_int 1) (const_int 1)
4149 (const_int 1) (const_int 1)]))
4151 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4152 "pavgb\t{%2, %0|%0, %2}"
4153 [(set_attr "type" "sseiadd")
4154 (set_attr "mode" "TI")])
4156 (define_insn "sse2_uavgv8hi3"
4157 [(set (match_operand:V8HI 0 "register_operand" "=x")
4163 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4165 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4166 (const_vector:V8HI [(const_int 1) (const_int 1)
4167 (const_int 1) (const_int 1)
4168 (const_int 1) (const_int 1)
4169 (const_int 1) (const_int 1)]))
4171 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
4172 "pavgw\t{%2, %0|%0, %2}"
4173 [(set_attr "type" "sseiadd")
4174 (set_attr "mode" "TI")])
4176 ;; The correct representation for this is absolutely enormous, and
4177 ;; surely not generally useful.
4178 (define_insn "sse2_psadbw"
4179 [(set (match_operand:V2DI 0 "register_operand" "=x")
4180 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
4181 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4184 "psadbw\t{%2, %0|%0, %2}"
4185 [(set_attr "type" "sseiadd")
4186 (set_attr "mode" "TI")])
4188 ;; ??? The test case that we've been testing against for autovect-branch
4189 ;; sums into an SImode value, which means we need to produce SImode
4190 ;; results here. Since the psadbw max values are 0x7f8, we could claim
4191 ;; the result is anything between V8HImode and V2DImode and still get
4192 ;; correct results. But we don't have naming conventions to produce all
4193 ;; three possibilities, so we just use the one we need for the benchmark.
4195 ;; Worse, the pattern being matched generically expects signed data,
4196 ;; whereas this instruction operates on unsigned data.
4198 ;(define_expand "sadv16qi"
4199 ; [(match_operand:V4SI 0 "register_operand" "")
4200 ; (match_operand:V16QI 1 "register_operand" "")
4201 ; (match_operand:V16QI 2 "nonimmediate_operand" "")
4202 ; (match_operand:V4SI 3 "register_operand" "")]
4205 ; rtx t = gen_reg_rtx (V2DImode);
4206 ; emit_insn (gen_sse2_psadbw (t, operands[1], operands[2]));
4207 ; emit_insn (gen_addv4si3 (operands[0], operands[3],
4208 ; gen_lowpart (V4SImode, t)));
4212 (define_insn "sse_movmskps"
4213 [(set (match_operand:SI 0 "register_operand" "=r")
4214 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
4217 "movmskps\t{%1, %0|%0, %1}"
4218 [(set_attr "type" "ssecvt")
4219 (set_attr "mode" "V4SF")])
4221 (define_insn "sse2_movmskpd"
4222 [(set (match_operand:SI 0 "register_operand" "=r")
4223 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
4226 "movmskpd\t{%1, %0|%0, %1}"
4227 [(set_attr "type" "ssecvt")
4228 (set_attr "mode" "V2DF")])
4230 (define_insn "sse2_pmovmskb"
4231 [(set (match_operand:SI 0 "register_operand" "=r")
4232 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
4235 "pmovmskb\t{%1, %0|%0, %1}"
4236 [(set_attr "type" "ssecvt")
4237 (set_attr "mode" "V2DF")])
4239 (define_expand "sse2_maskmovdqu"
4240 [(set (match_operand:V16QI 0 "memory_operand" "")
4241 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4242 (match_operand:V16QI 2 "register_operand" "x")
4248 (define_insn "*sse2_maskmovdqu"
4249 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
4250 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4251 (match_operand:V16QI 2 "register_operand" "x")
4252 (mem:V16QI (match_dup 0))]
4254 "TARGET_SSE2 && !TARGET_64BIT"
4255 ;; @@@ check ordering of operands in intel/nonintel syntax
4256 "maskmovdqu\t{%2, %1|%1, %2}"
4257 [(set_attr "type" "ssecvt")
4258 (set_attr "mode" "TI")])
4260 (define_insn "*sse2_maskmovdqu_rex64"
4261 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
4262 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
4263 (match_operand:V16QI 2 "register_operand" "x")
4264 (mem:V16QI (match_dup 0))]
4266 "TARGET_SSE2 && TARGET_64BIT"
4267 ;; @@@ check ordering of operands in intel/nonintel syntax
4268 "maskmovdqu\t{%2, %1|%1, %2}"
4269 [(set_attr "type" "ssecvt")
4270 (set_attr "mode" "TI")])
4272 (define_insn "sse_ldmxcsr"
4273 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
4277 [(set_attr "type" "sse")
4278 (set_attr "memory" "load")])
4280 (define_insn "sse_stmxcsr"
4281 [(set (match_operand:SI 0 "memory_operand" "=m")
4282 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
4285 [(set_attr "type" "sse")
4286 (set_attr "memory" "store")])
4288 (define_expand "sse_sfence"
4290 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4291 "TARGET_SSE || TARGET_3DNOW_A"
4293 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4294 MEM_VOLATILE_P (operands[0]) = 1;
4297 (define_insn "*sse_sfence"
4298 [(set (match_operand:BLK 0 "" "")
4299 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
4300 "TARGET_SSE || TARGET_3DNOW_A"
4302 [(set_attr "type" "sse")
4303 (set_attr "memory" "unknown")])
4305 (define_insn "sse2_clflush"
4306 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
4310 [(set_attr "type" "sse")
4311 (set_attr "memory" "unknown")])
4313 (define_expand "sse2_mfence"
4315 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4318 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4319 MEM_VOLATILE_P (operands[0]) = 1;
4322 (define_insn "*sse2_mfence"
4323 [(set (match_operand:BLK 0 "" "")
4324 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
4327 [(set_attr "type" "sse")
4328 (set_attr "memory" "unknown")])
4330 (define_expand "sse2_lfence"
4332 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4335 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
4336 MEM_VOLATILE_P (operands[0]) = 1;
4339 (define_insn "*sse2_lfence"
4340 [(set (match_operand:BLK 0 "" "")
4341 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
4344 [(set_attr "type" "sse")
4345 (set_attr "memory" "unknown")])
4347 (define_insn "sse3_mwait"
4348 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4349 (match_operand:SI 1 "register_operand" "c")]
4353 [(set_attr "length" "3")])
4355 (define_insn "sse3_monitor"
4356 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
4357 (match_operand:SI 1 "register_operand" "c")
4358 (match_operand:SI 2 "register_operand" "d")]
4361 "monitor\t%0, %1, %2"
4362 [(set_attr "length" "3")])