1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
36 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
37 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
39 ;; Mapping from float mode to required SSE level
40 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
42 ;; Mapping from integer vector mode to mnemonic suffix
43 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
45 ;; Mapping of the sse5 suffix
46 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
47 (V4SF "ps") (V2DF "pd")])
48 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
49 (V4SF "ss") (V2DF "sd")])
50 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
52 ;; Mapping of the max integer size for sse5 rotate immediate constraint
53 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
55 ;; Mapping of vector modes back to the scalar modes
56 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
57 (V16QI "QI") (V8HI "HI")
58 (V4SI "SI") (V2DI "DI")])
60 ;; Number of scalar elements in each vector type
61 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
62 (V16QI "16") (V8HI "8")
63 (V4SI "4") (V2DI "2")])
65 ;; Mapping of immediate bits for blend instructions
66 (define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
68 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
70 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
74 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
76 ;; All of these patterns are enabled for SSE1 as well as SSE2.
77 ;; This is essential for maintaining stable calling conventions.
79 (define_expand "mov<mode>"
80 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
81 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
84 ix86_expand_vector_move (<MODE>mode, operands);
88 (define_insn "*mov<mode>_internal"
89 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
90 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
92 && (register_operand (operands[0], <MODE>mode)
93 || register_operand (operands[1], <MODE>mode))"
95 switch (which_alternative)
98 return standard_sse_constant_opcode (insn, operands[1]);
101 switch (get_attr_mode (insn))
104 return "movaps\t{%1, %0|%0, %1}";
106 return "movapd\t{%1, %0|%0, %1}";
108 return "movdqa\t{%1, %0|%0, %1}";
114 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (cond [(ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
117 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
118 (and (eq_attr "alternative" "2")
119 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
121 (const_string "V4SF")
122 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
123 (const_string "V4SF")
124 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
125 (const_string "V2DF")
127 (const_string "TI")))])
129 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
130 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
131 ;; from memory, we'd prefer to load the memory directly into the %xmm
132 ;; register. To facilitate this happy circumstance, this pattern won't
133 ;; split until after register allocation. If the 64-bit value didn't
134 ;; come from memory, this is the best we can do. This is much better
135 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
138 (define_insn_and_split "movdi_to_sse"
140 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
141 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
142 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
143 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
145 "&& reload_completed"
148 if (register_operand (operands[1], DImode))
150 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
151 Assemble the 64-bit DImode value in an xmm register. */
152 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
153 gen_rtx_SUBREG (SImode, operands[1], 0)));
154 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
155 gen_rtx_SUBREG (SImode, operands[1], 4)));
156 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
158 else if (memory_operand (operands[1], DImode))
159 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
165 [(set (match_operand:V4SF 0 "register_operand" "")
166 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
167 "TARGET_SSE && reload_completed"
170 (vec_duplicate:V4SF (match_dup 1))
174 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
175 operands[2] = CONST0_RTX (V4SFmode);
179 [(set (match_operand:V2DF 0 "register_operand" "")
180 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
181 "TARGET_SSE2 && reload_completed"
182 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
184 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
185 operands[2] = CONST0_RTX (DFmode);
188 (define_expand "push<mode>1"
189 [(match_operand:SSEMODE 0 "register_operand" "")]
192 ix86_expand_push (<MODE>mode, operands[0]);
196 (define_expand "movmisalign<mode>"
197 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
198 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
201 ix86_expand_vector_move_misalign (<MODE>mode, operands);
205 (define_insn "<sse>_movup<ssemodesuffixf2c>"
206 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
208 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
210 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
211 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "<MODE>")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "prefix_data16" "1")
224 (set_attr "mode" "TI")])
226 (define_insn "<sse>_movnt<mode>"
227 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
229 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
231 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
232 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
233 [(set_attr "type" "ssemov")
234 (set_attr "mode" "<MODE>")])
236 (define_insn "sse2_movntv2di"
237 [(set (match_operand:V2DI 0 "memory_operand" "=m")
238 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
241 "movntdq\t{%1, %0|%0, %1}"
242 [(set_attr "type" "ssecvt")
243 (set_attr "prefix_data16" "1")
244 (set_attr "mode" "TI")])
246 (define_insn "sse2_movntsi"
247 [(set (match_operand:SI 0 "memory_operand" "=m")
248 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
251 "movnti\t{%1, %0|%0, %1}"
252 [(set_attr "type" "ssecvt")
253 (set_attr "mode" "V2DF")])
255 (define_insn "sse3_lddqu"
256 [(set (match_operand:V16QI 0 "register_operand" "=x")
257 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
260 "lddqu\t{%1, %0|%0, %1}"
261 [(set_attr "type" "ssecvt")
262 (set_attr "prefix_rep" "1")
263 (set_attr "mode" "TI")])
265 ; Expand patterns for non-temporal stores. At the moment, only those
266 ; that directly map to insns are defined; it would be possible to
267 ; define patterns for other modes that would expand to several insns.
269 (define_expand "storent<mode>"
270 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
272 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
274 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
277 (define_expand "storent<mode>"
278 [(set (match_operand:MODEF 0 "memory_operand" "")
280 [(match_operand:MODEF 1 "register_operand" "")]
285 (define_expand "storentv2di"
286 [(set (match_operand:V2DI 0 "memory_operand" "")
287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
292 (define_expand "storentsi"
293 [(set (match_operand:SI 0 "memory_operand" "")
294 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
301 ;; Parallel floating point arithmetic
303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
305 (define_expand "<code><mode>2"
306 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
308 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
309 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
310 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
312 (define_expand "<plusminus_insn><mode>3"
313 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
314 (plusminus:SSEMODEF2P
315 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
316 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
317 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
318 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
320 (define_insn "*<plusminus_insn><mode>3"
321 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
322 (plusminus:SSEMODEF2P
323 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
324 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
325 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
326 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
327 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
328 [(set_attr "type" "sseadd")
329 (set_attr "mode" "<MODE>")])
331 (define_insn "<sse>_vm<plusminus_insn><mode>3"
332 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
333 (vec_merge:SSEMODEF2P
334 (plusminus:SSEMODEF2P
335 (match_operand:SSEMODEF2P 1 "register_operand" "0")
336 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
339 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
340 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "<ssescalarmode>")])
344 (define_expand "mul<mode>3"
345 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
347 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
348 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
349 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
350 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
352 (define_insn "*mul<mode>3"
353 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
355 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
356 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
357 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
358 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
359 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
360 [(set_attr "type" "ssemul")
361 (set_attr "mode" "<MODE>")])
363 (define_insn "<sse>_vmmul<mode>3"
364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
365 (vec_merge:SSEMODEF2P
367 (match_operand:SSEMODEF2P 1 "register_operand" "0")
368 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
372 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
373 [(set_attr "type" "ssemul")
374 (set_attr "mode" "<ssescalarmode>")])
376 (define_expand "divv4sf3"
377 [(set (match_operand:V4SF 0 "register_operand" "")
378 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
379 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
382 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
383 && flag_finite_math_only && !flag_trapping_math
384 && flag_unsafe_math_optimizations)
386 ix86_emit_swdivsf (operands[0], operands[1],
387 operands[2], V4SFmode);
392 (define_expand "divv2df3"
393 [(set (match_operand:V2DF 0 "register_operand" "")
394 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
395 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
399 (define_insn "<sse>_div<mode>3"
400 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
402 (match_operand:SSEMODEF2P 1 "register_operand" "0")
403 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
404 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
405 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
406 [(set_attr "type" "ssediv")
407 (set_attr "mode" "<MODE>")])
409 (define_insn "<sse>_vmdiv<mode>3"
410 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
411 (vec_merge:SSEMODEF2P
413 (match_operand:SSEMODEF2P 1 "register_operand" "0")
414 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
417 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
418 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
419 [(set_attr "type" "ssediv")
420 (set_attr "mode" "<ssescalarmode>")])
422 (define_insn "sse_rcpv4sf2"
423 [(set (match_operand:V4SF 0 "register_operand" "=x")
425 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
427 "rcpps\t{%1, %0|%0, %1}"
428 [(set_attr "type" "sse")
429 (set_attr "mode" "V4SF")])
431 (define_insn "sse_vmrcpv4sf2"
432 [(set (match_operand:V4SF 0 "register_operand" "=x")
434 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
436 (match_operand:V4SF 2 "register_operand" "0")
439 "rcpss\t{%1, %0|%0, %1}"
440 [(set_attr "type" "sse")
441 (set_attr "mode" "SF")])
443 (define_expand "sqrtv4sf2"
444 [(set (match_operand:V4SF 0 "register_operand" "")
445 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
448 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
449 && flag_finite_math_only && !flag_trapping_math
450 && flag_unsafe_math_optimizations)
452 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
457 (define_insn "sse_sqrtv4sf2"
458 [(set (match_operand:V4SF 0 "register_operand" "=x")
459 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
461 "sqrtps\t{%1, %0|%0, %1}"
462 [(set_attr "type" "sse")
463 (set_attr "mode" "V4SF")])
465 (define_insn "sqrtv2df2"
466 [(set (match_operand:V2DF 0 "register_operand" "=x")
467 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
469 "sqrtpd\t{%1, %0|%0, %1}"
470 [(set_attr "type" "sse")
471 (set_attr "mode" "V2DF")])
473 (define_insn "<sse>_vmsqrt<mode>2"
474 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
475 (vec_merge:SSEMODEF2P
477 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
478 (match_operand:SSEMODEF2P 2 "register_operand" "0")
480 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
481 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
482 [(set_attr "type" "sse")
483 (set_attr "mode" "<ssescalarmode>")])
485 (define_expand "rsqrtv4sf2"
486 [(set (match_operand:V4SF 0 "register_operand" "")
488 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
491 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
495 (define_insn "sse_rsqrtv4sf2"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
500 "rsqrtps\t{%1, %0|%0, %1}"
501 [(set_attr "type" "sse")
502 (set_attr "mode" "V4SF")])
504 (define_insn "sse_vmrsqrtv4sf2"
505 [(set (match_operand:V4SF 0 "register_operand" "=x")
507 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
509 (match_operand:V4SF 2 "register_operand" "0")
512 "rsqrtss\t{%1, %0|%0, %1}"
513 [(set_attr "type" "sse")
514 (set_attr "mode" "SF")])
516 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
517 ;; isn't really correct, as those rtl operators aren't defined when
518 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
520 (define_expand "<code><mode>3"
521 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
523 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
524 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
525 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
527 if (!flag_finite_math_only)
528 operands[1] = force_reg (<MODE>mode, operands[1]);
529 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
532 (define_insn "*<code><mode>3_finite"
533 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
535 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
536 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
537 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
538 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
539 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
540 [(set_attr "type" "sseadd")
541 (set_attr "mode" "<MODE>")])
543 (define_insn "*<code><mode>3"
544 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
546 (match_operand:SSEMODEF2P 1 "register_operand" "0")
547 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
548 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
549 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
550 [(set_attr "type" "sseadd")
551 (set_attr "mode" "<MODE>")])
553 (define_insn "<sse>_vm<code><mode>3"
554 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
555 (vec_merge:SSEMODEF2P
557 (match_operand:SSEMODEF2P 1 "register_operand" "0")
558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
561 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
562 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
563 [(set_attr "type" "sse")
564 (set_attr "mode" "<ssescalarmode>")])
566 ;; These versions of the min/max patterns implement exactly the operations
567 ;; min = (op1 < op2 ? op1 : op2)
568 ;; max = (!(op1 < op2) ? op1 : op2)
569 ;; Their operands are not commutative, and thus they may be used in the
570 ;; presence of -0.0 and NaN.
572 (define_insn "*ieee_smin<mode>3"
573 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
576 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
579 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
580 [(set_attr "type" "sseadd")
581 (set_attr "mode" "<MODE>")])
583 (define_insn "*ieee_smax<mode>3"
584 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
586 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
587 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
589 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
590 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
591 [(set_attr "type" "sseadd")
592 (set_attr "mode" "<MODE>")])
594 (define_insn "sse3_addsubv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "=x")
598 (match_operand:V4SF 1 "register_operand" "0")
599 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
600 (minus:V4SF (match_dup 1) (match_dup 2))
603 "addsubps\t{%2, %0|%0, %2}"
604 [(set_attr "type" "sseadd")
605 (set_attr "prefix_rep" "1")
606 (set_attr "mode" "V4SF")])
608 (define_insn "sse3_addsubv2df3"
609 [(set (match_operand:V2DF 0 "register_operand" "=x")
612 (match_operand:V2DF 1 "register_operand" "0")
613 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
614 (minus:V2DF (match_dup 1) (match_dup 2))
617 "addsubpd\t{%2, %0|%0, %2}"
618 [(set_attr "type" "sseadd")
619 (set_attr "mode" "V2DF")])
621 (define_insn "sse3_h<plusminus_insn>v4sf3"
622 [(set (match_operand:V4SF 0 "register_operand" "=x")
627 (match_operand:V4SF 1 "register_operand" "0")
628 (parallel [(const_int 0)]))
629 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
631 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
632 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
636 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
637 (parallel [(const_int 0)]))
638 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
640 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
641 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
643 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
644 [(set_attr "type" "sseadd")
645 (set_attr "prefix_rep" "1")
646 (set_attr "mode" "V4SF")])
648 (define_insn "sse3_h<plusminus_insn>v2df3"
649 [(set (match_operand:V2DF 0 "register_operand" "=x")
653 (match_operand:V2DF 1 "register_operand" "0")
654 (parallel [(const_int 0)]))
655 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
658 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
659 (parallel [(const_int 0)]))
660 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
662 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sseadd")
664 (set_attr "mode" "V2DF")])
666 (define_expand "reduc_splus_v4sf"
667 [(match_operand:V4SF 0 "register_operand" "")
668 (match_operand:V4SF 1 "register_operand" "")]
673 rtx tmp = gen_reg_rtx (V4SFmode);
674 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
675 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
678 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
682 (define_expand "reduc_splus_v2df"
683 [(match_operand:V2DF 0 "register_operand" "")
684 (match_operand:V2DF 1 "register_operand" "")]
687 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
691 (define_expand "reduc_smax_v4sf"
692 [(match_operand:V4SF 0 "register_operand" "")
693 (match_operand:V4SF 1 "register_operand" "")]
696 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
700 (define_expand "reduc_smin_v4sf"
701 [(match_operand:V4SF 0 "register_operand" "")
702 (match_operand:V4SF 1 "register_operand" "")]
705 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
711 ;; Parallel floating point comparisons
713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
715 (define_insn "<sse>_maskcmp<mode>3"
716 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
717 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
718 [(match_operand:SSEMODEF4 1 "register_operand" "0")
719 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
720 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
722 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
723 [(set_attr "type" "ssecmp")
724 (set_attr "mode" "<MODE>")])
726 (define_insn "<sse>_vmmaskcmp<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (vec_merge:SSEMODEF2P
729 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
730 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
731 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
734 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
735 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
736 [(set_attr "type" "ssecmp")
737 (set_attr "mode" "<ssescalarmode>")])
739 (define_insn "<sse>_comi"
740 [(set (reg:CCFP FLAGS_REG)
743 (match_operand:<ssevecmode> 0 "register_operand" "x")
744 (parallel [(const_int 0)]))
746 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
747 (parallel [(const_int 0)]))))]
748 "SSE_FLOAT_MODE_P (<MODE>mode)"
749 "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
750 [(set_attr "type" "ssecomi")
751 (set_attr "mode" "<MODE>")])
753 (define_insn "<sse>_ucomi"
754 [(set (reg:CCFPU FLAGS_REG)
757 (match_operand:<ssevecmode> 0 "register_operand" "x")
758 (parallel [(const_int 0)]))
760 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
761 (parallel [(const_int 0)]))))]
762 "SSE_FLOAT_MODE_P (<MODE>mode)"
763 "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
764 [(set_attr "type" "ssecomi")
765 (set_attr "mode" "<MODE>")])
767 (define_expand "vcond<mode>"
768 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
769 (if_then_else:SSEMODEF2P
771 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
772 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
773 (match_operand:SSEMODEF2P 1 "general_operand" "")
774 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
775 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
777 if (ix86_expand_fp_vcond (operands))
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; Parallel floating point logical operations
787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
789 (define_insn "<sse>_nand<mode>3"
790 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
793 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
794 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
795 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
796 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
797 [(set_attr "type" "sselog")
798 (set_attr "mode" "<MODE>")])
800 (define_expand "<code><mode>3"
801 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
803 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
804 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
805 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
806 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
808 (define_insn "*<code><mode>3"
809 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
811 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
812 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
813 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
814 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
815 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
816 [(set_attr "type" "sselog")
817 (set_attr "mode" "<MODE>")])
819 ;; Also define scalar versions. These are used for abs, neg, and
820 ;; conditional move. Using subregs into vector modes causes register
821 ;; allocation lossage. These patterns do not allow memory operands
822 ;; because the native instructions read the full 128-bits.
824 (define_insn "*nand<mode>3"
825 [(set (match_operand:MODEF 0 "register_operand" "=x")
828 (match_operand:MODEF 1 "register_operand" "0"))
829 (match_operand:MODEF 2 "register_operand" "x")))]
830 "SSE_FLOAT_MODE_P (<MODE>mode)"
831 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
832 [(set_attr "type" "sselog")
833 (set_attr "mode" "<ssevecmode>")])
835 (define_insn "*<code><mode>3"
836 [(set (match_operand:MODEF 0 "register_operand" "=x")
838 (match_operand:MODEF 1 "register_operand" "0")
839 (match_operand:MODEF 2 "register_operand" "x")))]
840 "SSE_FLOAT_MODE_P (<MODE>mode)"
841 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
842 [(set_attr "type" "sselog")
843 (set_attr "mode" "<ssevecmode>")])
845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
847 ;; SSE5 floating point multiply/accumulate instructions This includes the
848 ;; scalar version of the instructions as well as the vector
850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
852 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
853 ;; combine to generate a multiply/add with two memory references. We then
854 ;; split this insn, into loading up the destination register with one of the
855 ;; memory operations. If we don't manage to split the insn, reload will
856 ;; generate the appropriate moves. The reason this is needed, is that combine
857 ;; has already folded one of the memory references into both the multiply and
858 ;; add insns, and it can't generate a new pseudo. I.e.:
859 ;; (set (reg1) (mem (addr1)))
860 ;; (set (reg2) (mult (reg1) (mem (addr2))))
861 ;; (set (reg3) (plus (reg2) (mem (addr3))))
863 (define_insn "sse5_fmadd<mode>4"
864 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
867 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
868 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
869 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
870 "TARGET_SSE5 && TARGET_FUSED_MADD
871 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
872 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
873 [(set_attr "type" "ssemuladd")
874 (set_attr "mode" "<MODE>")])
876 ;; Split fmadd with two memory operands into a load and the fmadd.
878 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
881 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
882 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
883 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
885 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
886 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
887 && !reg_mentioned_p (operands[0], operands[1])
888 && !reg_mentioned_p (operands[0], operands[2])
889 && !reg_mentioned_p (operands[0], operands[3])"
892 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
893 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
894 operands[2], operands[3]));
898 ;; For the scalar operations, use operand1 for the upper words that aren't
899 ;; modified, so restrict the forms that are generated.
900 ;; Scalar version of fmadd
901 (define_insn "sse5_vmfmadd<mode>4"
902 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
903 (vec_merge:SSEMODEF2P
906 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
907 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
908 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
911 "TARGET_SSE5 && TARGET_FUSED_MADD
912 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
913 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
914 [(set_attr "type" "ssemuladd")
915 (set_attr "mode" "<MODE>")])
917 ;; Floating multiply and subtract
918 ;; Allow two memory operands the same as fmadd
919 (define_insn "sse5_fmsub<mode>4"
920 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
923 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
924 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
925 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
926 "TARGET_SSE5 && TARGET_FUSED_MADD
927 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
928 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
929 [(set_attr "type" "ssemuladd")
930 (set_attr "mode" "<MODE>")])
932 ;; Split fmsub with two memory operands into a load and the fmsub.
934 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
937 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
938 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
939 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
941 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
942 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
943 && !reg_mentioned_p (operands[0], operands[1])
944 && !reg_mentioned_p (operands[0], operands[2])
945 && !reg_mentioned_p (operands[0], operands[3])"
948 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
949 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
950 operands[2], operands[3]));
954 ;; For the scalar operations, use operand1 for the upper words that aren't
955 ;; modified, so restrict the forms that are generated.
956 ;; Scalar version of fmsub
957 (define_insn "sse5_vmfmsub<mode>4"
958 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
959 (vec_merge:SSEMODEF2P
962 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
963 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
964 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
967 "TARGET_SSE5 && TARGET_FUSED_MADD
968 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
969 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
970 [(set_attr "type" "ssemuladd")
971 (set_attr "mode" "<MODE>")])
973 ;; Floating point negative multiply and add
974 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
975 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
976 ;; Allow two memory operands to help in optimizing.
977 (define_insn "sse5_fnmadd<mode>4"
978 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
980 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
982 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
983 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
984 "TARGET_SSE5 && TARGET_FUSED_MADD
985 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
986 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
987 [(set_attr "type" "ssemuladd")
988 (set_attr "mode" "<MODE>")])
990 ;; Split fnmadd with two memory operands into a load and the fnmadd.
992 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
994 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
996 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
997 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
999 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1000 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1001 && !reg_mentioned_p (operands[0], operands[1])
1002 && !reg_mentioned_p (operands[0], operands[2])
1003 && !reg_mentioned_p (operands[0], operands[3])"
1006 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1007 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1008 operands[2], operands[3]));
1012 ;; For the scalar operations, use operand1 for the upper words that aren't
1013 ;; modified, so restrict the forms that are generated.
1014 ;; Scalar version of fnmadd
1015 (define_insn "sse5_vmfnmadd<mode>4"
1016 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1017 (vec_merge:SSEMODEF2P
1019 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1021 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1022 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1025 "TARGET_SSE5 && TARGET_FUSED_MADD
1026 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1027 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1028 [(set_attr "type" "ssemuladd")
1029 (set_attr "mode" "<MODE>")])
1031 ;; Floating point negative multiply and subtract
1032 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1033 ;; Allow 2 memory operands to help with optimization
1034 (define_insn "sse5_fnmsub<mode>4"
1035 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1039 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
1040 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
1041 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1042 "TARGET_SSE5 && TARGET_FUSED_MADD
1043 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1044 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1045 [(set_attr "type" "ssemuladd")
1046 (set_attr "mode" "<MODE>")])
1048 ;; Split fnmsub with two memory operands into a load and the fmsub.
1050 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1054 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1055 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1056 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1058 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
1059 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
1060 && !reg_mentioned_p (operands[0], operands[1])
1061 && !reg_mentioned_p (operands[0], operands[2])
1062 && !reg_mentioned_p (operands[0], operands[3])"
1065 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1066 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1067 operands[2], operands[3]));
1071 ;; For the scalar operations, use operand1 for the upper words that aren't
1072 ;; modified, so restrict the forms that are generated.
1073 ;; Scalar version of fnmsub
1074 (define_insn "sse5_vmfnmsub<mode>4"
1075 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1076 (vec_merge:SSEMODEF2P
1080 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1081 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1082 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1085 "TARGET_SSE5 && TARGET_FUSED_MADD
1086 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
1087 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1088 [(set_attr "type" "ssemuladd")
1089 (set_attr "mode" "<MODE>")])
1091 ;; The same instructions using an UNSPEC to allow the intrinsic to be used
1092 ;; even if the user used -mno-fused-madd
1093 ;; Parallel instructions. During instruction generation, just default
1094 ;; to registers, and let combine later build the appropriate instruction.
1095 (define_expand "sse5i_fmadd<mode>4"
1096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1100 (match_operand:SSEMODEF2P 1 "register_operand" "")
1101 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1102 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1103 UNSPEC_SSE5_INTRINSIC))]
1106 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1107 if (TARGET_FUSED_MADD)
1109 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1110 operands[2], operands[3]));
1115 (define_insn "*sse5i_fmadd<mode>4"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1120 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1121 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1122 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1123 UNSPEC_SSE5_INTRINSIC))]
1124 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1125 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1126 [(set_attr "type" "ssemuladd")
1127 (set_attr "mode" "<MODE>")])
1129 (define_expand "sse5i_fmsub<mode>4"
1130 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1134 (match_operand:SSEMODEF2P 1 "register_operand" "")
1135 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1136 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1137 UNSPEC_SSE5_INTRINSIC))]
1140 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1141 if (TARGET_FUSED_MADD)
1143 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1144 operands[2], operands[3]));
1149 (define_insn "*sse5i_fmsub<mode>4"
1150 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1154 (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
1155 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1156 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1157 UNSPEC_SSE5_INTRINSIC))]
1158 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1159 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1160 [(set_attr "type" "ssemuladd")
1161 (set_attr "mode" "<MODE>")])
1163 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1164 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1165 (define_expand "sse5i_fnmadd<mode>4"
1166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1169 (match_operand:SSEMODEF2P 3 "register_operand" "")
1171 (match_operand:SSEMODEF2P 1 "register_operand" "")
1172 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1173 UNSPEC_SSE5_INTRINSIC))]
1176 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1177 if (TARGET_FUSED_MADD)
1179 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1180 operands[2], operands[3]));
1185 (define_insn "*sse5i_fnmadd<mode>4"
1186 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1189 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1191 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1192 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1193 UNSPEC_SSE5_INTRINSIC))]
1194 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1195 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1196 [(set_attr "type" "ssemuladd")
1197 (set_attr "mode" "<MODE>")])
1199 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1200 (define_expand "sse5i_fnmsub<mode>4"
1201 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1206 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1207 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1208 (match_operand:SSEMODEF2P 3 "register_operand" ""))]
1209 UNSPEC_SSE5_INTRINSIC))]
1212 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1213 if (TARGET_FUSED_MADD)
1215 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1216 operands[2], operands[3]));
1221 (define_insn "*sse5i_fnmsub<mode>4"
1222 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1227 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
1228 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1229 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1230 UNSPEC_SSE5_INTRINSIC))]
1231 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1232 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1233 [(set_attr "type" "ssemuladd")
1234 (set_attr "mode" "<MODE>")])
1236 ;; Scalar instructions
1237 (define_expand "sse5i_vmfmadd<mode>4"
1238 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1240 [(vec_merge:SSEMODEF2P
1243 (match_operand:SSEMODEF2P 1 "register_operand" "")
1244 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1245 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1248 UNSPEC_SSE5_INTRINSIC))]
1251 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1252 if (TARGET_FUSED_MADD)
1254 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
1255 operands[2], operands[3]));
1260 ;; For the scalar operations, use operand1 for the upper words that aren't
1261 ;; modified, so restrict the forms that are accepted.
1262 (define_insn "*sse5i_vmfmadd<mode>4"
1263 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1265 [(vec_merge:SSEMODEF2P
1268 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
1269 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1270 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1273 UNSPEC_SSE5_INTRINSIC))]
1274 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1275 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1276 [(set_attr "type" "ssemuladd")
1277 (set_attr "mode" "<ssescalarmode>")])
1279 (define_expand "sse5i_vmfmsub<mode>4"
1280 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1282 [(vec_merge:SSEMODEF2P
1285 (match_operand:SSEMODEF2P 1 "register_operand" "")
1286 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1287 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1290 UNSPEC_SSE5_INTRINSIC))]
1293 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1294 if (TARGET_FUSED_MADD)
1296 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
1297 operands[2], operands[3]));
1302 (define_insn "*sse5i_vmfmsub<mode>4"
1303 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1305 [(vec_merge:SSEMODEF2P
1308 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1309 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1310 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1313 UNSPEC_SSE5_INTRINSIC))]
1314 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1315 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1316 [(set_attr "type" "ssemuladd")
1317 (set_attr "mode" "<ssescalarmode>")])
1319 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p
1320 (define_expand "sse5i_vmfnmadd<mode>4"
1321 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1323 [(vec_merge:SSEMODEF2P
1325 (match_operand:SSEMODEF2P 3 "register_operand" "")
1327 (match_operand:SSEMODEF2P 1 "register_operand" "")
1328 (match_operand:SSEMODEF2P 2 "register_operand" "")))
1331 UNSPEC_SSE5_INTRINSIC))]
1334 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1335 if (TARGET_FUSED_MADD)
1337 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
1338 operands[2], operands[3]));
1343 (define_insn "*sse5i_vmfnmadd<mode>4"
1344 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1346 [(vec_merge:SSEMODEF2P
1348 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1350 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
1351 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
1354 UNSPEC_SSE5_INTRINSIC))]
1355 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1356 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1357 [(set_attr "type" "ssemuladd")
1358 (set_attr "mode" "<ssescalarmode>")])
1360 (define_expand "sse5i_vmfnmsub<mode>4"
1361 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1363 [(vec_merge:SSEMODEF2P
1367 (match_operand:SSEMODEF2P 1 "register_operand" ""))
1368 (match_operand:SSEMODEF2P 2 "register_operand" ""))
1369 (match_operand:SSEMODEF2P 3 "register_operand" ""))
1372 UNSPEC_SSE5_INTRINSIC))]
1375 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1376 if (TARGET_FUSED_MADD)
1378 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
1379 operands[2], operands[3]));
1384 (define_insn "*sse5i_vmfnmsub<mode>4"
1385 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1387 [(vec_merge:SSEMODEF2P
1391 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
1392 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1393 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1396 UNSPEC_SSE5_INTRINSIC))]
1397 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
1398 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1399 [(set_attr "type" "ssemuladd")
1400 (set_attr "mode" "<ssescalarmode>")])
1402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1404 ;; Parallel single-precision floating point conversion operations
1406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1408 (define_insn "sse_cvtpi2ps"
1409 [(set (match_operand:V4SF 0 "register_operand" "=x")
1412 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1413 (match_operand:V4SF 1 "register_operand" "0")
1416 "cvtpi2ps\t{%2, %0|%0, %2}"
1417 [(set_attr "type" "ssecvt")
1418 (set_attr "mode" "V4SF")])
1420 (define_insn "sse_cvtps2pi"
1421 [(set (match_operand:V2SI 0 "register_operand" "=y")
1423 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1425 (parallel [(const_int 0) (const_int 1)])))]
1427 "cvtps2pi\t{%1, %0|%0, %1}"
1428 [(set_attr "type" "ssecvt")
1429 (set_attr "unit" "mmx")
1430 (set_attr "mode" "DI")])
1432 (define_insn "sse_cvttps2pi"
1433 [(set (match_operand:V2SI 0 "register_operand" "=y")
1435 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1436 (parallel [(const_int 0) (const_int 1)])))]
1438 "cvttps2pi\t{%1, %0|%0, %1}"
1439 [(set_attr "type" "ssecvt")
1440 (set_attr "unit" "mmx")
1441 (set_attr "mode" "SF")])
1443 (define_insn "sse_cvtsi2ss"
1444 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1447 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1448 (match_operand:V4SF 1 "register_operand" "0,0")
1451 "cvtsi2ss\t{%2, %0|%0, %2}"
1452 [(set_attr "type" "sseicvt")
1453 (set_attr "athlon_decode" "vector,double")
1454 (set_attr "amdfam10_decode" "vector,double")
1455 (set_attr "mode" "SF")])
1457 (define_insn "sse_cvtsi2ssq"
1458 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1461 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1462 (match_operand:V4SF 1 "register_operand" "0,0")
1464 "TARGET_SSE && TARGET_64BIT"
1465 "cvtsi2ssq\t{%2, %0|%0, %2}"
1466 [(set_attr "type" "sseicvt")
1467 (set_attr "athlon_decode" "vector,double")
1468 (set_attr "amdfam10_decode" "vector,double")
1469 (set_attr "mode" "SF")])
1471 (define_insn "sse_cvtss2si"
1472 [(set (match_operand:SI 0 "register_operand" "=r,r")
1475 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1476 (parallel [(const_int 0)]))]
1477 UNSPEC_FIX_NOTRUNC))]
1479 "cvtss2si\t{%1, %0|%0, %1}"
1480 [(set_attr "type" "sseicvt")
1481 (set_attr "athlon_decode" "double,vector")
1482 (set_attr "prefix_rep" "1")
1483 (set_attr "mode" "SI")])
1485 (define_insn "sse_cvtss2si_2"
1486 [(set (match_operand:SI 0 "register_operand" "=r,r")
1487 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1488 UNSPEC_FIX_NOTRUNC))]
1490 "cvtss2si\t{%1, %0|%0, %1}"
1491 [(set_attr "type" "sseicvt")
1492 (set_attr "athlon_decode" "double,vector")
1493 (set_attr "amdfam10_decode" "double,double")
1494 (set_attr "prefix_rep" "1")
1495 (set_attr "mode" "SI")])
1497 (define_insn "sse_cvtss2siq"
1498 [(set (match_operand:DI 0 "register_operand" "=r,r")
1501 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1502 (parallel [(const_int 0)]))]
1503 UNSPEC_FIX_NOTRUNC))]
1504 "TARGET_SSE && TARGET_64BIT"
1505 "cvtss2siq\t{%1, %0|%0, %1}"
1506 [(set_attr "type" "sseicvt")
1507 (set_attr "athlon_decode" "double,vector")
1508 (set_attr "prefix_rep" "1")
1509 (set_attr "mode" "DI")])
1511 (define_insn "sse_cvtss2siq_2"
1512 [(set (match_operand:DI 0 "register_operand" "=r,r")
1513 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1514 UNSPEC_FIX_NOTRUNC))]
1515 "TARGET_SSE && TARGET_64BIT"
1516 "cvtss2siq\t{%1, %0|%0, %1}"
1517 [(set_attr "type" "sseicvt")
1518 (set_attr "athlon_decode" "double,vector")
1519 (set_attr "amdfam10_decode" "double,double")
1520 (set_attr "prefix_rep" "1")
1521 (set_attr "mode" "DI")])
1523 (define_insn "sse_cvttss2si"
1524 [(set (match_operand:SI 0 "register_operand" "=r,r")
1527 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1528 (parallel [(const_int 0)]))))]
1530 "cvttss2si\t{%1, %0|%0, %1}"
1531 [(set_attr "type" "sseicvt")
1532 (set_attr "athlon_decode" "double,vector")
1533 (set_attr "amdfam10_decode" "double,double")
1534 (set_attr "prefix_rep" "1")
1535 (set_attr "mode" "SI")])
1537 (define_insn "sse_cvttss2siq"
1538 [(set (match_operand:DI 0 "register_operand" "=r,r")
1541 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1542 (parallel [(const_int 0)]))))]
1543 "TARGET_SSE && TARGET_64BIT"
1544 "cvttss2siq\t{%1, %0|%0, %1}"
1545 [(set_attr "type" "sseicvt")
1546 (set_attr "athlon_decode" "double,vector")
1547 (set_attr "amdfam10_decode" "double,double")
1548 (set_attr "prefix_rep" "1")
1549 (set_attr "mode" "DI")])
1551 (define_insn "sse2_cvtdq2ps"
1552 [(set (match_operand:V4SF 0 "register_operand" "=x")
1553 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1555 "cvtdq2ps\t{%1, %0|%0, %1}"
1556 [(set_attr "type" "ssecvt")
1557 (set_attr "mode" "V4SF")])
1559 (define_insn "sse2_cvtps2dq"
1560 [(set (match_operand:V4SI 0 "register_operand" "=x")
1561 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1562 UNSPEC_FIX_NOTRUNC))]
1564 "cvtps2dq\t{%1, %0|%0, %1}"
1565 [(set_attr "type" "ssecvt")
1566 (set_attr "prefix_data16" "1")
1567 (set_attr "mode" "TI")])
1569 (define_insn "sse2_cvttps2dq"
1570 [(set (match_operand:V4SI 0 "register_operand" "=x")
1571 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1573 "cvttps2dq\t{%1, %0|%0, %1}"
1574 [(set_attr "type" "ssecvt")
1575 (set_attr "prefix_rep" "1")
1576 (set_attr "mode" "TI")])
1578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1580 ;; Parallel double-precision floating point conversion operations
1582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1584 (define_insn "sse2_cvtpi2pd"
1585 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1586 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1588 "cvtpi2pd\t{%1, %0|%0, %1}"
1589 [(set_attr "type" "ssecvt")
1590 (set_attr "unit" "mmx,*")
1591 (set_attr "mode" "V2DF")])
1593 (define_insn "sse2_cvtpd2pi"
1594 [(set (match_operand:V2SI 0 "register_operand" "=y")
1595 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1596 UNSPEC_FIX_NOTRUNC))]
1598 "cvtpd2pi\t{%1, %0|%0, %1}"
1599 [(set_attr "type" "ssecvt")
1600 (set_attr "unit" "mmx")
1601 (set_attr "prefix_data16" "1")
1602 (set_attr "mode" "DI")])
1604 (define_insn "sse2_cvttpd2pi"
1605 [(set (match_operand:V2SI 0 "register_operand" "=y")
1606 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1608 "cvttpd2pi\t{%1, %0|%0, %1}"
1609 [(set_attr "type" "ssecvt")
1610 (set_attr "unit" "mmx")
1611 (set_attr "prefix_data16" "1")
1612 (set_attr "mode" "TI")])
1614 (define_insn "sse2_cvtsi2sd"
1615 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1618 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1619 (match_operand:V2DF 1 "register_operand" "0,0")
1622 "cvtsi2sd\t{%2, %0|%0, %2}"
1623 [(set_attr "type" "sseicvt")
1624 (set_attr "mode" "DF")
1625 (set_attr "athlon_decode" "double,direct")
1626 (set_attr "amdfam10_decode" "vector,double")])
1628 (define_insn "sse2_cvtsi2sdq"
1629 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1632 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1633 (match_operand:V2DF 1 "register_operand" "0,0")
1635 "TARGET_SSE2 && TARGET_64BIT"
1636 "cvtsi2sdq\t{%2, %0|%0, %2}"
1637 [(set_attr "type" "sseicvt")
1638 (set_attr "mode" "DF")
1639 (set_attr "athlon_decode" "double,direct")
1640 (set_attr "amdfam10_decode" "vector,double")])
1642 (define_insn "sse2_cvtsd2si"
1643 [(set (match_operand:SI 0 "register_operand" "=r,r")
1646 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1647 (parallel [(const_int 0)]))]
1648 UNSPEC_FIX_NOTRUNC))]
1650 "cvtsd2si\t{%1, %0|%0, %1}"
1651 [(set_attr "type" "sseicvt")
1652 (set_attr "athlon_decode" "double,vector")
1653 (set_attr "prefix_rep" "1")
1654 (set_attr "mode" "SI")])
1656 (define_insn "sse2_cvtsd2si_2"
1657 [(set (match_operand:SI 0 "register_operand" "=r,r")
1658 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1659 UNSPEC_FIX_NOTRUNC))]
1661 "cvtsd2si\t{%1, %0|%0, %1}"
1662 [(set_attr "type" "sseicvt")
1663 (set_attr "athlon_decode" "double,vector")
1664 (set_attr "amdfam10_decode" "double,double")
1665 (set_attr "prefix_rep" "1")
1666 (set_attr "mode" "SI")])
1668 (define_insn "sse2_cvtsd2siq"
1669 [(set (match_operand:DI 0 "register_operand" "=r,r")
1672 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1673 (parallel [(const_int 0)]))]
1674 UNSPEC_FIX_NOTRUNC))]
1675 "TARGET_SSE2 && TARGET_64BIT"
1676 "cvtsd2siq\t{%1, %0|%0, %1}"
1677 [(set_attr "type" "sseicvt")
1678 (set_attr "athlon_decode" "double,vector")
1679 (set_attr "prefix_rep" "1")
1680 (set_attr "mode" "DI")])
1682 (define_insn "sse2_cvtsd2siq_2"
1683 [(set (match_operand:DI 0 "register_operand" "=r,r")
1684 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
1685 UNSPEC_FIX_NOTRUNC))]
1686 "TARGET_SSE2 && TARGET_64BIT"
1687 "cvtsd2siq\t{%1, %0|%0, %1}"
1688 [(set_attr "type" "sseicvt")
1689 (set_attr "athlon_decode" "double,vector")
1690 (set_attr "amdfam10_decode" "double,double")
1691 (set_attr "prefix_rep" "1")
1692 (set_attr "mode" "DI")])
1694 (define_insn "sse2_cvttsd2si"
1695 [(set (match_operand:SI 0 "register_operand" "=r,r")
1698 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1699 (parallel [(const_int 0)]))))]
1701 "cvttsd2si\t{%1, %0|%0, %1}"
1702 [(set_attr "type" "sseicvt")
1703 (set_attr "prefix_rep" "1")
1704 (set_attr "mode" "SI")
1705 (set_attr "athlon_decode" "double,vector")
1706 (set_attr "amdfam10_decode" "double,double")])
1708 (define_insn "sse2_cvttsd2siq"
1709 [(set (match_operand:DI 0 "register_operand" "=r,r")
1712 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1713 (parallel [(const_int 0)]))))]
1714 "TARGET_SSE2 && TARGET_64BIT"
1715 "cvttsd2siq\t{%1, %0|%0, %1}"
1716 [(set_attr "type" "sseicvt")
1717 (set_attr "prefix_rep" "1")
1718 (set_attr "mode" "DI")
1719 (set_attr "athlon_decode" "double,vector")
1720 (set_attr "amdfam10_decode" "double,double")])
1722 (define_insn "sse2_cvtdq2pd"
1723 [(set (match_operand:V2DF 0 "register_operand" "=x")
1726 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1727 (parallel [(const_int 0) (const_int 1)]))))]
1729 "cvtdq2pd\t{%1, %0|%0, %1}"
1730 [(set_attr "type" "ssecvt")
1731 (set_attr "mode" "V2DF")])
1733 (define_expand "sse2_cvtpd2dq"
1734 [(set (match_operand:V4SI 0 "register_operand" "")
1736 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1740 "operands[2] = CONST0_RTX (V2SImode);")
1742 (define_insn "*sse2_cvtpd2dq"
1743 [(set (match_operand:V4SI 0 "register_operand" "=x")
1745 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1747 (match_operand:V2SI 2 "const0_operand" "")))]
1749 "cvtpd2dq\t{%1, %0|%0, %1}"
1750 [(set_attr "type" "ssecvt")
1751 (set_attr "prefix_rep" "1")
1752 (set_attr "mode" "TI")
1753 (set_attr "amdfam10_decode" "double")])
1755 (define_expand "sse2_cvttpd2dq"
1756 [(set (match_operand:V4SI 0 "register_operand" "")
1758 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1761 "operands[2] = CONST0_RTX (V2SImode);")
1763 (define_insn "*sse2_cvttpd2dq"
1764 [(set (match_operand:V4SI 0 "register_operand" "=x")
1766 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1767 (match_operand:V2SI 2 "const0_operand" "")))]
1769 "cvttpd2dq\t{%1, %0|%0, %1}"
1770 [(set_attr "type" "ssecvt")
1771 (set_attr "prefix_rep" "1")
1772 (set_attr "mode" "TI")
1773 (set_attr "amdfam10_decode" "double")])
1775 (define_insn "sse2_cvtsd2ss"
1776 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1779 (float_truncate:V2SF
1780 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1781 (match_operand:V4SF 1 "register_operand" "0,0")
1784 "cvtsd2ss\t{%2, %0|%0, %2}"
1785 [(set_attr "type" "ssecvt")
1786 (set_attr "athlon_decode" "vector,double")
1787 (set_attr "amdfam10_decode" "vector,double")
1788 (set_attr "mode" "SF")])
1790 (define_insn "sse2_cvtss2sd"
1791 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1795 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
1796 (parallel [(const_int 0) (const_int 1)])))
1797 (match_operand:V2DF 1 "register_operand" "0,0")
1800 "cvtss2sd\t{%2, %0|%0, %2}"
1801 [(set_attr "type" "ssecvt")
1802 (set_attr "amdfam10_decode" "vector,double")
1803 (set_attr "mode" "DF")])
1805 (define_expand "sse2_cvtpd2ps"
1806 [(set (match_operand:V4SF 0 "register_operand" "")
1808 (float_truncate:V2SF
1809 (match_operand:V2DF 1 "nonimmediate_operand" ""))
1812 "operands[2] = CONST0_RTX (V2SFmode);")
1814 (define_insn "*sse2_cvtpd2ps"
1815 [(set (match_operand:V4SF 0 "register_operand" "=x")
1817 (float_truncate:V2SF
1818 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1819 (match_operand:V2SF 2 "const0_operand" "")))]
1821 "cvtpd2ps\t{%1, %0|%0, %1}"
1822 [(set_attr "type" "ssecvt")
1823 (set_attr "prefix_data16" "1")
1824 (set_attr "mode" "V4SF")
1825 (set_attr "amdfam10_decode" "double")])
1827 (define_insn "sse2_cvtps2pd"
1828 [(set (match_operand:V2DF 0 "register_operand" "=x")
1831 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1832 (parallel [(const_int 0) (const_int 1)]))))]
1834 "cvtps2pd\t{%1, %0|%0, %1}"
1835 [(set_attr "type" "ssecvt")
1836 (set_attr "mode" "V2DF")
1837 (set_attr "amdfam10_decode" "direct")])
1839 (define_expand "vec_unpacks_hi_v4sf"
1844 (match_operand:V4SF 1 "nonimmediate_operand" ""))
1845 (parallel [(const_int 6)
1849 (set (match_operand:V2DF 0 "register_operand" "")
1853 (parallel [(const_int 0) (const_int 1)]))))]
1856 operands[2] = gen_reg_rtx (V4SFmode);
1859 (define_expand "vec_unpacks_lo_v4sf"
1860 [(set (match_operand:V2DF 0 "register_operand" "")
1863 (match_operand:V4SF 1 "nonimmediate_operand" "")
1864 (parallel [(const_int 0) (const_int 1)]))))]
1867 (define_expand "vec_unpacks_float_hi_v8hi"
1868 [(match_operand:V4SF 0 "register_operand" "")
1869 (match_operand:V8HI 1 "register_operand" "")]
1872 rtx tmp = gen_reg_rtx (V4SImode);
1874 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
1875 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1879 (define_expand "vec_unpacks_float_lo_v8hi"
1880 [(match_operand:V4SF 0 "register_operand" "")
1881 (match_operand:V8HI 1 "register_operand" "")]
1884 rtx tmp = gen_reg_rtx (V4SImode);
1886 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
1887 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1891 (define_expand "vec_unpacku_float_hi_v8hi"
1892 [(match_operand:V4SF 0 "register_operand" "")
1893 (match_operand:V8HI 1 "register_operand" "")]
1896 rtx tmp = gen_reg_rtx (V4SImode);
1898 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
1899 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1903 (define_expand "vec_unpacku_float_lo_v8hi"
1904 [(match_operand:V4SF 0 "register_operand" "")
1905 (match_operand:V8HI 1 "register_operand" "")]
1908 rtx tmp = gen_reg_rtx (V4SImode);
1910 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
1911 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
1915 (define_expand "vec_unpacks_float_hi_v4si"
1918 (match_operand:V4SI 1 "nonimmediate_operand" "")
1919 (parallel [(const_int 2)
1923 (set (match_operand:V2DF 0 "register_operand" "")
1927 (parallel [(const_int 0) (const_int 1)]))))]
1930 operands[2] = gen_reg_rtx (V4SImode);
1933 (define_expand "vec_unpacks_float_lo_v4si"
1934 [(set (match_operand:V2DF 0 "register_operand" "")
1937 (match_operand:V4SI 1 "nonimmediate_operand" "")
1938 (parallel [(const_int 0) (const_int 1)]))))]
1941 (define_expand "vec_pack_trunc_v2df"
1942 [(match_operand:V4SF 0 "register_operand" "")
1943 (match_operand:V2DF 1 "nonimmediate_operand" "")
1944 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1949 r1 = gen_reg_rtx (V4SFmode);
1950 r2 = gen_reg_rtx (V4SFmode);
1952 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
1953 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
1954 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
1958 (define_expand "vec_pack_sfix_trunc_v2df"
1959 [(match_operand:V4SI 0 "register_operand" "")
1960 (match_operand:V2DF 1 "nonimmediate_operand" "")
1961 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1966 r1 = gen_reg_rtx (V4SImode);
1967 r2 = gen_reg_rtx (V4SImode);
1969 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
1970 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
1971 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1972 gen_lowpart (V2DImode, r1),
1973 gen_lowpart (V2DImode, r2)));
1977 (define_expand "vec_pack_sfix_v2df"
1978 [(match_operand:V4SI 0 "register_operand" "")
1979 (match_operand:V2DF 1 "nonimmediate_operand" "")
1980 (match_operand:V2DF 2 "nonimmediate_operand" "")]
1985 r1 = gen_reg_rtx (V4SImode);
1986 r2 = gen_reg_rtx (V4SImode);
1988 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
1989 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
1990 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
1991 gen_lowpart (V2DImode, r1),
1992 gen_lowpart (V2DImode, r2)));
1996 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1998 ;; Parallel single-precision floating point element swizzling
2000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2002 (define_expand "sse_movhlps_exp"
2003 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2006 (match_operand:V4SF 1 "nonimmediate_operand" "")
2007 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2008 (parallel [(const_int 6)
2013 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2015 (define_insn "sse_movhlps"
2016 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2019 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2020 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
2021 (parallel [(const_int 6)
2025 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2027 movhlps\t{%2, %0|%0, %2}
2028 movlps\t{%H2, %0|%0, %H2}
2029 movhps\t{%2, %0|%0, %2}"
2030 [(set_attr "type" "ssemov")
2031 (set_attr "mode" "V4SF,V2SF,V2SF")])
2033 (define_expand "sse_movlhps_exp"
2034 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2037 (match_operand:V4SF 1 "nonimmediate_operand" "")
2038 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2039 (parallel [(const_int 0)
2044 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2046 (define_insn "sse_movlhps"
2047 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2050 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
2051 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
2052 (parallel [(const_int 0)
2056 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2058 movlhps\t{%2, %0|%0, %2}
2059 movhps\t{%2, %0|%0, %2}
2060 movlps\t{%2, %H0|%H0, %2}"
2061 [(set_attr "type" "ssemov")
2062 (set_attr "mode" "V4SF,V2SF,V2SF")])
2064 (define_insn "sse_unpckhps"
2065 [(set (match_operand:V4SF 0 "register_operand" "=x")
2068 (match_operand:V4SF 1 "register_operand" "0")
2069 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2070 (parallel [(const_int 2) (const_int 6)
2071 (const_int 3) (const_int 7)])))]
2073 "unpckhps\t{%2, %0|%0, %2}"
2074 [(set_attr "type" "sselog")
2075 (set_attr "mode" "V4SF")])
2077 (define_insn "sse_unpcklps"
2078 [(set (match_operand:V4SF 0 "register_operand" "=x")
2081 (match_operand:V4SF 1 "register_operand" "0")
2082 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2083 (parallel [(const_int 0) (const_int 4)
2084 (const_int 1) (const_int 5)])))]
2086 "unpcklps\t{%2, %0|%0, %2}"
2087 [(set_attr "type" "sselog")
2088 (set_attr "mode" "V4SF")])
2090 ;; These are modeled with the same vec_concat as the others so that we
2091 ;; capture users of shufps that can use the new instructions
2092 (define_insn "sse3_movshdup"
2093 [(set (match_operand:V4SF 0 "register_operand" "=x")
2096 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2098 (parallel [(const_int 1)
2103 "movshdup\t{%1, %0|%0, %1}"
2104 [(set_attr "type" "sse")
2105 (set_attr "prefix_rep" "1")
2106 (set_attr "mode" "V4SF")])
2108 (define_insn "sse3_movsldup"
2109 [(set (match_operand:V4SF 0 "register_operand" "=x")
2112 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2114 (parallel [(const_int 0)
2119 "movsldup\t{%1, %0|%0, %1}"
2120 [(set_attr "type" "sse")
2121 (set_attr "prefix_rep" "1")
2122 (set_attr "mode" "V4SF")])
2124 (define_expand "sse_shufps"
2125 [(match_operand:V4SF 0 "register_operand" "")
2126 (match_operand:V4SF 1 "register_operand" "")
2127 (match_operand:V4SF 2 "nonimmediate_operand" "")
2128 (match_operand:SI 3 "const_int_operand" "")]
2131 int mask = INTVAL (operands[3]);
2132 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
2133 GEN_INT ((mask >> 0) & 3),
2134 GEN_INT ((mask >> 2) & 3),
2135 GEN_INT (((mask >> 4) & 3) + 4),
2136 GEN_INT (((mask >> 6) & 3) + 4)));
2140 (define_insn "sse_shufps_1"
2141 [(set (match_operand:V4SF 0 "register_operand" "=x")
2144 (match_operand:V4SF 1 "register_operand" "0")
2145 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
2146 (parallel [(match_operand 3 "const_0_to_3_operand" "")
2147 (match_operand 4 "const_0_to_3_operand" "")
2148 (match_operand 5 "const_4_to_7_operand" "")
2149 (match_operand 6 "const_4_to_7_operand" "")])))]
2153 mask |= INTVAL (operands[3]) << 0;
2154 mask |= INTVAL (operands[4]) << 2;
2155 mask |= (INTVAL (operands[5]) - 4) << 4;
2156 mask |= (INTVAL (operands[6]) - 4) << 6;
2157 operands[3] = GEN_INT (mask);
2159 return "shufps\t{%3, %2, %0|%0, %2, %3}";
2161 [(set_attr "type" "sselog")
2162 (set_attr "mode" "V4SF")])
2164 (define_insn "sse_storehps"
2165 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2167 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
2168 (parallel [(const_int 2) (const_int 3)])))]
2171 movhps\t{%1, %0|%0, %1}
2172 movhlps\t{%1, %0|%0, %1}
2173 movlps\t{%H1, %0|%0, %H1}"
2174 [(set_attr "type" "ssemov")
2175 (set_attr "mode" "V2SF,V4SF,V2SF")])
2177 (define_expand "sse_loadhps_exp"
2178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2181 (match_operand:V4SF 1 "nonimmediate_operand" "")
2182 (parallel [(const_int 0) (const_int 1)]))
2183 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
2185 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2187 (define_insn "sse_loadhps"
2188 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
2191 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
2192 (parallel [(const_int 0) (const_int 1)]))
2193 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
2196 movhps\t{%2, %0|%0, %2}
2197 movlhps\t{%2, %0|%0, %2}
2198 movlps\t{%2, %H0|%H0, %2}"
2199 [(set_attr "type" "ssemov")
2200 (set_attr "mode" "V2SF,V4SF,V2SF")])
2202 (define_insn "sse_storelps"
2203 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2205 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
2206 (parallel [(const_int 0) (const_int 1)])))]
2209 movlps\t{%1, %0|%0, %1}
2210 movaps\t{%1, %0|%0, %1}
2211 movlps\t{%1, %0|%0, %1}"
2212 [(set_attr "type" "ssemov")
2213 (set_attr "mode" "V2SF,V4SF,V2SF")])
2215 (define_expand "sse_loadlps_exp"
2216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2218 (match_operand:V2SF 2 "nonimmediate_operand" "")
2220 (match_operand:V4SF 1 "nonimmediate_operand" "")
2221 (parallel [(const_int 2) (const_int 3)]))))]
2223 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
2225 (define_insn "sse_loadlps"
2226 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
2228 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
2230 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
2231 (parallel [(const_int 2) (const_int 3)]))))]
2234 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
2235 movlps\t{%2, %0|%0, %2}
2236 movlps\t{%2, %0|%0, %2}"
2237 [(set_attr "type" "sselog,ssemov,ssemov")
2238 (set_attr "mode" "V4SF,V2SF,V2SF")])
2240 (define_insn "sse_movss"
2241 [(set (match_operand:V4SF 0 "register_operand" "=x")
2243 (match_operand:V4SF 2 "register_operand" "x")
2244 (match_operand:V4SF 1 "register_operand" "0")
2247 "movss\t{%2, %0|%0, %2}"
2248 [(set_attr "type" "ssemov")
2249 (set_attr "mode" "SF")])
2251 (define_insn "*vec_dupv4sf"
2252 [(set (match_operand:V4SF 0 "register_operand" "=x")
2254 (match_operand:SF 1 "register_operand" "0")))]
2256 "shufps\t{$0, %0, %0|%0, %0, 0}"
2257 [(set_attr "type" "sselog1")
2258 (set_attr "mode" "V4SF")])
2260 ;; Although insertps takes register source, we prefer
2261 ;; unpcklps with register source since it is shorter.
2262 (define_insn "*vec_concatv2sf_sse4_1"
2263 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
2265 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
2266 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
2269 unpcklps\t{%2, %0|%0, %2}
2270 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
2271 movss\t{%1, %0|%0, %1}
2272 punpckldq\t{%2, %0|%0, %2}
2273 movd\t{%1, %0|%0, %1}"
2274 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
2275 (set_attr "prefix_extra" "*,1,*,*,*")
2276 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
2278 ;; ??? In theory we can match memory for the MMX alternative, but allowing
2279 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
2280 ;; alternatives pretty much forces the MMX alternative to be chosen.
2281 (define_insn "*vec_concatv2sf_sse"
2282 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
2284 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
2285 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
2288 unpcklps\t{%2, %0|%0, %2}
2289 movss\t{%1, %0|%0, %1}
2290 punpckldq\t{%2, %0|%0, %2}
2291 movd\t{%1, %0|%0, %1}"
2292 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
2293 (set_attr "mode" "V4SF,SF,DI,DI")])
2295 (define_insn "*vec_concatv4sf_sse"
2296 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2298 (match_operand:V2SF 1 "register_operand" " 0,0")
2299 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
2302 movlhps\t{%2, %0|%0, %2}
2303 movhps\t{%2, %0|%0, %2}"
2304 [(set_attr "type" "ssemov")
2305 (set_attr "mode" "V4SF,V2SF")])
2307 (define_expand "vec_init<mode>"
2308 [(match_operand:SSEMODE 0 "register_operand" "")
2309 (match_operand 1 "" "")]
2312 ix86_expand_vector_init (false, operands[0], operands[1]);
2316 (define_insn "vec_setv4sf_0"
2317 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
2320 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
2321 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
2325 movss\t{%2, %0|%0, %2}
2326 movss\t{%2, %0|%0, %2}
2327 movd\t{%2, %0|%0, %2}
2329 [(set_attr "type" "ssemov")
2330 (set_attr "mode" "SF")])
2332 ;; A subset is vec_setv4sf.
2333 (define_insn "*vec_setv4sf_sse4_1"
2334 [(set (match_operand:V4SF 0 "register_operand" "=x")
2337 (match_operand:SF 2 "nonimmediate_operand" "xm"))
2338 (match_operand:V4SF 1 "register_operand" "0")
2339 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
2342 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
2343 return "insertps\t{%3, %2, %0|%0, %2, %3}";
2345 [(set_attr "type" "sselog")
2346 (set_attr "prefix_extra" "1")
2347 (set_attr "mode" "V4SF")])
2349 (define_insn "sse4_1_insertps"
2350 [(set (match_operand:V4SF 0 "register_operand" "=x")
2351 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
2352 (match_operand:V4SF 1 "register_operand" "0")
2353 (match_operand:SI 3 "const_0_to_255_operand" "n")]
2356 "insertps\t{%3, %2, %0|%0, %2, %3}";
2357 [(set_attr "type" "sselog")
2358 (set_attr "prefix_extra" "1")
2359 (set_attr "mode" "V4SF")])
2362 [(set (match_operand:V4SF 0 "memory_operand" "")
2365 (match_operand:SF 1 "nonmemory_operand" ""))
2368 "TARGET_SSE && reload_completed"
2371 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
2375 (define_expand "vec_set<mode>"
2376 [(match_operand:SSEMODE 0 "register_operand" "")
2377 (match_operand:<ssescalarmode> 1 "register_operand" "")
2378 (match_operand 2 "const_int_operand" "")]
2381 ix86_expand_vector_set (false, operands[0], operands[1],
2382 INTVAL (operands[2]));
2386 (define_insn_and_split "*vec_extractv4sf_0"
2387 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
2389 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
2390 (parallel [(const_int 0)])))]
2391 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2393 "&& reload_completed"
2396 rtx op1 = operands[1];
2398 op1 = gen_rtx_REG (SFmode, REGNO (op1));
2400 op1 = gen_lowpart (SFmode, op1);
2401 emit_move_insn (operands[0], op1);
2405 (define_insn "*sse4_1_extractps"
2406 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
2408 (match_operand:V4SF 1 "register_operand" "x")
2409 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
2411 "extractps\t{%2, %1, %0|%0, %1, %2}"
2412 [(set_attr "type" "sselog")
2413 (set_attr "prefix_extra" "1")
2414 (set_attr "mode" "V4SF")])
2416 (define_insn_and_split "*vec_extract_v4sf_mem"
2417 [(set (match_operand:SF 0 "register_operand" "=x*rf")
2419 (match_operand:V4SF 1 "memory_operand" "o")
2420 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
2426 int i = INTVAL (operands[2]);
2428 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
2432 (define_expand "vec_extract<mode>"
2433 [(match_operand:<ssescalarmode> 0 "register_operand" "")
2434 (match_operand:SSEMODE 1 "register_operand" "")
2435 (match_operand 2 "const_int_operand" "")]
2438 ix86_expand_vector_extract (false, operands[0], operands[1],
2439 INTVAL (operands[2]));
2443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2445 ;; Parallel double-precision floating point element swizzling
2447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2449 (define_expand "sse2_unpckhpd_exp"
2450 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2453 (match_operand:V2DF 1 "nonimmediate_operand" "")
2454 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2455 (parallel [(const_int 1)
2458 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2460 (define_insn "sse2_unpckhpd"
2461 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2464 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2465 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2466 (parallel [(const_int 1)
2468 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2470 unpckhpd\t{%2, %0|%0, %2}
2471 movlpd\t{%H1, %0|%0, %H1}
2472 movhpd\t{%1, %0|%0, %1}"
2473 [(set_attr "type" "sselog,ssemov,ssemov")
2474 (set_attr "mode" "V2DF,V1DF,V1DF")])
2476 (define_insn "*sse3_movddup"
2477 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2480 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2482 (parallel [(const_int 0)
2484 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2486 movddup\t{%1, %0|%0, %1}
2488 [(set_attr "type" "sselog1,ssemov")
2489 (set_attr "mode" "V2DF")])
2492 [(set (match_operand:V2DF 0 "memory_operand" "")
2495 (match_operand:V2DF 1 "register_operand" "")
2497 (parallel [(const_int 0)
2499 "TARGET_SSE3 && reload_completed"
2502 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2503 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2504 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2508 (define_expand "sse2_unpcklpd_exp"
2509 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2512 (match_operand:V2DF 1 "nonimmediate_operand" "")
2513 (match_operand:V2DF 2 "nonimmediate_operand" ""))
2514 (parallel [(const_int 0)
2517 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2519 (define_insn "sse2_unpcklpd"
2520 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2523 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2524 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2525 (parallel [(const_int 0)
2527 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2529 unpcklpd\t{%2, %0|%0, %2}
2530 movhpd\t{%2, %0|%0, %2}
2531 movlpd\t{%2, %H0|%H0, %2}"
2532 [(set_attr "type" "sselog,ssemov,ssemov")
2533 (set_attr "mode" "V2DF,V1DF,V1DF")])
2535 (define_expand "sse2_shufpd"
2536 [(match_operand:V2DF 0 "register_operand" "")
2537 (match_operand:V2DF 1 "register_operand" "")
2538 (match_operand:V2DF 2 "nonimmediate_operand" "")
2539 (match_operand:SI 3 "const_int_operand" "")]
2542 int mask = INTVAL (operands[3]);
2543 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2545 GEN_INT (mask & 2 ? 3 : 2)));
2549 (define_insn "sse2_shufpd_1"
2550 [(set (match_operand:V2DF 0 "register_operand" "=x")
2553 (match_operand:V2DF 1 "register_operand" "0")
2554 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2555 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2556 (match_operand 4 "const_2_to_3_operand" "")])))]
2560 mask = INTVAL (operands[3]);
2561 mask |= (INTVAL (operands[4]) - 2) << 1;
2562 operands[3] = GEN_INT (mask);
2564 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2566 [(set_attr "type" "sselog")
2567 (set_attr "mode" "V2DF")])
2569 (define_insn "sse2_storehpd"
2570 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2572 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2573 (parallel [(const_int 1)])))]
2574 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2576 movhpd\t{%1, %0|%0, %1}
2579 [(set_attr "type" "ssemov,sselog1,ssemov")
2580 (set_attr "mode" "V1DF,V2DF,DF")])
2583 [(set (match_operand:DF 0 "register_operand" "")
2585 (match_operand:V2DF 1 "memory_operand" "")
2586 (parallel [(const_int 1)])))]
2587 "TARGET_SSE2 && reload_completed"
2588 [(set (match_dup 0) (match_dup 1))]
2590 operands[1] = adjust_address (operands[1], DFmode, 8);
2593 (define_insn "sse2_storelpd"
2594 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2596 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2597 (parallel [(const_int 0)])))]
2598 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2600 movlpd\t{%1, %0|%0, %1}
2603 [(set_attr "type" "ssemov")
2604 (set_attr "mode" "V1DF,DF,DF")])
2607 [(set (match_operand:DF 0 "register_operand" "")
2609 (match_operand:V2DF 1 "nonimmediate_operand" "")
2610 (parallel [(const_int 0)])))]
2611 "TARGET_SSE2 && reload_completed"
2614 rtx op1 = operands[1];
2616 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2618 op1 = gen_lowpart (DFmode, op1);
2619 emit_move_insn (operands[0], op1);
2623 (define_expand "sse2_loadhpd_exp"
2624 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2627 (match_operand:V2DF 1 "nonimmediate_operand" "")
2628 (parallel [(const_int 0)]))
2629 (match_operand:DF 2 "nonimmediate_operand" "")))]
2631 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2633 (define_insn "sse2_loadhpd"
2634 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2637 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2638 (parallel [(const_int 0)]))
2639 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2640 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2642 movhpd\t{%2, %0|%0, %2}
2643 unpcklpd\t{%2, %0|%0, %2}
2644 shufpd\t{$1, %1, %0|%0, %1, 1}
2646 [(set_attr "type" "ssemov,sselog,sselog,other")
2647 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2650 [(set (match_operand:V2DF 0 "memory_operand" "")
2652 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2653 (match_operand:DF 1 "register_operand" "")))]
2654 "TARGET_SSE2 && reload_completed"
2655 [(set (match_dup 0) (match_dup 1))]
2657 operands[0] = adjust_address (operands[0], DFmode, 8);
2660 (define_expand "sse2_loadlpd_exp"
2661 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
2663 (match_operand:DF 2 "nonimmediate_operand" "")
2665 (match_operand:V2DF 1 "nonimmediate_operand" "")
2666 (parallel [(const_int 1)]))))]
2668 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
2670 (define_insn "sse2_loadlpd"
2671 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2673 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2675 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2676 (parallel [(const_int 1)]))))]
2677 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2679 movsd\t{%2, %0|%0, %2}
2680 movlpd\t{%2, %0|%0, %2}
2681 movsd\t{%2, %0|%0, %2}
2682 shufpd\t{$2, %2, %0|%0, %2, 2}
2683 movhpd\t{%H1, %0|%0, %H1}
2685 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2686 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2689 [(set (match_operand:V2DF 0 "memory_operand" "")
2691 (match_operand:DF 1 "register_operand" "")
2692 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2693 "TARGET_SSE2 && reload_completed"
2694 [(set (match_dup 0) (match_dup 1))]
2696 operands[0] = adjust_address (operands[0], DFmode, 8);
2699 ;; Not sure these two are ever used, but it doesn't hurt to have
2701 (define_insn "*vec_extractv2df_1_sse"
2702 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2704 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2705 (parallel [(const_int 1)])))]
2706 "!TARGET_SSE2 && TARGET_SSE
2707 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2709 movhps\t{%1, %0|%0, %1}
2710 movhlps\t{%1, %0|%0, %1}
2711 movlps\t{%H1, %0|%0, %H1}"
2712 [(set_attr "type" "ssemov")
2713 (set_attr "mode" "V2SF,V4SF,V2SF")])
2715 (define_insn "*vec_extractv2df_0_sse"
2716 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2718 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2719 (parallel [(const_int 0)])))]
2720 "!TARGET_SSE2 && TARGET_SSE
2721 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2723 movlps\t{%1, %0|%0, %1}
2724 movaps\t{%1, %0|%0, %1}
2725 movlps\t{%1, %0|%0, %1}"
2726 [(set_attr "type" "ssemov")
2727 (set_attr "mode" "V2SF,V4SF,V2SF")])
2729 (define_insn "sse2_movsd"
2730 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2732 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2733 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2737 movsd\t{%2, %0|%0, %2}
2738 movlpd\t{%2, %0|%0, %2}
2739 movlpd\t{%2, %0|%0, %2}
2740 shufpd\t{$2, %2, %0|%0, %2, 2}
2741 movhps\t{%H1, %0|%0, %H1}
2742 movhps\t{%1, %H0|%H0, %1}"
2743 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2744 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2746 (define_insn "*vec_dupv2df_sse3"
2747 [(set (match_operand:V2DF 0 "register_operand" "=x")
2749 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2751 "movddup\t{%1, %0|%0, %1}"
2752 [(set_attr "type" "sselog1")
2753 (set_attr "mode" "DF")])
2755 (define_insn "vec_dupv2df"
2756 [(set (match_operand:V2DF 0 "register_operand" "=x")
2758 (match_operand:DF 1 "register_operand" "0")))]
2761 [(set_attr "type" "sselog1")
2762 (set_attr "mode" "V2DF")])
2764 (define_insn "*vec_concatv2df_sse3"
2765 [(set (match_operand:V2DF 0 "register_operand" "=x")
2767 (match_operand:DF 1 "nonimmediate_operand" "xm")
2770 "movddup\t{%1, %0|%0, %1}"
2771 [(set_attr "type" "sselog1")
2772 (set_attr "mode" "DF")])
2774 (define_insn "*vec_concatv2df"
2775 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
2777 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2778 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
2781 unpcklpd\t{%2, %0|%0, %2}
2782 movhpd\t{%2, %0|%0, %2}
2783 movsd\t{%1, %0|%0, %1}
2784 movlhps\t{%2, %0|%0, %2}
2785 movhps\t{%2, %0|%0, %2}"
2786 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2787 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2789 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2791 ;; Parallel integral arithmetic
2793 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2795 (define_expand "neg<mode>2"
2796 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2799 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2801 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2803 (define_expand "<plusminus_insn><mode>3"
2804 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2806 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2807 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2809 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2811 (define_insn "*<plusminus_insn><mode>3"
2812 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2814 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
2815 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2816 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2817 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2818 [(set_attr "type" "sseiadd")
2819 (set_attr "prefix_data16" "1")
2820 (set_attr "mode" "TI")])
2822 (define_expand "sse2_<plusminus_insn><mode>3"
2823 [(set (match_operand:SSEMODE12 0 "register_operand" "")
2824 (sat_plusminus:SSEMODE12
2825 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
2826 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
2828 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2830 (define_insn "*sse2_<plusminus_insn><mode>3"
2831 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2832 (sat_plusminus:SSEMODE12
2833 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
2834 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2835 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2836 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
2837 [(set_attr "type" "sseiadd")
2838 (set_attr "prefix_data16" "1")
2839 (set_attr "mode" "TI")])
2841 (define_insn_and_split "mulv16qi3"
2842 [(set (match_operand:V16QI 0 "register_operand" "")
2843 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2844 (match_operand:V16QI 2 "register_operand" "")))]
2846 && !(reload_completed || reload_in_progress)"
2851 rtx t[12], op0, op[3];
2856 /* On SSE5, we can take advantage of the pperm instruction to pack and
2857 unpack the bytes. Unpack data such that we've got a source byte in
2858 each low byte of each word. We don't care what goes into the high
2859 byte, so put 0 there. */
2860 for (i = 0; i < 6; ++i)
2861 t[i] = gen_reg_rtx (V8HImode);
2863 for (i = 0; i < 2; i++)
2866 op[1] = operands[i+1];
2867 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
2870 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
2873 /* Multiply words. */
2874 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
2875 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
2877 /* Pack the low byte of each word back into a single xmm */
2878 op[0] = operands[0];
2881 ix86_expand_sse5_pack (op);
2885 for (i = 0; i < 12; ++i)
2886 t[i] = gen_reg_rtx (V16QImode);
2888 /* Unpack data such that we've got a source byte in each low byte of
2889 each word. We don't care what goes into the high byte of each word.
2890 Rather than trying to get zero in there, most convenient is to let
2891 it be a copy of the low byte. */
2892 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2893 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2894 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2895 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2897 /* Multiply words. The end-of-line annotations here give a picture of what
2898 the output of that instruction looks like. Dot means don't care; the
2899 letters are the bytes of the result with A being the most significant. */
2900 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2901 gen_lowpart (V8HImode, t[0]),
2902 gen_lowpart (V8HImode, t[1])));
2903 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2904 gen_lowpart (V8HImode, t[2]),
2905 gen_lowpart (V8HImode, t[3])));
2907 /* Extract the relevant bytes and merge them back together. */
2908 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2909 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2910 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2911 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2912 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2913 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2916 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2920 (define_expand "mulv8hi3"
2921 [(set (match_operand:V8HI 0 "register_operand" "")
2922 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2923 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2925 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2927 (define_insn "*mulv8hi3"
2928 [(set (match_operand:V8HI 0 "register_operand" "=x")
2929 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2930 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2931 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2932 "pmullw\t{%2, %0|%0, %2}"
2933 [(set_attr "type" "sseimul")
2934 (set_attr "prefix_data16" "1")
2935 (set_attr "mode" "TI")])
2937 (define_expand "smulv8hi3_highpart"
2938 [(set (match_operand:V8HI 0 "register_operand" "")
2943 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2945 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2948 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2950 (define_insn "*smulv8hi3_highpart"
2951 [(set (match_operand:V8HI 0 "register_operand" "=x")
2956 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2958 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2960 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2961 "pmulhw\t{%2, %0|%0, %2}"
2962 [(set_attr "type" "sseimul")
2963 (set_attr "prefix_data16" "1")
2964 (set_attr "mode" "TI")])
2966 (define_expand "umulv8hi3_highpart"
2967 [(set (match_operand:V8HI 0 "register_operand" "")
2972 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2974 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2977 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2979 (define_insn "*umulv8hi3_highpart"
2980 [(set (match_operand:V8HI 0 "register_operand" "=x")
2985 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2987 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2989 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2990 "pmulhuw\t{%2, %0|%0, %2}"
2991 [(set_attr "type" "sseimul")
2992 (set_attr "prefix_data16" "1")
2993 (set_attr "mode" "TI")])
2995 (define_expand "sse2_umulv2siv2di3"
2996 [(set (match_operand:V2DI 0 "register_operand" "")
3000 (match_operand:V4SI 1 "nonimmediate_operand" "")
3001 (parallel [(const_int 0) (const_int 2)])))
3004 (match_operand:V4SI 2 "nonimmediate_operand" "")
3005 (parallel [(const_int 0) (const_int 2)])))))]
3007 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3009 (define_insn "*sse2_umulv2siv2di3"
3010 [(set (match_operand:V2DI 0 "register_operand" "=x")
3014 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3015 (parallel [(const_int 0) (const_int 2)])))
3018 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3019 (parallel [(const_int 0) (const_int 2)])))))]
3020 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3021 "pmuludq\t{%2, %0|%0, %2}"
3022 [(set_attr "type" "sseimul")
3023 (set_attr "prefix_data16" "1")
3024 (set_attr "mode" "TI")])
3026 (define_expand "sse4_1_mulv2siv2di3"
3027 [(set (match_operand:V2DI 0 "register_operand" "")
3031 (match_operand:V4SI 1 "nonimmediate_operand" "")
3032 (parallel [(const_int 0) (const_int 2)])))
3035 (match_operand:V4SI 2 "nonimmediate_operand" "")
3036 (parallel [(const_int 0) (const_int 2)])))))]
3038 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
3040 (define_insn "*sse4_1_mulv2siv2di3"
3041 [(set (match_operand:V2DI 0 "register_operand" "=x")
3045 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3046 (parallel [(const_int 0) (const_int 2)])))
3049 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3050 (parallel [(const_int 0) (const_int 2)])))))]
3051 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3052 "pmuldq\t{%2, %0|%0, %2}"
3053 [(set_attr "type" "sseimul")
3054 (set_attr "prefix_extra" "1")
3055 (set_attr "mode" "TI")])
3057 (define_expand "sse2_pmaddwd"
3058 [(set (match_operand:V4SI 0 "register_operand" "")
3063 (match_operand:V8HI 1 "nonimmediate_operand" "")
3064 (parallel [(const_int 0)
3070 (match_operand:V8HI 2 "nonimmediate_operand" "")
3071 (parallel [(const_int 0)
3077 (vec_select:V4HI (match_dup 1)
3078 (parallel [(const_int 1)
3083 (vec_select:V4HI (match_dup 2)
3084 (parallel [(const_int 1)
3087 (const_int 7)]))))))]
3089 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
3091 (define_insn "*sse2_pmaddwd"
3092 [(set (match_operand:V4SI 0 "register_operand" "=x")
3097 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3098 (parallel [(const_int 0)
3104 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3105 (parallel [(const_int 0)
3111 (vec_select:V4HI (match_dup 1)
3112 (parallel [(const_int 1)
3117 (vec_select:V4HI (match_dup 2)
3118 (parallel [(const_int 1)
3121 (const_int 7)]))))))]
3122 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3123 "pmaddwd\t{%2, %0|%0, %2}"
3124 [(set_attr "type" "sseiadd")
3125 (set_attr "prefix_data16" "1")
3126 (set_attr "mode" "TI")])
3128 (define_expand "mulv4si3"
3129 [(set (match_operand:V4SI 0 "register_operand" "")
3130 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3131 (match_operand:V4SI 2 "register_operand" "")))]
3134 if (TARGET_SSE4_1 || TARGET_SSE5)
3135 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3138 (define_insn "*sse4_1_mulv4si3"
3139 [(set (match_operand:V4SI 0 "register_operand" "=x")
3140 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3141 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3142 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3143 "pmulld\t{%2, %0|%0, %2}"
3144 [(set_attr "type" "sseimul")
3145 (set_attr "prefix_extra" "1")
3146 (set_attr "mode" "TI")])
3148 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
3149 ;; multiply/add. In general, we expect the define_split to occur before
3150 ;; register allocation, so we have to handle the corner case where the target
3151 ;; is the same as one of the inputs.
3152 (define_insn_and_split "*sse5_mulv4si3"
3153 [(set (match_operand:V4SI 0 "register_operand" "=&x")
3154 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
3155 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3158 "&& (reload_completed
3159 || (!reg_mentioned_p (operands[0], operands[1])
3160 && !reg_mentioned_p (operands[0], operands[2])))"
3164 (plus:V4SI (mult:V4SI (match_dup 1)
3168 operands[3] = CONST0_RTX (V4SImode);
3170 [(set_attr "type" "ssemuladd")
3171 (set_attr "mode" "TI")])
3173 (define_insn_and_split "*sse2_mulv4si3"
3174 [(set (match_operand:V4SI 0 "register_operand" "")
3175 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3176 (match_operand:V4SI 2 "register_operand" "")))]
3177 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
3178 && !(reload_completed || reload_in_progress)"
3183 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3189 t1 = gen_reg_rtx (V4SImode);
3190 t2 = gen_reg_rtx (V4SImode);
3191 t3 = gen_reg_rtx (V4SImode);
3192 t4 = gen_reg_rtx (V4SImode);
3193 t5 = gen_reg_rtx (V4SImode);
3194 t6 = gen_reg_rtx (V4SImode);
3195 thirtytwo = GEN_INT (32);
3197 /* Multiply elements 2 and 0. */
3198 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3201 /* Shift both input vectors down one element, so that elements 3
3202 and 1 are now in the slots for elements 2 and 0. For K8, at
3203 least, this is faster than using a shuffle. */
3204 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3205 gen_lowpart (TImode, op1),
3207 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3208 gen_lowpart (TImode, op2),
3210 /* Multiply elements 3 and 1. */
3211 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3214 /* Move the results in element 2 down to element 1; we don't care
3215 what goes in elements 2 and 3. */
3216 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3217 const0_rtx, const0_rtx));
3218 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3219 const0_rtx, const0_rtx));
3221 /* Merge the parts back together. */
3222 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3226 (define_insn_and_split "mulv2di3"
3227 [(set (match_operand:V2DI 0 "register_operand" "")
3228 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3229 (match_operand:V2DI 2 "register_operand" "")))]
3231 && !(reload_completed || reload_in_progress)"
3236 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3241 /* op1: A,B,C,D, op2: E,F,G,H */
3243 op1 = gen_lowpart (V4SImode, operands[1]);
3244 op2 = gen_lowpart (V4SImode, operands[2]);
3245 t1 = gen_reg_rtx (V4SImode);
3246 t2 = gen_reg_rtx (V4SImode);
3247 t3 = gen_reg_rtx (V4SImode);
3248 t4 = gen_reg_rtx (V2DImode);
3249 t5 = gen_reg_rtx (V2DImode);
3252 emit_insn (gen_sse2_pshufd_1 (t1, op1,
3259 emit_move_insn (t2, CONST0_RTX (V4SImode));
3261 /* t3: (B*E),(A*F),(D*G),(C*H) */
3262 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
3264 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
3265 emit_insn (gen_sse5_phadddq (t4, t3));
3267 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
3268 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
3270 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
3271 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
3278 t1 = gen_reg_rtx (V2DImode);
3279 t2 = gen_reg_rtx (V2DImode);
3280 t3 = gen_reg_rtx (V2DImode);
3281 t4 = gen_reg_rtx (V2DImode);
3282 t5 = gen_reg_rtx (V2DImode);
3283 t6 = gen_reg_rtx (V2DImode);
3284 thirtytwo = GEN_INT (32);
3286 /* Multiply low parts. */
3287 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3288 gen_lowpart (V4SImode, op2)));
3290 /* Shift input vectors left 32 bits so we can multiply high parts. */
3291 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3292 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3294 /* Multiply high parts by low parts. */
3295 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3296 gen_lowpart (V4SImode, t3)));
3297 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3298 gen_lowpart (V4SImode, t2)));
3300 /* Shift them back. */
3301 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3302 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3304 /* Add the three parts together. */
3305 emit_insn (gen_addv2di3 (t6, t1, t4));
3306 emit_insn (gen_addv2di3 (op0, t6, t5));
3310 (define_expand "vec_widen_smult_hi_v8hi"
3311 [(match_operand:V4SI 0 "register_operand" "")
3312 (match_operand:V8HI 1 "register_operand" "")
3313 (match_operand:V8HI 2 "register_operand" "")]
3316 rtx op1, op2, t1, t2, dest;
3320 t1 = gen_reg_rtx (V8HImode);
3321 t2 = gen_reg_rtx (V8HImode);
3322 dest = gen_lowpart (V8HImode, operands[0]);
3324 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3325 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3326 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3330 (define_expand "vec_widen_smult_lo_v8hi"
3331 [(match_operand:V4SI 0 "register_operand" "")
3332 (match_operand:V8HI 1 "register_operand" "")
3333 (match_operand:V8HI 2 "register_operand" "")]
3336 rtx op1, op2, t1, t2, dest;
3340 t1 = gen_reg_rtx (V8HImode);
3341 t2 = gen_reg_rtx (V8HImode);
3342 dest = gen_lowpart (V8HImode, operands[0]);
3344 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3345 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3346 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3350 (define_expand "vec_widen_umult_hi_v8hi"
3351 [(match_operand:V4SI 0 "register_operand" "")
3352 (match_operand:V8HI 1 "register_operand" "")
3353 (match_operand:V8HI 2 "register_operand" "")]
3356 rtx op1, op2, t1, t2, dest;
3360 t1 = gen_reg_rtx (V8HImode);
3361 t2 = gen_reg_rtx (V8HImode);
3362 dest = gen_lowpart (V8HImode, operands[0]);
3364 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3365 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3366 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3370 (define_expand "vec_widen_umult_lo_v8hi"
3371 [(match_operand:V4SI 0 "register_operand" "")
3372 (match_operand:V8HI 1 "register_operand" "")
3373 (match_operand:V8HI 2 "register_operand" "")]
3376 rtx op1, op2, t1, t2, dest;
3380 t1 = gen_reg_rtx (V8HImode);
3381 t2 = gen_reg_rtx (V8HImode);
3382 dest = gen_lowpart (V8HImode, operands[0]);
3384 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3385 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3386 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3390 (define_expand "vec_widen_smult_hi_v4si"
3391 [(match_operand:V2DI 0 "register_operand" "")
3392 (match_operand:V4SI 1 "register_operand" "")
3393 (match_operand:V4SI 2 "register_operand" "")]
3398 t1 = gen_reg_rtx (V4SImode);
3399 t2 = gen_reg_rtx (V4SImode);
3401 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3406 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3411 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
3415 (define_expand "vec_widen_smult_lo_v4si"
3416 [(match_operand:V2DI 0 "register_operand" "")
3417 (match_operand:V4SI 1 "register_operand" "")
3418 (match_operand:V4SI 2 "register_operand" "")]
3423 t1 = gen_reg_rtx (V4SImode);
3424 t2 = gen_reg_rtx (V4SImode);
3426 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
3431 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
3436 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
3441 (define_expand "vec_widen_umult_hi_v4si"
3442 [(match_operand:V2DI 0 "register_operand" "")
3443 (match_operand:V4SI 1 "register_operand" "")
3444 (match_operand:V4SI 2 "register_operand" "")]
3447 rtx op1, op2, t1, t2;
3451 t1 = gen_reg_rtx (V4SImode);
3452 t2 = gen_reg_rtx (V4SImode);
3454 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3455 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3456 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3460 (define_expand "vec_widen_umult_lo_v4si"
3461 [(match_operand:V2DI 0 "register_operand" "")
3462 (match_operand:V4SI 1 "register_operand" "")
3463 (match_operand:V4SI 2 "register_operand" "")]
3466 rtx op1, op2, t1, t2;
3470 t1 = gen_reg_rtx (V4SImode);
3471 t2 = gen_reg_rtx (V4SImode);
3473 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3474 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3475 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3479 (define_expand "sdot_prodv8hi"
3480 [(match_operand:V4SI 0 "register_operand" "")
3481 (match_operand:V8HI 1 "register_operand" "")
3482 (match_operand:V8HI 2 "register_operand" "")
3483 (match_operand:V4SI 3 "register_operand" "")]
3486 rtx t = gen_reg_rtx (V4SImode);
3487 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3488 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3492 (define_expand "udot_prodv4si"
3493 [(match_operand:V2DI 0 "register_operand" "")
3494 (match_operand:V4SI 1 "register_operand" "")
3495 (match_operand:V4SI 2 "register_operand" "")
3496 (match_operand:V2DI 3 "register_operand" "")]
3501 t1 = gen_reg_rtx (V2DImode);
3502 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3503 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3505 t2 = gen_reg_rtx (V4SImode);
3506 t3 = gen_reg_rtx (V4SImode);
3507 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3508 gen_lowpart (TImode, operands[1]),
3510 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3511 gen_lowpart (TImode, operands[2]),
3514 t4 = gen_reg_rtx (V2DImode);
3515 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3517 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3521 (define_insn "ashr<mode>3"
3522 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3524 (match_operand:SSEMODE24 1 "register_operand" "0")
3525 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3527 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3528 [(set_attr "type" "sseishft")
3529 (set_attr "prefix_data16" "1")
3530 (set_attr "mode" "TI")])
3532 (define_insn "lshr<mode>3"
3533 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3534 (lshiftrt:SSEMODE248
3535 (match_operand:SSEMODE248 1 "register_operand" "0")
3536 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3538 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3539 [(set_attr "type" "sseishft")
3540 (set_attr "prefix_data16" "1")
3541 (set_attr "mode" "TI")])
3543 (define_insn "ashl<mode>3"
3544 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3546 (match_operand:SSEMODE248 1 "register_operand" "0")
3547 (match_operand:SI 2 "nonmemory_operand" "xN")))]
3549 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3550 [(set_attr "type" "sseishft")
3551 (set_attr "prefix_data16" "1")
3552 (set_attr "mode" "TI")])
3554 (define_expand "vec_shl_<mode>"
3555 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3556 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3557 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3560 operands[0] = gen_lowpart (TImode, operands[0]);
3561 operands[1] = gen_lowpart (TImode, operands[1]);
3564 (define_expand "vec_shr_<mode>"
3565 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3566 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3567 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
3570 operands[0] = gen_lowpart (TImode, operands[0]);
3571 operands[1] = gen_lowpart (TImode, operands[1]);
3574 (define_expand "<code>v16qi3"
3575 [(set (match_operand:V16QI 0 "register_operand" "")
3577 (match_operand:V16QI 1 "nonimmediate_operand" "")
3578 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3580 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
3582 (define_insn "*<code>v16qi3"
3583 [(set (match_operand:V16QI 0 "register_operand" "=x")
3585 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3586 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3587 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
3588 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
3589 [(set_attr "type" "sseiadd")
3590 (set_attr "prefix_data16" "1")
3591 (set_attr "mode" "TI")])
3593 (define_expand "<code>v8hi3"
3594 [(set (match_operand:V8HI 0 "register_operand" "")
3596 (match_operand:V8HI 1 "nonimmediate_operand" "")
3597 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3599 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
3601 (define_insn "*<code>v8hi3"
3602 [(set (match_operand:V8HI 0 "register_operand" "=x")
3604 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3605 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3606 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
3607 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
3608 [(set_attr "type" "sseiadd")
3609 (set_attr "prefix_data16" "1")
3610 (set_attr "mode" "TI")])
3612 (define_expand "umaxv8hi3"
3613 [(set (match_operand:V8HI 0 "register_operand" "")
3614 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3615 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3619 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3622 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3623 if (rtx_equal_p (op3, op2))
3624 op3 = gen_reg_rtx (V8HImode);
3625 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3626 emit_insn (gen_addv8hi3 (op0, op3, op2));
3631 (define_expand "smax<mode>3"
3632 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3633 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3634 (match_operand:SSEMODE14 2 "register_operand" "")))]
3638 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3644 xops[0] = operands[0];
3645 xops[1] = operands[1];
3646 xops[2] = operands[2];
3647 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3648 xops[4] = operands[1];
3649 xops[5] = operands[2];
3650 ok = ix86_expand_int_vcond (xops);
3656 (define_insn "*sse4_1_<code><mode>3"
3657 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3659 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3660 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3661 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3662 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3663 [(set_attr "type" "sseiadd")
3664 (set_attr "prefix_extra" "1")
3665 (set_attr "mode" "TI")])
3667 (define_expand "umaxv4si3"
3668 [(set (match_operand:V4SI 0 "register_operand" "")
3669 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3670 (match_operand:V4SI 2 "register_operand" "")))]
3674 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3680 xops[0] = operands[0];
3681 xops[1] = operands[1];
3682 xops[2] = operands[2];
3683 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3684 xops[4] = operands[1];
3685 xops[5] = operands[2];
3686 ok = ix86_expand_int_vcond (xops);
3692 (define_insn "*sse4_1_<code><mode>3"
3693 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3695 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3696 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3697 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3698 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
3699 [(set_attr "type" "sseiadd")
3700 (set_attr "prefix_extra" "1")
3701 (set_attr "mode" "TI")])
3703 (define_expand "smin<mode>3"
3704 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3705 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3706 (match_operand:SSEMODE14 2 "register_operand" "")))]
3710 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3716 xops[0] = operands[0];
3717 xops[1] = operands[2];
3718 xops[2] = operands[1];
3719 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3720 xops[4] = operands[1];
3721 xops[5] = operands[2];
3722 ok = ix86_expand_int_vcond (xops);
3728 (define_expand "umin<mode>3"
3729 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3730 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3731 (match_operand:SSEMODE24 2 "register_operand" "")))]
3735 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3741 xops[0] = operands[0];
3742 xops[1] = operands[2];
3743 xops[2] = operands[1];
3744 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3745 xops[4] = operands[1];
3746 xops[5] = operands[2];
3747 ok = ix86_expand_int_vcond (xops);
3753 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3755 ;; Parallel integral comparisons
3757 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3759 (define_expand "sse2_eq<mode>3"
3760 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3762 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
3763 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
3764 "TARGET_SSE2 && !TARGET_SSE5"
3765 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
3767 (define_insn "*sse2_eq<mode>3"
3768 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3770 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3771 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3772 "TARGET_SSE2 && !TARGET_SSE5
3773 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3774 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3775 [(set_attr "type" "ssecmp")
3776 (set_attr "prefix_data16" "1")
3777 (set_attr "mode" "TI")])
3779 (define_expand "sse4_1_eqv2di3"
3780 [(set (match_operand:V2DI 0 "register_operand" "")
3782 (match_operand:V2DI 1 "nonimmediate_operand" "")
3783 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
3785 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
3787 (define_insn "*sse4_1_eqv2di3"
3788 [(set (match_operand:V2DI 0 "register_operand" "=x")
3790 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3791 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3792 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3793 "pcmpeqq\t{%2, %0|%0, %2}"
3794 [(set_attr "type" "ssecmp")
3795 (set_attr "prefix_extra" "1")
3796 (set_attr "mode" "TI")])
3798 (define_insn "sse2_gt<mode>3"
3799 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3801 (match_operand:SSEMODE124 1 "register_operand" "0")
3802 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3803 "TARGET_SSE2 && !TARGET_SSE5"
3804 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3805 [(set_attr "type" "ssecmp")
3806 (set_attr "prefix_data16" "1")
3807 (set_attr "mode" "TI")])
3809 (define_insn "sse4_2_gtv2di3"
3810 [(set (match_operand:V2DI 0 "register_operand" "=x")
3812 (match_operand:V2DI 1 "register_operand" "0")
3813 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3815 "pcmpgtq\t{%2, %0|%0, %2}"
3816 [(set_attr "type" "ssecmp")
3817 (set_attr "mode" "TI")])
3819 (define_expand "vcond<mode>"
3820 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3821 (if_then_else:SSEMODEI
3822 (match_operator 3 ""
3823 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3824 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3825 (match_operand:SSEMODEI 1 "general_operand" "")
3826 (match_operand:SSEMODEI 2 "general_operand" "")))]
3829 if (ix86_expand_int_vcond (operands))
3835 (define_expand "vcondu<mode>"
3836 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3837 (if_then_else:SSEMODEI
3838 (match_operator 3 ""
3839 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3840 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3841 (match_operand:SSEMODEI 1 "general_operand" "")
3842 (match_operand:SSEMODEI 2 "general_operand" "")))]
3845 if (ix86_expand_int_vcond (operands))
3851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3853 ;; Parallel bitwise logical operations
3855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3857 (define_expand "one_cmpl<mode>2"
3858 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3859 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3863 int i, n = GET_MODE_NUNITS (<MODE>mode);
3864 rtvec v = rtvec_alloc (n);
3866 for (i = 0; i < n; ++i)
3867 RTVEC_ELT (v, i) = constm1_rtx;
3869 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3872 (define_insn "*sse_nand<mode>3"
3873 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3875 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3876 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3877 "(TARGET_SSE && !TARGET_SSE2)"
3878 "andnps\t{%2, %0|%0, %2}"
3879 [(set_attr "type" "sselog")
3880 (set_attr "mode" "V4SF")])
3882 (define_insn "sse2_nand<mode>3"
3883 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3885 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3886 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3888 "pandn\t{%2, %0|%0, %2}"
3889 [(set_attr "type" "sselog")
3890 (set_attr "prefix_data16" "1")
3891 (set_attr "mode" "TI")])
3893 (define_insn "*nandtf3"
3894 [(set (match_operand:TF 0 "register_operand" "=x")
3896 (not:TF (match_operand:TF 1 "register_operand" "0"))
3897 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3899 "pandn\t{%2, %0|%0, %2}"
3900 [(set_attr "type" "sselog")
3901 (set_attr "prefix_data16" "1")
3902 (set_attr "mode" "TI")])
3904 (define_expand "<code><mode>3"
3905 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3907 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3908 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3910 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3912 (define_insn "*sse_<code><mode>3"
3913 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3915 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3916 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3917 "(TARGET_SSE && !TARGET_SSE2)
3918 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3919 "<plogicprefix>ps\t{%2, %0|%0, %2}"
3920 [(set_attr "type" "sselog")
3921 (set_attr "mode" "V4SF")])
3923 (define_insn "*sse2_<code><mode>3"
3924 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3926 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3927 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3928 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3929 "p<plogicprefix>\t{%2, %0|%0, %2}"
3930 [(set_attr "type" "sselog")
3931 (set_attr "prefix_data16" "1")
3932 (set_attr "mode" "TI")])
3934 (define_expand "<code>tf3"
3935 [(set (match_operand:TF 0 "register_operand" "")
3937 (match_operand:TF 1 "nonimmediate_operand" "")
3938 (match_operand:TF 2 "nonimmediate_operand" "")))]
3940 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3942 (define_insn "*<code>tf3"
3943 [(set (match_operand:TF 0 "register_operand" "=x")
3945 (match_operand:TF 1 "nonimmediate_operand" "%0")
3946 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3947 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3948 "p<plogicprefix>\t{%2, %0|%0, %2}"
3949 [(set_attr "type" "sselog")
3950 (set_attr "prefix_data16" "1")
3951 (set_attr "mode" "TI")])
3953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3955 ;; Parallel integral element swizzling
3957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3960 ;; op1 = abcdefghijklmnop
3961 ;; op2 = qrstuvwxyz012345
3962 ;; h1 = aqbrcsdteufvgwhx
3963 ;; l1 = iyjzk0l1m2n3o4p5
3964 ;; h2 = aiqybjrzcks0dlt1
3965 ;; l2 = emu2fnv3gow4hpx5
3966 ;; h3 = aeimquy2bfjnrvz3
3967 ;; l3 = cgkosw04dhlptx15
3968 ;; result = bdfhjlnprtvxz135
3969 (define_expand "vec_pack_trunc_v8hi"
3970 [(match_operand:V16QI 0 "register_operand" "")
3971 (match_operand:V8HI 1 "register_operand" "")
3972 (match_operand:V8HI 2 "register_operand" "")]
3975 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3979 ix86_expand_sse5_pack (operands);
3983 op1 = gen_lowpart (V16QImode, operands[1]);
3984 op2 = gen_lowpart (V16QImode, operands[2]);
3985 h1 = gen_reg_rtx (V16QImode);
3986 l1 = gen_reg_rtx (V16QImode);
3987 h2 = gen_reg_rtx (V16QImode);
3988 l2 = gen_reg_rtx (V16QImode);
3989 h3 = gen_reg_rtx (V16QImode);
3990 l3 = gen_reg_rtx (V16QImode);
3992 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3993 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3994 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3995 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3996 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3997 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3998 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
4009 ;; result = bdfhjlnp
4010 (define_expand "vec_pack_trunc_v4si"
4011 [(match_operand:V8HI 0 "register_operand" "")
4012 (match_operand:V4SI 1 "register_operand" "")
4013 (match_operand:V4SI 2 "register_operand" "")]
4016 rtx op1, op2, h1, l1, h2, l2;
4020 ix86_expand_sse5_pack (operands);
4024 op1 = gen_lowpart (V8HImode, operands[1]);
4025 op2 = gen_lowpart (V8HImode, operands[2]);
4026 h1 = gen_reg_rtx (V8HImode);
4027 l1 = gen_reg_rtx (V8HImode);
4028 h2 = gen_reg_rtx (V8HImode);
4029 l2 = gen_reg_rtx (V8HImode);
4031 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
4032 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
4033 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
4034 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
4035 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
4045 (define_expand "vec_pack_trunc_v2di"
4046 [(match_operand:V4SI 0 "register_operand" "")
4047 (match_operand:V2DI 1 "register_operand" "")
4048 (match_operand:V2DI 2 "register_operand" "")]
4051 rtx op1, op2, h1, l1;
4055 ix86_expand_sse5_pack (operands);
4059 op1 = gen_lowpart (V4SImode, operands[1]);
4060 op2 = gen_lowpart (V4SImode, operands[2]);
4061 h1 = gen_reg_rtx (V4SImode);
4062 l1 = gen_reg_rtx (V4SImode);
4064 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
4065 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
4066 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
4070 (define_expand "vec_interleave_highv16qi"
4071 [(set (match_operand:V16QI 0 "register_operand" "")
4074 (match_operand:V16QI 1 "register_operand" "")
4075 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4076 (parallel [(const_int 8) (const_int 24)
4077 (const_int 9) (const_int 25)
4078 (const_int 10) (const_int 26)
4079 (const_int 11) (const_int 27)
4080 (const_int 12) (const_int 28)
4081 (const_int 13) (const_int 29)
4082 (const_int 14) (const_int 30)
4083 (const_int 15) (const_int 31)])))]
4086 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4090 (define_expand "vec_interleave_lowv16qi"
4091 [(set (match_operand:V16QI 0 "register_operand" "")
4094 (match_operand:V16QI 1 "register_operand" "")
4095 (match_operand:V16QI 2 "nonimmediate_operand" ""))
4096 (parallel [(const_int 0) (const_int 16)
4097 (const_int 1) (const_int 17)
4098 (const_int 2) (const_int 18)
4099 (const_int 3) (const_int 19)
4100 (const_int 4) (const_int 20)
4101 (const_int 5) (const_int 21)
4102 (const_int 6) (const_int 22)
4103 (const_int 7) (const_int 23)])))]
4106 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4110 (define_expand "vec_interleave_highv8hi"
4111 [(set (match_operand:V8HI 0 "register_operand" "=")
4114 (match_operand:V8HI 1 "register_operand" "")
4115 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4116 (parallel [(const_int 4) (const_int 12)
4117 (const_int 5) (const_int 13)
4118 (const_int 6) (const_int 14)
4119 (const_int 7) (const_int 15)])))]
4122 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4126 (define_expand "vec_interleave_lowv8hi"
4127 [(set (match_operand:V8HI 0 "register_operand" "")
4130 (match_operand:V8HI 1 "register_operand" "")
4131 (match_operand:V8HI 2 "nonimmediate_operand" ""))
4132 (parallel [(const_int 0) (const_int 8)
4133 (const_int 1) (const_int 9)
4134 (const_int 2) (const_int 10)
4135 (const_int 3) (const_int 11)])))]
4138 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4142 (define_expand "vec_interleave_highv4si"
4143 [(set (match_operand:V4SI 0 "register_operand" "")
4146 (match_operand:V4SI 1 "register_operand" "")
4147 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4148 (parallel [(const_int 2) (const_int 6)
4149 (const_int 3) (const_int 7)])))]
4152 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4156 (define_expand "vec_interleave_lowv4si"
4157 [(set (match_operand:V4SI 0 "register_operand" "")
4160 (match_operand:V4SI 1 "register_operand" "")
4161 (match_operand:V4SI 2 "nonimmediate_operand" ""))
4162 (parallel [(const_int 0) (const_int 4)
4163 (const_int 1) (const_int 5)])))]
4166 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4170 (define_expand "vec_interleave_highv2di"
4171 [(set (match_operand:V2DI 0 "register_operand" "")
4174 (match_operand:V2DI 1 "register_operand" "")
4175 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4176 (parallel [(const_int 1)
4180 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4184 (define_expand "vec_interleave_lowv2di"
4185 [(set (match_operand:V2DI 0 "register_operand" "")
4188 (match_operand:V2DI 1 "register_operand" "")
4189 (match_operand:V2DI 2 "nonimmediate_operand" ""))
4190 (parallel [(const_int 0)
4194 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4198 (define_insn "sse2_packsswb"
4199 [(set (match_operand:V16QI 0 "register_operand" "=x")
4202 (match_operand:V8HI 1 "register_operand" "0"))
4204 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4206 "packsswb\t{%2, %0|%0, %2}"
4207 [(set_attr "type" "sselog")
4208 (set_attr "prefix_data16" "1")
4209 (set_attr "mode" "TI")])
4211 (define_insn "sse2_packssdw"
4212 [(set (match_operand:V8HI 0 "register_operand" "=x")
4215 (match_operand:V4SI 1 "register_operand" "0"))
4217 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4219 "packssdw\t{%2, %0|%0, %2}"
4220 [(set_attr "type" "sselog")
4221 (set_attr "prefix_data16" "1")
4222 (set_attr "mode" "TI")])
4224 (define_insn "sse2_packuswb"
4225 [(set (match_operand:V16QI 0 "register_operand" "=x")
4228 (match_operand:V8HI 1 "register_operand" "0"))
4230 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4232 "packuswb\t{%2, %0|%0, %2}"
4233 [(set_attr "type" "sselog")
4234 (set_attr "prefix_data16" "1")
4235 (set_attr "mode" "TI")])
4237 (define_insn "sse2_punpckhbw"
4238 [(set (match_operand:V16QI 0 "register_operand" "=x")
4241 (match_operand:V16QI 1 "register_operand" "0")
4242 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4243 (parallel [(const_int 8) (const_int 24)
4244 (const_int 9) (const_int 25)
4245 (const_int 10) (const_int 26)
4246 (const_int 11) (const_int 27)
4247 (const_int 12) (const_int 28)
4248 (const_int 13) (const_int 29)
4249 (const_int 14) (const_int 30)
4250 (const_int 15) (const_int 31)])))]
4252 "punpckhbw\t{%2, %0|%0, %2}"
4253 [(set_attr "type" "sselog")
4254 (set_attr "prefix_data16" "1")
4255 (set_attr "mode" "TI")])
4257 (define_insn "sse2_punpcklbw"
4258 [(set (match_operand:V16QI 0 "register_operand" "=x")
4261 (match_operand:V16QI 1 "register_operand" "0")
4262 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4263 (parallel [(const_int 0) (const_int 16)
4264 (const_int 1) (const_int 17)
4265 (const_int 2) (const_int 18)
4266 (const_int 3) (const_int 19)
4267 (const_int 4) (const_int 20)
4268 (const_int 5) (const_int 21)
4269 (const_int 6) (const_int 22)
4270 (const_int 7) (const_int 23)])))]
4272 "punpcklbw\t{%2, %0|%0, %2}"
4273 [(set_attr "type" "sselog")
4274 (set_attr "prefix_data16" "1")
4275 (set_attr "mode" "TI")])
4277 (define_insn "sse2_punpckhwd"
4278 [(set (match_operand:V8HI 0 "register_operand" "=x")
4281 (match_operand:V8HI 1 "register_operand" "0")
4282 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4283 (parallel [(const_int 4) (const_int 12)
4284 (const_int 5) (const_int 13)
4285 (const_int 6) (const_int 14)
4286 (const_int 7) (const_int 15)])))]
4288 "punpckhwd\t{%2, %0|%0, %2}"
4289 [(set_attr "type" "sselog")
4290 (set_attr "prefix_data16" "1")
4291 (set_attr "mode" "TI")])
4293 (define_insn "sse2_punpcklwd"
4294 [(set (match_operand:V8HI 0 "register_operand" "=x")
4297 (match_operand:V8HI 1 "register_operand" "0")
4298 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4299 (parallel [(const_int 0) (const_int 8)
4300 (const_int 1) (const_int 9)
4301 (const_int 2) (const_int 10)
4302 (const_int 3) (const_int 11)])))]
4304 "punpcklwd\t{%2, %0|%0, %2}"
4305 [(set_attr "type" "sselog")
4306 (set_attr "prefix_data16" "1")
4307 (set_attr "mode" "TI")])
4309 (define_insn "sse2_punpckhdq"
4310 [(set (match_operand:V4SI 0 "register_operand" "=x")
4313 (match_operand:V4SI 1 "register_operand" "0")
4314 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4315 (parallel [(const_int 2) (const_int 6)
4316 (const_int 3) (const_int 7)])))]
4318 "punpckhdq\t{%2, %0|%0, %2}"
4319 [(set_attr "type" "sselog")
4320 (set_attr "prefix_data16" "1")
4321 (set_attr "mode" "TI")])
4323 (define_insn "sse2_punpckldq"
4324 [(set (match_operand:V4SI 0 "register_operand" "=x")
4327 (match_operand:V4SI 1 "register_operand" "0")
4328 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4329 (parallel [(const_int 0) (const_int 4)
4330 (const_int 1) (const_int 5)])))]
4332 "punpckldq\t{%2, %0|%0, %2}"
4333 [(set_attr "type" "sselog")
4334 (set_attr "prefix_data16" "1")
4335 (set_attr "mode" "TI")])
4337 (define_insn "sse2_punpckhqdq"
4338 [(set (match_operand:V2DI 0 "register_operand" "=x")
4341 (match_operand:V2DI 1 "register_operand" "0")
4342 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4343 (parallel [(const_int 1)
4346 "punpckhqdq\t{%2, %0|%0, %2}"
4347 [(set_attr "type" "sselog")
4348 (set_attr "prefix_data16" "1")
4349 (set_attr "mode" "TI")])
4351 (define_insn "sse2_punpcklqdq"
4352 [(set (match_operand:V2DI 0 "register_operand" "=x")
4355 (match_operand:V2DI 1 "register_operand" "0")
4356 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4357 (parallel [(const_int 0)
4360 "punpcklqdq\t{%2, %0|%0, %2}"
4361 [(set_attr "type" "sselog")
4362 (set_attr "prefix_data16" "1")
4363 (set_attr "mode" "TI")])
4365 (define_insn "*sse4_1_pinsrb"
4366 [(set (match_operand:V16QI 0 "register_operand" "=x")
4368 (vec_duplicate:V16QI
4369 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4370 (match_operand:V16QI 1 "register_operand" "0")
4371 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4374 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4375 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4377 [(set_attr "type" "sselog")
4378 (set_attr "prefix_extra" "1")
4379 (set_attr "mode" "TI")])
4381 (define_insn "*sse2_pinsrw"
4382 [(set (match_operand:V8HI 0 "register_operand" "=x")
4385 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4386 (match_operand:V8HI 1 "register_operand" "0")
4387 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4390 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4391 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4393 [(set_attr "type" "sselog")
4394 (set_attr "prefix_data16" "1")
4395 (set_attr "mode" "TI")])
4397 ;; It must come before sse2_loadld since it is preferred.
4398 (define_insn "*sse4_1_pinsrd"
4399 [(set (match_operand:V4SI 0 "register_operand" "=x")
4402 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4403 (match_operand:V4SI 1 "register_operand" "0")
4404 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4407 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4408 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4410 [(set_attr "type" "sselog")
4411 (set_attr "prefix_extra" "1")
4412 (set_attr "mode" "TI")])
4414 (define_insn "*sse4_1_pinsrq"
4415 [(set (match_operand:V2DI 0 "register_operand" "=x")
4418 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4419 (match_operand:V2DI 1 "register_operand" "0")
4420 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4421 "TARGET_SSE4_1 && TARGET_64BIT"
4423 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4424 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4426 [(set_attr "type" "sselog")
4427 (set_attr "prefix_extra" "1")
4428 (set_attr "mode" "TI")])
4430 (define_insn "*sse4_1_pextrb"
4431 [(set (match_operand:SI 0 "register_operand" "=r")
4434 (match_operand:V16QI 1 "register_operand" "x")
4435 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4437 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4438 [(set_attr "type" "sselog")
4439 (set_attr "prefix_extra" "1")
4440 (set_attr "mode" "TI")])
4442 (define_insn "*sse4_1_pextrb_memory"
4443 [(set (match_operand:QI 0 "memory_operand" "=m")
4445 (match_operand:V16QI 1 "register_operand" "x")
4446 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4448 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4449 [(set_attr "type" "sselog")
4450 (set_attr "prefix_extra" "1")
4451 (set_attr "mode" "TI")])
4453 (define_insn "*sse2_pextrw"
4454 [(set (match_operand:SI 0 "register_operand" "=r")
4457 (match_operand:V8HI 1 "register_operand" "x")
4458 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4460 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4461 [(set_attr "type" "sselog")
4462 (set_attr "prefix_data16" "1")
4463 (set_attr "mode" "TI")])
4465 (define_insn "*sse4_1_pextrw_memory"
4466 [(set (match_operand:HI 0 "memory_operand" "=m")
4468 (match_operand:V8HI 1 "register_operand" "x")
4469 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4471 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4472 [(set_attr "type" "sselog")
4473 (set_attr "prefix_extra" "1")
4474 (set_attr "mode" "TI")])
4476 (define_insn "*sse4_1_pextrd"
4477 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4479 (match_operand:V4SI 1 "register_operand" "x")
4480 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4482 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4483 [(set_attr "type" "sselog")
4484 (set_attr "prefix_extra" "1")
4485 (set_attr "mode" "TI")])
4487 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4488 (define_insn "*sse4_1_pextrq"
4489 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4491 (match_operand:V2DI 1 "register_operand" "x")
4492 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4493 "TARGET_SSE4_1 && TARGET_64BIT"
4494 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4495 [(set_attr "type" "sselog")
4496 (set_attr "prefix_extra" "1")
4497 (set_attr "mode" "TI")])
4499 (define_expand "sse2_pshufd"
4500 [(match_operand:V4SI 0 "register_operand" "")
4501 (match_operand:V4SI 1 "nonimmediate_operand" "")
4502 (match_operand:SI 2 "const_int_operand" "")]
4505 int mask = INTVAL (operands[2]);
4506 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4507 GEN_INT ((mask >> 0) & 3),
4508 GEN_INT ((mask >> 2) & 3),
4509 GEN_INT ((mask >> 4) & 3),
4510 GEN_INT ((mask >> 6) & 3)));
4514 (define_insn "sse2_pshufd_1"
4515 [(set (match_operand:V4SI 0 "register_operand" "=x")
4517 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4518 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4519 (match_operand 3 "const_0_to_3_operand" "")
4520 (match_operand 4 "const_0_to_3_operand" "")
4521 (match_operand 5 "const_0_to_3_operand" "")])))]
4525 mask |= INTVAL (operands[2]) << 0;
4526 mask |= INTVAL (operands[3]) << 2;
4527 mask |= INTVAL (operands[4]) << 4;
4528 mask |= INTVAL (operands[5]) << 6;
4529 operands[2] = GEN_INT (mask);
4531 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4533 [(set_attr "type" "sselog1")
4534 (set_attr "prefix_data16" "1")
4535 (set_attr "mode" "TI")])
4537 (define_expand "sse2_pshuflw"
4538 [(match_operand:V8HI 0 "register_operand" "")
4539 (match_operand:V8HI 1 "nonimmediate_operand" "")
4540 (match_operand:SI 2 "const_int_operand" "")]
4543 int mask = INTVAL (operands[2]);
4544 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4545 GEN_INT ((mask >> 0) & 3),
4546 GEN_INT ((mask >> 2) & 3),
4547 GEN_INT ((mask >> 4) & 3),
4548 GEN_INT ((mask >> 6) & 3)));
4552 (define_insn "sse2_pshuflw_1"
4553 [(set (match_operand:V8HI 0 "register_operand" "=x")
4555 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4556 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4557 (match_operand 3 "const_0_to_3_operand" "")
4558 (match_operand 4 "const_0_to_3_operand" "")
4559 (match_operand 5 "const_0_to_3_operand" "")
4567 mask |= INTVAL (operands[2]) << 0;
4568 mask |= INTVAL (operands[3]) << 2;
4569 mask |= INTVAL (operands[4]) << 4;
4570 mask |= INTVAL (operands[5]) << 6;
4571 operands[2] = GEN_INT (mask);
4573 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4575 [(set_attr "type" "sselog")
4576 (set_attr "prefix_rep" "1")
4577 (set_attr "mode" "TI")])
4579 (define_expand "sse2_pshufhw"
4580 [(match_operand:V8HI 0 "register_operand" "")
4581 (match_operand:V8HI 1 "nonimmediate_operand" "")
4582 (match_operand:SI 2 "const_int_operand" "")]
4585 int mask = INTVAL (operands[2]);
4586 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4587 GEN_INT (((mask >> 0) & 3) + 4),
4588 GEN_INT (((mask >> 2) & 3) + 4),
4589 GEN_INT (((mask >> 4) & 3) + 4),
4590 GEN_INT (((mask >> 6) & 3) + 4)));
4594 (define_insn "sse2_pshufhw_1"
4595 [(set (match_operand:V8HI 0 "register_operand" "=x")
4597 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4598 (parallel [(const_int 0)
4602 (match_operand 2 "const_4_to_7_operand" "")
4603 (match_operand 3 "const_4_to_7_operand" "")
4604 (match_operand 4 "const_4_to_7_operand" "")
4605 (match_operand 5 "const_4_to_7_operand" "")])))]
4609 mask |= (INTVAL (operands[2]) - 4) << 0;
4610 mask |= (INTVAL (operands[3]) - 4) << 2;
4611 mask |= (INTVAL (operands[4]) - 4) << 4;
4612 mask |= (INTVAL (operands[5]) - 4) << 6;
4613 operands[2] = GEN_INT (mask);
4615 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4617 [(set_attr "type" "sselog")
4618 (set_attr "prefix_rep" "1")
4619 (set_attr "mode" "TI")])
4621 (define_expand "sse2_loadd"
4622 [(set (match_operand:V4SI 0 "register_operand" "")
4625 (match_operand:SI 1 "nonimmediate_operand" ""))
4629 "operands[2] = CONST0_RTX (V4SImode);")
4631 (define_insn "sse2_loadld"
4632 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
4635 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4636 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4640 movd\t{%2, %0|%0, %2}
4641 movd\t{%2, %0|%0, %2}
4642 movss\t{%2, %0|%0, %2}
4643 movss\t{%2, %0|%0, %2}"
4644 [(set_attr "type" "ssemov")
4645 (set_attr "mode" "TI,TI,V4SF,SF")])
4647 (define_insn_and_split "sse2_stored"
4648 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4650 (match_operand:V4SI 1 "register_operand" "x,Yi")
4651 (parallel [(const_int 0)])))]
4654 "&& reload_completed
4655 && (TARGET_INTER_UNIT_MOVES
4656 || MEM_P (operands [0])
4657 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4658 [(set (match_dup 0) (match_dup 1))]
4660 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4663 (define_insn_and_split "*vec_ext_v4si_mem"
4664 [(set (match_operand:SI 0 "register_operand" "=r")
4666 (match_operand:V4SI 1 "memory_operand" "o")
4667 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
4673 int i = INTVAL (operands[2]);
4675 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
4679 (define_expand "sse_storeq"
4680 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4682 (match_operand:V2DI 1 "register_operand" "")
4683 (parallel [(const_int 0)])))]
4687 (define_insn "*sse2_storeq_rex64"
4688 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4690 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4691 (parallel [(const_int 0)])))]
4692 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4696 mov{q}\t{%1, %0|%0, %1}"
4697 [(set_attr "type" "*,*,imov")
4698 (set_attr "mode" "*,*,DI")])
4700 (define_insn "*sse2_storeq"
4701 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4703 (match_operand:V2DI 1 "register_operand" "x")
4704 (parallel [(const_int 0)])))]
4709 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4711 (match_operand:V2DI 1 "register_operand" "")
4712 (parallel [(const_int 0)])))]
4715 && (TARGET_INTER_UNIT_MOVES
4716 || MEM_P (operands [0])
4717 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4718 [(set (match_dup 0) (match_dup 1))]
4720 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4723 (define_insn "*vec_extractv2di_1_rex64"
4724 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4726 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4727 (parallel [(const_int 1)])))]
4728 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4730 movhps\t{%1, %0|%0, %1}
4731 psrldq\t{$8, %0|%0, 8}
4732 movq\t{%H1, %0|%0, %H1}
4733 mov{q}\t{%H1, %0|%0, %H1}"
4734 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4735 (set_attr "memory" "*,none,*,*")
4736 (set_attr "mode" "V2SF,TI,TI,DI")])
4738 (define_insn "*vec_extractv2di_1_sse2"
4739 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4741 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4742 (parallel [(const_int 1)])))]
4744 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4746 movhps\t{%1, %0|%0, %1}
4747 psrldq\t{$8, %0|%0, 8}
4748 movq\t{%H1, %0|%0, %H1}"
4749 [(set_attr "type" "ssemov,sseishft,ssemov")
4750 (set_attr "memory" "*,none,*")
4751 (set_attr "mode" "V2SF,TI,TI")])
4753 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4754 (define_insn "*vec_extractv2di_1_sse"
4755 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4757 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4758 (parallel [(const_int 1)])))]
4759 "!TARGET_SSE2 && TARGET_SSE
4760 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4762 movhps\t{%1, %0|%0, %1}
4763 movhlps\t{%1, %0|%0, %1}
4764 movlps\t{%H1, %0|%0, %H1}"
4765 [(set_attr "type" "ssemov")
4766 (set_attr "mode" "V2SF,V4SF,V2SF")])
4768 (define_insn "*vec_dupv4si"
4769 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
4771 (match_operand:SI 1 "register_operand" " Y2,0")))]
4774 pshufd\t{$0, %1, %0|%0, %1, 0}
4775 shufps\t{$0, %0, %0|%0, %0, 0}"
4776 [(set_attr "type" "sselog1")
4777 (set_attr "mode" "TI,V4SF")])
4779 (define_insn "*vec_dupv2di"
4780 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
4782 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4787 [(set_attr "type" "sselog1,ssemov")
4788 (set_attr "mode" "TI,V4SF")])
4790 (define_insn "*vec_concatv2si_sse4_1"
4791 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
4793 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
4794 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
4797 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
4798 punpckldq\t{%2, %0|%0, %2}
4799 movd\t{%1, %0|%0, %1}
4800 punpckldq\t{%2, %0|%0, %2}
4801 movd\t{%1, %0|%0, %1}"
4802 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4803 (set_attr "prefix_extra" "1,*,*,*,*")
4804 (set_attr "mode" "TI,TI,TI,DI,DI")])
4806 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4807 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4808 ;; alternatives pretty much forces the MMX alternative to be chosen.
4809 (define_insn "*vec_concatv2si_sse2"
4810 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
4812 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
4813 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
4816 punpckldq\t{%2, %0|%0, %2}
4817 movd\t{%1, %0|%0, %1}
4818 punpckldq\t{%2, %0|%0, %2}
4819 movd\t{%1, %0|%0, %1}"
4820 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4821 (set_attr "mode" "TI,TI,DI,DI")])
4823 (define_insn "*vec_concatv2si_sse"
4824 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4826 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4827 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4830 unpcklps\t{%2, %0|%0, %2}
4831 movss\t{%1, %0|%0, %1}
4832 punpckldq\t{%2, %0|%0, %2}
4833 movd\t{%1, %0|%0, %1}"
4834 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4835 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4837 (define_insn "*vec_concatv4si_1"
4838 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
4840 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4841 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
4844 punpcklqdq\t{%2, %0|%0, %2}
4845 movlhps\t{%2, %0|%0, %2}
4846 movhps\t{%2, %0|%0, %2}"
4847 [(set_attr "type" "sselog,ssemov,ssemov")
4848 (set_attr "mode" "TI,V4SF,V2SF")])
4850 (define_insn "vec_concatv2di"
4851 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
4853 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4854 (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
4855 "!TARGET_64BIT && TARGET_SSE"
4857 movq\t{%1, %0|%0, %1}
4858 movq2dq\t{%1, %0|%0, %1}
4859 punpcklqdq\t{%2, %0|%0, %2}
4860 movlhps\t{%2, %0|%0, %2}
4861 movhps\t{%2, %0|%0, %2}
4862 movlps\t{%1, %0|%0, %1}"
4863 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4864 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4866 (define_insn "*vec_concatv2di_rex64_sse4_1"
4867 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x")
4869 (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m")
4870 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))]
4871 "TARGET_64BIT && TARGET_SSE4_1"
4873 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
4874 movq\t{%1, %0|%0, %1}
4875 movq\t{%1, %0|%0, %1}
4876 movq2dq\t{%1, %0|%0, %1}
4877 punpcklqdq\t{%2, %0|%0, %2}
4878 movlhps\t{%2, %0|%0, %2}
4879 movhps\t{%2, %0|%0, %2}
4880 movlps\t{%1, %0|%0, %1}"
4881 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4882 (set_attr "prefix_extra" "1,*,*,*,*,*,*,*")
4883 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4885 (define_insn "*vec_concatv2di_rex64_sse"
4886 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
4888 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4889 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
4890 "TARGET_64BIT && TARGET_SSE"
4892 movq\t{%1, %0|%0, %1}
4893 movq\t{%1, %0|%0, %1}
4894 movq2dq\t{%1, %0|%0, %1}
4895 punpcklqdq\t{%2, %0|%0, %2}
4896 movlhps\t{%2, %0|%0, %2}
4897 movhps\t{%2, %0|%0, %2}
4898 movlps\t{%1, %0|%0, %1}"
4899 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4900 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4902 (define_expand "vec_unpacku_hi_v16qi"
4903 [(match_operand:V8HI 0 "register_operand" "")
4904 (match_operand:V16QI 1 "register_operand" "")]
4908 ix86_expand_sse4_unpack (operands, true, true);
4909 else if (TARGET_SSE5)
4910 ix86_expand_sse5_unpack (operands, true, true);
4912 ix86_expand_sse_unpack (operands, true, true);
4916 (define_expand "vec_unpacks_hi_v16qi"
4917 [(match_operand:V8HI 0 "register_operand" "")
4918 (match_operand:V16QI 1 "register_operand" "")]
4922 ix86_expand_sse4_unpack (operands, false, true);
4923 else if (TARGET_SSE5)
4924 ix86_expand_sse5_unpack (operands, false, true);
4926 ix86_expand_sse_unpack (operands, false, true);
4930 (define_expand "vec_unpacku_lo_v16qi"
4931 [(match_operand:V8HI 0 "register_operand" "")
4932 (match_operand:V16QI 1 "register_operand" "")]
4936 ix86_expand_sse4_unpack (operands, true, false);
4937 else if (TARGET_SSE5)
4938 ix86_expand_sse5_unpack (operands, true, false);
4940 ix86_expand_sse_unpack (operands, true, false);
4944 (define_expand "vec_unpacks_lo_v16qi"
4945 [(match_operand:V8HI 0 "register_operand" "")
4946 (match_operand:V16QI 1 "register_operand" "")]
4950 ix86_expand_sse4_unpack (operands, false, false);
4951 else if (TARGET_SSE5)
4952 ix86_expand_sse5_unpack (operands, false, false);
4954 ix86_expand_sse_unpack (operands, false, false);
4958 (define_expand "vec_unpacku_hi_v8hi"
4959 [(match_operand:V4SI 0 "register_operand" "")
4960 (match_operand:V8HI 1 "register_operand" "")]
4964 ix86_expand_sse4_unpack (operands, true, true);
4965 else if (TARGET_SSE5)
4966 ix86_expand_sse5_unpack (operands, true, true);
4968 ix86_expand_sse_unpack (operands, true, true);
4972 (define_expand "vec_unpacks_hi_v8hi"
4973 [(match_operand:V4SI 0 "register_operand" "")
4974 (match_operand:V8HI 1 "register_operand" "")]
4978 ix86_expand_sse4_unpack (operands, false, true);
4979 else if (TARGET_SSE5)
4980 ix86_expand_sse5_unpack (operands, false, true);
4982 ix86_expand_sse_unpack (operands, false, true);
4986 (define_expand "vec_unpacku_lo_v8hi"
4987 [(match_operand:V4SI 0 "register_operand" "")
4988 (match_operand:V8HI 1 "register_operand" "")]
4992 ix86_expand_sse4_unpack (operands, true, false);
4993 else if (TARGET_SSE5)
4994 ix86_expand_sse5_unpack (operands, true, false);
4996 ix86_expand_sse_unpack (operands, true, false);
5000 (define_expand "vec_unpacks_lo_v8hi"
5001 [(match_operand:V4SI 0 "register_operand" "")
5002 (match_operand:V8HI 1 "register_operand" "")]
5006 ix86_expand_sse4_unpack (operands, false, false);
5007 else if (TARGET_SSE5)
5008 ix86_expand_sse5_unpack (operands, false, false);
5010 ix86_expand_sse_unpack (operands, false, false);
5014 (define_expand "vec_unpacku_hi_v4si"
5015 [(match_operand:V2DI 0 "register_operand" "")
5016 (match_operand:V4SI 1 "register_operand" "")]
5020 ix86_expand_sse4_unpack (operands, true, true);
5021 else if (TARGET_SSE5)
5022 ix86_expand_sse5_unpack (operands, true, true);
5024 ix86_expand_sse_unpack (operands, true, true);
5028 (define_expand "vec_unpacks_hi_v4si"
5029 [(match_operand:V2DI 0 "register_operand" "")
5030 (match_operand:V4SI 1 "register_operand" "")]
5034 ix86_expand_sse4_unpack (operands, false, true);
5035 else if (TARGET_SSE5)
5036 ix86_expand_sse5_unpack (operands, false, true);
5038 ix86_expand_sse_unpack (operands, false, true);
5042 (define_expand "vec_unpacku_lo_v4si"
5043 [(match_operand:V2DI 0 "register_operand" "")
5044 (match_operand:V4SI 1 "register_operand" "")]
5048 ix86_expand_sse4_unpack (operands, true, false);
5049 else if (TARGET_SSE5)
5050 ix86_expand_sse5_unpack (operands, true, false);
5052 ix86_expand_sse_unpack (operands, true, false);
5056 (define_expand "vec_unpacks_lo_v4si"
5057 [(match_operand:V2DI 0 "register_operand" "")
5058 (match_operand:V4SI 1 "register_operand" "")]
5062 ix86_expand_sse4_unpack (operands, false, false);
5063 else if (TARGET_SSE5)
5064 ix86_expand_sse5_unpack (operands, false, false);
5066 ix86_expand_sse_unpack (operands, false, false);
5070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5074 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5076 (define_expand "sse2_uavgv16qi3"
5077 [(set (match_operand:V16QI 0 "register_operand" "")
5083 (match_operand:V16QI 1 "nonimmediate_operand" ""))
5085 (match_operand:V16QI 2 "nonimmediate_operand" "")))
5086 (const_vector:V16QI [(const_int 1) (const_int 1)
5087 (const_int 1) (const_int 1)
5088 (const_int 1) (const_int 1)
5089 (const_int 1) (const_int 1)
5090 (const_int 1) (const_int 1)
5091 (const_int 1) (const_int 1)
5092 (const_int 1) (const_int 1)
5093 (const_int 1) (const_int 1)]))
5096 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
5098 (define_insn "*sse2_uavgv16qi3"
5099 [(set (match_operand:V16QI 0 "register_operand" "=x")
5105 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5107 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5108 (const_vector:V16QI [(const_int 1) (const_int 1)
5109 (const_int 1) (const_int 1)
5110 (const_int 1) (const_int 1)
5111 (const_int 1) (const_int 1)
5112 (const_int 1) (const_int 1)
5113 (const_int 1) (const_int 1)
5114 (const_int 1) (const_int 1)
5115 (const_int 1) (const_int 1)]))
5117 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5118 "pavgb\t{%2, %0|%0, %2}"
5119 [(set_attr "type" "sseiadd")
5120 (set_attr "prefix_data16" "1")
5121 (set_attr "mode" "TI")])
5123 (define_expand "sse2_uavgv8hi3"
5124 [(set (match_operand:V8HI 0 "register_operand" "")
5130 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5132 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5133 (const_vector:V8HI [(const_int 1) (const_int 1)
5134 (const_int 1) (const_int 1)
5135 (const_int 1) (const_int 1)
5136 (const_int 1) (const_int 1)]))
5139 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
5141 (define_insn "*sse2_uavgv8hi3"
5142 [(set (match_operand:V8HI 0 "register_operand" "=x")
5148 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5150 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5151 (const_vector:V8HI [(const_int 1) (const_int 1)
5152 (const_int 1) (const_int 1)
5153 (const_int 1) (const_int 1)
5154 (const_int 1) (const_int 1)]))
5156 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5157 "pavgw\t{%2, %0|%0, %2}"
5158 [(set_attr "type" "sseiadd")
5159 (set_attr "prefix_data16" "1")
5160 (set_attr "mode" "TI")])
5162 ;; The correct representation for this is absolutely enormous, and
5163 ;; surely not generally useful.
5164 (define_insn "sse2_psadbw"
5165 [(set (match_operand:V2DI 0 "register_operand" "=x")
5166 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5167 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5170 "psadbw\t{%2, %0|%0, %2}"
5171 [(set_attr "type" "sseiadd")
5172 (set_attr "prefix_data16" "1")
5173 (set_attr "mode" "TI")])
5175 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
5176 [(set (match_operand:SI 0 "register_operand" "=r")
5178 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
5180 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
5181 "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
5182 [(set_attr "type" "ssecvt")
5183 (set_attr "mode" "<MODE>")])
5185 (define_insn "sse2_pmovmskb"
5186 [(set (match_operand:SI 0 "register_operand" "=r")
5187 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5190 "pmovmskb\t{%1, %0|%0, %1}"
5191 [(set_attr "type" "ssecvt")
5192 (set_attr "prefix_data16" "1")
5193 (set_attr "mode" "SI")])
5195 (define_expand "sse2_maskmovdqu"
5196 [(set (match_operand:V16QI 0 "memory_operand" "")
5197 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
5198 (match_operand:V16QI 2 "register_operand" "")
5204 (define_insn "*sse2_maskmovdqu"
5205 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5206 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5207 (match_operand:V16QI 2 "register_operand" "x")
5208 (mem:V16QI (match_dup 0))]
5210 "TARGET_SSE2 && !TARGET_64BIT"
5211 ;; @@@ check ordering of operands in intel/nonintel syntax
5212 "maskmovdqu\t{%2, %1|%1, %2}"
5213 [(set_attr "type" "ssecvt")
5214 (set_attr "prefix_data16" "1")
5215 (set_attr "mode" "TI")])
5217 (define_insn "*sse2_maskmovdqu_rex64"
5218 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5219 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5220 (match_operand:V16QI 2 "register_operand" "x")
5221 (mem:V16QI (match_dup 0))]
5223 "TARGET_SSE2 && TARGET_64BIT"
5224 ;; @@@ check ordering of operands in intel/nonintel syntax
5225 "maskmovdqu\t{%2, %1|%1, %2}"
5226 [(set_attr "type" "ssecvt")
5227 (set_attr "prefix_data16" "1")
5228 (set_attr "mode" "TI")])
5230 (define_insn "sse_ldmxcsr"
5231 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5235 [(set_attr "type" "sse")
5236 (set_attr "memory" "load")])
5238 (define_insn "sse_stmxcsr"
5239 [(set (match_operand:SI 0 "memory_operand" "=m")
5240 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5243 [(set_attr "type" "sse")
5244 (set_attr "memory" "store")])
5246 (define_expand "sse_sfence"
5248 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5249 "TARGET_SSE || TARGET_3DNOW_A"
5251 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5252 MEM_VOLATILE_P (operands[0]) = 1;
5255 (define_insn "*sse_sfence"
5256 [(set (match_operand:BLK 0 "" "")
5257 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5258 "TARGET_SSE || TARGET_3DNOW_A"
5260 [(set_attr "type" "sse")
5261 (set_attr "memory" "unknown")])
5263 (define_insn "sse2_clflush"
5264 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5268 [(set_attr "type" "sse")
5269 (set_attr "memory" "unknown")])
5271 (define_expand "sse2_mfence"
5273 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5276 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5277 MEM_VOLATILE_P (operands[0]) = 1;
5280 (define_insn "*sse2_mfence"
5281 [(set (match_operand:BLK 0 "" "")
5282 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5285 [(set_attr "type" "sse")
5286 (set_attr "memory" "unknown")])
5288 (define_expand "sse2_lfence"
5290 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5293 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5294 MEM_VOLATILE_P (operands[0]) = 1;
5297 (define_insn "*sse2_lfence"
5298 [(set (match_operand:BLK 0 "" "")
5299 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5302 [(set_attr "type" "sse")
5303 (set_attr "memory" "unknown")])
5305 (define_insn "sse3_mwait"
5306 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5307 (match_operand:SI 1 "register_operand" "c")]
5310 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5311 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5312 ;; we only need to set up 32bit registers.
5314 [(set_attr "length" "3")])
5316 (define_insn "sse3_monitor"
5317 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5318 (match_operand:SI 1 "register_operand" "c")
5319 (match_operand:SI 2 "register_operand" "d")]
5321 "TARGET_SSE3 && !TARGET_64BIT"
5322 "monitor\t%0, %1, %2"
5323 [(set_attr "length" "3")])
5325 (define_insn "sse3_monitor64"
5326 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5327 (match_operand:SI 1 "register_operand" "c")
5328 (match_operand:SI 2 "register_operand" "d")]
5330 "TARGET_SSE3 && TARGET_64BIT"
5331 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5332 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5333 ;; zero extended to 64bit, we only need to set up 32bit registers.
5335 [(set_attr "length" "3")])
5337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5339 ;; SSSE3 instructions
5341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5343 (define_insn "ssse3_phaddwv8hi3"
5344 [(set (match_operand:V8HI 0 "register_operand" "=x")
5350 (match_operand:V8HI 1 "register_operand" "0")
5351 (parallel [(const_int 0)]))
5352 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5354 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5355 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5358 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5359 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5361 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5362 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5367 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5368 (parallel [(const_int 0)]))
5369 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5371 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5372 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5375 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5376 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5378 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5379 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5381 "phaddw\t{%2, %0|%0, %2}"
5382 [(set_attr "type" "sseiadd")
5383 (set_attr "prefix_data16" "1")
5384 (set_attr "prefix_extra" "1")
5385 (set_attr "mode" "TI")])
5387 (define_insn "ssse3_phaddwv4hi3"
5388 [(set (match_operand:V4HI 0 "register_operand" "=y")
5393 (match_operand:V4HI 1 "register_operand" "0")
5394 (parallel [(const_int 0)]))
5395 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5397 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5398 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5402 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5403 (parallel [(const_int 0)]))
5404 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5406 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5407 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5409 "phaddw\t{%2, %0|%0, %2}"
5410 [(set_attr "type" "sseiadd")
5411 (set_attr "prefix_extra" "1")
5412 (set_attr "mode" "DI")])
5414 (define_insn "ssse3_phadddv4si3"
5415 [(set (match_operand:V4SI 0 "register_operand" "=x")
5420 (match_operand:V4SI 1 "register_operand" "0")
5421 (parallel [(const_int 0)]))
5422 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5424 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5425 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5429 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5430 (parallel [(const_int 0)]))
5431 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5433 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5434 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5436 "phaddd\t{%2, %0|%0, %2}"
5437 [(set_attr "type" "sseiadd")
5438 (set_attr "prefix_data16" "1")
5439 (set_attr "prefix_extra" "1")
5440 (set_attr "mode" "TI")])
5442 (define_insn "ssse3_phadddv2si3"
5443 [(set (match_operand:V2SI 0 "register_operand" "=y")
5447 (match_operand:V2SI 1 "register_operand" "0")
5448 (parallel [(const_int 0)]))
5449 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5452 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5453 (parallel [(const_int 0)]))
5454 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5456 "phaddd\t{%2, %0|%0, %2}"
5457 [(set_attr "type" "sseiadd")
5458 (set_attr "prefix_extra" "1")
5459 (set_attr "mode" "DI")])
5461 (define_insn "ssse3_phaddswv8hi3"
5462 [(set (match_operand:V8HI 0 "register_operand" "=x")
5468 (match_operand:V8HI 1 "register_operand" "0")
5469 (parallel [(const_int 0)]))
5470 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5472 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5473 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5476 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5477 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5479 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5480 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5485 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5486 (parallel [(const_int 0)]))
5487 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5489 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5490 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5493 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5494 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5496 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5497 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5499 "phaddsw\t{%2, %0|%0, %2}"
5500 [(set_attr "type" "sseiadd")
5501 (set_attr "prefix_data16" "1")
5502 (set_attr "prefix_extra" "1")
5503 (set_attr "mode" "TI")])
5505 (define_insn "ssse3_phaddswv4hi3"
5506 [(set (match_operand:V4HI 0 "register_operand" "=y")
5511 (match_operand:V4HI 1 "register_operand" "0")
5512 (parallel [(const_int 0)]))
5513 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5515 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5516 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5520 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5521 (parallel [(const_int 0)]))
5522 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5524 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5525 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5527 "phaddsw\t{%2, %0|%0, %2}"
5528 [(set_attr "type" "sseiadd")
5529 (set_attr "prefix_extra" "1")
5530 (set_attr "mode" "DI")])
5532 (define_insn "ssse3_phsubwv8hi3"
5533 [(set (match_operand:V8HI 0 "register_operand" "=x")
5539 (match_operand:V8HI 1 "register_operand" "0")
5540 (parallel [(const_int 0)]))
5541 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5543 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5544 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5547 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5548 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5550 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5551 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5556 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5557 (parallel [(const_int 0)]))
5558 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5560 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5561 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5564 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5565 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5567 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5568 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5570 "phsubw\t{%2, %0|%0, %2}"
5571 [(set_attr "type" "sseiadd")
5572 (set_attr "prefix_data16" "1")
5573 (set_attr "prefix_extra" "1")
5574 (set_attr "mode" "TI")])
5576 (define_insn "ssse3_phsubwv4hi3"
5577 [(set (match_operand:V4HI 0 "register_operand" "=y")
5582 (match_operand:V4HI 1 "register_operand" "0")
5583 (parallel [(const_int 0)]))
5584 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5586 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5587 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5591 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5592 (parallel [(const_int 0)]))
5593 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5595 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5596 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5598 "phsubw\t{%2, %0|%0, %2}"
5599 [(set_attr "type" "sseiadd")
5600 (set_attr "prefix_extra" "1")
5601 (set_attr "mode" "DI")])
5603 (define_insn "ssse3_phsubdv4si3"
5604 [(set (match_operand:V4SI 0 "register_operand" "=x")
5609 (match_operand:V4SI 1 "register_operand" "0")
5610 (parallel [(const_int 0)]))
5611 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5613 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5614 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5618 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5619 (parallel [(const_int 0)]))
5620 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5622 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5623 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5625 "phsubd\t{%2, %0|%0, %2}"
5626 [(set_attr "type" "sseiadd")
5627 (set_attr "prefix_data16" "1")
5628 (set_attr "prefix_extra" "1")
5629 (set_attr "mode" "TI")])
5631 (define_insn "ssse3_phsubdv2si3"
5632 [(set (match_operand:V2SI 0 "register_operand" "=y")
5636 (match_operand:V2SI 1 "register_operand" "0")
5637 (parallel [(const_int 0)]))
5638 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5641 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5642 (parallel [(const_int 0)]))
5643 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5645 "phsubd\t{%2, %0|%0, %2}"
5646 [(set_attr "type" "sseiadd")
5647 (set_attr "prefix_extra" "1")
5648 (set_attr "mode" "DI")])
5650 (define_insn "ssse3_phsubswv8hi3"
5651 [(set (match_operand:V8HI 0 "register_operand" "=x")
5657 (match_operand:V8HI 1 "register_operand" "0")
5658 (parallel [(const_int 0)]))
5659 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5661 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5662 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5665 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5666 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5668 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5669 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5674 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5675 (parallel [(const_int 0)]))
5676 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5678 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5679 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5682 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5683 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5685 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5686 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5688 "phsubsw\t{%2, %0|%0, %2}"
5689 [(set_attr "type" "sseiadd")
5690 (set_attr "prefix_data16" "1")
5691 (set_attr "prefix_extra" "1")
5692 (set_attr "mode" "TI")])
5694 (define_insn "ssse3_phsubswv4hi3"
5695 [(set (match_operand:V4HI 0 "register_operand" "=y")
5700 (match_operand:V4HI 1 "register_operand" "0")
5701 (parallel [(const_int 0)]))
5702 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5704 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5705 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5709 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5710 (parallel [(const_int 0)]))
5711 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5713 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5714 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5716 "phsubsw\t{%2, %0|%0, %2}"
5717 [(set_attr "type" "sseiadd")
5718 (set_attr "prefix_extra" "1")
5719 (set_attr "mode" "DI")])
5721 (define_insn "ssse3_pmaddubsw128"
5722 [(set (match_operand:V8HI 0 "register_operand" "=x")
5727 (match_operand:V16QI 1 "register_operand" "0")
5728 (parallel [(const_int 0)
5738 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5739 (parallel [(const_int 0)
5749 (vec_select:V16QI (match_dup 1)
5750 (parallel [(const_int 1)
5759 (vec_select:V16QI (match_dup 2)
5760 (parallel [(const_int 1)
5767 (const_int 15)]))))))]
5769 "pmaddubsw\t{%2, %0|%0, %2}"
5770 [(set_attr "type" "sseiadd")
5771 (set_attr "prefix_data16" "1")
5772 (set_attr "prefix_extra" "1")
5773 (set_attr "mode" "TI")])
5775 (define_insn "ssse3_pmaddubsw"
5776 [(set (match_operand:V4HI 0 "register_operand" "=y")
5781 (match_operand:V8QI 1 "register_operand" "0")
5782 (parallel [(const_int 0)
5788 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5789 (parallel [(const_int 0)
5795 (vec_select:V8QI (match_dup 1)
5796 (parallel [(const_int 1)
5801 (vec_select:V8QI (match_dup 2)
5802 (parallel [(const_int 1)
5805 (const_int 7)]))))))]
5807 "pmaddubsw\t{%2, %0|%0, %2}"
5808 [(set_attr "type" "sseiadd")
5809 (set_attr "prefix_extra" "1")
5810 (set_attr "mode" "DI")])
5812 (define_expand "ssse3_pmulhrswv8hi3"
5813 [(set (match_operand:V8HI 0 "register_operand" "")
5820 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5822 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5824 (const_vector:V8HI [(const_int 1) (const_int 1)
5825 (const_int 1) (const_int 1)
5826 (const_int 1) (const_int 1)
5827 (const_int 1) (const_int 1)]))
5830 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5832 (define_insn "*ssse3_pmulhrswv8hi3"
5833 [(set (match_operand:V8HI 0 "register_operand" "=x")
5840 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5842 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5844 (const_vector:V8HI [(const_int 1) (const_int 1)
5845 (const_int 1) (const_int 1)
5846 (const_int 1) (const_int 1)
5847 (const_int 1) (const_int 1)]))
5849 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5850 "pmulhrsw\t{%2, %0|%0, %2}"
5851 [(set_attr "type" "sseimul")
5852 (set_attr "prefix_data16" "1")
5853 (set_attr "prefix_extra" "1")
5854 (set_attr "mode" "TI")])
5856 (define_expand "ssse3_pmulhrswv4hi3"
5857 [(set (match_operand:V4HI 0 "register_operand" "")
5864 (match_operand:V4HI 1 "nonimmediate_operand" ""))
5866 (match_operand:V4HI 2 "nonimmediate_operand" "")))
5868 (const_vector:V4HI [(const_int 1) (const_int 1)
5869 (const_int 1) (const_int 1)]))
5872 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
5874 (define_insn "*ssse3_pmulhrswv4hi3"
5875 [(set (match_operand:V4HI 0 "register_operand" "=y")
5882 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5884 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5886 (const_vector:V4HI [(const_int 1) (const_int 1)
5887 (const_int 1) (const_int 1)]))
5889 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5890 "pmulhrsw\t{%2, %0|%0, %2}"
5891 [(set_attr "type" "sseimul")
5892 (set_attr "prefix_extra" "1")
5893 (set_attr "mode" "DI")])
5895 (define_insn "ssse3_pshufbv16qi3"
5896 [(set (match_operand:V16QI 0 "register_operand" "=x")
5897 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5901 "pshufb\t{%2, %0|%0, %2}";
5902 [(set_attr "type" "sselog1")
5903 (set_attr "prefix_data16" "1")
5904 (set_attr "prefix_extra" "1")
5905 (set_attr "mode" "TI")])
5907 (define_insn "ssse3_pshufbv8qi3"
5908 [(set (match_operand:V8QI 0 "register_operand" "=y")
5909 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5910 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5913 "pshufb\t{%2, %0|%0, %2}";
5914 [(set_attr "type" "sselog1")
5915 (set_attr "prefix_extra" "1")
5916 (set_attr "mode" "DI")])
5918 (define_insn "ssse3_psign<mode>3"
5919 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5921 [(match_operand:SSEMODE124 1 "register_operand" "0")
5922 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5925 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5926 [(set_attr "type" "sselog1")
5927 (set_attr "prefix_data16" "1")
5928 (set_attr "prefix_extra" "1")
5929 (set_attr "mode" "TI")])
5931 (define_insn "ssse3_psign<mode>3"
5932 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5934 [(match_operand:MMXMODEI 1 "register_operand" "0")
5935 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5938 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5939 [(set_attr "type" "sselog1")
5940 (set_attr "prefix_extra" "1")
5941 (set_attr "mode" "DI")])
5943 (define_insn "ssse3_palignrti"
5944 [(set (match_operand:TI 0 "register_operand" "=x")
5945 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5946 (match_operand:TI 2 "nonimmediate_operand" "xm")
5947 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5951 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5952 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5954 [(set_attr "type" "sseishft")
5955 (set_attr "prefix_data16" "1")
5956 (set_attr "prefix_extra" "1")
5957 (set_attr "mode" "TI")])
5959 (define_insn "ssse3_palignrdi"
5960 [(set (match_operand:DI 0 "register_operand" "=y")
5961 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5962 (match_operand:DI 2 "nonimmediate_operand" "ym")
5963 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5967 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5968 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5970 [(set_attr "type" "sseishft")
5971 (set_attr "prefix_extra" "1")
5972 (set_attr "mode" "DI")])
5974 (define_insn "abs<mode>2"
5975 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5976 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5978 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5979 [(set_attr "type" "sselog1")
5980 (set_attr "prefix_data16" "1")
5981 (set_attr "prefix_extra" "1")
5982 (set_attr "mode" "TI")])
5984 (define_insn "abs<mode>2"
5985 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5986 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5988 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5989 [(set_attr "type" "sselog1")
5990 (set_attr "prefix_extra" "1")
5991 (set_attr "mode" "DI")])
5993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5995 ;; AMD SSE4A instructions
5997 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5999 (define_insn "sse4a_movnt<mode>"
6000 [(set (match_operand:MODEF 0 "memory_operand" "=m")
6002 [(match_operand:MODEF 1 "register_operand" "x")]
6005 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
6006 [(set_attr "type" "ssemov")
6007 (set_attr "mode" "<MODE>")])
6009 (define_insn "sse4a_vmmovnt<mode>"
6010 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
6011 (unspec:<ssescalarmode>
6012 [(vec_select:<ssescalarmode>
6013 (match_operand:SSEMODEF2P 1 "register_operand" "x")
6014 (parallel [(const_int 0)]))]
6017 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
6018 [(set_attr "type" "ssemov")
6019 (set_attr "mode" "<ssescalarmode>")])
6021 (define_insn "sse4a_extrqi"
6022 [(set (match_operand:V2DI 0 "register_operand" "=x")
6023 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6024 (match_operand 2 "const_int_operand" "")
6025 (match_operand 3 "const_int_operand" "")]
6028 "extrq\t{%3, %2, %0|%0, %2, %3}"
6029 [(set_attr "type" "sse")
6030 (set_attr "prefix_data16" "1")
6031 (set_attr "mode" "TI")])
6033 (define_insn "sse4a_extrq"
6034 [(set (match_operand:V2DI 0 "register_operand" "=x")
6035 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6036 (match_operand:V16QI 2 "register_operand" "x")]
6039 "extrq\t{%2, %0|%0, %2}"
6040 [(set_attr "type" "sse")
6041 (set_attr "prefix_data16" "1")
6042 (set_attr "mode" "TI")])
6044 (define_insn "sse4a_insertqi"
6045 [(set (match_operand:V2DI 0 "register_operand" "=x")
6046 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6047 (match_operand:V2DI 2 "register_operand" "x")
6048 (match_operand 3 "const_int_operand" "")
6049 (match_operand 4 "const_int_operand" "")]
6052 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
6053 [(set_attr "type" "sseins")
6054 (set_attr "prefix_rep" "1")
6055 (set_attr "mode" "TI")])
6057 (define_insn "sse4a_insertq"
6058 [(set (match_operand:V2DI 0 "register_operand" "=x")
6059 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6060 (match_operand:V2DI 2 "register_operand" "x")]
6063 "insertq\t{%2, %0|%0, %2}"
6064 [(set_attr "type" "sseins")
6065 (set_attr "prefix_rep" "1")
6066 (set_attr "mode" "TI")])
6068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6070 ;; Intel SSE4.1 instructions
6072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6074 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
6075 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6076 (vec_merge:SSEMODEF2P
6077 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6078 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6079 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
6081 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6082 [(set_attr "type" "ssemov")
6083 (set_attr "prefix_extra" "1")
6084 (set_attr "mode" "<MODE>")])
6086 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
6087 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
6089 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
6090 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
6091 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
6094 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6095 [(set_attr "type" "ssemov")
6096 (set_attr "prefix_extra" "1")
6097 (set_attr "mode" "<MODE>")])
6099 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
6100 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6102 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
6103 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
6104 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6107 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6108 [(set_attr "type" "ssemul")
6109 (set_attr "prefix_extra" "1")
6110 (set_attr "mode" "<MODE>")])
6112 (define_insn "sse4_1_movntdqa"
6113 [(set (match_operand:V2DI 0 "register_operand" "=x")
6114 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6117 "movntdqa\t{%1, %0|%0, %1}"
6118 [(set_attr "type" "ssecvt")
6119 (set_attr "prefix_extra" "1")
6120 (set_attr "mode" "TI")])
6122 (define_insn "sse4_1_mpsadbw"
6123 [(set (match_operand:V16QI 0 "register_operand" "=x")
6124 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6125 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6126 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6129 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6130 [(set_attr "type" "sselog1")
6131 (set_attr "prefix_extra" "1")
6132 (set_attr "mode" "TI")])
6134 (define_insn "sse4_1_packusdw"
6135 [(set (match_operand:V8HI 0 "register_operand" "=x")
6138 (match_operand:V4SI 1 "register_operand" "0"))
6140 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6142 "packusdw\t{%2, %0|%0, %2}"
6143 [(set_attr "type" "sselog")
6144 (set_attr "prefix_extra" "1")
6145 (set_attr "mode" "TI")])
6147 (define_insn "sse4_1_pblendvb"
6148 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6149 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6150 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6151 (match_operand:V16QI 3 "register_operand" "Yz")]
6154 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6155 [(set_attr "type" "ssemov")
6156 (set_attr "prefix_extra" "1")
6157 (set_attr "mode" "TI")])
6159 (define_insn "sse4_1_pblendw"
6160 [(set (match_operand:V8HI 0 "register_operand" "=x")
6162 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6163 (match_operand:V8HI 1 "register_operand" "0")
6164 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6166 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6167 [(set_attr "type" "ssemov")
6168 (set_attr "prefix_extra" "1")
6169 (set_attr "mode" "TI")])
6171 (define_insn "sse4_1_phminposuw"
6172 [(set (match_operand:V8HI 0 "register_operand" "=x")
6173 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6174 UNSPEC_PHMINPOSUW))]
6176 "phminposuw\t{%1, %0|%0, %1}"
6177 [(set_attr "type" "sselog1")
6178 (set_attr "prefix_extra" "1")
6179 (set_attr "mode" "TI")])
6181 (define_insn "sse4_1_extendv8qiv8hi2"
6182 [(set (match_operand:V8HI 0 "register_operand" "=x")
6185 (match_operand:V16QI 1 "register_operand" "x")
6186 (parallel [(const_int 0)
6195 "pmovsxbw\t{%1, %0|%0, %1}"
6196 [(set_attr "type" "ssemov")
6197 (set_attr "prefix_extra" "1")
6198 (set_attr "mode" "TI")])
6200 (define_insn "*sse4_1_extendv8qiv8hi2"
6201 [(set (match_operand:V8HI 0 "register_operand" "=x")
6204 (vec_duplicate:V16QI
6205 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6206 (parallel [(const_int 0)
6215 "pmovsxbw\t{%1, %0|%0, %1}"
6216 [(set_attr "type" "ssemov")
6217 (set_attr "prefix_extra" "1")
6218 (set_attr "mode" "TI")])
6220 (define_insn "sse4_1_extendv4qiv4si2"
6221 [(set (match_operand:V4SI 0 "register_operand" "=x")
6224 (match_operand:V16QI 1 "register_operand" "x")
6225 (parallel [(const_int 0)
6230 "pmovsxbd\t{%1, %0|%0, %1}"
6231 [(set_attr "type" "ssemov")
6232 (set_attr "prefix_extra" "1")
6233 (set_attr "mode" "TI")])
6235 (define_insn "*sse4_1_extendv4qiv4si2"
6236 [(set (match_operand:V4SI 0 "register_operand" "=x")
6239 (vec_duplicate:V16QI
6240 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6241 (parallel [(const_int 0)
6246 "pmovsxbd\t{%1, %0|%0, %1}"
6247 [(set_attr "type" "ssemov")
6248 (set_attr "prefix_extra" "1")
6249 (set_attr "mode" "TI")])
6251 (define_insn "sse4_1_extendv2qiv2di2"
6252 [(set (match_operand:V2DI 0 "register_operand" "=x")
6255 (match_operand:V16QI 1 "register_operand" "x")
6256 (parallel [(const_int 0)
6259 "pmovsxbq\t{%1, %0|%0, %1}"
6260 [(set_attr "type" "ssemov")
6261 (set_attr "prefix_extra" "1")
6262 (set_attr "mode" "TI")])
6264 (define_insn "*sse4_1_extendv2qiv2di2"
6265 [(set (match_operand:V2DI 0 "register_operand" "=x")
6268 (vec_duplicate:V16QI
6269 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6270 (parallel [(const_int 0)
6273 "pmovsxbq\t{%1, %0|%0, %1}"
6274 [(set_attr "type" "ssemov")
6275 (set_attr "prefix_extra" "1")
6276 (set_attr "mode" "TI")])
6278 (define_insn "sse4_1_extendv4hiv4si2"
6279 [(set (match_operand:V4SI 0 "register_operand" "=x")
6282 (match_operand:V8HI 1 "register_operand" "x")
6283 (parallel [(const_int 0)
6288 "pmovsxwd\t{%1, %0|%0, %1}"
6289 [(set_attr "type" "ssemov")
6290 (set_attr "prefix_extra" "1")
6291 (set_attr "mode" "TI")])
6293 (define_insn "*sse4_1_extendv4hiv4si2"
6294 [(set (match_operand:V4SI 0 "register_operand" "=x")
6298 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6299 (parallel [(const_int 0)
6304 "pmovsxwd\t{%1, %0|%0, %1}"
6305 [(set_attr "type" "ssemov")
6306 (set_attr "prefix_extra" "1")
6307 (set_attr "mode" "TI")])
6309 (define_insn "sse4_1_extendv2hiv2di2"
6310 [(set (match_operand:V2DI 0 "register_operand" "=x")
6313 (match_operand:V8HI 1 "register_operand" "x")
6314 (parallel [(const_int 0)
6317 "pmovsxwq\t{%1, %0|%0, %1}"
6318 [(set_attr "type" "ssemov")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "mode" "TI")])
6322 (define_insn "*sse4_1_extendv2hiv2di2"
6323 [(set (match_operand:V2DI 0 "register_operand" "=x")
6327 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6328 (parallel [(const_int 0)
6331 "pmovsxwq\t{%1, %0|%0, %1}"
6332 [(set_attr "type" "ssemov")
6333 (set_attr "prefix_extra" "1")
6334 (set_attr "mode" "TI")])
6336 (define_insn "sse4_1_extendv2siv2di2"
6337 [(set (match_operand:V2DI 0 "register_operand" "=x")
6340 (match_operand:V4SI 1 "register_operand" "x")
6341 (parallel [(const_int 0)
6344 "pmovsxdq\t{%1, %0|%0, %1}"
6345 [(set_attr "type" "ssemov")
6346 (set_attr "prefix_extra" "1")
6347 (set_attr "mode" "TI")])
6349 (define_insn "*sse4_1_extendv2siv2di2"
6350 [(set (match_operand:V2DI 0 "register_operand" "=x")
6354 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6355 (parallel [(const_int 0)
6358 "pmovsxdq\t{%1, %0|%0, %1}"
6359 [(set_attr "type" "ssemov")
6360 (set_attr "prefix_extra" "1")
6361 (set_attr "mode" "TI")])
6363 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6364 [(set (match_operand:V8HI 0 "register_operand" "=x")
6367 (match_operand:V16QI 1 "register_operand" "x")
6368 (parallel [(const_int 0)
6377 "pmovzxbw\t{%1, %0|%0, %1}"
6378 [(set_attr "type" "ssemov")
6379 (set_attr "prefix_extra" "1")
6380 (set_attr "mode" "TI")])
6382 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6383 [(set (match_operand:V8HI 0 "register_operand" "=x")
6386 (vec_duplicate:V16QI
6387 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6388 (parallel [(const_int 0)
6397 "pmovzxbw\t{%1, %0|%0, %1}"
6398 [(set_attr "type" "ssemov")
6399 (set_attr "prefix_extra" "1")
6400 (set_attr "mode" "TI")])
6402 (define_insn "sse4_1_zero_extendv4qiv4si2"
6403 [(set (match_operand:V4SI 0 "register_operand" "=x")
6406 (match_operand:V16QI 1 "register_operand" "x")
6407 (parallel [(const_int 0)
6412 "pmovzxbd\t{%1, %0|%0, %1}"
6413 [(set_attr "type" "ssemov")
6414 (set_attr "prefix_extra" "1")
6415 (set_attr "mode" "TI")])
6417 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6418 [(set (match_operand:V4SI 0 "register_operand" "=x")
6421 (vec_duplicate:V16QI
6422 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6423 (parallel [(const_int 0)
6428 "pmovzxbd\t{%1, %0|%0, %1}"
6429 [(set_attr "type" "ssemov")
6430 (set_attr "prefix_extra" "1")
6431 (set_attr "mode" "TI")])
6433 (define_insn "sse4_1_zero_extendv2qiv2di2"
6434 [(set (match_operand:V2DI 0 "register_operand" "=x")
6437 (match_operand:V16QI 1 "register_operand" "x")
6438 (parallel [(const_int 0)
6441 "pmovzxbq\t{%1, %0|%0, %1}"
6442 [(set_attr "type" "ssemov")
6443 (set_attr "prefix_extra" "1")
6444 (set_attr "mode" "TI")])
6446 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6447 [(set (match_operand:V2DI 0 "register_operand" "=x")
6450 (vec_duplicate:V16QI
6451 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6452 (parallel [(const_int 0)
6455 "pmovzxbq\t{%1, %0|%0, %1}"
6456 [(set_attr "type" "ssemov")
6457 (set_attr "prefix_extra" "1")
6458 (set_attr "mode" "TI")])
6460 (define_insn "sse4_1_zero_extendv4hiv4si2"
6461 [(set (match_operand:V4SI 0 "register_operand" "=x")
6464 (match_operand:V8HI 1 "register_operand" "x")
6465 (parallel [(const_int 0)
6470 "pmovzxwd\t{%1, %0|%0, %1}"
6471 [(set_attr "type" "ssemov")
6472 (set_attr "prefix_extra" "1")
6473 (set_attr "mode" "TI")])
6475 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6476 [(set (match_operand:V4SI 0 "register_operand" "=x")
6480 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6481 (parallel [(const_int 0)
6486 "pmovzxwd\t{%1, %0|%0, %1}"
6487 [(set_attr "type" "ssemov")
6488 (set_attr "prefix_extra" "1")
6489 (set_attr "mode" "TI")])
6491 (define_insn "sse4_1_zero_extendv2hiv2di2"
6492 [(set (match_operand:V2DI 0 "register_operand" "=x")
6495 (match_operand:V8HI 1 "register_operand" "x")
6496 (parallel [(const_int 0)
6499 "pmovzxwq\t{%1, %0|%0, %1}"
6500 [(set_attr "type" "ssemov")
6501 (set_attr "prefix_extra" "1")
6502 (set_attr "mode" "TI")])
6504 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6505 [(set (match_operand:V2DI 0 "register_operand" "=x")
6509 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6510 (parallel [(const_int 0)
6513 "pmovzxwq\t{%1, %0|%0, %1}"
6514 [(set_attr "type" "ssemov")
6515 (set_attr "prefix_extra" "1")
6516 (set_attr "mode" "TI")])
6518 (define_insn "sse4_1_zero_extendv2siv2di2"
6519 [(set (match_operand:V2DI 0 "register_operand" "=x")
6522 (match_operand:V4SI 1 "register_operand" "x")
6523 (parallel [(const_int 0)
6526 "pmovzxdq\t{%1, %0|%0, %1}"
6527 [(set_attr "type" "ssemov")
6528 (set_attr "prefix_extra" "1")
6529 (set_attr "mode" "TI")])
6531 (define_insn "*sse4_1_zero_extendv2siv2di2"
6532 [(set (match_operand:V2DI 0 "register_operand" "=x")
6536 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6537 (parallel [(const_int 0)
6540 "pmovzxdq\t{%1, %0|%0, %1}"
6541 [(set_attr "type" "ssemov")
6542 (set_attr "prefix_extra" "1")
6543 (set_attr "mode" "TI")])
6545 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6546 ;; But it is not a really compare instruction.
6547 (define_insn "sse4_1_ptest"
6548 [(set (reg:CC FLAGS_REG)
6549 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6550 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6553 "ptest\t{%1, %0|%0, %1}"
6554 [(set_attr "type" "ssecomi")
6555 (set_attr "prefix_extra" "1")
6556 (set_attr "mode" "TI")])
6558 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
6559 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6561 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
6562 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6565 "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
6566 [(set_attr "type" "ssecvt")
6567 (set_attr "prefix_extra" "1")
6568 (set_attr "mode" "<MODE>")])
6570 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
6571 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
6572 (vec_merge:SSEMODEF2P
6574 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
6575 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6577 (match_operand:SSEMODEF2P 1 "register_operand" "0")
6580 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
6581 [(set_attr "type" "ssecvt")
6582 (set_attr "prefix_extra" "1")
6583 (set_attr "mode" "<MODE>")])
6585 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6587 ;; Intel SSE4.2 string/text processing instructions
6589 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6591 (define_insn_and_split "sse4_2_pcmpestr"
6592 [(set (match_operand:SI 0 "register_operand" "=c,c")
6594 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6595 (match_operand:SI 3 "register_operand" "a,a")
6596 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
6597 (match_operand:SI 5 "register_operand" "d,d")
6598 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6600 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6608 (set (reg:CC FLAGS_REG)
6617 && !(reload_completed || reload_in_progress)"
6622 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6623 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6624 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6627 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6628 operands[3], operands[4],
6629 operands[5], operands[6]));
6631 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6632 operands[3], operands[4],
6633 operands[5], operands[6]));
6634 if (flags && !(ecx || xmm0))
6635 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
6636 operands[2], operands[3],
6637 operands[4], operands[5],
6641 [(set_attr "type" "sselog")
6642 (set_attr "prefix_data16" "1")
6643 (set_attr "prefix_extra" "1")
6644 (set_attr "memory" "none,load")
6645 (set_attr "mode" "TI")])
6647 (define_insn "sse4_2_pcmpestri"
6648 [(set (match_operand:SI 0 "register_operand" "=c,c")
6650 [(match_operand:V16QI 1 "register_operand" "x,x")
6651 (match_operand:SI 2 "register_operand" "a,a")
6652 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6653 (match_operand:SI 4 "register_operand" "d,d")
6654 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6656 (set (reg:CC FLAGS_REG)
6665 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6666 [(set_attr "type" "sselog")
6667 (set_attr "prefix_data16" "1")
6668 (set_attr "prefix_extra" "1")
6669 (set_attr "memory" "none,load")
6670 (set_attr "mode" "TI")])
6672 (define_insn "sse4_2_pcmpestrm"
6673 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6675 [(match_operand:V16QI 1 "register_operand" "x,x")
6676 (match_operand:SI 2 "register_operand" "a,a")
6677 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6678 (match_operand:SI 4 "register_operand" "d,d")
6679 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6681 (set (reg:CC FLAGS_REG)
6690 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6691 [(set_attr "type" "sselog")
6692 (set_attr "prefix_data16" "1")
6693 (set_attr "prefix_extra" "1")
6694 (set_attr "memory" "none,load")
6695 (set_attr "mode" "TI")])
6697 (define_insn "sse4_2_pcmpestr_cconly"
6698 [(set (reg:CC FLAGS_REG)
6700 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6701 (match_operand:SI 3 "register_operand" "a,a,a,a")
6702 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
6703 (match_operand:SI 5 "register_operand" "d,d,d,d")
6704 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
6706 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6707 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6710 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6711 pcmpestrm\t{%6, %4, %2|%2, %4, %6}
6712 pcmpestri\t{%6, %4, %2|%2, %4, %6}
6713 pcmpestri\t{%6, %4, %2|%2, %4, %6}"
6714 [(set_attr "type" "sselog")
6715 (set_attr "prefix_data16" "1")
6716 (set_attr "prefix_extra" "1")
6717 (set_attr "memory" "none,load,none,load")
6718 (set_attr "mode" "TI")])
6720 (define_insn_and_split "sse4_2_pcmpistr"
6721 [(set (match_operand:SI 0 "register_operand" "=c,c")
6723 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
6724 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
6725 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6727 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
6733 (set (reg:CC FLAGS_REG)
6740 && !(reload_completed || reload_in_progress)"
6745 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6746 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6747 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6750 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6751 operands[3], operands[4]));
6753 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6754 operands[3], operands[4]));
6755 if (flags && !(ecx || xmm0))
6756 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
6757 operands[2], operands[3],
6761 [(set_attr "type" "sselog")
6762 (set_attr "prefix_data16" "1")
6763 (set_attr "prefix_extra" "1")
6764 (set_attr "memory" "none,load")
6765 (set_attr "mode" "TI")])
6767 (define_insn "sse4_2_pcmpistri"
6768 [(set (match_operand:SI 0 "register_operand" "=c,c")
6770 [(match_operand:V16QI 1 "register_operand" "x,x")
6771 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6772 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6774 (set (reg:CC FLAGS_REG)
6781 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6782 [(set_attr "type" "sselog")
6783 (set_attr "prefix_data16" "1")
6784 (set_attr "prefix_extra" "1")
6785 (set_attr "memory" "none,load")
6786 (set_attr "mode" "TI")])
6788 (define_insn "sse4_2_pcmpistrm"
6789 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
6791 [(match_operand:V16QI 1 "register_operand" "x,x")
6792 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6793 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6795 (set (reg:CC FLAGS_REG)
6802 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6803 [(set_attr "type" "sselog")
6804 (set_attr "prefix_data16" "1")
6805 (set_attr "prefix_extra" "1")
6806 (set_attr "memory" "none,load")
6807 (set_attr "mode" "TI")])
6809 (define_insn "sse4_2_pcmpistr_cconly"
6810 [(set (reg:CC FLAGS_REG)
6812 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
6813 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
6814 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6816 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
6817 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
6820 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6821 pcmpistrm\t{%4, %3, %2|%2, %3, %4}
6822 pcmpistri\t{%4, %3, %2|%2, %3, %4}
6823 pcmpistri\t{%4, %3, %2|%2, %3, %4}"
6824 [(set_attr "type" "sselog")
6825 (set_attr "prefix_data16" "1")
6826 (set_attr "prefix_extra" "1")
6827 (set_attr "memory" "none,load,none,load")
6828 (set_attr "mode" "TI")])
6830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6832 ;; SSE5 instructions
6834 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6836 ;; SSE5 parallel integer multiply/add instructions.
6837 ;; Note the instruction does not allow the value being added to be a memory
6838 ;; operation. However by pretending via the nonimmediate_operand predicate
6839 ;; that it does and splitting it later allows the following to be recognized:
6840 ;; a[i] = b[i] * c[i] + d[i];
6841 (define_insn "sse5_pmacsww"
6842 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6845 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6846 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6847 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6848 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6850 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6851 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6852 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6853 [(set_attr "type" "ssemuladd")
6854 (set_attr "mode" "TI")])
6856 ;; Split pmacsww with two memory operands into a load and the pmacsww.
6858 [(set (match_operand:V8HI 0 "register_operand" "")
6860 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
6861 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6862 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
6864 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6865 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6866 && !reg_mentioned_p (operands[0], operands[1])
6867 && !reg_mentioned_p (operands[0], operands[2])
6868 && !reg_mentioned_p (operands[0], operands[3])"
6871 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
6872 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
6877 (define_insn "sse5_pmacssww"
6878 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
6880 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
6881 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
6882 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
6883 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6885 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6886 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
6887 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6888 [(set_attr "type" "ssemuladd")
6889 (set_attr "mode" "TI")])
6891 ;; Note the instruction does not allow the value being added to be a memory
6892 ;; operation. However by pretending via the nonimmediate_operand predicate
6893 ;; that it does and splitting it later allows the following to be recognized:
6894 ;; a[i] = b[i] * c[i] + d[i];
6895 (define_insn "sse5_pmacsdd"
6896 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6899 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6900 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6901 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6902 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
6904 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6905 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6906 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6907 [(set_attr "type" "ssemuladd")
6908 (set_attr "mode" "TI")])
6910 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
6912 [(set (match_operand:V4SI 0 "register_operand" "")
6914 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
6915 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6916 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
6918 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
6919 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
6920 && !reg_mentioned_p (operands[0], operands[1])
6921 && !reg_mentioned_p (operands[0], operands[2])
6922 && !reg_mentioned_p (operands[0], operands[3])"
6925 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
6926 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
6931 (define_insn "sse5_pmacssdd"
6932 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
6934 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
6935 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
6936 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
6937 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6939 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6940 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
6941 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6942 [(set_attr "type" "ssemuladd")
6943 (set_attr "mode" "TI")])
6945 (define_insn "sse5_pmacssdql"
6946 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6951 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6952 (parallel [(const_int 1)
6955 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6956 (parallel [(const_int 1)
6958 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6959 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6961 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6962 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
6963 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6964 [(set_attr "type" "ssemuladd")
6965 (set_attr "mode" "TI")])
6967 (define_insn "sse5_pmacssdqh"
6968 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6973 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6974 (parallel [(const_int 0)
6978 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
6979 (parallel [(const_int 0)
6981 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
6982 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
6984 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6985 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
6986 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
6987 [(set_attr "type" "ssemuladd")
6988 (set_attr "mode" "TI")])
6990 (define_insn "sse5_pmacsdql"
6991 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6996 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
6997 (parallel [(const_int 1)
7001 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7002 (parallel [(const_int 1)
7004 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7005 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7007 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7008 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
7009 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7010 [(set_attr "type" "ssemuladd")
7011 (set_attr "mode" "TI")])
7013 (define_insn_and_split "*sse5_pmacsdql_mem"
7014 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7019 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7020 (parallel [(const_int 1)
7024 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7025 (parallel [(const_int 1)
7027 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7028 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
7030 "&& (reload_completed
7031 || (!reg_mentioned_p (operands[0], operands[1])
7032 && !reg_mentioned_p (operands[0], operands[2])))"
7041 (parallel [(const_int 1)
7046 (parallel [(const_int 1)
7050 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7051 ;; fake it with a multiply/add. In general, we expect the define_split to
7052 ;; occur before register allocation, so we have to handle the corner case where
7053 ;; the target is the same as operands 1/2
7054 (define_insn_and_split "sse5_mulv2div2di3_low"
7055 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7059 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7060 (parallel [(const_int 1)
7064 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7065 (parallel [(const_int 1)
7066 (const_int 3)])))))]
7069 "&& (reload_completed
7070 || (!reg_mentioned_p (operands[0], operands[1])
7071 && !reg_mentioned_p (operands[0], operands[2])))"
7080 (parallel [(const_int 1)
7085 (parallel [(const_int 1)
7089 operands[3] = CONST0_RTX (V2DImode);
7091 [(set_attr "type" "ssemuladd")
7092 (set_attr "mode" "TI")])
7094 (define_insn "sse5_pmacsdqh"
7095 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7100 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7101 (parallel [(const_int 0)
7105 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7106 (parallel [(const_int 0)
7108 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
7109 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7111 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7112 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
7113 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7114 [(set_attr "type" "ssemuladd")
7115 (set_attr "mode" "TI")])
7117 (define_insn_and_split "*sse5_pmacsdqh_mem"
7118 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
7123 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
7124 (parallel [(const_int 0)
7128 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
7129 (parallel [(const_int 0)
7131 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
7132 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
7134 "&& (reload_completed
7135 || (!reg_mentioned_p (operands[0], operands[1])
7136 && !reg_mentioned_p (operands[0], operands[2])))"
7145 (parallel [(const_int 0)
7150 (parallel [(const_int 0)
7154 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
7155 ;; fake it with a multiply/add. In general, we expect the define_split to
7156 ;; occur before register allocation, so we have to handle the corner case where
7157 ;; the target is the same as either operands[1] or operands[2]
7158 (define_insn_and_split "sse5_mulv2div2di3_high"
7159 [(set (match_operand:V2DI 0 "register_operand" "=&x")
7163 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
7164 (parallel [(const_int 0)
7168 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
7169 (parallel [(const_int 0)
7170 (const_int 2)])))))]
7173 "&& (reload_completed
7174 || (!reg_mentioned_p (operands[0], operands[1])
7175 && !reg_mentioned_p (operands[0], operands[2])))"
7184 (parallel [(const_int 0)
7189 (parallel [(const_int 0)
7193 operands[3] = CONST0_RTX (V2DImode);
7195 [(set_attr "type" "ssemuladd")
7196 (set_attr "mode" "TI")])
7198 ;; SSE5 parallel integer multiply/add instructions for the intrinisics
7199 (define_insn "sse5_pmacsswd"
7200 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7205 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7206 (parallel [(const_int 1)
7212 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7213 (parallel [(const_int 1)
7217 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7218 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7220 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7221 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7222 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7223 [(set_attr "type" "ssemuladd")
7224 (set_attr "mode" "TI")])
7226 (define_insn "sse5_pmacswd"
7227 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7232 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7233 (parallel [(const_int 1)
7239 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7240 (parallel [(const_int 1)
7244 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7245 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7247 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7248 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7249 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7250 [(set_attr "type" "ssemuladd")
7251 (set_attr "mode" "TI")])
7253 (define_insn "sse5_pmadcsswd"
7254 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7260 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7261 (parallel [(const_int 0)
7267 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7268 (parallel [(const_int 0)
7276 (parallel [(const_int 1)
7283 (parallel [(const_int 1)
7287 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7288 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7290 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7291 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7292 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7293 [(set_attr "type" "ssemuladd")
7294 (set_attr "mode" "TI")])
7296 (define_insn "sse5_pmadcswd"
7297 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7303 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
7304 (parallel [(const_int 0)
7310 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
7311 (parallel [(const_int 0)
7319 (parallel [(const_int 1)
7326 (parallel [(const_int 1)
7330 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
7331 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
7333 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7334 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
7335 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
7336 [(set_attr "type" "ssemuladd")
7337 (set_attr "mode" "TI")])
7339 ;; SSE5 parallel XMM conditional moves
7340 (define_insn "sse5_pcmov_<mode>"
7341 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
7342 (if_then_else:SSEMODE
7343 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
7344 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
7345 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
7346 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7348 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7349 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7350 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
7351 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7352 [(set_attr "type" "sse4arg")])
7354 ;; SSE5 horizontal add/subtract instructions
7355 (define_insn "sse5_phaddbw"
7356 [(set (match_operand:V8HI 0 "register_operand" "=x")
7360 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7361 (parallel [(const_int 0)
7372 (parallel [(const_int 1)
7379 (const_int 15)])))))]
7381 "phaddbw\t{%1, %0|%0, %1}"
7382 [(set_attr "type" "sseiadd1")])
7384 (define_insn "sse5_phaddbd"
7385 [(set (match_operand:V4SI 0 "register_operand" "=x")
7390 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7391 (parallel [(const_int 0)
7398 (parallel [(const_int 1)
7406 (parallel [(const_int 2)
7413 (parallel [(const_int 3)
7416 (const_int 15)]))))))]
7418 "phaddbd\t{%1, %0|%0, %1}"
7419 [(set_attr "type" "sseiadd1")])
7421 (define_insn "sse5_phaddbq"
7422 [(set (match_operand:V2DI 0 "register_operand" "=x")
7428 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7429 (parallel [(const_int 0)
7434 (parallel [(const_int 1)
7440 (parallel [(const_int 2)
7445 (parallel [(const_int 3)
7452 (parallel [(const_int 8)
7457 (parallel [(const_int 9)
7463 (parallel [(const_int 10)
7468 (parallel [(const_int 11)
7469 (const_int 15)])))))))]
7471 "phaddbq\t{%1, %0|%0, %1}"
7472 [(set_attr "type" "sseiadd1")])
7474 (define_insn "sse5_phaddwd"
7475 [(set (match_operand:V4SI 0 "register_operand" "=x")
7479 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7480 (parallel [(const_int 0)
7487 (parallel [(const_int 1)
7490 (const_int 7)])))))]
7492 "phaddwd\t{%1, %0|%0, %1}"
7493 [(set_attr "type" "sseiadd1")])
7495 (define_insn "sse5_phaddwq"
7496 [(set (match_operand:V2DI 0 "register_operand" "=x")
7501 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7502 (parallel [(const_int 0)
7507 (parallel [(const_int 1)
7513 (parallel [(const_int 2)
7518 (parallel [(const_int 3)
7519 (const_int 7)]))))))]
7521 "phaddwq\t{%1, %0|%0, %1}"
7522 [(set_attr "type" "sseiadd1")])
7524 (define_insn "sse5_phadddq"
7525 [(set (match_operand:V2DI 0 "register_operand" "=x")
7529 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7530 (parallel [(const_int 0)
7535 (parallel [(const_int 1)
7536 (const_int 3)])))))]
7538 "phadddq\t{%1, %0|%0, %1}"
7539 [(set_attr "type" "sseiadd1")])
7541 (define_insn "sse5_phaddubw"
7542 [(set (match_operand:V8HI 0 "register_operand" "=x")
7546 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7547 (parallel [(const_int 0)
7558 (parallel [(const_int 1)
7565 (const_int 15)])))))]
7567 "phaddubw\t{%1, %0|%0, %1}"
7568 [(set_attr "type" "sseiadd1")])
7570 (define_insn "sse5_phaddubd"
7571 [(set (match_operand:V4SI 0 "register_operand" "=x")
7576 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7577 (parallel [(const_int 0)
7584 (parallel [(const_int 1)
7592 (parallel [(const_int 2)
7599 (parallel [(const_int 3)
7602 (const_int 15)]))))))]
7604 "phaddubd\t{%1, %0|%0, %1}"
7605 [(set_attr "type" "sseiadd1")])
7607 (define_insn "sse5_phaddubq"
7608 [(set (match_operand:V2DI 0 "register_operand" "=x")
7614 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7615 (parallel [(const_int 0)
7620 (parallel [(const_int 1)
7626 (parallel [(const_int 2)
7631 (parallel [(const_int 3)
7638 (parallel [(const_int 8)
7643 (parallel [(const_int 9)
7649 (parallel [(const_int 10)
7654 (parallel [(const_int 11)
7655 (const_int 15)])))))))]
7657 "phaddubq\t{%1, %0|%0, %1}"
7658 [(set_attr "type" "sseiadd1")])
7660 (define_insn "sse5_phadduwd"
7661 [(set (match_operand:V4SI 0 "register_operand" "=x")
7665 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7666 (parallel [(const_int 0)
7673 (parallel [(const_int 1)
7676 (const_int 7)])))))]
7678 "phadduwd\t{%1, %0|%0, %1}"
7679 [(set_attr "type" "sseiadd1")])
7681 (define_insn "sse5_phadduwq"
7682 [(set (match_operand:V2DI 0 "register_operand" "=x")
7687 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7688 (parallel [(const_int 0)
7693 (parallel [(const_int 1)
7699 (parallel [(const_int 2)
7704 (parallel [(const_int 3)
7705 (const_int 7)]))))))]
7707 "phadduwq\t{%1, %0|%0, %1}"
7708 [(set_attr "type" "sseiadd1")])
7710 (define_insn "sse5_phaddudq"
7711 [(set (match_operand:V2DI 0 "register_operand" "=x")
7715 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7716 (parallel [(const_int 0)
7721 (parallel [(const_int 1)
7722 (const_int 3)])))))]
7724 "phaddudq\t{%1, %0|%0, %1}"
7725 [(set_attr "type" "sseiadd1")])
7727 (define_insn "sse5_phsubbw"
7728 [(set (match_operand:V8HI 0 "register_operand" "=x")
7732 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7733 (parallel [(const_int 0)
7744 (parallel [(const_int 1)
7751 (const_int 15)])))))]
7753 "phsubbw\t{%1, %0|%0, %1}"
7754 [(set_attr "type" "sseiadd1")])
7756 (define_insn "sse5_phsubwd"
7757 [(set (match_operand:V4SI 0 "register_operand" "=x")
7761 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7762 (parallel [(const_int 0)
7769 (parallel [(const_int 1)
7772 (const_int 7)])))))]
7774 "phsubwd\t{%1, %0|%0, %1}"
7775 [(set_attr "type" "sseiadd1")])
7777 (define_insn "sse5_phsubdq"
7778 [(set (match_operand:V2DI 0 "register_operand" "=x")
7782 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7783 (parallel [(const_int 0)
7788 (parallel [(const_int 1)
7789 (const_int 3)])))))]
7791 "phsubdq\t{%1, %0|%0, %1}"
7792 [(set_attr "type" "sseiadd1")])
7794 ;; SSE5 permute instructions
7795 (define_insn "sse5_pperm"
7796 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7798 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
7799 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
7800 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7801 UNSPEC_SSE5_PERMUTE))]
7802 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7803 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7804 [(set_attr "type" "sse4arg")
7805 (set_attr "mode" "TI")])
7807 ;; The following are for the various unpack insns which doesn't need the first
7808 ;; source operand, so we can just use the output operand for the first operand.
7809 ;; This allows either of the other two operands to be a memory operand. We
7810 ;; can't just use the first operand as an argument to the normal pperm because
7811 ;; then an output only argument, suddenly becomes an input operand.
7812 (define_insn "sse5_pperm_zero_v16qi_v8hi"
7813 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7816 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7817 (match_operand 2 "" "")))) ;; parallel with const_int's
7818 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7820 && (register_operand (operands[1], V16QImode)
7821 || register_operand (operands[2], V16QImode))"
7822 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7823 [(set_attr "type" "sseadd")
7824 (set_attr "mode" "TI")])
7826 (define_insn "sse5_pperm_sign_v16qi_v8hi"
7827 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7830 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
7831 (match_operand 2 "" "")))) ;; parallel with const_int's
7832 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7834 && (register_operand (operands[1], V16QImode)
7835 || register_operand (operands[2], V16QImode))"
7836 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7837 [(set_attr "type" "sseadd")
7838 (set_attr "mode" "TI")])
7840 (define_insn "sse5_pperm_zero_v8hi_v4si"
7841 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7844 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7845 (match_operand 2 "" "")))) ;; parallel with const_int's
7846 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7848 && (register_operand (operands[1], V8HImode)
7849 || register_operand (operands[2], V16QImode))"
7850 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7851 [(set_attr "type" "sseadd")
7852 (set_attr "mode" "TI")])
7854 (define_insn "sse5_pperm_sign_v8hi_v4si"
7855 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7858 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
7859 (match_operand 2 "" "")))) ;; parallel with const_int's
7860 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7862 && (register_operand (operands[1], V8HImode)
7863 || register_operand (operands[2], V16QImode))"
7864 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7865 [(set_attr "type" "sseadd")
7866 (set_attr "mode" "TI")])
7868 (define_insn "sse5_pperm_zero_v4si_v2di"
7869 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7872 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7873 (match_operand 2 "" "")))) ;; parallel with const_int's
7874 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7876 && (register_operand (operands[1], V4SImode)
7877 || register_operand (operands[2], V16QImode))"
7878 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7879 [(set_attr "type" "sseadd")
7880 (set_attr "mode" "TI")])
7882 (define_insn "sse5_pperm_sign_v4si_v2di"
7883 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7886 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
7887 (match_operand 2 "" "")))) ;; parallel with const_int's
7888 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
7890 && (register_operand (operands[1], V4SImode)
7891 || register_operand (operands[2], V16QImode))"
7892 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
7893 [(set_attr "type" "sseadd")
7894 (set_attr "mode" "TI")])
7896 ;; SSE5 pack instructions that combine two vectors into a smaller vector
7897 (define_insn "sse5_pperm_pack_v2di_v4si"
7898 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
7901 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
7903 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7904 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7905 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7906 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7907 [(set_attr "type" "sse4arg")
7908 (set_attr "mode" "TI")])
7910 (define_insn "sse5_pperm_pack_v4si_v8hi"
7911 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
7914 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
7916 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7917 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7918 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7919 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7920 [(set_attr "type" "sse4arg")
7921 (set_attr "mode" "TI")])
7923 (define_insn "sse5_pperm_pack_v8hi_v16qi"
7924 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
7927 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
7929 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
7930 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
7931 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7932 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7933 [(set_attr "type" "sse4arg")
7934 (set_attr "mode" "TI")])
7936 ;; Floating point permutation (permps, permpd)
7937 (define_insn "sse5_perm<mode>"
7938 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
7940 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
7941 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
7942 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
7943 UNSPEC_SSE5_PERMUTE))]
7944 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
7945 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7946 [(set_attr "type" "sse4arg")
7947 (set_attr "mode" "<MODE>")])
7949 ;; SSE5 packed rotate instructions
7950 (define_expand "rotl<mode>3"
7951 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
7953 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
7954 (match_operand:SI 2 "general_operand")))]
7957 /* If we were given a scalar, convert it to parallel */
7958 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
7960 rtvec vs = rtvec_alloc (<ssescalarnum>);
7961 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
7962 rtx reg = gen_reg_rtx (<MODE>mode);
7963 rtx op2 = operands[2];
7966 if (GET_MODE (op2) != <ssescalarmode>mode)
7968 op2 = gen_reg_rtx (<ssescalarmode>mode);
7969 convert_move (op2, operands[2], false);
7972 for (i = 0; i < <ssescalarnum>; i++)
7973 RTVEC_ELT (vs, i) = op2;
7975 emit_insn (gen_vec_init<mode> (reg, par));
7976 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
7981 (define_expand "rotr<mode>3"
7982 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
7983 (rotatert:SSEMODE1248
7984 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
7985 (match_operand:SI 2 "general_operand")))]
7988 /* If we were given a scalar, convert it to parallel */
7989 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
7991 rtvec vs = rtvec_alloc (<ssescalarnum>);
7992 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
7993 rtx neg = gen_reg_rtx (<MODE>mode);
7994 rtx reg = gen_reg_rtx (<MODE>mode);
7995 rtx op2 = operands[2];
7998 if (GET_MODE (op2) != <ssescalarmode>mode)
8000 op2 = gen_reg_rtx (<ssescalarmode>mode);
8001 convert_move (op2, operands[2], false);
8004 for (i = 0; i < <ssescalarnum>; i++)
8005 RTVEC_ELT (vs, i) = op2;
8007 emit_insn (gen_vec_init<mode> (reg, par));
8008 emit_insn (gen_neg<mode>2 (neg, reg));
8009 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
8014 (define_insn "sse5_rotl<mode>3"
8015 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8017 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8018 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8020 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8021 [(set_attr "type" "sseishft")
8022 (set_attr "mode" "TI")])
8024 (define_insn "sse5_rotr<mode>3"
8025 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8026 (rotatert:SSEMODE1248
8027 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
8028 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
8031 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
8032 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
8034 [(set_attr "type" "sseishft")
8035 (set_attr "mode" "TI")])
8037 (define_expand "vrotr<mode>3"
8038 [(match_operand:SSEMODE1248 0 "register_operand" "")
8039 (match_operand:SSEMODE1248 1 "register_operand" "")
8040 (match_operand:SSEMODE1248 2 "register_operand" "")]
8043 rtx reg = gen_reg_rtx (<MODE>mode);
8044 emit_insn (gen_neg<mode>2 (reg, operands[2]));
8045 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
8049 (define_expand "vrotl<mode>3"
8050 [(match_operand:SSEMODE1248 0 "register_operand" "")
8051 (match_operand:SSEMODE1248 1 "register_operand" "")
8052 (match_operand:SSEMODE1248 2 "register_operand" "")]
8055 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
8059 (define_insn "sse5_vrotl<mode>3"
8060 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8061 (if_then_else:SSEMODE1248
8063 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8066 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8068 (rotatert:SSEMODE1248
8070 (neg:SSEMODE1248 (match_dup 2)))))]
8071 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8072 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8073 [(set_attr "type" "sseishft")
8074 (set_attr "mode" "TI")])
8076 ;; SSE5 packed shift instructions.
8077 ;; FIXME: add V2DI back in
8078 (define_expand "vlshr<mode>3"
8079 [(match_operand:SSEMODE124 0 "register_operand" "")
8080 (match_operand:SSEMODE124 1 "register_operand" "")
8081 (match_operand:SSEMODE124 2 "register_operand" "")]
8084 rtx neg = gen_reg_rtx (<MODE>mode);
8085 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8086 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
8090 (define_expand "vashr<mode>3"
8091 [(match_operand:SSEMODE124 0 "register_operand" "")
8092 (match_operand:SSEMODE124 1 "register_operand" "")
8093 (match_operand:SSEMODE124 2 "register_operand" "")]
8096 rtx neg = gen_reg_rtx (<MODE>mode);
8097 emit_insn (gen_neg<mode>2 (neg, operands[2]));
8098 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
8102 (define_expand "vashl<mode>3"
8103 [(match_operand:SSEMODE124 0 "register_operand" "")
8104 (match_operand:SSEMODE124 1 "register_operand" "")
8105 (match_operand:SSEMODE124 2 "register_operand" "")]
8108 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
8112 (define_insn "sse5_ashl<mode>3"
8113 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8114 (if_then_else:SSEMODE1248
8116 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8119 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8121 (ashiftrt:SSEMODE1248
8123 (neg:SSEMODE1248 (match_dup 2)))))]
8124 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8125 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8126 [(set_attr "type" "sseishft")
8127 (set_attr "mode" "TI")])
8129 (define_insn "sse5_lshl<mode>3"
8130 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
8131 (if_then_else:SSEMODE1248
8133 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
8136 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
8138 (lshiftrt:SSEMODE1248
8140 (neg:SSEMODE1248 (match_dup 2)))))]
8141 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
8142 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8143 [(set_attr "type" "sseishft")
8144 (set_attr "mode" "TI")])
8146 ;; SSE2 doesn't have some shift varients, so define versions for SSE5
8147 (define_expand "ashlv16qi3"
8148 [(match_operand:V16QI 0 "register_operand" "")
8149 (match_operand:V16QI 1 "register_operand" "")
8150 (match_operand:SI 2 "nonmemory_operand" "")]
8153 rtvec vs = rtvec_alloc (16);
8154 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8155 rtx reg = gen_reg_rtx (V16QImode);
8157 for (i = 0; i < 16; i++)
8158 RTVEC_ELT (vs, i) = operands[2];
8160 emit_insn (gen_vec_initv16qi (reg, par));
8161 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8165 (define_expand "lshlv16qi3"
8166 [(match_operand:V16QI 0 "register_operand" "")
8167 (match_operand:V16QI 1 "register_operand" "")
8168 (match_operand:SI 2 "nonmemory_operand" "")]
8171 rtvec vs = rtvec_alloc (16);
8172 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8173 rtx reg = gen_reg_rtx (V16QImode);
8175 for (i = 0; i < 16; i++)
8176 RTVEC_ELT (vs, i) = operands[2];
8178 emit_insn (gen_vec_initv16qi (reg, par));
8179 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
8183 (define_expand "ashrv16qi3"
8184 [(match_operand:V16QI 0 "register_operand" "")
8185 (match_operand:V16QI 1 "register_operand" "")
8186 (match_operand:SI 2 "nonmemory_operand" "")]
8189 rtvec vs = rtvec_alloc (16);
8190 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
8191 rtx reg = gen_reg_rtx (V16QImode);
8193 rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
8194 ? GEN_INT (- INTVAL (operands[2]))
8197 for (i = 0; i < 16; i++)
8198 RTVEC_ELT (vs, i) = ele;
8200 emit_insn (gen_vec_initv16qi (reg, par));
8202 if (GET_CODE (operands[2]) != CONST_INT)
8204 rtx neg = gen_reg_rtx (V16QImode);
8205 emit_insn (gen_negv16qi2 (neg, reg));
8206 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
8209 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
8214 (define_expand "ashrv2di3"
8215 [(match_operand:V2DI 0 "register_operand" "")
8216 (match_operand:V2DI 1 "register_operand" "")
8217 (match_operand:DI 2 "nonmemory_operand" "")]
8220 rtvec vs = rtvec_alloc (2);
8221 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
8222 rtx reg = gen_reg_rtx (V2DImode);
8225 if (GET_CODE (operands[2]) == CONST_INT)
8226 ele = GEN_INT (- INTVAL (operands[2]));
8227 else if (GET_MODE (operands[2]) != DImode)
8229 rtx move = gen_reg_rtx (DImode);
8230 ele = gen_reg_rtx (DImode);
8231 convert_move (move, operands[2], false);
8232 emit_insn (gen_negdi2 (ele, move));
8236 ele = gen_reg_rtx (DImode);
8237 emit_insn (gen_negdi2 (ele, operands[2]));
8240 RTVEC_ELT (vs, 0) = ele;
8241 RTVEC_ELT (vs, 1) = ele;
8242 emit_insn (gen_vec_initv2di (reg, par));
8243 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
8247 ;; SSE5 FRCZ support
8249 (define_insn "sse5_frcz<mode>2"
8250 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8252 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
8255 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
8256 [(set_attr "type" "ssecvt1")
8257 (set_attr "prefix_extra" "1")
8258 (set_attr "mode" "<MODE>")])
8261 (define_insn "sse5_vmfrcz<mode>2"
8262 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8263 (vec_merge:SSEMODEF2P
8265 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
8267 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8270 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
8271 [(set_attr "type" "ssecvt1")
8272 (set_attr "prefix_extra" "1")
8273 (set_attr "mode" "<MODE>")])
8275 (define_insn "sse5_cvtph2ps"
8276 [(set (match_operand:V4SF 0 "register_operand" "=x")
8277 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
8280 "cvtph2ps\t{%1, %0|%0, %1}"
8281 [(set_attr "type" "ssecvt")
8282 (set_attr "mode" "V4SF")])
8284 (define_insn "sse5_cvtps2ph"
8285 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
8286 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
8289 "cvtps2ph\t{%1, %0|%0, %1}"
8290 [(set_attr "type" "ssecvt")
8291 (set_attr "mode" "V4SF")])
8293 ;; Scalar versions of the com instructions that use vector types that are
8294 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
8295 ;; com instructions fill in 0's in the upper bits instead of leaving them
8296 ;; unmodified, so we use const_vector of 0 instead of match_dup.
8297 (define_expand "sse5_vmmaskcmp<mode>3"
8298 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
8299 (vec_merge:SSEMODEF2P
8300 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8301 [(match_operand:SSEMODEF2P 2 "register_operand" "")
8302 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
8307 operands[4] = CONST0_RTX (<MODE>mode);
8310 (define_insn "*sse5_vmmaskcmp<mode>3"
8311 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8312 (vec_merge:SSEMODEF2P
8313 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8314 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8315 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
8316 (match_operand:SSEMODEF2P 4 "")
8319 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
8320 [(set_attr "type" "sse4arg")
8321 (set_attr "mode" "<ssescalarmode>")])
8323 ;; We don't have a comparison operator that always returns true/false, so
8324 ;; handle comfalse and comtrue specially.
8325 (define_insn "sse5_com_tf<mode>3"
8326 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8328 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
8329 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
8330 (match_operand:SI 3 "const_int_operand" "n")]
8331 UNSPEC_SSE5_TRUEFALSE))]
8334 const char *ret = NULL;
8336 switch (INTVAL (operands[3]))
8339 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8343 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8347 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8351 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
8360 [(set_attr "type" "ssecmp")
8361 (set_attr "mode" "<MODE>")])
8363 (define_insn "sse5_maskcmp<mode>3"
8364 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8365 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
8366 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
8367 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
8369 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
8370 [(set_attr "type" "ssecmp")
8371 (set_attr "mode" "<MODE>")])
8373 (define_insn "sse5_maskcmp<mode>3"
8374 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8375 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
8376 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8377 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8379 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8380 [(set_attr "type" "sse4arg")
8381 (set_attr "mode" "TI")])
8383 (define_insn "sse5_maskcmp_uns<mode>3"
8384 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8385 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8386 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8387 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
8389 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8390 [(set_attr "type" "ssecmp")
8391 (set_attr "mode" "TI")])
8393 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
8394 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
8395 ;; the exact instruction generated for the intrinsic.
8396 (define_insn "sse5_maskcmp_uns2<mode>3"
8397 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8399 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
8400 [(match_operand:SSEMODE1248 2 "register_operand" "x")
8401 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
8402 UNSPEC_SSE5_UNSIGNED_CMP))]
8404 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
8405 [(set_attr "type" "ssecmp")
8406 (set_attr "mode" "TI")])
8408 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
8409 ;; being added here to be complete.
8410 (define_insn "sse5_pcom_tf<mode>3"
8411 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
8413 [(match_operand:SSEMODE1248 1 "register_operand" "x")
8414 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
8415 (match_operand:SI 3 "const_int_operand" "n")]
8416 UNSPEC_SSE5_TRUEFALSE))]
8419 return ((INTVAL (operands[3]) != 0)
8420 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
8421 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
8423 [(set_attr "type" "ssecmp")
8424 (set_attr "mode" "TI")])
8426 (define_insn "aesenc"
8427 [(set (match_operand:V2DI 0 "register_operand" "=x")
8428 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8429 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8432 "aesenc\t{%2, %0|%0, %2}"
8433 [(set_attr "type" "sselog1")
8434 (set_attr "prefix_extra" "1")
8435 (set_attr "mode" "TI")])
8437 (define_insn "aesenclast"
8438 [(set (match_operand:V2DI 0 "register_operand" "=x")
8439 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8440 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8441 UNSPEC_AESENCLAST))]
8443 "aesenclast\t{%2, %0|%0, %2}"
8444 [(set_attr "type" "sselog1")
8445 (set_attr "prefix_extra" "1")
8446 (set_attr "mode" "TI")])
8448 (define_insn "aesdec"
8449 [(set (match_operand:V2DI 0 "register_operand" "=x")
8450 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8451 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8454 "aesdec\t{%2, %0|%0, %2}"
8455 [(set_attr "type" "sselog1")
8456 (set_attr "prefix_extra" "1")
8457 (set_attr "mode" "TI")])
8459 (define_insn "aesdeclast"
8460 [(set (match_operand:V2DI 0 "register_operand" "=x")
8461 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8462 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
8463 UNSPEC_AESDECLAST))]
8465 "aesdeclast\t{%2, %0|%0, %2}"
8466 [(set_attr "type" "sselog1")
8467 (set_attr "prefix_extra" "1")
8468 (set_attr "mode" "TI")])
8470 (define_insn "aesimc"
8471 [(set (match_operand:V2DI 0 "register_operand" "=x")
8472 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8475 "aesimc\t{%1, %0|%0, %1}"
8476 [(set_attr "type" "sselog1")
8477 (set_attr "prefix_extra" "1")
8478 (set_attr "mode" "TI")])
8480 (define_insn "aeskeygenassist"
8481 [(set (match_operand:V2DI 0 "register_operand" "=x")
8482 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
8483 (match_operand:SI 2 "const_0_to_255_operand" "n")]
8484 UNSPEC_AESKEYGENASSIST))]
8486 "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8487 [(set_attr "type" "sselog1")
8488 (set_attr "prefix_extra" "1")
8489 (set_attr "mode" "TI")])
8491 (define_insn "pclmulqdq"
8492 [(set (match_operand:V2DI 0 "register_operand" "=x")
8493 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8494 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
8495 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8498 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
8499 [(set_attr "type" "sselog1")
8500 (set_attr "prefix_extra" "1")
8501 (set_attr "mode" "TI")])