1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [V8SI V4SI V4DI V2DI])
131 (define_mode_iterator VI4SD_AVX2
134 (define_mode_iterator V48_AVX2
137 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
138 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
140 (define_mode_attr sse2_avx2
141 [(V16QI "sse2") (V32QI "avx2")
142 (V8HI "sse2") (V16HI "avx2")
143 (V4SI "sse2") (V8SI "avx2")
144 (V2DI "sse2") (V4DI "avx2")
145 (V1TI "sse2") (V2TI "avx2")])
147 (define_mode_attr ssse3_avx2
148 [(V16QI "ssse3") (V32QI "avx2")
149 (V8HI "ssse3") (V16HI "avx2")
150 (V4SI "ssse3") (V8SI "avx2")
151 (V2DI "ssse3") (V4DI "avx2")
152 (TI "ssse3") (V2TI "avx2")])
154 (define_mode_attr sse4_1_avx2
155 [(V16QI "sse4_1") (V32QI "avx2")
156 (V8HI "sse4_1") (V16HI "avx2")
157 (V4SI "sse4_1") (V8SI "avx2")
158 (V2DI "sse4_1") (V4DI "avx2")])
160 (define_mode_attr avx_avx2
161 [(V4SF "avx") (V2DF "avx")
162 (V8SF "avx") (V4DF "avx")
163 (V4SI "avx2") (V2DI "avx2")
164 (V8SI "avx2") (V4DI "avx2")])
166 (define_mode_attr vec_avx2
167 [(V16QI "vec") (V32QI "avx2")
168 (V8HI "vec") (V16HI "avx2")
169 (V4SI "vec") (V8SI "avx2")
170 (V2DI "vec") (V4DI "avx2")])
172 ;; Mapping of logic-shift operators
173 (define_code_iterator lshift [lshiftrt ashift])
175 ;; Base name for define_insn
176 (define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
178 ;; Base name for insn mnemonic
179 (define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
181 (define_mode_attr ssedoublemode
182 [(V16HI "V16SI") (V8HI "V8SI")])
184 (define_mode_attr ssebytemode
185 [(V4DI "V32QI") (V2DI "V16QI")])
187 ;; All 128bit vector integer modes
188 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
190 ;; All 256bit vector integer modes
191 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
193 ;; Random 128bit vector integer mode combinations
194 (define_mode_iterator VI12_128 [V16QI V8HI])
195 (define_mode_iterator VI14_128 [V16QI V4SI])
196 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
197 (define_mode_iterator VI24_128 [V8HI V4SI])
198 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
200 ;; Random 256bit vector integer mode combinations
201 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
202 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
204 ;; Int-float size matches
205 (define_mode_iterator VI4F_128 [V4SI V4SF])
206 (define_mode_iterator VI8F_128 [V2DI V2DF])
207 (define_mode_iterator VI4F_256 [V8SI V8SF])
208 (define_mode_iterator VI8F_256 [V4DI V4DF])
210 ;; Mapping from float mode to required SSE level
211 (define_mode_attr sse
212 [(SF "sse") (DF "sse2")
213 (V4SF "sse") (V2DF "sse2")
214 (V8SF "avx") (V4DF "avx")])
216 (define_mode_attr sse2
217 [(V16QI "sse2") (V32QI "avx")
218 (V2DI "sse2") (V4DI "avx")])
220 (define_mode_attr sse3
221 [(V16QI "sse3") (V32QI "avx")])
223 (define_mode_attr sse4_1
224 [(V4SF "sse4_1") (V2DF "sse4_1")
225 (V8SF "avx") (V4DF "avx")])
227 (define_mode_attr avxsizesuffix
228 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
229 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
230 (V8SF "256") (V4DF "256")
231 (V4SF "") (V2DF "")])
233 ;; SSE instruction mode
234 (define_mode_attr sseinsnmode
235 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
236 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
237 (V8SF "V8SF") (V4DF "V4DF")
238 (V4SF "V4SF") (V2DF "V2DF")
241 ;; Mapping of vector float modes to an integer mode of the same size
242 (define_mode_attr sseintvecmode
243 [(V8SF "V8SI") (V4DF "V4DI")
244 (V4SF "V4SI") (V2DF "V2DI")
245 (V4DF "V4DI") (V8SF "V8SI")
246 (V8SI "V8SI") (V4DI "V4DI")
247 (V4SI "V4SI") (V2DI "V2DI")
248 (V16HI "V16HI") (V8HI "V8HI")
249 (V32QI "V32QI") (V16QI "V16QI")
252 ;; Mapping of vector modes to a vector mode of double size
253 (define_mode_attr ssedoublevecmode
254 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
255 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
256 (V8SF "V16SF") (V4DF "V8DF")
257 (V4SF "V8SF") (V2DF "V4DF")])
259 ;; Mapping of vector modes to a vector mode of half size
260 (define_mode_attr ssehalfvecmode
261 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
262 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
263 (V8SF "V4SF") (V4DF "V2DF")
266 ;; Mapping of vector modes back to the scalar modes
267 (define_mode_attr ssescalarmode
268 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
269 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
270 (V8SF "SF") (V4DF "DF")
271 (V4SF "SF") (V2DF "DF")])
273 ;; Number of scalar elements in each vector type
274 (define_mode_attr ssescalarnum
275 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
276 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
277 (V8SF "8") (V4DF "4")
278 (V4SF "4") (V2DF "2")])
280 ;; SSE scalar suffix for vector modes
281 (define_mode_attr ssescalarmodesuffix
283 (V8SF "ss") (V4DF "sd")
284 (V4SF "ss") (V2DF "sd")
285 (V8SI "ss") (V4DI "sd")
288 ;; Pack/unpack vector modes
289 (define_mode_attr sseunpackmode
290 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
291 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
293 (define_mode_attr ssepackmode
294 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
295 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
297 ;; Mapping of the max integer size for xop rotate immediate constraint
298 (define_mode_attr sserotatemax
299 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
301 ;; Mapping of mode to cast intrinsic name
302 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
304 ;; Instruction suffix for sign and zero extensions.
305 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
307 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
308 (define_mode_attr i128
309 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
310 (V8SI "%~128") (V4DI "%~128")])
313 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
315 (define_mode_iterator AVXMODE48P_DI
316 [V2DI V2DF V4DI V4DF V4SF V4SI])
317 (define_mode_attr AVXMODE48P_DI
318 [(V2DI "V2DI") (V2DF "V2DI")
319 (V4DI "V4DI") (V4DF "V4DI")
320 (V4SI "V2DI") (V4SF "V2DI")
321 (V8SI "V4DI") (V8SF "V4DI")])
322 (define_mode_attr gthrfirstp
323 [(V2DI "p") (V2DF "")
326 (V8SI "p") (V8SF "")])
327 (define_mode_attr gthrlastp
328 [(V2DI "q") (V2DF "pd")
329 (V4DI "q") (V4DF "pd")
330 (V4SI "d") (V4SF "ps")
331 (V8SI "d") (V8SF "ps")])
333 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
335 ;; Mapping of immediate bits for blend instructions
336 (define_mode_attr blendbits
337 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
339 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
347 ;; All of these patterns are enabled for SSE1 as well as SSE2.
348 ;; This is essential for maintaining stable calling conventions.
350 (define_expand "mov<mode>"
351 [(set (match_operand:V16 0 "nonimmediate_operand" "")
352 (match_operand:V16 1 "nonimmediate_operand" ""))]
355 ix86_expand_vector_move (<MODE>mode, operands);
359 (define_insn "*mov<mode>_internal"
360 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
361 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
363 && (register_operand (operands[0], <MODE>mode)
364 || register_operand (operands[1], <MODE>mode))"
366 switch (which_alternative)
369 return standard_sse_constant_opcode (insn, operands[1]);
372 switch (get_attr_mode (insn))
377 && (misaligned_operand (operands[0], <MODE>mode)
378 || misaligned_operand (operands[1], <MODE>mode)))
379 return "vmovups\t{%1, %0|%0, %1}";
381 return "%vmovaps\t{%1, %0|%0, %1}";
386 && (misaligned_operand (operands[0], <MODE>mode)
387 || misaligned_operand (operands[1], <MODE>mode)))
388 return "vmovupd\t{%1, %0|%0, %1}";
389 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
390 return "%vmovaps\t{%1, %0|%0, %1}";
392 return "%vmovapd\t{%1, %0|%0, %1}";
397 && (misaligned_operand (operands[0], <MODE>mode)
398 || misaligned_operand (operands[1], <MODE>mode)))
399 return "vmovdqu\t{%1, %0|%0, %1}";
400 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
401 return "%vmovaps\t{%1, %0|%0, %1}";
403 return "%vmovdqa\t{%1, %0|%0, %1}";
412 [(set_attr "type" "sselog1,ssemov,ssemov")
413 (set_attr "prefix" "maybe_vex")
415 (cond [(match_test "TARGET_AVX")
416 (const_string "<sseinsnmode>")
417 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
418 (not (match_test "TARGET_SSE2")))
419 (and (eq_attr "alternative" "2")
420 (match_test "TARGET_SSE_TYPELESS_STORES")))
421 (const_string "V4SF")
422 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
423 (const_string "V4SF")
424 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
425 (const_string "V2DF")
427 (const_string "TI")))])
429 (define_insn "sse2_movq128"
430 [(set (match_operand:V2DI 0 "register_operand" "=x")
433 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
434 (parallel [(const_int 0)]))
437 "%vmovq\t{%1, %0|%0, %1}"
438 [(set_attr "type" "ssemov")
439 (set_attr "prefix" "maybe_vex")
440 (set_attr "mode" "TI")])
442 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
443 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
444 ;; from memory, we'd prefer to load the memory directly into the %xmm
445 ;; register. To facilitate this happy circumstance, this pattern won't
446 ;; split until after register allocation. If the 64-bit value didn't
447 ;; come from memory, this is the best we can do. This is much better
448 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
451 (define_insn_and_split "movdi_to_sse"
453 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
454 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
455 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
456 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
458 "&& reload_completed"
461 if (register_operand (operands[1], DImode))
463 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
464 Assemble the 64-bit DImode value in an xmm register. */
465 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
466 gen_rtx_SUBREG (SImode, operands[1], 0)));
467 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
468 gen_rtx_SUBREG (SImode, operands[1], 4)));
469 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
472 else if (memory_operand (operands[1], DImode))
473 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
474 operands[1], const0_rtx));
480 [(set (match_operand:V4SF 0 "register_operand" "")
481 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
482 "TARGET_SSE && reload_completed"
485 (vec_duplicate:V4SF (match_dup 1))
489 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
490 operands[2] = CONST0_RTX (V4SFmode);
494 [(set (match_operand:V2DF 0 "register_operand" "")
495 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
496 "TARGET_SSE2 && reload_completed"
497 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
499 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
500 operands[2] = CONST0_RTX (DFmode);
503 (define_expand "push<mode>1"
504 [(match_operand:V16 0 "register_operand" "")]
507 ix86_expand_push (<MODE>mode, operands[0]);
511 (define_expand "movmisalign<mode>"
512 [(set (match_operand:V16 0 "nonimmediate_operand" "")
513 (match_operand:V16 1 "nonimmediate_operand" ""))]
516 ix86_expand_vector_move_misalign (<MODE>mode, operands);
520 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
521 [(set (match_operand:VF 0 "nonimmediate_operand" "")
523 [(match_operand:VF 1 "nonimmediate_operand" "")]
527 if (MEM_P (operands[0]) && MEM_P (operands[1]))
528 operands[1] = force_reg (<MODE>mode, operands[1]);
531 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
532 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
534 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
536 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
537 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
538 [(set_attr "type" "ssemov")
539 (set_attr "movu" "1")
540 (set_attr "prefix" "maybe_vex")
541 (set_attr "mode" "<MODE>")])
543 (define_expand "<sse2>_movdqu<avxsizesuffix>"
544 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
545 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
549 if (MEM_P (operands[0]) && MEM_P (operands[1]))
550 operands[1] = force_reg (<MODE>mode, operands[1]);
553 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
554 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
555 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
557 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
558 "%vmovdqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set (attr "prefix_data16")
563 (match_test "TARGET_AVX")
566 (set_attr "prefix" "maybe_vex")
567 (set_attr "mode" "<sseinsnmode>")])
569 (define_insn "<sse3>_lddqu<avxsizesuffix>"
570 [(set (match_operand:VI1 0 "register_operand" "=x")
571 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
574 "%vlddqu\t{%1, %0|%0, %1}"
575 [(set_attr "type" "ssemov")
576 (set_attr "movu" "1")
577 (set (attr "prefix_data16")
579 (match_test "TARGET_AVX")
582 (set (attr "prefix_rep")
584 (match_test "TARGET_AVX")
587 (set_attr "prefix" "maybe_vex")
588 (set_attr "mode" "<sseinsnmode>")])
590 (define_insn "sse2_movntsi"
591 [(set (match_operand:SI 0 "memory_operand" "=m")
592 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
595 "movnti\t{%1, %0|%0, %1}"
596 [(set_attr "type" "ssemov")
597 (set_attr "prefix_data16" "0")
598 (set_attr "mode" "V2DF")])
600 (define_insn "<sse>_movnt<mode>"
601 [(set (match_operand:VF 0 "memory_operand" "=m")
602 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
605 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
606 [(set_attr "type" "ssemov")
607 (set_attr "prefix" "maybe_vex")
608 (set_attr "mode" "<MODE>")])
610 (define_insn "<sse2>_movnt<mode>"
611 [(set (match_operand:VI8 0 "memory_operand" "=m")
612 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
615 "%vmovntdq\t{%1, %0|%0, %1}"
616 [(set_attr "type" "ssecvt")
617 (set (attr "prefix_data16")
619 (match_test "TARGET_AVX")
622 (set_attr "prefix" "maybe_vex")
623 (set_attr "mode" "<sseinsnmode>")])
625 ; Expand patterns for non-temporal stores. At the moment, only those
626 ; that directly map to insns are defined; it would be possible to
627 ; define patterns for other modes that would expand to several insns.
629 ;; Modes handled by storent patterns.
630 (define_mode_iterator STORENT_MODE
631 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
633 (V8SF "TARGET_AVX") V4SF
634 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
636 (define_expand "storent<mode>"
637 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
639 [(match_operand:STORENT_MODE 1 "register_operand" "")]
643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
645 ;; Parallel floating point arithmetic
647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
649 (define_expand "<code><mode>2"
650 [(set (match_operand:VF 0 "register_operand" "")
652 (match_operand:VF 1 "register_operand" "")))]
654 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
656 (define_insn_and_split "*absneg<mode>2"
657 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
658 (match_operator:VF 3 "absneg_operator"
659 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
660 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
663 "&& reload_completed"
666 enum rtx_code absneg_op;
672 if (MEM_P (operands[1]))
673 op1 = operands[2], op2 = operands[1];
675 op1 = operands[1], op2 = operands[2];
680 if (rtx_equal_p (operands[0], operands[1]))
686 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
687 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
688 t = gen_rtx_SET (VOIDmode, operands[0], t);
692 [(set_attr "isa" "noavx,noavx,avx,avx")])
694 (define_expand "<plusminus_insn><mode>3"
695 [(set (match_operand:VF 0 "register_operand" "")
697 (match_operand:VF 1 "nonimmediate_operand" "")
698 (match_operand:VF 2 "nonimmediate_operand" "")))]
700 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
702 (define_insn "*<plusminus_insn><mode>3"
703 [(set (match_operand:VF 0 "register_operand" "=x,x")
705 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
706 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
707 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
709 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
710 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
711 [(set_attr "isa" "noavx,avx")
712 (set_attr "type" "sseadd")
713 (set_attr "prefix" "orig,vex")
714 (set_attr "mode" "<MODE>")])
716 (define_insn "<sse>_vm<plusminus_insn><mode>3"
717 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
720 (match_operand:VF_128 1 "register_operand" "0,x")
721 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
726 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
727 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
728 [(set_attr "isa" "noavx,avx")
729 (set_attr "type" "sseadd")
730 (set_attr "prefix" "orig,vex")
731 (set_attr "mode" "<ssescalarmode>")])
733 (define_expand "mul<mode>3"
734 [(set (match_operand:VF 0 "register_operand" "")
736 (match_operand:VF 1 "nonimmediate_operand" "")
737 (match_operand:VF 2 "nonimmediate_operand" "")))]
739 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
741 (define_insn "*mul<mode>3"
742 [(set (match_operand:VF 0 "register_operand" "=x,x")
744 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
745 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
746 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
748 mul<ssemodesuffix>\t{%2, %0|%0, %2}
749 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
750 [(set_attr "isa" "noavx,avx")
751 (set_attr "type" "ssemul")
752 (set_attr "prefix" "orig,vex")
753 (set_attr "mode" "<MODE>")])
755 (define_insn "<sse>_vmmul<mode>3"
756 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
759 (match_operand:VF_128 1 "register_operand" "0,x")
760 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
765 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
766 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
767 [(set_attr "isa" "noavx,avx")
768 (set_attr "type" "ssemul")
769 (set_attr "prefix" "orig,vex")
770 (set_attr "mode" "<ssescalarmode>")])
772 (define_expand "div<mode>3"
773 [(set (match_operand:VF2 0 "register_operand" "")
774 (div:VF2 (match_operand:VF2 1 "register_operand" "")
775 (match_operand:VF2 2 "nonimmediate_operand" "")))]
777 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
779 (define_expand "div<mode>3"
780 [(set (match_operand:VF1 0 "register_operand" "")
781 (div:VF1 (match_operand:VF1 1 "register_operand" "")
782 (match_operand:VF1 2 "nonimmediate_operand" "")))]
785 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
788 && TARGET_RECIP_VEC_DIV
789 && !optimize_insn_for_size_p ()
790 && flag_finite_math_only && !flag_trapping_math
791 && flag_unsafe_math_optimizations)
793 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
798 (define_insn "<sse>_div<mode>3"
799 [(set (match_operand:VF 0 "register_operand" "=x,x")
801 (match_operand:VF 1 "register_operand" "0,x")
802 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
805 div<ssemodesuffix>\t{%2, %0|%0, %2}
806 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
807 [(set_attr "isa" "noavx,avx")
808 (set_attr "type" "ssediv")
809 (set_attr "prefix" "orig,vex")
810 (set_attr "mode" "<MODE>")])
812 (define_insn "<sse>_vmdiv<mode>3"
813 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
816 (match_operand:VF_128 1 "register_operand" "0,x")
817 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
822 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
823 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
824 [(set_attr "isa" "noavx,avx")
825 (set_attr "type" "ssediv")
826 (set_attr "prefix" "orig,vex")
827 (set_attr "mode" "<ssescalarmode>")])
829 (define_insn "<sse>_rcp<mode>2"
830 [(set (match_operand:VF1 0 "register_operand" "=x")
832 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
834 "%vrcpps\t{%1, %0|%0, %1}"
835 [(set_attr "type" "sse")
836 (set_attr "atom_sse_attr" "rcp")
837 (set_attr "prefix" "maybe_vex")
838 (set_attr "mode" "<MODE>")])
840 (define_insn "sse_vmrcpv4sf2"
841 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
843 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
845 (match_operand:V4SF 2 "register_operand" "0,x")
849 rcpss\t{%1, %0|%0, %1}
850 vrcpss\t{%1, %2, %0|%0, %2, %1}"
851 [(set_attr "isa" "noavx,avx")
852 (set_attr "type" "sse")
853 (set_attr "atom_sse_attr" "rcp")
854 (set_attr "prefix" "orig,vex")
855 (set_attr "mode" "SF")])
857 (define_expand "sqrt<mode>2"
858 [(set (match_operand:VF2 0 "register_operand" "")
859 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
862 (define_expand "sqrt<mode>2"
863 [(set (match_operand:VF1 0 "register_operand" "")
864 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
868 && TARGET_RECIP_VEC_SQRT
869 && !optimize_insn_for_size_p ()
870 && flag_finite_math_only && !flag_trapping_math
871 && flag_unsafe_math_optimizations)
873 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
878 (define_insn "<sse>_sqrt<mode>2"
879 [(set (match_operand:VF 0 "register_operand" "=x")
880 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
882 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
883 [(set_attr "type" "sse")
884 (set_attr "atom_sse_attr" "sqrt")
885 (set_attr "prefix" "maybe_vex")
886 (set_attr "mode" "<MODE>")])
888 (define_insn "<sse>_vmsqrt<mode>2"
889 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
892 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
893 (match_operand:VF_128 2 "register_operand" "0,x")
897 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
898 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
899 [(set_attr "isa" "noavx,avx")
900 (set_attr "type" "sse")
901 (set_attr "atom_sse_attr" "sqrt")
902 (set_attr "prefix" "orig,vex")
903 (set_attr "mode" "<ssescalarmode>")])
905 (define_expand "rsqrt<mode>2"
906 [(set (match_operand:VF1 0 "register_operand" "")
908 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
911 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
915 (define_insn "<sse>_rsqrt<mode>2"
916 [(set (match_operand:VF1 0 "register_operand" "=x")
918 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
920 "%vrsqrtps\t{%1, %0|%0, %1}"
921 [(set_attr "type" "sse")
922 (set_attr "prefix" "maybe_vex")
923 (set_attr "mode" "<MODE>")])
925 (define_insn "sse_vmrsqrtv4sf2"
926 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
928 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
930 (match_operand:V4SF 2 "register_operand" "0,x")
934 rsqrtss\t{%1, %0|%0, %1}
935 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
936 [(set_attr "isa" "noavx,avx")
937 (set_attr "type" "sse")
938 (set_attr "prefix" "orig,vex")
939 (set_attr "mode" "SF")])
941 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
942 ;; isn't really correct, as those rtl operators aren't defined when
943 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
945 (define_expand "<code><mode>3"
946 [(set (match_operand:VF 0 "register_operand" "")
948 (match_operand:VF 1 "nonimmediate_operand" "")
949 (match_operand:VF 2 "nonimmediate_operand" "")))]
952 if (!flag_finite_math_only)
953 operands[1] = force_reg (<MODE>mode, operands[1]);
954 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
957 (define_insn "*<code><mode>3_finite"
958 [(set (match_operand:VF 0 "register_operand" "=x,x")
960 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
961 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
962 "TARGET_SSE && flag_finite_math_only
963 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
965 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
966 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
967 [(set_attr "isa" "noavx,avx")
968 (set_attr "type" "sseadd")
969 (set_attr "prefix" "orig,vex")
970 (set_attr "mode" "<MODE>")])
972 (define_insn "*<code><mode>3"
973 [(set (match_operand:VF 0 "register_operand" "=x,x")
975 (match_operand:VF 1 "register_operand" "0,x")
976 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
977 "TARGET_SSE && !flag_finite_math_only"
979 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
980 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
981 [(set_attr "isa" "noavx,avx")
982 (set_attr "type" "sseadd")
983 (set_attr "prefix" "orig,vex")
984 (set_attr "mode" "<MODE>")])
986 (define_insn "<sse>_vm<code><mode>3"
987 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
990 (match_operand:VF_128 1 "register_operand" "0,x")
991 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
996 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
997 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
998 [(set_attr "isa" "noavx,avx")
999 (set_attr "type" "sse")
1000 (set_attr "prefix" "orig,vex")
1001 (set_attr "mode" "<ssescalarmode>")])
1003 ;; These versions of the min/max patterns implement exactly the operations
1004 ;; min = (op1 < op2 ? op1 : op2)
1005 ;; max = (!(op1 < op2) ? op1 : op2)
1006 ;; Their operands are not commutative, and thus they may be used in the
1007 ;; presence of -0.0 and NaN.
1009 (define_insn "*ieee_smin<mode>3"
1010 [(set (match_operand:VF 0 "register_operand" "=x,x")
1012 [(match_operand:VF 1 "register_operand" "0,x")
1013 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1017 min<ssemodesuffix>\t{%2, %0|%0, %2}
1018 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1019 [(set_attr "isa" "noavx,avx")
1020 (set_attr "type" "sseadd")
1021 (set_attr "prefix" "orig,vex")
1022 (set_attr "mode" "<MODE>")])
1024 (define_insn "*ieee_smax<mode>3"
1025 [(set (match_operand:VF 0 "register_operand" "=x,x")
1027 [(match_operand:VF 1 "register_operand" "0,x")
1028 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1032 max<ssemodesuffix>\t{%2, %0|%0, %2}
1033 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1034 [(set_attr "isa" "noavx,avx")
1035 (set_attr "type" "sseadd")
1036 (set_attr "prefix" "orig,vex")
1037 (set_attr "mode" "<MODE>")])
1039 (define_insn "avx_addsubv4df3"
1040 [(set (match_operand:V4DF 0 "register_operand" "=x")
1043 (match_operand:V4DF 1 "register_operand" "x")
1044 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1045 (minus:V4DF (match_dup 1) (match_dup 2))
1048 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1049 [(set_attr "type" "sseadd")
1050 (set_attr "prefix" "vex")
1051 (set_attr "mode" "V4DF")])
1053 (define_insn "sse3_addsubv2df3"
1054 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1057 (match_operand:V2DF 1 "register_operand" "0,x")
1058 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1059 (minus:V2DF (match_dup 1) (match_dup 2))
1063 addsubpd\t{%2, %0|%0, %2}
1064 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1065 [(set_attr "isa" "noavx,avx")
1066 (set_attr "type" "sseadd")
1067 (set_attr "atom_unit" "complex")
1068 (set_attr "prefix" "orig,vex")
1069 (set_attr "mode" "V2DF")])
1071 (define_insn "avx_addsubv8sf3"
1072 [(set (match_operand:V8SF 0 "register_operand" "=x")
1075 (match_operand:V8SF 1 "register_operand" "x")
1076 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1077 (minus:V8SF (match_dup 1) (match_dup 2))
1080 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1081 [(set_attr "type" "sseadd")
1082 (set_attr "prefix" "vex")
1083 (set_attr "mode" "V8SF")])
1085 (define_insn "sse3_addsubv4sf3"
1086 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1089 (match_operand:V4SF 1 "register_operand" "0,x")
1090 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1091 (minus:V4SF (match_dup 1) (match_dup 2))
1095 addsubps\t{%2, %0|%0, %2}
1096 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1097 [(set_attr "isa" "noavx,avx")
1098 (set_attr "type" "sseadd")
1099 (set_attr "prefix" "orig,vex")
1100 (set_attr "prefix_rep" "1,*")
1101 (set_attr "mode" "V4SF")])
1103 (define_insn "avx_h<plusminus_insn>v4df3"
1104 [(set (match_operand:V4DF 0 "register_operand" "=x")
1109 (match_operand:V4DF 1 "register_operand" "x")
1110 (parallel [(const_int 0)]))
1111 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1113 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1114 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1118 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1119 (parallel [(const_int 0)]))
1120 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1122 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1123 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1125 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1126 [(set_attr "type" "sseadd")
1127 (set_attr "prefix" "vex")
1128 (set_attr "mode" "V4DF")])
1130 (define_insn "sse3_h<plusminus_insn>v2df3"
1131 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1135 (match_operand:V2DF 1 "register_operand" "0,x")
1136 (parallel [(const_int 0)]))
1137 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1140 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1141 (parallel [(const_int 0)]))
1142 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1145 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1146 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "isa" "noavx,avx")
1148 (set_attr "type" "sseadd")
1149 (set_attr "prefix" "orig,vex")
1150 (set_attr "mode" "V2DF")])
1152 (define_insn "avx_h<plusminus_insn>v8sf3"
1153 [(set (match_operand:V8SF 0 "register_operand" "=x")
1159 (match_operand:V8SF 1 "register_operand" "x")
1160 (parallel [(const_int 0)]))
1161 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1163 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1168 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1169 (parallel [(const_int 0)]))
1170 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1172 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1173 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1177 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1178 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1180 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1181 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1184 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1185 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1187 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1188 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1190 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1191 [(set_attr "type" "sseadd")
1192 (set_attr "prefix" "vex")
1193 (set_attr "mode" "V8SF")])
1195 (define_insn "sse3_h<plusminus_insn>v4sf3"
1196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1201 (match_operand:V4SF 1 "register_operand" "0,x")
1202 (parallel [(const_int 0)]))
1203 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1205 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1206 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1210 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1211 (parallel [(const_int 0)]))
1212 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1214 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1215 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1218 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1219 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1220 [(set_attr "isa" "noavx,avx")
1221 (set_attr "type" "sseadd")
1222 (set_attr "atom_unit" "complex")
1223 (set_attr "prefix" "orig,vex")
1224 (set_attr "prefix_rep" "1,*")
1225 (set_attr "mode" "V4SF")])
1227 (define_expand "reduc_splus_v4df"
1228 [(match_operand:V4DF 0 "register_operand" "")
1229 (match_operand:V4DF 1 "register_operand" "")]
1232 rtx tmp = gen_reg_rtx (V4DFmode);
1233 rtx tmp2 = gen_reg_rtx (V4DFmode);
1234 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1235 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1236 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1240 (define_expand "reduc_splus_v2df"
1241 [(match_operand:V2DF 0 "register_operand" "")
1242 (match_operand:V2DF 1 "register_operand" "")]
1245 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1249 (define_expand "reduc_splus_v8sf"
1250 [(match_operand:V8SF 0 "register_operand" "")
1251 (match_operand:V8SF 1 "register_operand" "")]
1254 rtx tmp = gen_reg_rtx (V8SFmode);
1255 rtx tmp2 = gen_reg_rtx (V8SFmode);
1256 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1257 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1258 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1259 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1263 (define_expand "reduc_splus_v4sf"
1264 [(match_operand:V4SF 0 "register_operand" "")
1265 (match_operand:V4SF 1 "register_operand" "")]
1270 rtx tmp = gen_reg_rtx (V4SFmode);
1271 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1272 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1275 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1279 ;; Modes handled by reduc_sm{in,ax}* patterns.
1280 (define_mode_iterator REDUC_SMINMAX_MODE
1281 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1282 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1283 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1284 (V4SF "TARGET_SSE")])
1286 (define_expand "reduc_<code>_<mode>"
1287 [(smaxmin:REDUC_SMINMAX_MODE
1288 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1289 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1292 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1296 (define_expand "reduc_<code>_<mode>"
1298 (match_operand:VI_256 0 "register_operand" "")
1299 (match_operand:VI_256 1 "register_operand" ""))]
1302 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1306 (define_expand "reduc_umin_v8hi"
1308 (match_operand:V8HI 0 "register_operand" "")
1309 (match_operand:V8HI 1 "register_operand" ""))]
1312 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1318 ;; Parallel floating point comparisons
1320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1322 (define_insn "avx_cmp<mode>3"
1323 [(set (match_operand:VF 0 "register_operand" "=x")
1325 [(match_operand:VF 1 "register_operand" "x")
1326 (match_operand:VF 2 "nonimmediate_operand" "xm")
1327 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1330 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1331 [(set_attr "type" "ssecmp")
1332 (set_attr "length_immediate" "1")
1333 (set_attr "prefix" "vex")
1334 (set_attr "mode" "<MODE>")])
1336 (define_insn "avx_vmcmp<mode>3"
1337 [(set (match_operand:VF_128 0 "register_operand" "=x")
1340 [(match_operand:VF_128 1 "register_operand" "x")
1341 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1342 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1347 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1348 [(set_attr "type" "ssecmp")
1349 (set_attr "length_immediate" "1")
1350 (set_attr "prefix" "vex")
1351 (set_attr "mode" "<ssescalarmode>")])
1353 (define_insn "*<sse>_maskcmp<mode>3_comm"
1354 [(set (match_operand:VF 0 "register_operand" "=x,x")
1355 (match_operator:VF 3 "sse_comparison_operator"
1356 [(match_operand:VF 1 "register_operand" "%0,x")
1357 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1359 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1361 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1362 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1363 [(set_attr "isa" "noavx,avx")
1364 (set_attr "type" "ssecmp")
1365 (set_attr "length_immediate" "1")
1366 (set_attr "prefix" "orig,vex")
1367 (set_attr "mode" "<MODE>")])
1369 (define_insn "<sse>_maskcmp<mode>3"
1370 [(set (match_operand:VF 0 "register_operand" "=x,x")
1371 (match_operator:VF 3 "sse_comparison_operator"
1372 [(match_operand:VF 1 "register_operand" "0,x")
1373 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1376 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1377 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1378 [(set_attr "isa" "noavx,avx")
1379 (set_attr "type" "ssecmp")
1380 (set_attr "length_immediate" "1")
1381 (set_attr "prefix" "orig,vex")
1382 (set_attr "mode" "<MODE>")])
1384 (define_insn "<sse>_vmmaskcmp<mode>3"
1385 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1387 (match_operator:VF_128 3 "sse_comparison_operator"
1388 [(match_operand:VF_128 1 "register_operand" "0,x")
1389 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1394 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1395 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1396 [(set_attr "isa" "noavx,avx")
1397 (set_attr "type" "ssecmp")
1398 (set_attr "length_immediate" "1,*")
1399 (set_attr "prefix" "orig,vex")
1400 (set_attr "mode" "<ssescalarmode>")])
1402 (define_insn "<sse>_comi"
1403 [(set (reg:CCFP FLAGS_REG)
1406 (match_operand:<ssevecmode> 0 "register_operand" "x")
1407 (parallel [(const_int 0)]))
1409 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1410 (parallel [(const_int 0)]))))]
1411 "SSE_FLOAT_MODE_P (<MODE>mode)"
1412 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1413 [(set_attr "type" "ssecomi")
1414 (set_attr "prefix" "maybe_vex")
1415 (set_attr "prefix_rep" "0")
1416 (set (attr "prefix_data16")
1417 (if_then_else (eq_attr "mode" "DF")
1419 (const_string "0")))
1420 (set_attr "mode" "<MODE>")])
1422 (define_insn "<sse>_ucomi"
1423 [(set (reg:CCFPU FLAGS_REG)
1426 (match_operand:<ssevecmode> 0 "register_operand" "x")
1427 (parallel [(const_int 0)]))
1429 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1430 (parallel [(const_int 0)]))))]
1431 "SSE_FLOAT_MODE_P (<MODE>mode)"
1432 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1433 [(set_attr "type" "ssecomi")
1434 (set_attr "prefix" "maybe_vex")
1435 (set_attr "prefix_rep" "0")
1436 (set (attr "prefix_data16")
1437 (if_then_else (eq_attr "mode" "DF")
1439 (const_string "0")))
1440 (set_attr "mode" "<MODE>")])
1442 (define_expand "vcond<V_256:mode><VF_256:mode>"
1443 [(set (match_operand:V_256 0 "register_operand" "")
1445 (match_operator 3 ""
1446 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1447 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1448 (match_operand:V_256 1 "general_operand" "")
1449 (match_operand:V_256 2 "general_operand" "")))]
1451 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1452 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1454 bool ok = ix86_expand_fp_vcond (operands);
1459 (define_expand "vcond<V_128:mode><VF_128:mode>"
1460 [(set (match_operand:V_128 0 "register_operand" "")
1462 (match_operator 3 ""
1463 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1464 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1465 (match_operand:V_128 1 "general_operand" "")
1466 (match_operand:V_128 2 "general_operand" "")))]
1468 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1469 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1471 bool ok = ix86_expand_fp_vcond (operands);
1476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1478 ;; Parallel floating point logical operations
1480 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1482 (define_insn "<sse>_andnot<mode>3"
1483 [(set (match_operand:VF 0 "register_operand" "=x,x")
1486 (match_operand:VF 1 "register_operand" "0,x"))
1487 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1490 static char buf[32];
1493 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1495 switch (which_alternative)
1498 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1501 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1507 snprintf (buf, sizeof (buf), insn, suffix);
1510 [(set_attr "isa" "noavx,avx")
1511 (set_attr "type" "sselog")
1512 (set_attr "prefix" "orig,vex")
1513 (set_attr "mode" "<MODE>")])
1515 (define_expand "<code><mode>3"
1516 [(set (match_operand:VF 0 "register_operand" "")
1518 (match_operand:VF 1 "nonimmediate_operand" "")
1519 (match_operand:VF 2 "nonimmediate_operand" "")))]
1521 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1523 (define_insn "*<code><mode>3"
1524 [(set (match_operand:VF 0 "register_operand" "=x,x")
1526 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1527 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1528 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1530 static char buf[32];
1533 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1535 switch (which_alternative)
1538 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1541 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1547 snprintf (buf, sizeof (buf), insn, suffix);
1550 [(set_attr "isa" "noavx,avx")
1551 (set_attr "type" "sselog")
1552 (set_attr "prefix" "orig,vex")
1553 (set_attr "mode" "<MODE>")])
1555 (define_expand "copysign<mode>3"
1558 (not:VF (match_dup 3))
1559 (match_operand:VF 1 "nonimmediate_operand" "")))
1561 (and:VF (match_dup 3)
1562 (match_operand:VF 2 "nonimmediate_operand" "")))
1563 (set (match_operand:VF 0 "register_operand" "")
1564 (ior:VF (match_dup 4) (match_dup 5)))]
1567 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1569 operands[4] = gen_reg_rtx (<MODE>mode);
1570 operands[5] = gen_reg_rtx (<MODE>mode);
1573 ;; Also define scalar versions. These are used for abs, neg, and
1574 ;; conditional move. Using subregs into vector modes causes register
1575 ;; allocation lossage. These patterns do not allow memory operands
1576 ;; because the native instructions read the full 128-bits.
1578 (define_insn "*andnot<mode>3"
1579 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1582 (match_operand:MODEF 1 "register_operand" "0,x"))
1583 (match_operand:MODEF 2 "register_operand" "x,x")))]
1584 "SSE_FLOAT_MODE_P (<MODE>mode)"
1586 static char buf[32];
1589 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1591 switch (which_alternative)
1594 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1597 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1603 snprintf (buf, sizeof (buf), insn, suffix);
1606 [(set_attr "isa" "noavx,avx")
1607 (set_attr "type" "sselog")
1608 (set_attr "prefix" "orig,vex")
1609 (set_attr "mode" "<ssevecmode>")])
1611 (define_insn "*<code><mode>3"
1612 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1614 (match_operand:MODEF 1 "register_operand" "%0,x")
1615 (match_operand:MODEF 2 "register_operand" "x,x")))]
1616 "SSE_FLOAT_MODE_P (<MODE>mode)"
1618 static char buf[32];
1621 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1623 switch (which_alternative)
1626 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1629 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1635 snprintf (buf, sizeof (buf), insn, suffix);
1638 [(set_attr "isa" "noavx,avx")
1639 (set_attr "type" "sselog")
1640 (set_attr "prefix" "orig,vex")
1641 (set_attr "mode" "<ssevecmode>")])
1643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1645 ;; FMA4 floating point multiply/accumulate instructions. This
1646 ;; includes the scalar version of the instructions as well as the
1649 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1651 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1652 ;; combine to generate a multiply/add with two memory references. We then
1653 ;; split this insn, into loading up the destination register with one of the
1654 ;; memory operations. If we don't manage to split the insn, reload will
1655 ;; generate the appropriate moves. The reason this is needed, is that combine
1656 ;; has already folded one of the memory references into both the multiply and
1657 ;; add insns, and it can't generate a new pseudo. I.e.:
1658 ;; (set (reg1) (mem (addr1)))
1659 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1660 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1662 ;; ??? This is historic, pre-dating the gimple fma transformation.
1663 ;; We could now properly represent that only one memory operand is
1664 ;; allowed and not be penalized during optimization.
1666 ;; Intrinsic FMA operations.
1668 ;; The standard names for fma is only available with SSE math enabled.
1669 (define_expand "fma<mode>4"
1670 [(set (match_operand:FMAMODE 0 "register_operand")
1672 (match_operand:FMAMODE 1 "nonimmediate_operand")
1673 (match_operand:FMAMODE 2 "nonimmediate_operand")
1674 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1675 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1677 (define_expand "fms<mode>4"
1678 [(set (match_operand:FMAMODE 0 "register_operand")
1680 (match_operand:FMAMODE 1 "nonimmediate_operand")
1681 (match_operand:FMAMODE 2 "nonimmediate_operand")
1682 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1683 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1685 (define_expand "fnma<mode>4"
1686 [(set (match_operand:FMAMODE 0 "register_operand")
1688 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1689 (match_operand:FMAMODE 2 "nonimmediate_operand")
1690 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1691 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1693 (define_expand "fnms<mode>4"
1694 [(set (match_operand:FMAMODE 0 "register_operand")
1696 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1697 (match_operand:FMAMODE 2 "nonimmediate_operand")
1698 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1699 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1701 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1702 (define_expand "fma4i_fmadd_<mode>"
1703 [(set (match_operand:FMAMODE 0 "register_operand")
1705 (match_operand:FMAMODE 1 "nonimmediate_operand")
1706 (match_operand:FMAMODE 2 "nonimmediate_operand")
1707 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1708 "TARGET_FMA || TARGET_FMA4")
1710 (define_insn "*fma4i_fmadd_<mode>"
1711 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1713 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1714 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1715 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1717 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1721 (define_insn "*fma4i_fmsub_<mode>"
1722 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1724 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1725 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1727 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1729 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1730 [(set_attr "type" "ssemuladd")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "*fma4i_fnmadd_<mode>"
1734 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1737 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1738 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1739 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1741 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1742 [(set_attr "type" "ssemuladd")
1743 (set_attr "mode" "<MODE>")])
1745 (define_insn "*fma4i_fnmsub_<mode>"
1746 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1749 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1750 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1752 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1754 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1755 [(set_attr "type" "ssemuladd")
1756 (set_attr "mode" "<MODE>")])
1758 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1759 ;; entire destination register, with the high-order elements zeroed.
1761 (define_expand "fma4i_vmfmadd_<mode>"
1762 [(set (match_operand:VF_128 0 "register_operand")
1765 (match_operand:VF_128 1 "nonimmediate_operand")
1766 (match_operand:VF_128 2 "nonimmediate_operand")
1767 (match_operand:VF_128 3 "nonimmediate_operand"))
1772 operands[4] = CONST0_RTX (<MODE>mode);
1775 (define_expand "fmai_vmfmadd_<mode>"
1776 [(set (match_operand:VF_128 0 "register_operand")
1779 (match_operand:VF_128 1 "nonimmediate_operand")
1780 (match_operand:VF_128 2 "nonimmediate_operand")
1781 (match_operand:VF_128 3 "nonimmediate_operand"))
1786 (define_insn "*fmai_fmadd_<mode>"
1787 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1790 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1791 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1792 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1797 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1798 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1799 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1800 [(set_attr "type" "ssemuladd")
1801 (set_attr "mode" "<MODE>")])
1803 (define_insn "*fmai_fmsub_<mode>"
1804 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1807 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1808 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1810 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1815 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1816 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1817 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1818 [(set_attr "type" "ssemuladd")
1819 (set_attr "mode" "<MODE>")])
1821 (define_insn "*fmai_fnmadd_<mode>"
1822 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1826 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1827 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1828 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1833 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1834 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1835 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1836 [(set_attr "type" "ssemuladd")
1837 (set_attr "mode" "<MODE>")])
1839 (define_insn "*fmai_fnmsub_<mode>"
1840 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1844 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1845 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1847 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1852 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1853 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1854 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1855 [(set_attr "type" "ssemuladd")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "*fma4i_vmfmadd_<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1862 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1863 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1864 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1865 (match_operand:VF_128 4 "const0_operand" "")
1868 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1869 [(set_attr "type" "ssemuladd")
1870 (set_attr "mode" "<MODE>")])
1872 (define_insn "*fma4i_vmfmsub_<mode>"
1873 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1876 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1877 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1879 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1880 (match_operand:VF_128 4 "const0_operand" "")
1883 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1884 [(set_attr "type" "ssemuladd")
1885 (set_attr "mode" "<MODE>")])
1887 (define_insn "*fma4i_vmfnmadd_<mode>"
1888 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1892 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1893 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1894 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1895 (match_operand:VF_128 4 "const0_operand" "")
1898 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1899 [(set_attr "type" "ssemuladd")
1900 (set_attr "mode" "<MODE>")])
1902 (define_insn "*fma4i_vmfnmsub_<mode>"
1903 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1907 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1908 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1910 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1911 (match_operand:VF_128 4 "const0_operand" "")
1914 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1920 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1922 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1924 ;; It would be possible to represent these without the UNSPEC as
1927 ;; (fma op1 op2 op3)
1928 ;; (fma op1 op2 (neg op3))
1931 ;; But this doesn't seem useful in practice.
1933 (define_expand "fmaddsub_<mode>"
1934 [(set (match_operand:VF 0 "register_operand")
1936 [(match_operand:VF 1 "nonimmediate_operand")
1937 (match_operand:VF 2 "nonimmediate_operand")
1938 (match_operand:VF 3 "nonimmediate_operand")]
1940 "TARGET_FMA || TARGET_FMA4")
1942 (define_insn "*fma4_fmaddsub_<mode>"
1943 [(set (match_operand:VF 0 "register_operand" "=x,x")
1945 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1946 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1947 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1950 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1951 [(set_attr "type" "ssemuladd")
1952 (set_attr "mode" "<MODE>")])
1954 (define_insn "*fma4_fmsubadd_<mode>"
1955 [(set (match_operand:VF 0 "register_operand" "=x,x")
1957 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1958 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1960 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1963 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1964 [(set_attr "type" "ssemuladd")
1965 (set_attr "mode" "<MODE>")])
1967 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1969 ;; FMA3 floating point multiply/accumulate instructions.
1971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1973 (define_insn "*fma_fmadd_<mode>"
1974 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1976 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1977 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1978 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1981 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1982 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1983 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1984 [(set_attr "type" "ssemuladd")
1985 (set_attr "mode" "<MODE>")])
1987 (define_insn "*fma_fmsub_<mode>"
1988 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1990 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1991 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1993 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1996 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1997 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1998 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1999 [(set_attr "type" "ssemuladd")
2000 (set_attr "mode" "<MODE>")])
2002 (define_insn "*fma_fnmadd_<mode>"
2003 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2006 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2007 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2008 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2011 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2012 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2013 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2014 [(set_attr "type" "ssemuladd")
2015 (set_attr "mode" "<MODE>")])
2017 (define_insn "*fma_fnmsub_<mode>"
2018 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2021 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2022 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2024 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2027 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2028 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2029 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2030 [(set_attr "type" "ssemuladd")
2031 (set_attr "mode" "<MODE>")])
2033 (define_insn "*fma_fmaddsub_<mode>"
2034 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2036 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2037 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2038 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2042 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2043 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2044 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2045 [(set_attr "type" "ssemuladd")
2046 (set_attr "mode" "<MODE>")])
2048 (define_insn "*fma_fmsubadd_<mode>"
2049 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2051 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2052 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2054 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2058 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2059 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2060 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2061 [(set_attr "type" "ssemuladd")
2062 (set_attr "mode" "<MODE>")])
2064 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2066 ;; Parallel single-precision floating point conversion operations
2068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2070 (define_insn "sse_cvtpi2ps"
2071 [(set (match_operand:V4SF 0 "register_operand" "=x")
2074 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2075 (match_operand:V4SF 1 "register_operand" "0")
2078 "cvtpi2ps\t{%2, %0|%0, %2}"
2079 [(set_attr "type" "ssecvt")
2080 (set_attr "mode" "V4SF")])
2082 (define_insn "sse_cvtps2pi"
2083 [(set (match_operand:V2SI 0 "register_operand" "=y")
2085 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2087 (parallel [(const_int 0) (const_int 1)])))]
2089 "cvtps2pi\t{%1, %0|%0, %1}"
2090 [(set_attr "type" "ssecvt")
2091 (set_attr "unit" "mmx")
2092 (set_attr "mode" "DI")])
2094 (define_insn "sse_cvttps2pi"
2095 [(set (match_operand:V2SI 0 "register_operand" "=y")
2097 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2098 (parallel [(const_int 0) (const_int 1)])))]
2100 "cvttps2pi\t{%1, %0|%0, %1}"
2101 [(set_attr "type" "ssecvt")
2102 (set_attr "unit" "mmx")
2103 (set_attr "prefix_rep" "0")
2104 (set_attr "mode" "SF")])
2106 (define_insn "sse_cvtsi2ss"
2107 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2110 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2111 (match_operand:V4SF 1 "register_operand" "0,0,x")
2115 cvtsi2ss\t{%2, %0|%0, %2}
2116 cvtsi2ss\t{%2, %0|%0, %2}
2117 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2118 [(set_attr "isa" "noavx,noavx,avx")
2119 (set_attr "type" "sseicvt")
2120 (set_attr "athlon_decode" "vector,double,*")
2121 (set_attr "amdfam10_decode" "vector,double,*")
2122 (set_attr "bdver1_decode" "double,direct,*")
2123 (set_attr "prefix" "orig,orig,vex")
2124 (set_attr "mode" "SF")])
2126 (define_insn "sse_cvtsi2ssq"
2127 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2130 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2131 (match_operand:V4SF 1 "register_operand" "0,0,x")
2133 "TARGET_SSE && TARGET_64BIT"
2135 cvtsi2ssq\t{%2, %0|%0, %2}
2136 cvtsi2ssq\t{%2, %0|%0, %2}
2137 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2138 [(set_attr "isa" "noavx,noavx,avx")
2139 (set_attr "type" "sseicvt")
2140 (set_attr "athlon_decode" "vector,double,*")
2141 (set_attr "amdfam10_decode" "vector,double,*")
2142 (set_attr "bdver1_decode" "double,direct,*")
2143 (set_attr "length_vex" "*,*,4")
2144 (set_attr "prefix_rex" "1,1,*")
2145 (set_attr "prefix" "orig,orig,vex")
2146 (set_attr "mode" "SF")])
2148 (define_insn "sse_cvtss2si"
2149 [(set (match_operand:SI 0 "register_operand" "=r,r")
2152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2153 (parallel [(const_int 0)]))]
2154 UNSPEC_FIX_NOTRUNC))]
2156 "%vcvtss2si\t{%1, %0|%0, %1}"
2157 [(set_attr "type" "sseicvt")
2158 (set_attr "athlon_decode" "double,vector")
2159 (set_attr "bdver1_decode" "double,double")
2160 (set_attr "prefix_rep" "1")
2161 (set_attr "prefix" "maybe_vex")
2162 (set_attr "mode" "SI")])
2164 (define_insn "sse_cvtss2si_2"
2165 [(set (match_operand:SI 0 "register_operand" "=r,r")
2166 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2167 UNSPEC_FIX_NOTRUNC))]
2169 "%vcvtss2si\t{%1, %0|%0, %1}"
2170 [(set_attr "type" "sseicvt")
2171 (set_attr "athlon_decode" "double,vector")
2172 (set_attr "amdfam10_decode" "double,double")
2173 (set_attr "bdver1_decode" "double,double")
2174 (set_attr "prefix_rep" "1")
2175 (set_attr "prefix" "maybe_vex")
2176 (set_attr "mode" "SI")])
2178 (define_insn "sse_cvtss2siq"
2179 [(set (match_operand:DI 0 "register_operand" "=r,r")
2182 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2183 (parallel [(const_int 0)]))]
2184 UNSPEC_FIX_NOTRUNC))]
2185 "TARGET_SSE && TARGET_64BIT"
2186 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "sseicvt")
2188 (set_attr "athlon_decode" "double,vector")
2189 (set_attr "bdver1_decode" "double,double")
2190 (set_attr "prefix_rep" "1")
2191 (set_attr "prefix" "maybe_vex")
2192 (set_attr "mode" "DI")])
2194 (define_insn "sse_cvtss2siq_2"
2195 [(set (match_operand:DI 0 "register_operand" "=r,r")
2196 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2197 UNSPEC_FIX_NOTRUNC))]
2198 "TARGET_SSE && TARGET_64BIT"
2199 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2200 [(set_attr "type" "sseicvt")
2201 (set_attr "athlon_decode" "double,vector")
2202 (set_attr "amdfam10_decode" "double,double")
2203 (set_attr "bdver1_decode" "double,double")
2204 (set_attr "prefix_rep" "1")
2205 (set_attr "prefix" "maybe_vex")
2206 (set_attr "mode" "DI")])
2208 (define_insn "sse_cvttss2si"
2209 [(set (match_operand:SI 0 "register_operand" "=r,r")
2212 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2213 (parallel [(const_int 0)]))))]
2215 "%vcvttss2si\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "sseicvt")
2217 (set_attr "athlon_decode" "double,vector")
2218 (set_attr "amdfam10_decode" "double,double")
2219 (set_attr "bdver1_decode" "double,double")
2220 (set_attr "prefix_rep" "1")
2221 (set_attr "prefix" "maybe_vex")
2222 (set_attr "mode" "SI")])
2224 (define_insn "sse_cvttss2siq"
2225 [(set (match_operand:DI 0 "register_operand" "=r,r")
2228 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2229 (parallel [(const_int 0)]))))]
2230 "TARGET_SSE && TARGET_64BIT"
2231 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "sseicvt")
2233 (set_attr "athlon_decode" "double,vector")
2234 (set_attr "amdfam10_decode" "double,double")
2235 (set_attr "bdver1_decode" "double,double")
2236 (set_attr "prefix_rep" "1")
2237 (set_attr "prefix" "maybe_vex")
2238 (set_attr "mode" "DI")])
2240 (define_insn "avx_cvtdq2ps256"
2241 [(set (match_operand:V8SF 0 "register_operand" "=x")
2242 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
2244 "vcvtdq2ps\t{%1, %0|%0, %1}"
2245 [(set_attr "type" "ssecvt")
2246 (set_attr "prefix" "vex")
2247 (set_attr "mode" "V8SF")])
2249 (define_insn "sse2_cvtdq2ps"
2250 [(set (match_operand:V4SF 0 "register_operand" "=x")
2251 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2253 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2254 [(set_attr "type" "ssecvt")
2255 (set_attr "prefix" "maybe_vex")
2256 (set_attr "mode" "V4SF")])
2258 (define_expand "sse2_cvtudq2ps"
2260 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2262 (lt:V4SF (match_dup 5) (match_dup 3)))
2264 (and:V4SF (match_dup 6) (match_dup 4)))
2265 (set (match_operand:V4SF 0 "register_operand" "")
2266 (plus:V4SF (match_dup 5) (match_dup 7)))]
2269 REAL_VALUE_TYPE TWO32r;
2273 real_ldexp (&TWO32r, &dconst1, 32);
2274 x = const_double_from_real_value (TWO32r, SFmode);
2276 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2277 operands[4] = force_reg (V4SFmode,
2278 ix86_build_const_vector (V4SFmode, 1, x));
2280 for (i = 5; i < 8; i++)
2281 operands[i] = gen_reg_rtx (V4SFmode);
2284 (define_insn "avx_cvtps2dq256"
2285 [(set (match_operand:V8SI 0 "register_operand" "=x")
2286 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2287 UNSPEC_FIX_NOTRUNC))]
2289 "vcvtps2dq\t{%1, %0|%0, %1}"
2290 [(set_attr "type" "ssecvt")
2291 (set_attr "prefix" "vex")
2292 (set_attr "mode" "OI")])
2294 (define_insn "sse2_cvtps2dq"
2295 [(set (match_operand:V4SI 0 "register_operand" "=x")
2296 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2297 UNSPEC_FIX_NOTRUNC))]
2299 "%vcvtps2dq\t{%1, %0|%0, %1}"
2300 [(set_attr "type" "ssecvt")
2301 (set (attr "prefix_data16")
2303 (match_test "TARGET_AVX")
2305 (const_string "1")))
2306 (set_attr "prefix" "maybe_vex")
2307 (set_attr "mode" "TI")])
2309 (define_insn "avx_cvttps2dq256"
2310 [(set (match_operand:V8SI 0 "register_operand" "=x")
2311 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2313 "vcvttps2dq\t{%1, %0|%0, %1}"
2314 [(set_attr "type" "ssecvt")
2315 (set_attr "prefix" "vex")
2316 (set_attr "mode" "OI")])
2318 (define_insn "sse2_cvttps2dq"
2319 [(set (match_operand:V4SI 0 "register_operand" "=x")
2320 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2322 "%vcvttps2dq\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "ssecvt")
2324 (set (attr "prefix_rep")
2326 (match_test "TARGET_AVX")
2328 (const_string "1")))
2329 (set (attr "prefix_data16")
2331 (match_test "TARGET_AVX")
2333 (const_string "0")))
2334 (set_attr "prefix_data16" "0")
2335 (set_attr "prefix" "maybe_vex")
2336 (set_attr "mode" "TI")])
2338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2340 ;; Parallel double-precision floating point conversion operations
2342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2344 (define_insn "sse2_cvtpi2pd"
2345 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2346 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2348 "cvtpi2pd\t{%1, %0|%0, %1}"
2349 [(set_attr "type" "ssecvt")
2350 (set_attr "unit" "mmx,*")
2351 (set_attr "prefix_data16" "1,*")
2352 (set_attr "mode" "V2DF")])
2354 (define_insn "sse2_cvtpd2pi"
2355 [(set (match_operand:V2SI 0 "register_operand" "=y")
2356 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2357 UNSPEC_FIX_NOTRUNC))]
2359 "cvtpd2pi\t{%1, %0|%0, %1}"
2360 [(set_attr "type" "ssecvt")
2361 (set_attr "unit" "mmx")
2362 (set_attr "bdver1_decode" "double")
2363 (set_attr "prefix_data16" "1")
2364 (set_attr "mode" "DI")])
2366 (define_insn "sse2_cvttpd2pi"
2367 [(set (match_operand:V2SI 0 "register_operand" "=y")
2368 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2370 "cvttpd2pi\t{%1, %0|%0, %1}"
2371 [(set_attr "type" "ssecvt")
2372 (set_attr "unit" "mmx")
2373 (set_attr "bdver1_decode" "double")
2374 (set_attr "prefix_data16" "1")
2375 (set_attr "mode" "TI")])
2377 (define_insn "sse2_cvtsi2sd"
2378 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2381 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2382 (match_operand:V2DF 1 "register_operand" "0,0,x")
2386 cvtsi2sd\t{%2, %0|%0, %2}
2387 cvtsi2sd\t{%2, %0|%0, %2}
2388 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2389 [(set_attr "isa" "noavx,noavx,avx")
2390 (set_attr "type" "sseicvt")
2391 (set_attr "athlon_decode" "double,direct,*")
2392 (set_attr "amdfam10_decode" "vector,double,*")
2393 (set_attr "bdver1_decode" "double,direct,*")
2394 (set_attr "prefix" "orig,orig,vex")
2395 (set_attr "mode" "DF")])
2397 (define_insn "sse2_cvtsi2sdq"
2398 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2401 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2402 (match_operand:V2DF 1 "register_operand" "0,0,x")
2404 "TARGET_SSE2 && TARGET_64BIT"
2406 cvtsi2sdq\t{%2, %0|%0, %2}
2407 cvtsi2sdq\t{%2, %0|%0, %2}
2408 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2409 [(set_attr "isa" "noavx,noavx,avx")
2410 (set_attr "type" "sseicvt")
2411 (set_attr "athlon_decode" "double,direct,*")
2412 (set_attr "amdfam10_decode" "vector,double,*")
2413 (set_attr "bdver1_decode" "double,direct,*")
2414 (set_attr "length_vex" "*,*,4")
2415 (set_attr "prefix_rex" "1,1,*")
2416 (set_attr "prefix" "orig,orig,vex")
2417 (set_attr "mode" "DF")])
2419 (define_insn "sse2_cvtsd2si"
2420 [(set (match_operand:SI 0 "register_operand" "=r,r")
2423 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2424 (parallel [(const_int 0)]))]
2425 UNSPEC_FIX_NOTRUNC))]
2427 "%vcvtsd2si\t{%1, %0|%0, %1}"
2428 [(set_attr "type" "sseicvt")
2429 (set_attr "athlon_decode" "double,vector")
2430 (set_attr "bdver1_decode" "double,double")
2431 (set_attr "prefix_rep" "1")
2432 (set_attr "prefix" "maybe_vex")
2433 (set_attr "mode" "SI")])
2435 (define_insn "sse2_cvtsd2si_2"
2436 [(set (match_operand:SI 0 "register_operand" "=r,r")
2437 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2438 UNSPEC_FIX_NOTRUNC))]
2440 "%vcvtsd2si\t{%1, %0|%0, %1}"
2441 [(set_attr "type" "sseicvt")
2442 (set_attr "athlon_decode" "double,vector")
2443 (set_attr "amdfam10_decode" "double,double")
2444 (set_attr "bdver1_decode" "double,double")
2445 (set_attr "prefix_rep" "1")
2446 (set_attr "prefix" "maybe_vex")
2447 (set_attr "mode" "SI")])
2449 (define_insn "sse2_cvtsd2siq"
2450 [(set (match_operand:DI 0 "register_operand" "=r,r")
2453 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2454 (parallel [(const_int 0)]))]
2455 UNSPEC_FIX_NOTRUNC))]
2456 "TARGET_SSE2 && TARGET_64BIT"
2457 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2458 [(set_attr "type" "sseicvt")
2459 (set_attr "athlon_decode" "double,vector")
2460 (set_attr "bdver1_decode" "double,double")
2461 (set_attr "prefix_rep" "1")
2462 (set_attr "prefix" "maybe_vex")
2463 (set_attr "mode" "DI")])
2465 (define_insn "sse2_cvtsd2siq_2"
2466 [(set (match_operand:DI 0 "register_operand" "=r,r")
2467 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2468 UNSPEC_FIX_NOTRUNC))]
2469 "TARGET_SSE2 && TARGET_64BIT"
2470 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2471 [(set_attr "type" "sseicvt")
2472 (set_attr "athlon_decode" "double,vector")
2473 (set_attr "amdfam10_decode" "double,double")
2474 (set_attr "bdver1_decode" "double,double")
2475 (set_attr "prefix_rep" "1")
2476 (set_attr "prefix" "maybe_vex")
2477 (set_attr "mode" "DI")])
2479 (define_insn "sse2_cvttsd2si"
2480 [(set (match_operand:SI 0 "register_operand" "=r,r")
2483 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2484 (parallel [(const_int 0)]))))]
2486 "%vcvttsd2si\t{%1, %0|%0, %1}"
2487 [(set_attr "type" "sseicvt")
2488 (set_attr "athlon_decode" "double,vector")
2489 (set_attr "amdfam10_decode" "double,double")
2490 (set_attr "bdver1_decode" "double,double")
2491 (set_attr "prefix_rep" "1")
2492 (set_attr "prefix" "maybe_vex")
2493 (set_attr "mode" "SI")])
2495 (define_insn "sse2_cvttsd2siq"
2496 [(set (match_operand:DI 0 "register_operand" "=r,r")
2499 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2500 (parallel [(const_int 0)]))))]
2501 "TARGET_SSE2 && TARGET_64BIT"
2502 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "athlon_decode" "double,vector")
2505 (set_attr "amdfam10_decode" "double,double")
2506 (set_attr "bdver1_decode" "double,double")
2507 (set_attr "prefix_rep" "1")
2508 (set_attr "prefix" "maybe_vex")
2509 (set_attr "mode" "DI")])
2511 (define_insn "avx_cvtdq2pd256"
2512 [(set (match_operand:V4DF 0 "register_operand" "=x")
2513 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2515 "vcvtdq2pd\t{%1, %0|%0, %1}"
2516 [(set_attr "type" "ssecvt")
2517 (set_attr "prefix" "vex")
2518 (set_attr "mode" "V4DF")])
2520 (define_insn "*avx_cvtdq2pd256_2"
2521 [(set (match_operand:V4DF 0 "register_operand" "=x")
2524 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2525 (parallel [(const_int 0) (const_int 1)
2526 (const_int 2) (const_int 3)]))))]
2528 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2529 [(set_attr "type" "ssecvt")
2530 (set_attr "prefix" "vex")
2531 (set_attr "mode" "V4DF")])
2533 (define_insn "sse2_cvtdq2pd"
2534 [(set (match_operand:V2DF 0 "register_operand" "=x")
2537 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2538 (parallel [(const_int 0) (const_int 1)]))))]
2540 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2541 [(set_attr "type" "ssecvt")
2542 (set_attr "prefix" "maybe_vex")
2543 (set_attr "mode" "V2DF")])
2545 (define_insn "avx_cvtpd2dq256"
2546 [(set (match_operand:V4SI 0 "register_operand" "=x")
2547 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2548 UNSPEC_FIX_NOTRUNC))]
2550 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2551 [(set_attr "type" "ssecvt")
2552 (set_attr "prefix" "vex")
2553 (set_attr "mode" "OI")])
2555 (define_expand "sse2_cvtpd2dq"
2556 [(set (match_operand:V4SI 0 "register_operand" "")
2558 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2562 "operands[2] = CONST0_RTX (V2SImode);")
2564 (define_insn "*sse2_cvtpd2dq"
2565 [(set (match_operand:V4SI 0 "register_operand" "=x")
2567 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2569 (match_operand:V2SI 2 "const0_operand" "")))]
2573 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2575 return "cvtpd2dq\t{%1, %0|%0, %1}";
2577 [(set_attr "type" "ssecvt")
2578 (set_attr "prefix_rep" "1")
2579 (set_attr "prefix_data16" "0")
2580 (set_attr "prefix" "maybe_vex")
2581 (set_attr "mode" "TI")
2582 (set_attr "amdfam10_decode" "double")
2583 (set_attr "athlon_decode" "vector")
2584 (set_attr "bdver1_decode" "double")])
2586 (define_insn "avx_cvttpd2dq256"
2587 [(set (match_operand:V4SI 0 "register_operand" "=x")
2588 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2590 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2591 [(set_attr "type" "ssecvt")
2592 (set_attr "prefix" "vex")
2593 (set_attr "mode" "OI")])
2595 (define_expand "sse2_cvttpd2dq"
2596 [(set (match_operand:V4SI 0 "register_operand" "")
2598 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2601 "operands[2] = CONST0_RTX (V2SImode);")
2603 (define_insn "*sse2_cvttpd2dq"
2604 [(set (match_operand:V4SI 0 "register_operand" "=x")
2606 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2607 (match_operand:V2SI 2 "const0_operand" "")))]
2611 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2613 return "cvttpd2dq\t{%1, %0|%0, %1}";
2615 [(set_attr "type" "ssecvt")
2616 (set_attr "amdfam10_decode" "double")
2617 (set_attr "athlon_decode" "vector")
2618 (set_attr "bdver1_decode" "double")
2619 (set_attr "prefix" "maybe_vex")
2620 (set_attr "mode" "TI")])
2622 (define_insn "sse2_cvtsd2ss"
2623 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2626 (float_truncate:V2SF
2627 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2628 (match_operand:V4SF 1 "register_operand" "0,0,x")
2632 cvtsd2ss\t{%2, %0|%0, %2}
2633 cvtsd2ss\t{%2, %0|%0, %2}
2634 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2635 [(set_attr "isa" "noavx,noavx,avx")
2636 (set_attr "type" "ssecvt")
2637 (set_attr "athlon_decode" "vector,double,*")
2638 (set_attr "amdfam10_decode" "vector,double,*")
2639 (set_attr "bdver1_decode" "direct,direct,*")
2640 (set_attr "prefix" "orig,orig,vex")
2641 (set_attr "mode" "SF")])
2643 (define_insn "sse2_cvtss2sd"
2644 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2648 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2649 (parallel [(const_int 0) (const_int 1)])))
2650 (match_operand:V2DF 1 "register_operand" "0,0,x")
2654 cvtss2sd\t{%2, %0|%0, %2}
2655 cvtss2sd\t{%2, %0|%0, %2}
2656 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2657 [(set_attr "isa" "noavx,noavx,avx")
2658 (set_attr "type" "ssecvt")
2659 (set_attr "amdfam10_decode" "vector,double,*")
2660 (set_attr "athlon_decode" "direct,direct,*")
2661 (set_attr "bdver1_decode" "direct,direct,*")
2662 (set_attr "prefix" "orig,orig,vex")
2663 (set_attr "mode" "DF")])
2665 (define_insn "avx_cvtpd2ps256"
2666 [(set (match_operand:V4SF 0 "register_operand" "=x")
2667 (float_truncate:V4SF
2668 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2670 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2671 [(set_attr "type" "ssecvt")
2672 (set_attr "prefix" "vex")
2673 (set_attr "mode" "V4SF")])
2675 (define_expand "sse2_cvtpd2ps"
2676 [(set (match_operand:V4SF 0 "register_operand" "")
2678 (float_truncate:V2SF
2679 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2682 "operands[2] = CONST0_RTX (V2SFmode);")
2684 (define_insn "*sse2_cvtpd2ps"
2685 [(set (match_operand:V4SF 0 "register_operand" "=x")
2687 (float_truncate:V2SF
2688 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2689 (match_operand:V2SF 2 "const0_operand" "")))]
2693 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2695 return "cvtpd2ps\t{%1, %0|%0, %1}";
2697 [(set_attr "type" "ssecvt")
2698 (set_attr "amdfam10_decode" "double")
2699 (set_attr "athlon_decode" "vector")
2700 (set_attr "bdver1_decode" "double")
2701 (set_attr "prefix_data16" "1")
2702 (set_attr "prefix" "maybe_vex")
2703 (set_attr "mode" "V4SF")])
2705 (define_insn "avx_cvtps2pd256"
2706 [(set (match_operand:V4DF 0 "register_operand" "=x")
2708 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2710 "vcvtps2pd\t{%1, %0|%0, %1}"
2711 [(set_attr "type" "ssecvt")
2712 (set_attr "prefix" "vex")
2713 (set_attr "mode" "V4DF")])
2715 (define_insn "*avx_cvtps2pd256_2"
2716 [(set (match_operand:V4DF 0 "register_operand" "=x")
2719 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2720 (parallel [(const_int 0) (const_int 1)
2721 (const_int 2) (const_int 3)]))))]
2723 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2724 [(set_attr "type" "ssecvt")
2725 (set_attr "prefix" "vex")
2726 (set_attr "mode" "V4DF")])
2728 (define_insn "sse2_cvtps2pd"
2729 [(set (match_operand:V2DF 0 "register_operand" "=x")
2732 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2733 (parallel [(const_int 0) (const_int 1)]))))]
2735 "%vcvtps2pd\t{%1, %0|%0, %1}"
2736 [(set_attr "type" "ssecvt")
2737 (set_attr "amdfam10_decode" "direct")
2738 (set_attr "athlon_decode" "double")
2739 (set_attr "bdver1_decode" "double")
2740 (set_attr "prefix_data16" "0")
2741 (set_attr "prefix" "maybe_vex")
2742 (set_attr "mode" "V2DF")])
2744 (define_expand "vec_unpacks_hi_v4sf"
2749 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2750 (parallel [(const_int 6) (const_int 7)
2751 (const_int 2) (const_int 3)])))
2752 (set (match_operand:V2DF 0 "register_operand" "")
2756 (parallel [(const_int 0) (const_int 1)]))))]
2758 "operands[2] = gen_reg_rtx (V4SFmode);")
2760 (define_expand "vec_unpacks_hi_v8sf"
2763 (match_operand:V8SF 1 "nonimmediate_operand" "")
2764 (parallel [(const_int 4) (const_int 5)
2765 (const_int 6) (const_int 7)])))
2766 (set (match_operand:V4DF 0 "register_operand" "")
2770 "operands[2] = gen_reg_rtx (V4SFmode);")
2772 (define_expand "vec_unpacks_lo_v4sf"
2773 [(set (match_operand:V2DF 0 "register_operand" "")
2776 (match_operand:V4SF 1 "nonimmediate_operand" "")
2777 (parallel [(const_int 0) (const_int 1)]))))]
2780 (define_expand "vec_unpacks_lo_v8sf"
2781 [(set (match_operand:V4DF 0 "register_operand" "")
2784 (match_operand:V8SF 1 "nonimmediate_operand" "")
2785 (parallel [(const_int 0) (const_int 1)
2786 (const_int 2) (const_int 3)]))))]
2789 (define_expand "vec_unpacks_float_hi_v8hi"
2790 [(match_operand:V4SF 0 "register_operand" "")
2791 (match_operand:V8HI 1 "register_operand" "")]
2794 rtx tmp = gen_reg_rtx (V4SImode);
2796 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2797 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2801 (define_expand "vec_unpacks_float_lo_v8hi"
2802 [(match_operand:V4SF 0 "register_operand" "")
2803 (match_operand:V8HI 1 "register_operand" "")]
2806 rtx tmp = gen_reg_rtx (V4SImode);
2808 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2809 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2813 (define_expand "vec_unpacku_float_hi_v8hi"
2814 [(match_operand:V4SF 0 "register_operand" "")
2815 (match_operand:V8HI 1 "register_operand" "")]
2818 rtx tmp = gen_reg_rtx (V4SImode);
2820 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2821 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2825 (define_expand "vec_unpacku_float_lo_v8hi"
2826 [(match_operand:V4SF 0 "register_operand" "")
2827 (match_operand:V8HI 1 "register_operand" "")]
2830 rtx tmp = gen_reg_rtx (V4SImode);
2832 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2833 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2837 (define_expand "vec_unpacks_float_hi_v4si"
2840 (match_operand:V4SI 1 "nonimmediate_operand" "")
2841 (parallel [(const_int 2) (const_int 3)
2842 (const_int 2) (const_int 3)])))
2843 (set (match_operand:V2DF 0 "register_operand" "")
2847 (parallel [(const_int 0) (const_int 1)]))))]
2849 "operands[2] = gen_reg_rtx (V4SImode);")
2851 (define_expand "vec_unpacks_float_lo_v4si"
2852 [(set (match_operand:V2DF 0 "register_operand" "")
2855 (match_operand:V4SI 1 "nonimmediate_operand" "")
2856 (parallel [(const_int 0) (const_int 1)]))))]
2859 (define_expand "vec_unpacks_float_hi_v8si"
2862 (match_operand:V8SI 1 "nonimmediate_operand" "")
2863 (parallel [(const_int 4) (const_int 5)
2864 (const_int 6) (const_int 7)])))
2865 (set (match_operand:V4DF 0 "register_operand" "")
2869 "operands[2] = gen_reg_rtx (V4SImode);")
2871 (define_expand "vec_unpacks_float_lo_v8si"
2872 [(set (match_operand:V4DF 0 "register_operand" "")
2875 (match_operand:V8SI 1 "nonimmediate_operand" "")
2876 (parallel [(const_int 0) (const_int 1)
2877 (const_int 2) (const_int 3)]))))]
2880 (define_expand "vec_unpacku_float_hi_v4si"
2883 (match_operand:V4SI 1 "nonimmediate_operand" "")
2884 (parallel [(const_int 2) (const_int 3)
2885 (const_int 2) (const_int 3)])))
2890 (parallel [(const_int 0) (const_int 1)]))))
2892 (lt:V2DF (match_dup 6) (match_dup 3)))
2894 (and:V2DF (match_dup 7) (match_dup 4)))
2895 (set (match_operand:V2DF 0 "register_operand" "")
2896 (plus:V2DF (match_dup 6) (match_dup 8)))]
2899 REAL_VALUE_TYPE TWO32r;
2903 real_ldexp (&TWO32r, &dconst1, 32);
2904 x = const_double_from_real_value (TWO32r, DFmode);
2906 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2907 operands[4] = force_reg (V2DFmode,
2908 ix86_build_const_vector (V2DFmode, 1, x));
2910 operands[5] = gen_reg_rtx (V4SImode);
2912 for (i = 6; i < 9; i++)
2913 operands[i] = gen_reg_rtx (V2DFmode);
2916 (define_expand "vec_unpacku_float_lo_v4si"
2920 (match_operand:V4SI 1 "nonimmediate_operand" "")
2921 (parallel [(const_int 0) (const_int 1)]))))
2923 (lt:V2DF (match_dup 5) (match_dup 3)))
2925 (and:V2DF (match_dup 6) (match_dup 4)))
2926 (set (match_operand:V2DF 0 "register_operand" "")
2927 (plus:V2DF (match_dup 5) (match_dup 7)))]
2930 REAL_VALUE_TYPE TWO32r;
2934 real_ldexp (&TWO32r, &dconst1, 32);
2935 x = const_double_from_real_value (TWO32r, DFmode);
2937 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2938 operands[4] = force_reg (V2DFmode,
2939 ix86_build_const_vector (V2DFmode, 1, x));
2941 for (i = 5; i < 8; i++)
2942 operands[i] = gen_reg_rtx (V2DFmode);
2945 (define_expand "vec_pack_trunc_v4df"
2947 (float_truncate:V4SF
2948 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2950 (float_truncate:V4SF
2951 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2952 (set (match_operand:V8SF 0 "register_operand" "")
2958 operands[3] = gen_reg_rtx (V4SFmode);
2959 operands[4] = gen_reg_rtx (V4SFmode);
2962 (define_expand "vec_pack_trunc_v2df"
2963 [(match_operand:V4SF 0 "register_operand" "")
2964 (match_operand:V2DF 1 "nonimmediate_operand" "")
2965 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2970 r1 = gen_reg_rtx (V4SFmode);
2971 r2 = gen_reg_rtx (V4SFmode);
2973 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2974 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2975 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2979 (define_expand "vec_pack_sfix_trunc_v2df"
2980 [(match_operand:V4SI 0 "register_operand" "")
2981 (match_operand:V2DF 1 "nonimmediate_operand" "")
2982 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2987 r1 = gen_reg_rtx (V4SImode);
2988 r2 = gen_reg_rtx (V4SImode);
2990 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2991 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2992 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2993 gen_lowpart (V2DImode, r1),
2994 gen_lowpart (V2DImode, r2)));
2998 (define_expand "vec_pack_sfix_v2df"
2999 [(match_operand:V4SI 0 "register_operand" "")
3000 (match_operand:V2DF 1 "nonimmediate_operand" "")
3001 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3006 r1 = gen_reg_rtx (V4SImode);
3007 r2 = gen_reg_rtx (V4SImode);
3009 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3010 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3011 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3012 gen_lowpart (V2DImode, r1),
3013 gen_lowpart (V2DImode, r2)));
3017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3019 ;; Parallel single-precision floating point element swizzling
3021 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3023 (define_expand "sse_movhlps_exp"
3024 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3027 (match_operand:V4SF 1 "nonimmediate_operand" "")
3028 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3029 (parallel [(const_int 6)
3035 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3037 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3039 /* Fix up the destination if needed. */
3040 if (dst != operands[0])
3041 emit_move_insn (operands[0], dst);
3046 (define_insn "sse_movhlps"
3047 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3050 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3051 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3052 (parallel [(const_int 6)
3056 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3058 movhlps\t{%2, %0|%0, %2}
3059 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3060 movlps\t{%H2, %0|%0, %H2}
3061 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3062 %vmovhps\t{%2, %0|%0, %2}"
3063 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3064 (set_attr "type" "ssemov")
3065 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3066 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3068 (define_expand "sse_movlhps_exp"
3069 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3072 (match_operand:V4SF 1 "nonimmediate_operand" "")
3073 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3074 (parallel [(const_int 0)
3080 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3082 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3084 /* Fix up the destination if needed. */
3085 if (dst != operands[0])
3086 emit_move_insn (operands[0], dst);
3091 (define_insn "sse_movlhps"
3092 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3095 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3096 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3097 (parallel [(const_int 0)
3101 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3103 movlhps\t{%2, %0|%0, %2}
3104 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3105 movhps\t{%2, %0|%0, %2}
3106 vmovhps\t{%2, %1, %0|%0, %1, %2}
3107 %vmovlps\t{%2, %H0|%H0, %2}"
3108 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3109 (set_attr "type" "ssemov")
3110 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3111 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3113 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3114 (define_insn "avx_unpckhps256"
3115 [(set (match_operand:V8SF 0 "register_operand" "=x")
3118 (match_operand:V8SF 1 "register_operand" "x")
3119 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3120 (parallel [(const_int 2) (const_int 10)
3121 (const_int 3) (const_int 11)
3122 (const_int 6) (const_int 14)
3123 (const_int 7) (const_int 15)])))]
3125 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3126 [(set_attr "type" "sselog")
3127 (set_attr "prefix" "vex")
3128 (set_attr "mode" "V8SF")])
3130 (define_expand "vec_interleave_highv8sf"
3134 (match_operand:V8SF 1 "register_operand" "x")
3135 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3136 (parallel [(const_int 0) (const_int 8)
3137 (const_int 1) (const_int 9)
3138 (const_int 4) (const_int 12)
3139 (const_int 5) (const_int 13)])))
3145 (parallel [(const_int 2) (const_int 10)
3146 (const_int 3) (const_int 11)
3147 (const_int 6) (const_int 14)
3148 (const_int 7) (const_int 15)])))
3149 (set (match_operand:V8SF 0 "register_operand" "")
3154 (parallel [(const_int 4) (const_int 5)
3155 (const_int 6) (const_int 7)
3156 (const_int 12) (const_int 13)
3157 (const_int 14) (const_int 15)])))]
3160 operands[3] = gen_reg_rtx (V8SFmode);
3161 operands[4] = gen_reg_rtx (V8SFmode);
3164 (define_insn "vec_interleave_highv4sf"
3165 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3168 (match_operand:V4SF 1 "register_operand" "0,x")
3169 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3170 (parallel [(const_int 2) (const_int 6)
3171 (const_int 3) (const_int 7)])))]
3174 unpckhps\t{%2, %0|%0, %2}
3175 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3176 [(set_attr "isa" "noavx,avx")
3177 (set_attr "type" "sselog")
3178 (set_attr "prefix" "orig,vex")
3179 (set_attr "mode" "V4SF")])
3181 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3182 (define_insn "avx_unpcklps256"
3183 [(set (match_operand:V8SF 0 "register_operand" "=x")
3186 (match_operand:V8SF 1 "register_operand" "x")
3187 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3188 (parallel [(const_int 0) (const_int 8)
3189 (const_int 1) (const_int 9)
3190 (const_int 4) (const_int 12)
3191 (const_int 5) (const_int 13)])))]
3193 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3194 [(set_attr "type" "sselog")
3195 (set_attr "prefix" "vex")
3196 (set_attr "mode" "V8SF")])
3198 (define_expand "vec_interleave_lowv8sf"
3202 (match_operand:V8SF 1 "register_operand" "x")
3203 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3204 (parallel [(const_int 0) (const_int 8)
3205 (const_int 1) (const_int 9)
3206 (const_int 4) (const_int 12)
3207 (const_int 5) (const_int 13)])))
3213 (parallel [(const_int 2) (const_int 10)
3214 (const_int 3) (const_int 11)
3215 (const_int 6) (const_int 14)
3216 (const_int 7) (const_int 15)])))
3217 (set (match_operand:V8SF 0 "register_operand" "")
3222 (parallel [(const_int 0) (const_int 1)
3223 (const_int 2) (const_int 3)
3224 (const_int 8) (const_int 9)
3225 (const_int 10) (const_int 11)])))]
3228 operands[3] = gen_reg_rtx (V8SFmode);
3229 operands[4] = gen_reg_rtx (V8SFmode);
3232 (define_insn "vec_interleave_lowv4sf"
3233 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3236 (match_operand:V4SF 1 "register_operand" "0,x")
3237 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3238 (parallel [(const_int 0) (const_int 4)
3239 (const_int 1) (const_int 5)])))]
3242 unpcklps\t{%2, %0|%0, %2}
3243 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3244 [(set_attr "isa" "noavx,avx")
3245 (set_attr "type" "sselog")
3246 (set_attr "prefix" "orig,vex")
3247 (set_attr "mode" "V4SF")])
3249 ;; These are modeled with the same vec_concat as the others so that we
3250 ;; capture users of shufps that can use the new instructions
3251 (define_insn "avx_movshdup256"
3252 [(set (match_operand:V8SF 0 "register_operand" "=x")
3255 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3257 (parallel [(const_int 1) (const_int 1)
3258 (const_int 3) (const_int 3)
3259 (const_int 5) (const_int 5)
3260 (const_int 7) (const_int 7)])))]
3262 "vmovshdup\t{%1, %0|%0, %1}"
3263 [(set_attr "type" "sse")
3264 (set_attr "prefix" "vex")
3265 (set_attr "mode" "V8SF")])
3267 (define_insn "sse3_movshdup"
3268 [(set (match_operand:V4SF 0 "register_operand" "=x")
3271 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3273 (parallel [(const_int 1)
3278 "%vmovshdup\t{%1, %0|%0, %1}"
3279 [(set_attr "type" "sse")
3280 (set_attr "prefix_rep" "1")
3281 (set_attr "prefix" "maybe_vex")
3282 (set_attr "mode" "V4SF")])
3284 (define_insn "avx_movsldup256"
3285 [(set (match_operand:V8SF 0 "register_operand" "=x")
3288 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3290 (parallel [(const_int 0) (const_int 0)
3291 (const_int 2) (const_int 2)
3292 (const_int 4) (const_int 4)
3293 (const_int 6) (const_int 6)])))]
3295 "vmovsldup\t{%1, %0|%0, %1}"
3296 [(set_attr "type" "sse")
3297 (set_attr "prefix" "vex")
3298 (set_attr "mode" "V8SF")])
3300 (define_insn "sse3_movsldup"
3301 [(set (match_operand:V4SF 0 "register_operand" "=x")
3304 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3306 (parallel [(const_int 0)
3311 "%vmovsldup\t{%1, %0|%0, %1}"
3312 [(set_attr "type" "sse")
3313 (set_attr "prefix_rep" "1")
3314 (set_attr "prefix" "maybe_vex")
3315 (set_attr "mode" "V4SF")])
3317 (define_expand "avx_shufps256"
3318 [(match_operand:V8SF 0 "register_operand" "")
3319 (match_operand:V8SF 1 "register_operand" "")
3320 (match_operand:V8SF 2 "nonimmediate_operand" "")
3321 (match_operand:SI 3 "const_int_operand" "")]
3324 int mask = INTVAL (operands[3]);
3325 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3326 GEN_INT ((mask >> 0) & 3),
3327 GEN_INT ((mask >> 2) & 3),
3328 GEN_INT (((mask >> 4) & 3) + 8),
3329 GEN_INT (((mask >> 6) & 3) + 8),
3330 GEN_INT (((mask >> 0) & 3) + 4),
3331 GEN_INT (((mask >> 2) & 3) + 4),
3332 GEN_INT (((mask >> 4) & 3) + 12),
3333 GEN_INT (((mask >> 6) & 3) + 12)));
3337 ;; One bit in mask selects 2 elements.
3338 (define_insn "avx_shufps256_1"
3339 [(set (match_operand:V8SF 0 "register_operand" "=x")
3342 (match_operand:V8SF 1 "register_operand" "x")
3343 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3344 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3345 (match_operand 4 "const_0_to_3_operand" "")
3346 (match_operand 5 "const_8_to_11_operand" "")
3347 (match_operand 6 "const_8_to_11_operand" "")
3348 (match_operand 7 "const_4_to_7_operand" "")
3349 (match_operand 8 "const_4_to_7_operand" "")
3350 (match_operand 9 "const_12_to_15_operand" "")
3351 (match_operand 10 "const_12_to_15_operand" "")])))]
3353 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3354 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3355 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3356 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3359 mask = INTVAL (operands[3]);
3360 mask |= INTVAL (operands[4]) << 2;
3361 mask |= (INTVAL (operands[5]) - 8) << 4;
3362 mask |= (INTVAL (operands[6]) - 8) << 6;
3363 operands[3] = GEN_INT (mask);
3365 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3367 [(set_attr "type" "sselog")
3368 (set_attr "length_immediate" "1")
3369 (set_attr "prefix" "vex")
3370 (set_attr "mode" "V8SF")])
3372 (define_expand "sse_shufps"
3373 [(match_operand:V4SF 0 "register_operand" "")
3374 (match_operand:V4SF 1 "register_operand" "")
3375 (match_operand:V4SF 2 "nonimmediate_operand" "")
3376 (match_operand:SI 3 "const_int_operand" "")]
3379 int mask = INTVAL (operands[3]);
3380 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3381 GEN_INT ((mask >> 0) & 3),
3382 GEN_INT ((mask >> 2) & 3),
3383 GEN_INT (((mask >> 4) & 3) + 4),
3384 GEN_INT (((mask >> 6) & 3) + 4)));
3388 (define_insn "sse_shufps_<mode>"
3389 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3390 (vec_select:VI4F_128
3391 (vec_concat:<ssedoublevecmode>
3392 (match_operand:VI4F_128 1 "register_operand" "0,x")
3393 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3394 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3395 (match_operand 4 "const_0_to_3_operand" "")
3396 (match_operand 5 "const_4_to_7_operand" "")
3397 (match_operand 6 "const_4_to_7_operand" "")])))]
3401 mask |= INTVAL (operands[3]) << 0;
3402 mask |= INTVAL (operands[4]) << 2;
3403 mask |= (INTVAL (operands[5]) - 4) << 4;
3404 mask |= (INTVAL (operands[6]) - 4) << 6;
3405 operands[3] = GEN_INT (mask);
3407 switch (which_alternative)
3410 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3412 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3417 [(set_attr "isa" "noavx,avx")
3418 (set_attr "type" "sselog")
3419 (set_attr "length_immediate" "1")
3420 (set_attr "prefix" "orig,vex")
3421 (set_attr "mode" "V4SF")])
3423 (define_insn "sse_storehps"
3424 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3426 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3427 (parallel [(const_int 2) (const_int 3)])))]
3430 %vmovhps\t{%1, %0|%0, %1}
3431 %vmovhlps\t{%1, %d0|%d0, %1}
3432 %vmovlps\t{%H1, %d0|%d0, %H1}"
3433 [(set_attr "type" "ssemov")
3434 (set_attr "prefix" "maybe_vex")
3435 (set_attr "mode" "V2SF,V4SF,V2SF")])
3437 (define_expand "sse_loadhps_exp"
3438 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3441 (match_operand:V4SF 1 "nonimmediate_operand" "")
3442 (parallel [(const_int 0) (const_int 1)]))
3443 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3446 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3448 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3450 /* Fix up the destination if needed. */
3451 if (dst != operands[0])
3452 emit_move_insn (operands[0], dst);
3457 (define_insn "sse_loadhps"
3458 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3461 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3462 (parallel [(const_int 0) (const_int 1)]))
3463 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3466 movhps\t{%2, %0|%0, %2}
3467 vmovhps\t{%2, %1, %0|%0, %1, %2}
3468 movlhps\t{%2, %0|%0, %2}
3469 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3470 %vmovlps\t{%2, %H0|%H0, %2}"
3471 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3472 (set_attr "type" "ssemov")
3473 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3474 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3476 (define_insn "sse_storelps"
3477 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3479 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3480 (parallel [(const_int 0) (const_int 1)])))]
3483 %vmovlps\t{%1, %0|%0, %1}
3484 %vmovaps\t{%1, %0|%0, %1}
3485 %vmovlps\t{%1, %d0|%d0, %1}"
3486 [(set_attr "type" "ssemov")
3487 (set_attr "prefix" "maybe_vex")
3488 (set_attr "mode" "V2SF,V4SF,V2SF")])
3490 (define_expand "sse_loadlps_exp"
3491 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3493 (match_operand:V2SF 2 "nonimmediate_operand" "")
3495 (match_operand:V4SF 1 "nonimmediate_operand" "")
3496 (parallel [(const_int 2) (const_int 3)]))))]
3499 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3501 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3503 /* Fix up the destination if needed. */
3504 if (dst != operands[0])
3505 emit_move_insn (operands[0], dst);
3510 (define_insn "sse_loadlps"
3511 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3513 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3515 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3516 (parallel [(const_int 2) (const_int 3)]))))]
3519 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3520 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3521 movlps\t{%2, %0|%0, %2}
3522 vmovlps\t{%2, %1, %0|%0, %1, %2}
3523 %vmovlps\t{%2, %0|%0, %2}"
3524 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3525 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3526 (set_attr "length_immediate" "1,1,*,*,*")
3527 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3528 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3530 (define_insn "sse_movss"
3531 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3533 (match_operand:V4SF 2 "register_operand" " x,x")
3534 (match_operand:V4SF 1 "register_operand" " 0,x")
3538 movss\t{%2, %0|%0, %2}
3539 vmovss\t{%2, %1, %0|%0, %1, %2}"
3540 [(set_attr "isa" "noavx,avx")
3541 (set_attr "type" "ssemov")
3542 (set_attr "prefix" "orig,vex")
3543 (set_attr "mode" "SF")])
3545 (define_expand "vec_dupv4sf"
3546 [(set (match_operand:V4SF 0 "register_operand" "")
3548 (match_operand:SF 1 "nonimmediate_operand" "")))]
3552 operands[1] = force_reg (SFmode, operands[1]);
3555 (define_insn "avx2_vec_dupv4sf"
3556 [(set (match_operand:V4SF 0 "register_operand" "=x")
3559 (match_operand:V4SF 1 "register_operand" "x")
3560 (parallel [(const_int 0)]))))]
3562 "vbroadcastss\t{%1, %0|%0, %1}"
3563 [(set_attr "type" "sselog1")
3564 (set_attr "prefix" "vex")
3565 (set_attr "mode" "V4SF")])
3567 (define_insn "*vec_dupv4sf_avx"
3568 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3570 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3573 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3574 vbroadcastss\t{%1, %0|%0, %1}"
3575 [(set_attr "type" "sselog1,ssemov")
3576 (set_attr "length_immediate" "1,0")
3577 (set_attr "prefix_extra" "0,1")
3578 (set_attr "prefix" "vex")
3579 (set_attr "mode" "V4SF")])
3581 (define_insn "avx2_vec_dupv8sf"
3582 [(set (match_operand:V8SF 0 "register_operand" "=x")
3585 (match_operand:V4SF 1 "register_operand" "x")
3586 (parallel [(const_int 0)]))))]
3588 "vbroadcastss\t{%1, %0|%0, %1}"
3589 [(set_attr "type" "sselog1")
3590 (set_attr "prefix" "vex")
3591 (set_attr "mode" "V8SF")])
3593 (define_insn "*vec_dupv4sf"
3594 [(set (match_operand:V4SF 0 "register_operand" "=x")
3596 (match_operand:SF 1 "register_operand" "0")))]
3598 "shufps\t{$0, %0, %0|%0, %0, 0}"
3599 [(set_attr "type" "sselog1")
3600 (set_attr "length_immediate" "1")
3601 (set_attr "mode" "V4SF")])
3603 ;; Although insertps takes register source, we prefer
3604 ;; unpcklps with register source since it is shorter.
3605 (define_insn "*vec_concatv2sf_sse4_1"
3606 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3608 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3609 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3612 unpcklps\t{%2, %0|%0, %2}
3613 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3614 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3615 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3616 %vmovss\t{%1, %0|%0, %1}
3617 punpckldq\t{%2, %0|%0, %2}
3618 movd\t{%1, %0|%0, %1}"
3619 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3620 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3621 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3622 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3623 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3624 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3625 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3627 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3628 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3629 ;; alternatives pretty much forces the MMX alternative to be chosen.
3630 (define_insn "*vec_concatv2sf_sse"
3631 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3633 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3634 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3637 unpcklps\t{%2, %0|%0, %2}
3638 movss\t{%1, %0|%0, %1}
3639 punpckldq\t{%2, %0|%0, %2}
3640 movd\t{%1, %0|%0, %1}"
3641 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3642 (set_attr "mode" "V4SF,SF,DI,DI")])
3644 (define_insn "*vec_concatv4sf"
3645 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3647 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3648 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3651 movlhps\t{%2, %0|%0, %2}
3652 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3653 movhps\t{%2, %0|%0, %2}
3654 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3655 [(set_attr "isa" "noavx,avx,noavx,avx")
3656 (set_attr "type" "ssemov")
3657 (set_attr "prefix" "orig,vex,orig,vex")
3658 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3660 (define_expand "vec_init<mode>"
3661 [(match_operand:V_128 0 "register_operand" "")
3662 (match_operand 1 "" "")]
3665 ix86_expand_vector_init (false, operands[0], operands[1]);
3669 ;; Avoid combining registers from different units in a single alternative,
3670 ;; see comment above inline_secondary_memory_needed function in i386.c
3671 (define_insn "vec_set<mode>_0"
3672 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3673 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3675 (vec_duplicate:VI4F_128
3676 (match_operand:<ssescalarmode> 2 "general_operand"
3677 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3678 (match_operand:VI4F_128 1 "vector_move_operand"
3679 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3683 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3684 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3685 %vmovd\t{%2, %0|%0, %2}
3686 movss\t{%2, %0|%0, %2}
3687 movss\t{%2, %0|%0, %2}
3688 vmovss\t{%2, %1, %0|%0, %1, %2}
3689 pinsrd\t{$0, %2, %0|%0, %2, 0}
3690 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3694 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3696 (cond [(eq_attr "alternative" "0,6,7")
3697 (const_string "sselog")
3698 (eq_attr "alternative" "9")
3699 (const_string "fmov")
3700 (eq_attr "alternative" "10")
3701 (const_string "imov")
3703 (const_string "ssemov")))
3704 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3705 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3706 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3707 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3709 ;; A subset is vec_setv4sf.
3710 (define_insn "*vec_setv4sf_sse4_1"
3711 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3714 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3715 (match_operand:V4SF 1 "register_operand" "0,x")
3716 (match_operand:SI 3 "const_int_operand" "")))]
3718 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3719 < GET_MODE_NUNITS (V4SFmode))"
3721 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3722 switch (which_alternative)
3725 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3727 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3732 [(set_attr "isa" "noavx,avx")
3733 (set_attr "type" "sselog")
3734 (set_attr "prefix_data16" "1,*")
3735 (set_attr "prefix_extra" "1")
3736 (set_attr "length_immediate" "1")
3737 (set_attr "prefix" "orig,vex")
3738 (set_attr "mode" "V4SF")])
3740 (define_insn "sse4_1_insertps"
3741 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3742 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3743 (match_operand:V4SF 1 "register_operand" "0,x")
3744 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3748 if (MEM_P (operands[2]))
3750 unsigned count_s = INTVAL (operands[3]) >> 6;
3752 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3753 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3755 switch (which_alternative)
3758 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3760 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3765 [(set_attr "isa" "noavx,avx")
3766 (set_attr "type" "sselog")
3767 (set_attr "prefix_data16" "1,*")
3768 (set_attr "prefix_extra" "1")
3769 (set_attr "length_immediate" "1")
3770 (set_attr "prefix" "orig,vex")
3771 (set_attr "mode" "V4SF")])
3774 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3776 (vec_duplicate:VI4F_128
3777 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3780 "TARGET_SSE && reload_completed"
3783 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3788 (define_expand "vec_set<mode>"
3789 [(match_operand:V_128 0 "register_operand" "")
3790 (match_operand:<ssescalarmode> 1 "register_operand" "")
3791 (match_operand 2 "const_int_operand" "")]
3794 ix86_expand_vector_set (false, operands[0], operands[1],
3795 INTVAL (operands[2]));
3799 (define_insn_and_split "*vec_extractv4sf_0"
3800 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3802 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3803 (parallel [(const_int 0)])))]
3804 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3806 "&& reload_completed"
3809 rtx op1 = operands[1];
3811 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3813 op1 = gen_lowpart (SFmode, op1);
3814 emit_move_insn (operands[0], op1);
3818 (define_expand "avx_vextractf128<mode>"
3819 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3820 (match_operand:V_256 1 "register_operand" "")
3821 (match_operand:SI 2 "const_0_to_1_operand" "")]
3824 rtx (*insn)(rtx, rtx);
3826 switch (INTVAL (operands[2]))
3829 insn = gen_vec_extract_lo_<mode>;
3832 insn = gen_vec_extract_hi_<mode>;
3838 emit_insn (insn (operands[0], operands[1]));
3842 (define_insn_and_split "vec_extract_lo_<mode>"
3843 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3844 (vec_select:<ssehalfvecmode>
3845 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3846 (parallel [(const_int 0) (const_int 1)])))]
3847 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3849 "&& reload_completed"
3852 rtx op1 = operands[1];
3854 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3856 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3857 emit_move_insn (operands[0], op1);
3861 (define_insn "vec_extract_hi_<mode>"
3862 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3863 (vec_select:<ssehalfvecmode>
3864 (match_operand:VI8F_256 1 "register_operand" "x,x")
3865 (parallel [(const_int 2) (const_int 3)])))]
3867 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3868 [(set_attr "type" "sselog")
3869 (set_attr "prefix_extra" "1")
3870 (set_attr "length_immediate" "1")
3871 (set_attr "memory" "none,store")
3872 (set_attr "prefix" "vex")
3873 (set_attr "mode" "<sseinsnmode>")])
3875 (define_insn_and_split "vec_extract_lo_<mode>"
3876 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3877 (vec_select:<ssehalfvecmode>
3878 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3879 (parallel [(const_int 0) (const_int 1)
3880 (const_int 2) (const_int 3)])))]
3881 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3883 "&& reload_completed"
3886 rtx op1 = operands[1];
3888 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3890 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3891 emit_move_insn (operands[0], op1);
3895 (define_insn "vec_extract_hi_<mode>"
3896 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3897 (vec_select:<ssehalfvecmode>
3898 (match_operand:VI4F_256 1 "register_operand" "x,x")
3899 (parallel [(const_int 4) (const_int 5)
3900 (const_int 6) (const_int 7)])))]
3902 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
3903 [(set_attr "type" "sselog")
3904 (set_attr "prefix_extra" "1")
3905 (set_attr "length_immediate" "1")
3906 (set_attr "memory" "none,store")
3907 (set_attr "prefix" "vex")
3908 (set_attr "mode" "<sseinsnmode>")])
3910 (define_insn_and_split "vec_extract_lo_v16hi"
3911 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3913 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3914 (parallel [(const_int 0) (const_int 1)
3915 (const_int 2) (const_int 3)
3916 (const_int 4) (const_int 5)
3917 (const_int 6) (const_int 7)])))]
3918 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3920 "&& reload_completed"
3923 rtx op1 = operands[1];
3925 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3927 op1 = gen_lowpart (V8HImode, op1);
3928 emit_move_insn (operands[0], op1);
3932 (define_insn "vec_extract_hi_v16hi"
3933 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3935 (match_operand:V16HI 1 "register_operand" "x,x")
3936 (parallel [(const_int 8) (const_int 9)
3937 (const_int 10) (const_int 11)
3938 (const_int 12) (const_int 13)
3939 (const_int 14) (const_int 15)])))]
3941 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
3942 [(set_attr "type" "sselog")
3943 (set_attr "prefix_extra" "1")
3944 (set_attr "length_immediate" "1")
3945 (set_attr "memory" "none,store")
3946 (set_attr "prefix" "vex")
3947 (set_attr "mode" "OI")])
3949 (define_insn_and_split "vec_extract_lo_v32qi"
3950 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3952 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3953 (parallel [(const_int 0) (const_int 1)
3954 (const_int 2) (const_int 3)
3955 (const_int 4) (const_int 5)
3956 (const_int 6) (const_int 7)
3957 (const_int 8) (const_int 9)
3958 (const_int 10) (const_int 11)
3959 (const_int 12) (const_int 13)
3960 (const_int 14) (const_int 15)])))]
3961 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3963 "&& reload_completed"
3966 rtx op1 = operands[1];
3968 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3970 op1 = gen_lowpart (V16QImode, op1);
3971 emit_move_insn (operands[0], op1);
3975 (define_insn "vec_extract_hi_v32qi"
3976 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3978 (match_operand:V32QI 1 "register_operand" "x,x")
3979 (parallel [(const_int 16) (const_int 17)
3980 (const_int 18) (const_int 19)
3981 (const_int 20) (const_int 21)
3982 (const_int 22) (const_int 23)
3983 (const_int 24) (const_int 25)
3984 (const_int 26) (const_int 27)
3985 (const_int 28) (const_int 29)
3986 (const_int 30) (const_int 31)])))]
3988 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
3989 [(set_attr "type" "sselog")
3990 (set_attr "prefix_extra" "1")
3991 (set_attr "length_immediate" "1")
3992 (set_attr "memory" "none,store")
3993 (set_attr "prefix" "vex")
3994 (set_attr "mode" "OI")])
3996 (define_insn_and_split "*sse4_1_extractps"
3997 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3999 (match_operand:V4SF 1 "register_operand" "x,0,x")
4000 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4003 %vextractps\t{%2, %1, %0|%0, %1, %2}
4006 "&& reload_completed && SSE_REG_P (operands[0])"
4009 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4010 switch (INTVAL (operands[2]))
4014 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4015 operands[2], operands[2],
4016 GEN_INT (INTVAL (operands[2]) + 4),
4017 GEN_INT (INTVAL (operands[2]) + 4)));
4020 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4023 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4028 [(set_attr "isa" "*,noavx,avx")
4029 (set_attr "type" "sselog,*,*")
4030 (set_attr "prefix_data16" "1,*,*")
4031 (set_attr "prefix_extra" "1,*,*")
4032 (set_attr "length_immediate" "1,*,*")
4033 (set_attr "prefix" "maybe_vex,*,*")
4034 (set_attr "mode" "V4SF,*,*")])
4036 (define_insn_and_split "*vec_extract_v4sf_mem"
4037 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4039 (match_operand:V4SF 1 "memory_operand" "o")
4040 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4043 "&& reload_completed"
4046 int i = INTVAL (operands[2]);
4048 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4052 ;; Modes handled by vec_extract patterns.
4053 (define_mode_iterator VEC_EXTRACT_MODE
4054 [(V32QI "TARGET_AVX") V16QI
4055 (V16HI "TARGET_AVX") V8HI
4056 (V8SI "TARGET_AVX") V4SI
4057 (V4DI "TARGET_AVX") V2DI
4058 (V8SF "TARGET_AVX") V4SF
4059 (V4DF "TARGET_AVX") V2DF])
4061 (define_expand "vec_extract<mode>"
4062 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4063 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4064 (match_operand 2 "const_int_operand" "")]
4067 ix86_expand_vector_extract (false, operands[0], operands[1],
4068 INTVAL (operands[2]));
4072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4074 ;; Parallel double-precision floating point element swizzling
4076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4078 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4079 (define_insn "avx_unpckhpd256"
4080 [(set (match_operand:V4DF 0 "register_operand" "=x")
4083 (match_operand:V4DF 1 "register_operand" "x")
4084 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4085 (parallel [(const_int 1) (const_int 5)
4086 (const_int 3) (const_int 7)])))]
4088 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4089 [(set_attr "type" "sselog")
4090 (set_attr "prefix" "vex")
4091 (set_attr "mode" "V4DF")])
4093 (define_expand "vec_interleave_highv4df"
4097 (match_operand:V4DF 1 "register_operand" "x")
4098 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4099 (parallel [(const_int 0) (const_int 4)
4100 (const_int 2) (const_int 6)])))
4106 (parallel [(const_int 1) (const_int 5)
4107 (const_int 3) (const_int 7)])))
4108 (set (match_operand:V4DF 0 "register_operand" "")
4113 (parallel [(const_int 2) (const_int 3)
4114 (const_int 6) (const_int 7)])))]
4117 operands[3] = gen_reg_rtx (V4DFmode);
4118 operands[4] = gen_reg_rtx (V4DFmode);
4122 (define_expand "vec_interleave_highv2df"
4123 [(set (match_operand:V2DF 0 "register_operand" "")
4126 (match_operand:V2DF 1 "nonimmediate_operand" "")
4127 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4128 (parallel [(const_int 1)
4132 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4133 operands[2] = force_reg (V2DFmode, operands[2]);
4136 (define_insn "*vec_interleave_highv2df"
4137 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4140 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4141 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4142 (parallel [(const_int 1)
4144 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4146 unpckhpd\t{%2, %0|%0, %2}
4147 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4148 %vmovddup\t{%H1, %0|%0, %H1}
4149 movlpd\t{%H1, %0|%0, %H1}
4150 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4151 %vmovhpd\t{%1, %0|%0, %1}"
4152 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4153 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4154 (set_attr "prefix_data16" "*,*,*,1,*,1")
4155 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4156 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4158 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4159 (define_expand "avx_movddup256"
4160 [(set (match_operand:V4DF 0 "register_operand" "")
4163 (match_operand:V4DF 1 "nonimmediate_operand" "")
4165 (parallel [(const_int 0) (const_int 4)
4166 (const_int 2) (const_int 6)])))]
4169 (define_expand "avx_unpcklpd256"
4170 [(set (match_operand:V4DF 0 "register_operand" "")
4173 (match_operand:V4DF 1 "register_operand" "")
4174 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4175 (parallel [(const_int 0) (const_int 4)
4176 (const_int 2) (const_int 6)])))]
4179 (define_insn "*avx_unpcklpd256"
4180 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4183 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4184 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4185 (parallel [(const_int 0) (const_int 4)
4186 (const_int 2) (const_int 6)])))]
4188 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4190 vmovddup\t{%1, %0|%0, %1}
4191 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4192 [(set_attr "type" "sselog")
4193 (set_attr "prefix" "vex")
4194 (set_attr "mode" "V4DF")])
4196 (define_expand "vec_interleave_lowv4df"
4200 (match_operand:V4DF 1 "register_operand" "x")
4201 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4202 (parallel [(const_int 0) (const_int 4)
4203 (const_int 2) (const_int 6)])))
4209 (parallel [(const_int 1) (const_int 5)
4210 (const_int 3) (const_int 7)])))
4211 (set (match_operand:V4DF 0 "register_operand" "")
4216 (parallel [(const_int 0) (const_int 1)
4217 (const_int 4) (const_int 5)])))]
4220 operands[3] = gen_reg_rtx (V4DFmode);
4221 operands[4] = gen_reg_rtx (V4DFmode);
4224 (define_expand "vec_interleave_lowv2df"
4225 [(set (match_operand:V2DF 0 "register_operand" "")
4228 (match_operand:V2DF 1 "nonimmediate_operand" "")
4229 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4230 (parallel [(const_int 0)
4234 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4235 operands[1] = force_reg (V2DFmode, operands[1]);
4238 (define_insn "*vec_interleave_lowv2df"
4239 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4242 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4243 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4244 (parallel [(const_int 0)
4246 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4248 unpcklpd\t{%2, %0|%0, %2}
4249 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4250 %vmovddup\t{%1, %0|%0, %1}
4251 movhpd\t{%2, %0|%0, %2}
4252 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4253 %vmovlpd\t{%2, %H0|%H0, %2}"
4254 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4255 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4256 (set_attr "prefix_data16" "*,*,*,1,*,1")
4257 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4258 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4261 [(set (match_operand:V2DF 0 "memory_operand" "")
4264 (match_operand:V2DF 1 "register_operand" "")
4266 (parallel [(const_int 0)
4268 "TARGET_SSE3 && reload_completed"
4271 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4272 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4273 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4278 [(set (match_operand:V2DF 0 "register_operand" "")
4281 (match_operand:V2DF 1 "memory_operand" "")
4283 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4284 (match_operand:SI 3 "const_int_operand" "")])))]
4285 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4286 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4288 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4291 (define_expand "avx_shufpd256"
4292 [(match_operand:V4DF 0 "register_operand" "")
4293 (match_operand:V4DF 1 "register_operand" "")
4294 (match_operand:V4DF 2 "nonimmediate_operand" "")
4295 (match_operand:SI 3 "const_int_operand" "")]
4298 int mask = INTVAL (operands[3]);
4299 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4301 GEN_INT (mask & 2 ? 5 : 4),
4302 GEN_INT (mask & 4 ? 3 : 2),
4303 GEN_INT (mask & 8 ? 7 : 6)));
4307 (define_insn "avx_shufpd256_1"
4308 [(set (match_operand:V4DF 0 "register_operand" "=x")
4311 (match_operand:V4DF 1 "register_operand" "x")
4312 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4313 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4314 (match_operand 4 "const_4_to_5_operand" "")
4315 (match_operand 5 "const_2_to_3_operand" "")
4316 (match_operand 6 "const_6_to_7_operand" "")])))]
4320 mask = INTVAL (operands[3]);
4321 mask |= (INTVAL (operands[4]) - 4) << 1;
4322 mask |= (INTVAL (operands[5]) - 2) << 2;
4323 mask |= (INTVAL (operands[6]) - 6) << 3;
4324 operands[3] = GEN_INT (mask);
4326 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4328 [(set_attr "type" "sselog")
4329 (set_attr "length_immediate" "1")
4330 (set_attr "prefix" "vex")
4331 (set_attr "mode" "V4DF")])
4333 (define_expand "sse2_shufpd"
4334 [(match_operand:V2DF 0 "register_operand" "")
4335 (match_operand:V2DF 1 "register_operand" "")
4336 (match_operand:V2DF 2 "nonimmediate_operand" "")
4337 (match_operand:SI 3 "const_int_operand" "")]
4340 int mask = INTVAL (operands[3]);
4341 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4343 GEN_INT (mask & 2 ? 3 : 2)));
4347 ;; Modes handled by vec_extract_even/odd pattern.
4348 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4349 [(V32QI "TARGET_AVX2") (V16QI "TARGET_SSE2")
4350 (V16HI "TARGET_AVX2") (V8HI "TARGET_SSE2")
4351 (V8SI "TARGET_AVX2") (V4SI "TARGET_SSE2")
4352 (V4DI "TARGET_AVX2") (V2DI "TARGET_SSE2")
4353 (V8SF "TARGET_AVX") V4SF
4354 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4356 (define_expand "vec_extract_even<mode>"
4357 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4358 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4359 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4362 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4366 (define_expand "vec_extract_odd<mode>"
4367 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4368 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4369 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4372 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4376 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4377 (define_insn "avx2_interleave_highv4di"
4378 [(set (match_operand:V4DI 0 "register_operand" "=x")
4381 (match_operand:V4DI 1 "register_operand" "x")
4382 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4383 (parallel [(const_int 1)
4388 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4389 [(set_attr "type" "sselog")
4390 (set_attr "prefix" "vex")
4391 (set_attr "mode" "OI")])
4393 (define_insn "vec_interleave_highv2di"
4394 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4397 (match_operand:V2DI 1 "register_operand" "0,x")
4398 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4399 (parallel [(const_int 1)
4403 punpckhqdq\t{%2, %0|%0, %2}
4404 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4405 [(set_attr "isa" "noavx,avx")
4406 (set_attr "type" "sselog")
4407 (set_attr "prefix_data16" "1,*")
4408 (set_attr "prefix" "orig,vex")
4409 (set_attr "mode" "TI")])
4411 (define_insn "avx2_interleave_lowv4di"
4412 [(set (match_operand:V4DI 0 "register_operand" "=x")
4415 (match_operand:V4DI 1 "register_operand" "x")
4416 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4417 (parallel [(const_int 0)
4422 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4423 [(set_attr "type" "sselog")
4424 (set_attr "prefix" "vex")
4425 (set_attr "mode" "OI")])
4427 (define_insn "vec_interleave_lowv2di"
4428 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4431 (match_operand:V2DI 1 "register_operand" "0,x")
4432 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4433 (parallel [(const_int 0)
4437 punpcklqdq\t{%2, %0|%0, %2}
4438 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4439 [(set_attr "isa" "noavx,avx")
4440 (set_attr "type" "sselog")
4441 (set_attr "prefix_data16" "1,*")
4442 (set_attr "prefix" "orig,vex")
4443 (set_attr "mode" "TI")])
4445 (define_insn "sse2_shufpd_<mode>"
4446 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4447 (vec_select:VI8F_128
4448 (vec_concat:<ssedoublevecmode>
4449 (match_operand:VI8F_128 1 "register_operand" "0,x")
4450 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4451 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4452 (match_operand 4 "const_2_to_3_operand" "")])))]
4456 mask = INTVAL (operands[3]);
4457 mask |= (INTVAL (operands[4]) - 2) << 1;
4458 operands[3] = GEN_INT (mask);
4460 switch (which_alternative)
4463 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4465 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4470 [(set_attr "isa" "noavx,avx")
4471 (set_attr "type" "sselog")
4472 (set_attr "length_immediate" "1")
4473 (set_attr "prefix" "orig,vex")
4474 (set_attr "mode" "V2DF")])
4476 ;; Avoid combining registers from different units in a single alternative,
4477 ;; see comment above inline_secondary_memory_needed function in i386.c
4478 (define_insn "sse2_storehpd"
4479 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4481 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4482 (parallel [(const_int 1)])))]
4483 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4485 %vmovhpd\t{%1, %0|%0, %1}
4487 vunpckhpd\t{%d1, %0|%0, %d1}
4491 [(set_attr "isa" "*,noavx,avx,*,*,*")
4492 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4493 (set (attr "prefix_data16")
4495 (and (eq_attr "alternative" "0")
4496 (not (match_test "TARGET_AVX")))
4498 (const_string "*")))
4499 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4500 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4503 [(set (match_operand:DF 0 "register_operand" "")
4505 (match_operand:V2DF 1 "memory_operand" "")
4506 (parallel [(const_int 1)])))]
4507 "TARGET_SSE2 && reload_completed"
4508 [(set (match_dup 0) (match_dup 1))]
4509 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4511 (define_insn "*vec_extractv2df_1_sse"
4512 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4514 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4515 (parallel [(const_int 1)])))]
4516 "!TARGET_SSE2 && TARGET_SSE
4517 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4519 movhps\t{%1, %0|%0, %1}
4520 movhlps\t{%1, %0|%0, %1}
4521 movlps\t{%H1, %0|%0, %H1}"
4522 [(set_attr "type" "ssemov")
4523 (set_attr "mode" "V2SF,V4SF,V2SF")])
4525 ;; Avoid combining registers from different units in a single alternative,
4526 ;; see comment above inline_secondary_memory_needed function in i386.c
4527 (define_insn "sse2_storelpd"
4528 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4530 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4531 (parallel [(const_int 0)])))]
4532 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4534 %vmovlpd\t{%1, %0|%0, %1}
4539 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4540 (set_attr "prefix_data16" "1,*,*,*,*")
4541 (set_attr "prefix" "maybe_vex")
4542 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4545 [(set (match_operand:DF 0 "register_operand" "")
4547 (match_operand:V2DF 1 "nonimmediate_operand" "")
4548 (parallel [(const_int 0)])))]
4549 "TARGET_SSE2 && reload_completed"
4552 rtx op1 = operands[1];
4554 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4556 op1 = gen_lowpart (DFmode, op1);
4557 emit_move_insn (operands[0], op1);
4561 (define_insn "*vec_extractv2df_0_sse"
4562 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4564 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4565 (parallel [(const_int 0)])))]
4566 "!TARGET_SSE2 && TARGET_SSE
4567 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4569 movlps\t{%1, %0|%0, %1}
4570 movaps\t{%1, %0|%0, %1}
4571 movlps\t{%1, %0|%0, %1}"
4572 [(set_attr "type" "ssemov")
4573 (set_attr "mode" "V2SF,V4SF,V2SF")])
4575 (define_expand "sse2_loadhpd_exp"
4576 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4579 (match_operand:V2DF 1 "nonimmediate_operand" "")
4580 (parallel [(const_int 0)]))
4581 (match_operand:DF 2 "nonimmediate_operand" "")))]
4584 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4586 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4588 /* Fix up the destination if needed. */
4589 if (dst != operands[0])
4590 emit_move_insn (operands[0], dst);
4595 ;; Avoid combining registers from different units in a single alternative,
4596 ;; see comment above inline_secondary_memory_needed function in i386.c
4597 (define_insn "sse2_loadhpd"
4598 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4602 (match_operand:V2DF 1 "nonimmediate_operand"
4604 (parallel [(const_int 0)]))
4605 (match_operand:DF 2 "nonimmediate_operand"
4606 " m,m,x,x,x,*f,r")))]
4607 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4609 movhpd\t{%2, %0|%0, %2}
4610 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4611 unpcklpd\t{%2, %0|%0, %2}
4612 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4616 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4617 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4618 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4619 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4620 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4623 [(set (match_operand:V2DF 0 "memory_operand" "")
4625 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4626 (match_operand:DF 1 "register_operand" "")))]
4627 "TARGET_SSE2 && reload_completed"
4628 [(set (match_dup 0) (match_dup 1))]
4629 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4631 (define_expand "sse2_loadlpd_exp"
4632 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4634 (match_operand:DF 2 "nonimmediate_operand" "")
4636 (match_operand:V2DF 1 "nonimmediate_operand" "")
4637 (parallel [(const_int 1)]))))]
4640 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4642 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4644 /* Fix up the destination if needed. */
4645 if (dst != operands[0])
4646 emit_move_insn (operands[0], dst);
4651 ;; Avoid combining registers from different units in a single alternative,
4652 ;; see comment above inline_secondary_memory_needed function in i386.c
4653 (define_insn "sse2_loadlpd"
4654 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4655 "=x,x,x,x,x,x,x,x,m,m ,m")
4657 (match_operand:DF 2 "nonimmediate_operand"
4658 " m,m,m,x,x,0,0,x,x,*f,r")
4660 (match_operand:V2DF 1 "vector_move_operand"
4661 " C,0,x,0,x,x,o,o,0,0 ,0")
4662 (parallel [(const_int 1)]))))]
4663 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4665 %vmovsd\t{%2, %0|%0, %2}
4666 movlpd\t{%2, %0|%0, %2}
4667 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4668 movsd\t{%2, %0|%0, %2}
4669 vmovsd\t{%2, %1, %0|%0, %1, %2}
4670 shufpd\t{$2, %1, %0|%0, %1, 2}
4671 movhpd\t{%H1, %0|%0, %H1}
4672 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4676 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4678 (cond [(eq_attr "alternative" "5")
4679 (const_string "sselog")
4680 (eq_attr "alternative" "9")
4681 (const_string "fmov")
4682 (eq_attr "alternative" "10")
4683 (const_string "imov")
4685 (const_string "ssemov")))
4686 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4687 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4688 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4689 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4692 [(set (match_operand:V2DF 0 "memory_operand" "")
4694 (match_operand:DF 1 "register_operand" "")
4695 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4696 "TARGET_SSE2 && reload_completed"
4697 [(set (match_dup 0) (match_dup 1))]
4698 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4700 (define_insn "sse2_movsd"
4701 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4703 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4704 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4708 movsd\t{%2, %0|%0, %2}
4709 vmovsd\t{%2, %1, %0|%0, %1, %2}
4710 movlpd\t{%2, %0|%0, %2}
4711 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4712 %vmovlpd\t{%2, %0|%0, %2}
4713 shufpd\t{$2, %1, %0|%0, %1, 2}
4714 movhps\t{%H1, %0|%0, %H1}
4715 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4716 %vmovhps\t{%1, %H0|%H0, %1}"
4717 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4720 (eq_attr "alternative" "5")
4721 (const_string "sselog")
4722 (const_string "ssemov")))
4723 (set (attr "prefix_data16")
4725 (and (eq_attr "alternative" "2,4")
4726 (not (match_test "TARGET_AVX")))
4728 (const_string "*")))
4729 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4730 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4731 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4733 (define_expand "vec_dupv2df"
4734 [(set (match_operand:V2DF 0 "register_operand" "")
4736 (match_operand:DF 1 "nonimmediate_operand" "")))]
4740 operands[1] = force_reg (DFmode, operands[1]);
4743 (define_insn "*vec_dupv2df_sse3"
4744 [(set (match_operand:V2DF 0 "register_operand" "=x")
4746 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4748 "%vmovddup\t{%1, %0|%0, %1}"
4749 [(set_attr "type" "sselog1")
4750 (set_attr "prefix" "maybe_vex")
4751 (set_attr "mode" "DF")])
4753 (define_insn "*vec_dupv2df"
4754 [(set (match_operand:V2DF 0 "register_operand" "=x")
4756 (match_operand:DF 1 "register_operand" "0")))]
4759 [(set_attr "type" "sselog1")
4760 (set_attr "mode" "V2DF")])
4762 (define_insn "*vec_concatv2df_sse3"
4763 [(set (match_operand:V2DF 0 "register_operand" "=x")
4765 (match_operand:DF 1 "nonimmediate_operand" "xm")
4768 "%vmovddup\t{%1, %0|%0, %1}"
4769 [(set_attr "type" "sselog1")
4770 (set_attr "prefix" "maybe_vex")
4771 (set_attr "mode" "DF")])
4773 (define_insn "*vec_concatv2df"
4774 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
4776 (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
4777 (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
4780 unpcklpd\t{%2, %0|%0, %2}
4781 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4782 movhpd\t{%2, %0|%0, %2}
4783 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4784 %vmovsd\t{%1, %0|%0, %1}
4785 movlhps\t{%2, %0|%0, %2}
4786 movhps\t{%2, %0|%0, %2}"
4787 [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
4790 (eq_attr "alternative" "0,1")
4791 (const_string "sselog")
4792 (const_string "ssemov")))
4793 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4794 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4795 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4799 ;; Parallel integral arithmetic
4801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4803 (define_expand "neg<mode>2"
4804 [(set (match_operand:VI_128 0 "register_operand" "")
4807 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4809 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4811 (define_expand "<plusminus_insn><mode>3"
4812 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4814 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4815 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4817 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4819 (define_insn "*<plusminus_insn><mode>3"
4820 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4822 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4823 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4824 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4826 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4827 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4828 [(set_attr "isa" "noavx,avx")
4829 (set_attr "type" "sseiadd")
4830 (set_attr "prefix_data16" "1,*")
4831 (set_attr "prefix" "orig,vex")
4832 (set_attr "mode" "<sseinsnmode>")])
4834 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4835 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4836 (sat_plusminus:VI12_AVX2
4837 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4838 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4840 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4842 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4843 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4844 (sat_plusminus:VI12_AVX2
4845 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4846 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4847 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4849 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4850 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4851 [(set_attr "isa" "noavx,avx")
4852 (set_attr "type" "sseiadd")
4853 (set_attr "prefix_data16" "1,*")
4854 (set_attr "prefix" "orig,vex")
4855 (set_attr "mode" "TI")])
4857 (define_insn_and_split "mul<mode>3"
4858 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4859 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4860 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4862 && can_create_pseudo_p ()"
4869 enum machine_mode mulmode = <sseunpackmode>mode;
4871 for (i = 0; i < 6; ++i)
4872 t[i] = gen_reg_rtx (<MODE>mode);
4874 /* Unpack data such that we've got a source byte in each low byte of
4875 each word. We don't care what goes into the high byte of each word.
4876 Rather than trying to get zero in there, most convenient is to let
4877 it be a copy of the low byte. */
4878 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4880 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4882 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4884 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4887 /* Multiply words. The end-of-line annotations here give a picture of what
4888 the output of that instruction looks like. Dot means don't care; the
4889 letters are the bytes of the result with A being the most significant. */
4890 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4891 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4892 gen_lowpart (mulmode, t[0]),
4893 gen_lowpart (mulmode, t[1]))));
4894 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4895 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4896 gen_lowpart (mulmode, t[2]),
4897 gen_lowpart (mulmode, t[3]))));
4899 /* Extract the even bytes and merge them back together. */
4900 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4902 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4903 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4907 (define_expand "mul<mode>3"
4908 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4909 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4910 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4912 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4914 (define_insn "*mul<mode>3"
4915 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4916 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4917 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4918 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4920 pmullw\t{%2, %0|%0, %2}
4921 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4922 [(set_attr "isa" "noavx,avx")
4923 (set_attr "type" "sseimul")
4924 (set_attr "prefix_data16" "1,*")
4925 (set_attr "prefix" "orig,vex")
4926 (set_attr "mode" "<sseinsnmode>")])
4928 (define_expand "<s>mul<mode>3_highpart"
4929 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4931 (lshiftrt:<ssedoublemode>
4932 (mult:<ssedoublemode>
4933 (any_extend:<ssedoublemode>
4934 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4935 (any_extend:<ssedoublemode>
4936 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4939 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4941 (define_insn "*<s>mul<mode>3_highpart"
4942 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4944 (lshiftrt:<ssedoublemode>
4945 (mult:<ssedoublemode>
4946 (any_extend:<ssedoublemode>
4947 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
4948 (any_extend:<ssedoublemode>
4949 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
4951 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4953 pmulh<u>w\t{%2, %0|%0, %2}
4954 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4955 [(set_attr "isa" "noavx,avx")
4956 (set_attr "type" "sseimul")
4957 (set_attr "prefix_data16" "1,*")
4958 (set_attr "prefix" "orig,vex")
4959 (set_attr "mode" "<sseinsnmode>")])
4961 (define_expand "avx2_umulv4siv4di3"
4962 [(set (match_operand:V4DI 0 "register_operand" "")
4966 (match_operand:V8SI 1 "nonimmediate_operand" "")
4967 (parallel [(const_int 0) (const_int 2)
4968 (const_int 4) (const_int 6)])))
4971 (match_operand:V8SI 2 "nonimmediate_operand" "")
4972 (parallel [(const_int 0) (const_int 2)
4973 (const_int 4) (const_int 6)])))))]
4975 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
4977 (define_insn "*avx_umulv4siv4di3"
4978 [(set (match_operand:V4DI 0 "register_operand" "=x")
4982 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
4983 (parallel [(const_int 0) (const_int 2)
4984 (const_int 4) (const_int 6)])))
4987 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
4988 (parallel [(const_int 0) (const_int 2)
4989 (const_int 4) (const_int 6)])))))]
4990 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
4991 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4992 [(set_attr "type" "sseimul")
4993 (set_attr "prefix" "vex")
4994 (set_attr "mode" "OI")])
4996 (define_expand "sse2_umulv2siv2di3"
4997 [(set (match_operand:V2DI 0 "register_operand" "")
5001 (match_operand:V4SI 1 "nonimmediate_operand" "")
5002 (parallel [(const_int 0) (const_int 2)])))
5005 (match_operand:V4SI 2 "nonimmediate_operand" "")
5006 (parallel [(const_int 0) (const_int 2)])))))]
5008 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5010 (define_insn "*sse2_umulv2siv2di3"
5011 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5015 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5016 (parallel [(const_int 0) (const_int 2)])))
5019 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5020 (parallel [(const_int 0) (const_int 2)])))))]
5021 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5023 pmuludq\t{%2, %0|%0, %2}
5024 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5025 [(set_attr "isa" "noavx,avx")
5026 (set_attr "type" "sseimul")
5027 (set_attr "prefix_data16" "1,*")
5028 (set_attr "prefix" "orig,vex")
5029 (set_attr "mode" "TI")])
5031 (define_expand "avx2_mulv4siv4di3"
5032 [(set (match_operand:V4DI 0 "register_operand" "")
5036 (match_operand:V8SI 1 "nonimmediate_operand" "")
5037 (parallel [(const_int 0) (const_int 2)
5038 (const_int 4) (const_int 6)])))
5041 (match_operand:V8SI 2 "nonimmediate_operand" "")
5042 (parallel [(const_int 0) (const_int 2)
5043 (const_int 4) (const_int 6)])))))]
5045 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5047 (define_insn "*avx2_mulv4siv4di3"
5048 [(set (match_operand:V4DI 0 "register_operand" "=x")
5052 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5053 (parallel [(const_int 0) (const_int 2)
5054 (const_int 4) (const_int 6)])))
5057 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5058 (parallel [(const_int 0) (const_int 2)
5059 (const_int 4) (const_int 6)])))))]
5060 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5061 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5062 [(set_attr "isa" "avx")
5063 (set_attr "type" "sseimul")
5064 (set_attr "prefix_extra" "1")
5065 (set_attr "prefix" "vex")
5066 (set_attr "mode" "OI")])
5068 (define_expand "sse4_1_mulv2siv2di3"
5069 [(set (match_operand:V2DI 0 "register_operand" "")
5073 (match_operand:V4SI 1 "nonimmediate_operand" "")
5074 (parallel [(const_int 0) (const_int 2)])))
5077 (match_operand:V4SI 2 "nonimmediate_operand" "")
5078 (parallel [(const_int 0) (const_int 2)])))))]
5080 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5082 (define_insn "*sse4_1_mulv2siv2di3"
5083 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5087 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5088 (parallel [(const_int 0) (const_int 2)])))
5091 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5092 (parallel [(const_int 0) (const_int 2)])))))]
5093 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5095 pmuldq\t{%2, %0|%0, %2}
5096 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5097 [(set_attr "isa" "noavx,avx")
5098 (set_attr "type" "sseimul")
5099 (set_attr "prefix_data16" "1,*")
5100 (set_attr "prefix_extra" "1")
5101 (set_attr "prefix" "orig,vex")
5102 (set_attr "mode" "TI")])
5104 (define_expand "avx2_pmaddwd"
5105 [(set (match_operand:V8SI 0 "register_operand" "")
5110 (match_operand:V16HI 1 "nonimmediate_operand" "")
5111 (parallel [(const_int 0)
5121 (match_operand:V16HI 2 "nonimmediate_operand" "")
5122 (parallel [(const_int 0)
5132 (vec_select:V8HI (match_dup 1)
5133 (parallel [(const_int 1)
5142 (vec_select:V8HI (match_dup 2)
5143 (parallel [(const_int 1)
5150 (const_int 15)]))))))]
5152 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5154 (define_expand "sse2_pmaddwd"
5155 [(set (match_operand:V4SI 0 "register_operand" "")
5160 (match_operand:V8HI 1 "nonimmediate_operand" "")
5161 (parallel [(const_int 0)
5167 (match_operand:V8HI 2 "nonimmediate_operand" "")
5168 (parallel [(const_int 0)
5174 (vec_select:V4HI (match_dup 1)
5175 (parallel [(const_int 1)
5180 (vec_select:V4HI (match_dup 2)
5181 (parallel [(const_int 1)
5184 (const_int 7)]))))))]
5186 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5188 (define_insn "*avx2_pmaddwd"
5189 [(set (match_operand:V8SI 0 "register_operand" "=x")
5194 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5195 (parallel [(const_int 0)
5205 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5206 (parallel [(const_int 0)
5216 (vec_select:V8HI (match_dup 1)
5217 (parallel [(const_int 1)
5226 (vec_select:V8HI (match_dup 2)
5227 (parallel [(const_int 1)
5234 (const_int 15)]))))))]
5235 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5236 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5237 [(set_attr "type" "sseiadd")
5238 (set_attr "prefix" "vex")
5239 (set_attr "mode" "OI")])
5241 (define_insn "*sse2_pmaddwd"
5242 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5247 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5248 (parallel [(const_int 0)
5254 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5255 (parallel [(const_int 0)
5261 (vec_select:V4HI (match_dup 1)
5262 (parallel [(const_int 1)
5267 (vec_select:V4HI (match_dup 2)
5268 (parallel [(const_int 1)
5271 (const_int 7)]))))))]
5272 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5274 pmaddwd\t{%2, %0|%0, %2}
5275 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5276 [(set_attr "isa" "noavx,avx")
5277 (set_attr "type" "sseiadd")
5278 (set_attr "atom_unit" "simul")
5279 (set_attr "prefix_data16" "1,*")
5280 (set_attr "prefix" "orig,vex")
5281 (set_attr "mode" "TI")])
5283 (define_expand "mul<mode>3"
5284 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5285 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5286 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5289 if (TARGET_SSE4_1 || TARGET_AVX)
5290 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5293 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5294 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5295 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5296 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5297 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5299 pmulld\t{%2, %0|%0, %2}
5300 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5301 [(set_attr "isa" "noavx,avx")
5302 (set_attr "type" "sseimul")
5303 (set_attr "prefix_extra" "1")
5304 (set_attr "prefix" "orig,vex")
5305 (set_attr "mode" "<sseinsnmode>")])
5307 (define_insn_and_split "*sse2_mulv4si3"
5308 [(set (match_operand:V4SI 0 "register_operand" "")
5309 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5310 (match_operand:V4SI 2 "register_operand" "")))]
5311 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5312 && can_create_pseudo_p ()"
5317 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5323 t1 = gen_reg_rtx (V4SImode);
5324 t2 = gen_reg_rtx (V4SImode);
5325 t3 = gen_reg_rtx (V4SImode);
5326 t4 = gen_reg_rtx (V4SImode);
5327 t5 = gen_reg_rtx (V4SImode);
5328 t6 = gen_reg_rtx (V4SImode);
5329 thirtytwo = GEN_INT (32);
5331 /* Multiply elements 2 and 0. */
5332 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5335 /* Shift both input vectors down one element, so that elements 3
5336 and 1 are now in the slots for elements 2 and 0. For K8, at
5337 least, this is faster than using a shuffle. */
5338 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5339 gen_lowpart (V1TImode, op1),
5341 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5342 gen_lowpart (V1TImode, op2),
5344 /* Multiply elements 3 and 1. */
5345 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5348 /* Move the results in element 2 down to element 1; we don't care
5349 what goes in elements 2 and 3. */
5350 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5351 const0_rtx, const0_rtx));
5352 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5353 const0_rtx, const0_rtx));
5355 /* Merge the parts back together. */
5356 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5358 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5359 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5363 (define_insn_and_split "mulv2di3"
5364 [(set (match_operand:V2DI 0 "register_operand" "")
5365 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5366 (match_operand:V2DI 2 "register_operand" "")))]
5368 && can_create_pseudo_p ()"
5373 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5382 /* op1: A,B,C,D, op2: E,F,G,H */
5383 op1 = gen_lowpart (V4SImode, op1);
5384 op2 = gen_lowpart (V4SImode, op2);
5386 t1 = gen_reg_rtx (V4SImode);
5387 t2 = gen_reg_rtx (V4SImode);
5388 t3 = gen_reg_rtx (V2DImode);
5389 t4 = gen_reg_rtx (V2DImode);
5392 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5398 /* t2: (B*E),(A*F),(D*G),(C*H) */
5399 emit_insn (gen_mulv4si3 (t2, t1, op2));
5401 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5402 emit_insn (gen_xop_phadddq (t3, t2));
5404 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5405 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5407 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5408 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5412 t1 = gen_reg_rtx (V2DImode);
5413 t2 = gen_reg_rtx (V2DImode);
5414 t3 = gen_reg_rtx (V2DImode);
5415 t4 = gen_reg_rtx (V2DImode);
5416 t5 = gen_reg_rtx (V2DImode);
5417 t6 = gen_reg_rtx (V2DImode);
5418 thirtytwo = GEN_INT (32);
5420 /* Multiply low parts. */
5421 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5422 gen_lowpart (V4SImode, op2)));
5424 /* Shift input vectors left 32 bits so we can multiply high parts. */
5425 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5426 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5428 /* Multiply high parts by low parts. */
5429 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5430 gen_lowpart (V4SImode, t3)));
5431 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5432 gen_lowpart (V4SImode, t2)));
5434 /* Shift them back. */
5435 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5436 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5438 /* Add the three parts together. */
5439 emit_insn (gen_addv2di3 (t6, t1, t4));
5440 emit_insn (gen_addv2di3 (op0, t6, t5));
5443 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5444 gen_rtx_MULT (V2DImode, operands[1], operands[2]));
5448 (define_expand "vec_widen_smult_hi_v8hi"
5449 [(match_operand:V4SI 0 "register_operand" "")
5450 (match_operand:V8HI 1 "register_operand" "")
5451 (match_operand:V8HI 2 "register_operand" "")]
5454 rtx op1, op2, t1, t2, dest;
5458 t1 = gen_reg_rtx (V8HImode);
5459 t2 = gen_reg_rtx (V8HImode);
5460 dest = gen_lowpart (V8HImode, operands[0]);
5462 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5463 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5464 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5468 (define_expand "vec_widen_smult_lo_v8hi"
5469 [(match_operand:V4SI 0 "register_operand" "")
5470 (match_operand:V8HI 1 "register_operand" "")
5471 (match_operand:V8HI 2 "register_operand" "")]
5474 rtx op1, op2, t1, t2, dest;
5478 t1 = gen_reg_rtx (V8HImode);
5479 t2 = gen_reg_rtx (V8HImode);
5480 dest = gen_lowpart (V8HImode, operands[0]);
5482 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5483 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5484 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5488 (define_expand "vec_widen_umult_hi_v8hi"
5489 [(match_operand:V4SI 0 "register_operand" "")
5490 (match_operand:V8HI 1 "register_operand" "")
5491 (match_operand:V8HI 2 "register_operand" "")]
5494 rtx op1, op2, t1, t2, dest;
5498 t1 = gen_reg_rtx (V8HImode);
5499 t2 = gen_reg_rtx (V8HImode);
5500 dest = gen_lowpart (V8HImode, operands[0]);
5502 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5503 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5504 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5508 (define_expand "vec_widen_umult_lo_v8hi"
5509 [(match_operand:V4SI 0 "register_operand" "")
5510 (match_operand:V8HI 1 "register_operand" "")
5511 (match_operand:V8HI 2 "register_operand" "")]
5514 rtx op1, op2, t1, t2, dest;
5518 t1 = gen_reg_rtx (V8HImode);
5519 t2 = gen_reg_rtx (V8HImode);
5520 dest = gen_lowpart (V8HImode, operands[0]);
5522 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5523 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5524 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5528 (define_expand "vec_widen_smult_hi_v4si"
5529 [(match_operand:V2DI 0 "register_operand" "")
5530 (match_operand:V4SI 1 "register_operand" "")
5531 (match_operand:V4SI 2 "register_operand" "")]
5536 t1 = gen_reg_rtx (V4SImode);
5537 t2 = gen_reg_rtx (V4SImode);
5539 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5544 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5549 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5553 (define_expand "vec_widen_smult_lo_v4si"
5554 [(match_operand:V2DI 0 "register_operand" "")
5555 (match_operand:V4SI 1 "register_operand" "")
5556 (match_operand:V4SI 2 "register_operand" "")]
5561 t1 = gen_reg_rtx (V4SImode);
5562 t2 = gen_reg_rtx (V4SImode);
5564 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5569 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5574 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5578 (define_expand "vec_widen_umult_hi_v4si"
5579 [(match_operand:V2DI 0 "register_operand" "")
5580 (match_operand:V4SI 1 "register_operand" "")
5581 (match_operand:V4SI 2 "register_operand" "")]
5584 rtx op1, op2, t1, t2;
5588 t1 = gen_reg_rtx (V4SImode);
5589 t2 = gen_reg_rtx (V4SImode);
5591 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5592 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5593 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5597 (define_expand "vec_widen_umult_lo_v4si"
5598 [(match_operand:V2DI 0 "register_operand" "")
5599 (match_operand:V4SI 1 "register_operand" "")
5600 (match_operand:V4SI 2 "register_operand" "")]
5603 rtx op1, op2, t1, t2;
5607 t1 = gen_reg_rtx (V4SImode);
5608 t2 = gen_reg_rtx (V4SImode);
5610 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5611 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5612 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5616 (define_expand "sdot_prodv8hi"
5617 [(match_operand:V4SI 0 "register_operand" "")
5618 (match_operand:V8HI 1 "register_operand" "")
5619 (match_operand:V8HI 2 "register_operand" "")
5620 (match_operand:V4SI 3 "register_operand" "")]
5623 rtx t = gen_reg_rtx (V4SImode);
5624 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5625 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5629 (define_expand "udot_prodv4si"
5630 [(match_operand:V2DI 0 "register_operand" "")
5631 (match_operand:V4SI 1 "register_operand" "")
5632 (match_operand:V4SI 2 "register_operand" "")
5633 (match_operand:V2DI 3 "register_operand" "")]
5638 t1 = gen_reg_rtx (V2DImode);
5639 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5640 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5642 t2 = gen_reg_rtx (V4SImode);
5643 t3 = gen_reg_rtx (V4SImode);
5644 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5645 gen_lowpart (V1TImode, operands[1]),
5647 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5648 gen_lowpart (V1TImode, operands[2]),
5651 t4 = gen_reg_rtx (V2DImode);
5652 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5654 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5658 (define_insn "ashr<mode>3"
5659 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5661 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5662 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5665 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5666 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5667 [(set_attr "isa" "noavx,avx")
5668 (set_attr "type" "sseishft")
5669 (set (attr "length_immediate")
5670 (if_then_else (match_operand 2 "const_int_operand" "")
5672 (const_string "0")))
5673 (set_attr "prefix_data16" "1,*")
5674 (set_attr "prefix" "orig,vex")
5675 (set_attr "mode" "<sseinsnmode>")])
5677 (define_insn "lshr<mode>3"
5678 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5679 (lshiftrt:VI248_AVX2
5680 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5681 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5684 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5685 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5686 [(set_attr "isa" "noavx,avx")
5687 (set_attr "type" "sseishft")
5688 (set (attr "length_immediate")
5689 (if_then_else (match_operand 2 "const_int_operand" "")
5691 (const_string "0")))
5692 (set_attr "prefix_data16" "1,*")
5693 (set_attr "prefix" "orig,vex")
5694 (set_attr "mode" "<sseinsnmode>")])
5696 (define_insn "avx2_lshl<mode>3"
5697 [(set (match_operand:VI248_256 0 "register_operand" "=x")
5699 (match_operand:VI248_256 1 "register_operand" "x")
5700 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5702 "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5703 [(set_attr "type" "sseishft")
5704 (set_attr "prefix" "vex")
5705 (set (attr "length_immediate")
5706 (if_then_else (match_operand 2 "const_int_operand" "")
5708 (const_string "0")))
5709 (set_attr "mode" "OI")])
5711 (define_insn "ashl<mode>3"
5712 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5714 (match_operand:VI248_128 1 "register_operand" "0,x")
5715 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5718 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5719 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5720 [(set_attr "isa" "noavx,avx")
5721 (set_attr "type" "sseishft")
5722 (set (attr "length_immediate")
5723 (if_then_else (match_operand 2 "const_int_operand" "")
5725 (const_string "0")))
5726 (set_attr "prefix_data16" "1,*")
5727 (set_attr "prefix" "orig,vex")
5728 (set_attr "mode" "TI")])
5730 (define_expand "vec_shl_<mode>"
5731 [(set (match_operand:VI_128 0 "register_operand" "")
5733 (match_operand:VI_128 1 "register_operand" "")
5734 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5737 operands[0] = gen_lowpart (V1TImode, operands[0]);
5738 operands[1] = gen_lowpart (V1TImode, operands[1]);
5741 (define_insn "<sse2_avx2>_ashl<mode>3"
5742 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5744 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5745 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5748 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5750 switch (which_alternative)
5753 return "pslldq\t{%2, %0|%0, %2}";
5755 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5760 [(set_attr "isa" "noavx,avx")
5761 (set_attr "type" "sseishft")
5762 (set_attr "length_immediate" "1")
5763 (set_attr "prefix_data16" "1,*")
5764 (set_attr "prefix" "orig,vex")
5765 (set_attr "mode" "<sseinsnmode>")])
5767 (define_expand "vec_shr_<mode>"
5768 [(set (match_operand:VI_128 0 "register_operand" "")
5770 (match_operand:VI_128 1 "register_operand" "")
5771 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5774 operands[0] = gen_lowpart (V1TImode, operands[0]);
5775 operands[1] = gen_lowpart (V1TImode, operands[1]);
5778 (define_insn "<sse2_avx2>_lshr<mode>3"
5779 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5780 (lshiftrt:VIMAX_AVX2
5781 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5782 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5785 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5787 switch (which_alternative)
5790 return "psrldq\t{%2, %0|%0, %2}";
5792 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5797 [(set_attr "isa" "noavx,avx")
5798 (set_attr "type" "sseishft")
5799 (set_attr "length_immediate" "1")
5800 (set_attr "atom_unit" "sishuf")
5801 (set_attr "prefix_data16" "1,*")
5802 (set_attr "prefix" "orig,vex")
5803 (set_attr "mode" "<sseinsnmode>")])
5806 (define_expand "<code><mode>3"
5807 [(set (match_operand:VI124_256 0 "register_operand" "")
5809 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5810 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5812 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5814 (define_insn "*avx2_<code><mode>3"
5815 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5817 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5818 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5819 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5820 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5821 [(set_attr "type" "sseiadd")
5822 (set_attr "prefix_extra" "1")
5823 (set_attr "prefix" "vex")
5824 (set_attr "mode" "OI")])
5826 (define_expand "<code><mode>3"
5827 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5828 (maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5829 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5836 xops[0] = operands[0];
5838 if (<CODE> == SMAX || <CODE> == UMAX)
5840 xops[1] = operands[1];
5841 xops[2] = operands[2];
5845 xops[1] = operands[2];
5846 xops[2] = operands[1];
5849 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5851 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5852 xops[4] = operands[1];
5853 xops[5] = operands[2];
5855 ok = ix86_expand_int_vcond (xops);
5860 (define_expand "<code><mode>3"
5861 [(set (match_operand:VI124_128 0 "register_operand" "")
5862 (smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5863 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5866 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5867 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5873 xops[0] = operands[0];
5874 operands[1] = force_reg (<MODE>mode, operands[1]);
5875 operands[2] = force_reg (<MODE>mode, operands[2]);
5879 xops[1] = operands[1];
5880 xops[2] = operands[2];
5884 xops[1] = operands[2];
5885 xops[2] = operands[1];
5888 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5889 xops[4] = operands[1];
5890 xops[5] = operands[2];
5892 ok = ix86_expand_int_vcond (xops);
5898 (define_insn "*sse4_1_<code><mode>3"
5899 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5901 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5902 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5903 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5905 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5906 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5907 [(set_attr "isa" "noavx,avx")
5908 (set_attr "type" "sseiadd")
5909 (set_attr "prefix_extra" "1,*")
5910 (set_attr "prefix" "orig,vex")
5911 (set_attr "mode" "TI")])
5913 (define_insn "*<code>v8hi3"
5914 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5916 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5917 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5918 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5920 p<maxmin_int>w\t{%2, %0|%0, %2}
5921 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5922 [(set_attr "isa" "noavx,avx")
5923 (set_attr "type" "sseiadd")
5924 (set_attr "prefix_data16" "1,*")
5925 (set_attr "prefix_extra" "*,1")
5926 (set_attr "prefix" "orig,vex")
5927 (set_attr "mode" "TI")])
5929 (define_expand "<code><mode>3"
5930 [(set (match_operand:VI124_128 0 "register_operand" "")
5931 (umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5932 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5935 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
5936 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5937 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
5939 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5940 operands[1] = force_reg (<MODE>mode, operands[1]);
5941 if (rtx_equal_p (op3, op2))
5942 op3 = gen_reg_rtx (V8HImode);
5943 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5944 emit_insn (gen_addv8hi3 (op0, op3, op2));
5952 operands[1] = force_reg (<MODE>mode, operands[1]);
5953 operands[2] = force_reg (<MODE>mode, operands[2]);
5955 xops[0] = operands[0];
5959 xops[1] = operands[1];
5960 xops[2] = operands[2];
5964 xops[1] = operands[2];
5965 xops[2] = operands[1];
5968 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5969 xops[4] = operands[1];
5970 xops[5] = operands[2];
5972 ok = ix86_expand_int_vcond (xops);
5978 (define_insn "*sse4_1_<code><mode>3"
5979 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5981 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5982 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5983 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5985 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5986 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5987 [(set_attr "isa" "noavx,avx")
5988 (set_attr "type" "sseiadd")
5989 (set_attr "prefix_extra" "1,*")
5990 (set_attr "prefix" "orig,vex")
5991 (set_attr "mode" "TI")])
5993 (define_insn "*<code>v16qi3"
5994 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5996 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5997 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5998 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6000 p<maxmin_int>b\t{%2, %0|%0, %2}
6001 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6002 [(set_attr "isa" "noavx,avx")
6003 (set_attr "type" "sseiadd")
6004 (set_attr "prefix_data16" "1,*")
6005 (set_attr "prefix_extra" "*,1")
6006 (set_attr "prefix" "orig,vex")
6007 (set_attr "mode" "TI")])
6009 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6011 ;; Parallel integral comparisons
6013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6015 (define_expand "avx2_eq<mode>3"
6016 [(set (match_operand:VI_256 0 "register_operand" "")
6018 (match_operand:VI_256 1 "nonimmediate_operand" "")
6019 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6021 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6023 (define_insn "*avx2_eq<mode>3"
6024 [(set (match_operand:VI_256 0 "register_operand" "=x")
6026 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6027 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6028 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6029 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6030 [(set_attr "type" "ssecmp")
6031 (set_attr "prefix_extra" "1")
6032 (set_attr "prefix" "vex")
6033 (set_attr "mode" "OI")])
6035 (define_insn "*sse4_1_eqv2di3"
6036 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6038 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6039 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6040 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6042 pcmpeqq\t{%2, %0|%0, %2}
6043 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6044 [(set_attr "isa" "noavx,avx")
6045 (set_attr "type" "ssecmp")
6046 (set_attr "prefix_extra" "1")
6047 (set_attr "prefix" "orig,vex")
6048 (set_attr "mode" "TI")])
6050 (define_insn "*sse2_eq<mode>3"
6051 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6053 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6054 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6055 "TARGET_SSE2 && !TARGET_XOP
6056 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6058 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6059 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6060 [(set_attr "isa" "noavx,avx")
6061 (set_attr "type" "ssecmp")
6062 (set_attr "prefix_data16" "1,*")
6063 (set_attr "prefix" "orig,vex")
6064 (set_attr "mode" "TI")])
6066 (define_expand "sse2_eq<mode>3"
6067 [(set (match_operand:VI124_128 0 "register_operand" "")
6069 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6070 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6071 "TARGET_SSE2 && !TARGET_XOP "
6072 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6074 (define_expand "sse4_1_eqv2di3"
6075 [(set (match_operand:V2DI 0 "register_operand" "")
6077 (match_operand:V2DI 1 "nonimmediate_operand" "")
6078 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6080 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6082 (define_insn "sse4_2_gtv2di3"
6083 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6085 (match_operand:V2DI 1 "register_operand" "0,x")
6086 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6089 pcmpgtq\t{%2, %0|%0, %2}
6090 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6091 [(set_attr "isa" "noavx,avx")
6092 (set_attr "type" "ssecmp")
6093 (set_attr "prefix_extra" "1")
6094 (set_attr "prefix" "orig,vex")
6095 (set_attr "mode" "TI")])
6097 (define_insn "avx2_gt<mode>3"
6098 [(set (match_operand:VI_256 0 "register_operand" "=x")
6100 (match_operand:VI_256 1 "register_operand" "x")
6101 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6103 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6104 [(set_attr "type" "ssecmp")
6105 (set_attr "prefix_extra" "1")
6106 (set_attr "prefix" "vex")
6107 (set_attr "mode" "OI")])
6109 (define_insn "sse2_gt<mode>3"
6110 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6112 (match_operand:VI124_128 1 "register_operand" "0,x")
6113 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6114 "TARGET_SSE2 && !TARGET_XOP"
6116 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6117 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6118 [(set_attr "isa" "noavx,avx")
6119 (set_attr "type" "ssecmp")
6120 (set_attr "prefix_data16" "1,*")
6121 (set_attr "prefix" "orig,vex")
6122 (set_attr "mode" "TI")])
6124 (define_expand "vcond<V_256:mode><VI_256:mode>"
6125 [(set (match_operand:V_256 0 "register_operand" "")
6127 (match_operator 3 ""
6128 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6129 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6130 (match_operand:V_256 1 "general_operand" "")
6131 (match_operand:V_256 2 "general_operand" "")))]
6133 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6134 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6136 bool ok = ix86_expand_int_vcond (operands);
6141 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6142 [(set (match_operand:V_128 0 "register_operand" "")
6144 (match_operator 3 ""
6145 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6146 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6147 (match_operand:V_128 1 "general_operand" "")
6148 (match_operand:V_128 2 "general_operand" "")))]
6150 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6151 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6153 bool ok = ix86_expand_int_vcond (operands);
6158 (define_expand "vcond<VI8F_128:mode>v2di"
6159 [(set (match_operand:VI8F_128 0 "register_operand" "")
6160 (if_then_else:VI8F_128
6161 (match_operator 3 ""
6162 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6163 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6164 (match_operand:VI8F_128 1 "general_operand" "")
6165 (match_operand:VI8F_128 2 "general_operand" "")))]
6168 bool ok = ix86_expand_int_vcond (operands);
6173 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6174 [(set (match_operand:V_256 0 "register_operand" "")
6176 (match_operator 3 ""
6177 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6178 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6179 (match_operand:V_256 1 "general_operand" "")
6180 (match_operand:V_256 2 "general_operand" "")))]
6182 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6183 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6185 bool ok = ix86_expand_int_vcond (operands);
6190 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6191 [(set (match_operand:V_128 0 "register_operand" "")
6193 (match_operator 3 ""
6194 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6195 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6196 (match_operand:V_128 1 "general_operand" "")
6197 (match_operand:V_128 2 "general_operand" "")))]
6199 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6200 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6202 bool ok = ix86_expand_int_vcond (operands);
6207 (define_expand "vcondu<VI8F_128:mode>v2di"
6208 [(set (match_operand:VI8F_128 0 "register_operand" "")
6209 (if_then_else:VI8F_128
6210 (match_operator 3 ""
6211 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6212 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6213 (match_operand:VI8F_128 1 "general_operand" "")
6214 (match_operand:VI8F_128 2 "general_operand" "")))]
6217 bool ok = ix86_expand_int_vcond (operands);
6222 (define_mode_iterator VEC_PERM_AVX2
6223 [V16QI V8HI V4SI V2DI V4SF V2DF
6224 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6225 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6226 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6228 (define_expand "vec_perm<mode>"
6229 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6230 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6231 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6232 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6233 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6235 ix86_expand_vec_perm (operands);
6239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6241 ;; Parallel bitwise logical operations
6243 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6245 (define_expand "one_cmpl<mode>2"
6246 [(set (match_operand:VI 0 "register_operand" "")
6247 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6251 int i, n = GET_MODE_NUNITS (<MODE>mode);
6252 rtvec v = rtvec_alloc (n);
6254 for (i = 0; i < n; ++i)
6255 RTVEC_ELT (v, i) = constm1_rtx;
6257 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6260 (define_expand "<sse2_avx2>_andnot<mode>3"
6261 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6263 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6264 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6267 (define_insn "*andnot<mode>3"
6268 [(set (match_operand:VI 0 "register_operand" "=x,x")
6270 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6271 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6274 static char buf[32];
6278 switch (get_attr_mode (insn))
6281 gcc_assert (TARGET_AVX2);
6283 gcc_assert (TARGET_SSE2);
6289 gcc_assert (TARGET_AVX);
6291 gcc_assert (TARGET_SSE);
6300 switch (which_alternative)
6303 ops = "%s\t{%%2, %%0|%%0, %%2}";
6306 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6312 snprintf (buf, sizeof (buf), ops, tmp);
6315 [(set_attr "isa" "noavx,avx")
6316 (set_attr "type" "sselog")
6317 (set (attr "prefix_data16")
6319 (and (eq_attr "alternative" "0")
6320 (eq_attr "mode" "TI"))
6322 (const_string "*")))
6323 (set_attr "prefix" "orig,vex")
6325 (cond [(and (not (match_test "TARGET_AVX2"))
6326 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6327 (const_string "V8SF")
6328 (not (match_test "TARGET_SSE2"))
6329 (const_string "V4SF")
6331 (const_string "<sseinsnmode>")))])
6333 (define_expand "<code><mode>3"
6334 [(set (match_operand:VI 0 "register_operand" "")
6336 (match_operand:VI 1 "nonimmediate_operand" "")
6337 (match_operand:VI 2 "nonimmediate_operand" "")))]
6339 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6341 (define_insn "*<code><mode>3"
6342 [(set (match_operand:VI 0 "register_operand" "=x,x")
6344 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6345 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6347 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6349 static char buf[32];
6353 switch (get_attr_mode (insn))
6356 gcc_assert (TARGET_AVX2);
6358 gcc_assert (TARGET_SSE2);
6364 gcc_assert (TARGET_AVX);
6366 gcc_assert (TARGET_SSE);
6375 switch (which_alternative)
6378 ops = "%s\t{%%2, %%0|%%0, %%2}";
6381 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6387 snprintf (buf, sizeof (buf), ops, tmp);
6390 [(set_attr "isa" "noavx,avx")
6391 (set_attr "type" "sselog")
6392 (set (attr "prefix_data16")
6394 (and (eq_attr "alternative" "0")
6395 (eq_attr "mode" "TI"))
6397 (const_string "*")))
6398 (set_attr "prefix" "orig,vex")
6400 (cond [(and (not (match_test "TARGET_AVX2"))
6401 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6402 (const_string "V8SF")
6403 (not (match_test "TARGET_SSE2"))
6404 (const_string "V4SF")
6406 (const_string "<sseinsnmode>")))])
6408 (define_insn "*andnottf3"
6409 [(set (match_operand:TF 0 "register_operand" "=x,x")
6411 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6412 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6415 pandn\t{%2, %0|%0, %2}
6416 vpandn\t{%2, %1, %0|%0, %1, %2}"
6417 [(set_attr "isa" "noavx,avx")
6418 (set_attr "type" "sselog")
6419 (set_attr "prefix_data16" "1,*")
6420 (set_attr "prefix" "orig,vex")
6421 (set_attr "mode" "TI")])
6423 (define_expand "<code>tf3"
6424 [(set (match_operand:TF 0 "register_operand" "")
6426 (match_operand:TF 1 "nonimmediate_operand" "")
6427 (match_operand:TF 2 "nonimmediate_operand" "")))]
6429 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6431 (define_insn "*<code>tf3"
6432 [(set (match_operand:TF 0 "register_operand" "=x,x")
6434 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6435 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6437 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6439 p<logic>\t{%2, %0|%0, %2}
6440 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6441 [(set_attr "isa" "noavx,avx")
6442 (set_attr "type" "sselog")
6443 (set_attr "prefix_data16" "1,*")
6444 (set_attr "prefix" "orig,vex")
6445 (set_attr "mode" "TI")])
6447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6449 ;; Parallel integral element swizzling
6451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6453 (define_expand "vec_pack_trunc_<mode>"
6454 [(match_operand:<ssepackmode> 0 "register_operand" "")
6455 (match_operand:VI248_AVX2 1 "register_operand" "")
6456 (match_operand:VI248_AVX2 2 "register_operand" "")]
6459 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6460 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6461 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6465 (define_insn "<sse2_avx2>_packsswb"
6466 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6467 (vec_concat:VI1_AVX2
6468 (ss_truncate:<ssehalfvecmode>
6469 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6470 (ss_truncate:<ssehalfvecmode>
6471 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6474 packsswb\t{%2, %0|%0, %2}
6475 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6476 [(set_attr "isa" "noavx,avx")
6477 (set_attr "type" "sselog")
6478 (set_attr "prefix_data16" "1,*")
6479 (set_attr "prefix" "orig,vex")
6480 (set_attr "mode" "<sseinsnmode>")])
6482 (define_insn "<sse2_avx2>_packssdw"
6483 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6484 (vec_concat:VI2_AVX2
6485 (ss_truncate:<ssehalfvecmode>
6486 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6487 (ss_truncate:<ssehalfvecmode>
6488 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6491 packssdw\t{%2, %0|%0, %2}
6492 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6493 [(set_attr "isa" "noavx,avx")
6494 (set_attr "type" "sselog")
6495 (set_attr "prefix_data16" "1,*")
6496 (set_attr "prefix" "orig,vex")
6497 (set_attr "mode" "<sseinsnmode>")])
6499 (define_insn "<sse2_avx2>_packuswb"
6500 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6501 (vec_concat:VI1_AVX2
6502 (us_truncate:<ssehalfvecmode>
6503 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6504 (us_truncate:<ssehalfvecmode>
6505 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6508 packuswb\t{%2, %0|%0, %2}
6509 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6510 [(set_attr "isa" "noavx,avx")
6511 (set_attr "type" "sselog")
6512 (set_attr "prefix_data16" "1,*")
6513 (set_attr "prefix" "orig,vex")
6514 (set_attr "mode" "<sseinsnmode>")])
6516 (define_insn "avx2_interleave_highv32qi"
6517 [(set (match_operand:V32QI 0 "register_operand" "=x")
6520 (match_operand:V32QI 1 "register_operand" "x")
6521 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6522 (parallel [(const_int 8) (const_int 40)
6523 (const_int 9) (const_int 41)
6524 (const_int 10) (const_int 42)
6525 (const_int 11) (const_int 43)
6526 (const_int 12) (const_int 44)
6527 (const_int 13) (const_int 45)
6528 (const_int 14) (const_int 46)
6529 (const_int 15) (const_int 47)
6530 (const_int 24) (const_int 56)
6531 (const_int 25) (const_int 57)
6532 (const_int 26) (const_int 58)
6533 (const_int 27) (const_int 59)
6534 (const_int 28) (const_int 60)
6535 (const_int 29) (const_int 61)
6536 (const_int 30) (const_int 62)
6537 (const_int 31) (const_int 63)])))]
6539 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6540 [(set_attr "type" "sselog")
6541 (set_attr "prefix" "vex")
6542 (set_attr "mode" "OI")])
6544 (define_insn "vec_interleave_highv16qi"
6545 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6548 (match_operand:V16QI 1 "register_operand" "0,x")
6549 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6550 (parallel [(const_int 8) (const_int 24)
6551 (const_int 9) (const_int 25)
6552 (const_int 10) (const_int 26)
6553 (const_int 11) (const_int 27)
6554 (const_int 12) (const_int 28)
6555 (const_int 13) (const_int 29)
6556 (const_int 14) (const_int 30)
6557 (const_int 15) (const_int 31)])))]
6560 punpckhbw\t{%2, %0|%0, %2}
6561 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6562 [(set_attr "isa" "noavx,avx")
6563 (set_attr "type" "sselog")
6564 (set_attr "prefix_data16" "1,*")
6565 (set_attr "prefix" "orig,vex")
6566 (set_attr "mode" "TI")])
6568 (define_insn "avx2_interleave_lowv32qi"
6569 [(set (match_operand:V32QI 0 "register_operand" "=x")
6572 (match_operand:V32QI 1 "register_operand" "x")
6573 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6574 (parallel [(const_int 0) (const_int 32)
6575 (const_int 1) (const_int 33)
6576 (const_int 2) (const_int 34)
6577 (const_int 3) (const_int 35)
6578 (const_int 4) (const_int 36)
6579 (const_int 5) (const_int 37)
6580 (const_int 6) (const_int 38)
6581 (const_int 7) (const_int 39)
6582 (const_int 16) (const_int 48)
6583 (const_int 17) (const_int 49)
6584 (const_int 18) (const_int 50)
6585 (const_int 19) (const_int 51)
6586 (const_int 20) (const_int 52)
6587 (const_int 21) (const_int 53)
6588 (const_int 22) (const_int 54)
6589 (const_int 23) (const_int 55)])))]
6591 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6592 [(set_attr "type" "sselog")
6593 (set_attr "prefix" "vex")
6594 (set_attr "mode" "OI")])
6596 (define_insn "vec_interleave_lowv16qi"
6597 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6600 (match_operand:V16QI 1 "register_operand" "0,x")
6601 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6602 (parallel [(const_int 0) (const_int 16)
6603 (const_int 1) (const_int 17)
6604 (const_int 2) (const_int 18)
6605 (const_int 3) (const_int 19)
6606 (const_int 4) (const_int 20)
6607 (const_int 5) (const_int 21)
6608 (const_int 6) (const_int 22)
6609 (const_int 7) (const_int 23)])))]
6612 punpcklbw\t{%2, %0|%0, %2}
6613 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6614 [(set_attr "isa" "noavx,avx")
6615 (set_attr "type" "sselog")
6616 (set_attr "prefix_data16" "1,*")
6617 (set_attr "prefix" "orig,vex")
6618 (set_attr "mode" "TI")])
6620 (define_insn "avx2_interleave_highv16hi"
6621 [(set (match_operand:V16HI 0 "register_operand" "=x")
6624 (match_operand:V16HI 1 "register_operand" "x")
6625 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6626 (parallel [(const_int 4) (const_int 20)
6627 (const_int 5) (const_int 21)
6628 (const_int 6) (const_int 22)
6629 (const_int 7) (const_int 23)
6630 (const_int 12) (const_int 28)
6631 (const_int 13) (const_int 29)
6632 (const_int 14) (const_int 30)
6633 (const_int 15) (const_int 31)])))]
6635 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6636 [(set_attr "type" "sselog")
6637 (set_attr "prefix" "vex")
6638 (set_attr "mode" "OI")])
6640 (define_insn "vec_interleave_highv8hi"
6641 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6644 (match_operand:V8HI 1 "register_operand" "0,x")
6645 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6646 (parallel [(const_int 4) (const_int 12)
6647 (const_int 5) (const_int 13)
6648 (const_int 6) (const_int 14)
6649 (const_int 7) (const_int 15)])))]
6652 punpckhwd\t{%2, %0|%0, %2}
6653 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6654 [(set_attr "isa" "noavx,avx")
6655 (set_attr "type" "sselog")
6656 (set_attr "prefix_data16" "1,*")
6657 (set_attr "prefix" "orig,vex")
6658 (set_attr "mode" "TI")])
6660 (define_insn "avx2_interleave_lowv16hi"
6661 [(set (match_operand:V16HI 0 "register_operand" "=x")
6664 (match_operand:V16HI 1 "register_operand" "x")
6665 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6666 (parallel [(const_int 0) (const_int 16)
6667 (const_int 1) (const_int 17)
6668 (const_int 2) (const_int 18)
6669 (const_int 3) (const_int 19)
6670 (const_int 8) (const_int 24)
6671 (const_int 9) (const_int 25)
6672 (const_int 10) (const_int 26)
6673 (const_int 11) (const_int 27)])))]
6675 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6676 [(set_attr "type" "sselog")
6677 (set_attr "prefix" "vex")
6678 (set_attr "mode" "OI")])
6680 (define_insn "vec_interleave_lowv8hi"
6681 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6684 (match_operand:V8HI 1 "register_operand" "0,x")
6685 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6686 (parallel [(const_int 0) (const_int 8)
6687 (const_int 1) (const_int 9)
6688 (const_int 2) (const_int 10)
6689 (const_int 3) (const_int 11)])))]
6692 punpcklwd\t{%2, %0|%0, %2}
6693 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6694 [(set_attr "isa" "noavx,avx")
6695 (set_attr "type" "sselog")
6696 (set_attr "prefix_data16" "1,*")
6697 (set_attr "prefix" "orig,vex")
6698 (set_attr "mode" "TI")])
6700 (define_insn "avx2_interleave_highv8si"
6701 [(set (match_operand:V8SI 0 "register_operand" "=x")
6704 (match_operand:V8SI 1 "register_operand" "x")
6705 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6706 (parallel [(const_int 2) (const_int 10)
6707 (const_int 3) (const_int 11)
6708 (const_int 6) (const_int 14)
6709 (const_int 7) (const_int 15)])))]
6711 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6712 [(set_attr "type" "sselog")
6713 (set_attr "prefix" "vex")
6714 (set_attr "mode" "OI")])
6716 (define_insn "vec_interleave_highv4si"
6717 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6720 (match_operand:V4SI 1 "register_operand" "0,x")
6721 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6722 (parallel [(const_int 2) (const_int 6)
6723 (const_int 3) (const_int 7)])))]
6726 punpckhdq\t{%2, %0|%0, %2}
6727 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6728 [(set_attr "isa" "noavx,avx")
6729 (set_attr "type" "sselog")
6730 (set_attr "prefix_data16" "1,*")
6731 (set_attr "prefix" "orig,vex")
6732 (set_attr "mode" "TI")])
6734 (define_insn "avx2_interleave_lowv8si"
6735 [(set (match_operand:V8SI 0 "register_operand" "=x")
6738 (match_operand:V8SI 1 "register_operand" "x")
6739 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6740 (parallel [(const_int 0) (const_int 8)
6741 (const_int 1) (const_int 9)
6742 (const_int 4) (const_int 12)
6743 (const_int 5) (const_int 13)])))]
6745 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6746 [(set_attr "type" "sselog")
6747 (set_attr "prefix" "vex")
6748 (set_attr "mode" "OI")])
6750 (define_insn "vec_interleave_lowv4si"
6751 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6754 (match_operand:V4SI 1 "register_operand" "0,x")
6755 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6756 (parallel [(const_int 0) (const_int 4)
6757 (const_int 1) (const_int 5)])))]
6760 punpckldq\t{%2, %0|%0, %2}
6761 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6762 [(set_attr "isa" "noavx,avx")
6763 (set_attr "type" "sselog")
6764 (set_attr "prefix_data16" "1,*")
6765 (set_attr "prefix" "orig,vex")
6766 (set_attr "mode" "TI")])
6768 ;; Modes handled by pinsr patterns.
6769 (define_mode_iterator PINSR_MODE
6770 [(V16QI "TARGET_SSE4_1") V8HI
6771 (V4SI "TARGET_SSE4_1")
6772 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6774 (define_mode_attr sse2p4_1
6775 [(V16QI "sse4_1") (V8HI "sse2")
6776 (V4SI "sse4_1") (V2DI "sse4_1")])
6778 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6779 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6780 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6781 (vec_merge:PINSR_MODE
6782 (vec_duplicate:PINSR_MODE
6783 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6784 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6785 (match_operand:SI 3 "const_int_operand" "")))]
6787 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6788 < GET_MODE_NUNITS (<MODE>mode))"
6790 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6792 switch (which_alternative)
6795 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6796 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6799 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6801 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6802 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6805 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6810 [(set_attr "isa" "noavx,noavx,avx,avx")
6811 (set_attr "type" "sselog")
6812 (set (attr "prefix_rex")
6814 (and (not (match_test "TARGET_AVX"))
6815 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6817 (const_string "*")))
6818 (set (attr "prefix_data16")
6820 (and (not (match_test "TARGET_AVX"))
6821 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6823 (const_string "*")))
6824 (set (attr "prefix_extra")
6826 (and (not (match_test "TARGET_AVX"))
6827 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6829 (const_string "1")))
6830 (set_attr "length_immediate" "1")
6831 (set_attr "prefix" "orig,orig,vex,vex")
6832 (set_attr "mode" "TI")])
6834 (define_insn "*sse4_1_pextrb_<mode>"
6835 [(set (match_operand:SWI48 0 "register_operand" "=r")
6838 (match_operand:V16QI 1 "register_operand" "x")
6839 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6841 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6842 [(set_attr "type" "sselog")
6843 (set_attr "prefix_extra" "1")
6844 (set_attr "length_immediate" "1")
6845 (set_attr "prefix" "maybe_vex")
6846 (set_attr "mode" "TI")])
6848 (define_insn "*sse4_1_pextrb_memory"
6849 [(set (match_operand:QI 0 "memory_operand" "=m")
6851 (match_operand:V16QI 1 "register_operand" "x")
6852 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6854 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6855 [(set_attr "type" "sselog")
6856 (set_attr "prefix_extra" "1")
6857 (set_attr "length_immediate" "1")
6858 (set_attr "prefix" "maybe_vex")
6859 (set_attr "mode" "TI")])
6861 (define_insn "*sse2_pextrw_<mode>"
6862 [(set (match_operand:SWI48 0 "register_operand" "=r")
6865 (match_operand:V8HI 1 "register_operand" "x")
6866 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6868 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6869 [(set_attr "type" "sselog")
6870 (set_attr "prefix_data16" "1")
6871 (set_attr "length_immediate" "1")
6872 (set_attr "prefix" "maybe_vex")
6873 (set_attr "mode" "TI")])
6875 (define_insn "*sse4_1_pextrw_memory"
6876 [(set (match_operand:HI 0 "memory_operand" "=m")
6878 (match_operand:V8HI 1 "register_operand" "x")
6879 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6881 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6882 [(set_attr "type" "sselog")
6883 (set_attr "prefix_extra" "1")
6884 (set_attr "length_immediate" "1")
6885 (set_attr "prefix" "maybe_vex")
6886 (set_attr "mode" "TI")])
6888 (define_insn "*sse4_1_pextrd"
6889 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6891 (match_operand:V4SI 1 "register_operand" "x")
6892 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6894 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6895 [(set_attr "type" "sselog")
6896 (set_attr "prefix_extra" "1")
6897 (set_attr "length_immediate" "1")
6898 (set_attr "prefix" "maybe_vex")
6899 (set_attr "mode" "TI")])
6901 (define_insn "*sse4_1_pextrd_zext"
6902 [(set (match_operand:DI 0 "register_operand" "=r")
6905 (match_operand:V4SI 1 "register_operand" "x")
6906 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6907 "TARGET_64BIT && TARGET_SSE4_1"
6908 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6909 [(set_attr "type" "sselog")
6910 (set_attr "prefix_extra" "1")
6911 (set_attr "length_immediate" "1")
6912 (set_attr "prefix" "maybe_vex")
6913 (set_attr "mode" "TI")])
6915 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
6916 (define_insn "*sse4_1_pextrq"
6917 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6919 (match_operand:V2DI 1 "register_operand" "x")
6920 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6921 "TARGET_SSE4_1 && TARGET_64BIT"
6922 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6923 [(set_attr "type" "sselog")
6924 (set_attr "prefix_rex" "1")
6925 (set_attr "prefix_extra" "1")
6926 (set_attr "length_immediate" "1")
6927 (set_attr "prefix" "maybe_vex")
6928 (set_attr "mode" "TI")])
6930 (define_expand "avx2_pshufdv3"
6931 [(match_operand:V8SI 0 "register_operand" "")
6932 (match_operand:V8SI 1 "nonimmediate_operand" "")
6933 (match_operand:SI 2 "const_0_to_255_operand" "")]
6936 int mask = INTVAL (operands[2]);
6937 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
6938 GEN_INT ((mask >> 0) & 3),
6939 GEN_INT ((mask >> 2) & 3),
6940 GEN_INT ((mask >> 4) & 3),
6941 GEN_INT ((mask >> 6) & 3),
6942 GEN_INT (((mask >> 0) & 3) + 4),
6943 GEN_INT (((mask >> 2) & 3) + 4),
6944 GEN_INT (((mask >> 4) & 3) + 4),
6945 GEN_INT (((mask >> 6) & 3) + 4)));
6949 (define_insn "avx2_pshufd_1"
6950 [(set (match_operand:V8SI 0 "register_operand" "=x")
6952 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
6953 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6954 (match_operand 3 "const_0_to_3_operand" "")
6955 (match_operand 4 "const_0_to_3_operand" "")
6956 (match_operand 5 "const_0_to_3_operand" "")
6957 (match_operand 6 "const_4_to_7_operand" "")
6958 (match_operand 7 "const_4_to_7_operand" "")
6959 (match_operand 8 "const_4_to_7_operand" "")
6960 (match_operand 9 "const_4_to_7_operand" "")])))]
6962 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
6963 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
6964 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
6965 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
6968 mask |= INTVAL (operands[2]) << 0;
6969 mask |= INTVAL (operands[3]) << 2;
6970 mask |= INTVAL (operands[4]) << 4;
6971 mask |= INTVAL (operands[5]) << 6;
6972 operands[2] = GEN_INT (mask);
6974 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
6976 [(set_attr "type" "sselog1")
6977 (set_attr "prefix" "vex")
6978 (set_attr "length_immediate" "1")
6979 (set_attr "mode" "OI")])
6981 (define_expand "sse2_pshufd"
6982 [(match_operand:V4SI 0 "register_operand" "")
6983 (match_operand:V4SI 1 "nonimmediate_operand" "")
6984 (match_operand:SI 2 "const_int_operand" "")]
6987 int mask = INTVAL (operands[2]);
6988 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6989 GEN_INT ((mask >> 0) & 3),
6990 GEN_INT ((mask >> 2) & 3),
6991 GEN_INT ((mask >> 4) & 3),
6992 GEN_INT ((mask >> 6) & 3)));
6996 (define_insn "sse2_pshufd_1"
6997 [(set (match_operand:V4SI 0 "register_operand" "=x")
6999 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7000 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7001 (match_operand 3 "const_0_to_3_operand" "")
7002 (match_operand 4 "const_0_to_3_operand" "")
7003 (match_operand 5 "const_0_to_3_operand" "")])))]
7007 mask |= INTVAL (operands[2]) << 0;
7008 mask |= INTVAL (operands[3]) << 2;
7009 mask |= INTVAL (operands[4]) << 4;
7010 mask |= INTVAL (operands[5]) << 6;
7011 operands[2] = GEN_INT (mask);
7013 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7015 [(set_attr "type" "sselog1")
7016 (set_attr "prefix_data16" "1")
7017 (set_attr "prefix" "maybe_vex")
7018 (set_attr "length_immediate" "1")
7019 (set_attr "mode" "TI")])
7021 (define_expand "avx2_pshuflwv3"
7022 [(match_operand:V16HI 0 "register_operand" "")
7023 (match_operand:V16HI 1 "nonimmediate_operand" "")
7024 (match_operand:SI 2 "const_0_to_255_operand" "")]
7027 int mask = INTVAL (operands[2]);
7028 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7029 GEN_INT ((mask >> 0) & 3),
7030 GEN_INT ((mask >> 2) & 3),
7031 GEN_INT ((mask >> 4) & 3),
7032 GEN_INT ((mask >> 6) & 3),
7033 GEN_INT (((mask >> 0) & 3) + 8),
7034 GEN_INT (((mask >> 2) & 3) + 8),
7035 GEN_INT (((mask >> 4) & 3) + 8),
7036 GEN_INT (((mask >> 6) & 3) + 8)));
7040 (define_insn "avx2_pshuflw_1"
7041 [(set (match_operand:V16HI 0 "register_operand" "=x")
7043 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7044 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7045 (match_operand 3 "const_0_to_3_operand" "")
7046 (match_operand 4 "const_0_to_3_operand" "")
7047 (match_operand 5 "const_0_to_3_operand" "")
7052 (match_operand 6 "const_8_to_11_operand" "")
7053 (match_operand 7 "const_8_to_11_operand" "")
7054 (match_operand 8 "const_8_to_11_operand" "")
7055 (match_operand 9 "const_8_to_11_operand" "")
7061 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7062 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7063 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7064 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7067 mask |= INTVAL (operands[2]) << 0;
7068 mask |= INTVAL (operands[3]) << 2;
7069 mask |= INTVAL (operands[4]) << 4;
7070 mask |= INTVAL (operands[5]) << 6;
7071 operands[2] = GEN_INT (mask);
7073 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7075 [(set_attr "type" "sselog")
7076 (set_attr "prefix" "vex")
7077 (set_attr "length_immediate" "1")
7078 (set_attr "mode" "OI")])
7080 (define_expand "sse2_pshuflw"
7081 [(match_operand:V8HI 0 "register_operand" "")
7082 (match_operand:V8HI 1 "nonimmediate_operand" "")
7083 (match_operand:SI 2 "const_int_operand" "")]
7086 int mask = INTVAL (operands[2]);
7087 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7088 GEN_INT ((mask >> 0) & 3),
7089 GEN_INT ((mask >> 2) & 3),
7090 GEN_INT ((mask >> 4) & 3),
7091 GEN_INT ((mask >> 6) & 3)));
7095 (define_insn "sse2_pshuflw_1"
7096 [(set (match_operand:V8HI 0 "register_operand" "=x")
7098 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7099 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7100 (match_operand 3 "const_0_to_3_operand" "")
7101 (match_operand 4 "const_0_to_3_operand" "")
7102 (match_operand 5 "const_0_to_3_operand" "")
7110 mask |= INTVAL (operands[2]) << 0;
7111 mask |= INTVAL (operands[3]) << 2;
7112 mask |= INTVAL (operands[4]) << 4;
7113 mask |= INTVAL (operands[5]) << 6;
7114 operands[2] = GEN_INT (mask);
7116 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7118 [(set_attr "type" "sselog")
7119 (set_attr "prefix_data16" "0")
7120 (set_attr "prefix_rep" "1")
7121 (set_attr "prefix" "maybe_vex")
7122 (set_attr "length_immediate" "1")
7123 (set_attr "mode" "TI")])
7125 (define_expand "avx2_pshufhwv3"
7126 [(match_operand:V16HI 0 "register_operand" "")
7127 (match_operand:V16HI 1 "nonimmediate_operand" "")
7128 (match_operand:SI 2 "const_0_to_255_operand" "")]
7131 int mask = INTVAL (operands[2]);
7132 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7133 GEN_INT (((mask >> 0) & 3) + 4),
7134 GEN_INT (((mask >> 2) & 3) + 4),
7135 GEN_INT (((mask >> 4) & 3) + 4),
7136 GEN_INT (((mask >> 6) & 3) + 4),
7137 GEN_INT (((mask >> 0) & 3) + 12),
7138 GEN_INT (((mask >> 2) & 3) + 12),
7139 GEN_INT (((mask >> 4) & 3) + 12),
7140 GEN_INT (((mask >> 6) & 3) + 12)));
7144 (define_insn "avx2_pshufhw_1"
7145 [(set (match_operand:V16HI 0 "register_operand" "=x")
7147 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7148 (parallel [(const_int 0)
7152 (match_operand 2 "const_4_to_7_operand" "")
7153 (match_operand 3 "const_4_to_7_operand" "")
7154 (match_operand 4 "const_4_to_7_operand" "")
7155 (match_operand 5 "const_4_to_7_operand" "")
7160 (match_operand 6 "const_12_to_15_operand" "")
7161 (match_operand 7 "const_12_to_15_operand" "")
7162 (match_operand 8 "const_12_to_15_operand" "")
7163 (match_operand 9 "const_12_to_15_operand" "")])))]
7165 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7166 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7167 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7168 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7171 mask |= (INTVAL (operands[2]) - 4) << 0;
7172 mask |= (INTVAL (operands[3]) - 4) << 2;
7173 mask |= (INTVAL (operands[4]) - 4) << 4;
7174 mask |= (INTVAL (operands[5]) - 4) << 6;
7175 operands[2] = GEN_INT (mask);
7177 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7179 [(set_attr "type" "sselog")
7180 (set_attr "prefix" "vex")
7181 (set_attr "length_immediate" "1")
7182 (set_attr "mode" "OI")])
7184 (define_expand "sse2_pshufhw"
7185 [(match_operand:V8HI 0 "register_operand" "")
7186 (match_operand:V8HI 1 "nonimmediate_operand" "")
7187 (match_operand:SI 2 "const_int_operand" "")]
7190 int mask = INTVAL (operands[2]);
7191 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7192 GEN_INT (((mask >> 0) & 3) + 4),
7193 GEN_INT (((mask >> 2) & 3) + 4),
7194 GEN_INT (((mask >> 4) & 3) + 4),
7195 GEN_INT (((mask >> 6) & 3) + 4)));
7199 (define_insn "sse2_pshufhw_1"
7200 [(set (match_operand:V8HI 0 "register_operand" "=x")
7202 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7203 (parallel [(const_int 0)
7207 (match_operand 2 "const_4_to_7_operand" "")
7208 (match_operand 3 "const_4_to_7_operand" "")
7209 (match_operand 4 "const_4_to_7_operand" "")
7210 (match_operand 5 "const_4_to_7_operand" "")])))]
7214 mask |= (INTVAL (operands[2]) - 4) << 0;
7215 mask |= (INTVAL (operands[3]) - 4) << 2;
7216 mask |= (INTVAL (operands[4]) - 4) << 4;
7217 mask |= (INTVAL (operands[5]) - 4) << 6;
7218 operands[2] = GEN_INT (mask);
7220 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7222 [(set_attr "type" "sselog")
7223 (set_attr "prefix_rep" "1")
7224 (set_attr "prefix_data16" "0")
7225 (set_attr "prefix" "maybe_vex")
7226 (set_attr "length_immediate" "1")
7227 (set_attr "mode" "TI")])
7229 (define_expand "sse2_loadd"
7230 [(set (match_operand:V4SI 0 "register_operand" "")
7233 (match_operand:SI 1 "nonimmediate_operand" ""))
7237 "operands[2] = CONST0_RTX (V4SImode);")
7239 (define_insn "sse2_loadld"
7240 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7243 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7244 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7248 %vmovd\t{%2, %0|%0, %2}
7249 %vmovd\t{%2, %0|%0, %2}
7250 movss\t{%2, %0|%0, %2}
7251 movss\t{%2, %0|%0, %2}
7252 vmovss\t{%2, %1, %0|%0, %1, %2}"
7253 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7254 (set_attr "type" "ssemov")
7255 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7256 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7258 (define_insn_and_split "sse2_stored"
7259 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7261 (match_operand:V4SI 1 "register_operand" "x,Yi")
7262 (parallel [(const_int 0)])))]
7265 "&& reload_completed
7266 && (TARGET_INTER_UNIT_MOVES
7267 || MEM_P (operands [0])
7268 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7269 [(set (match_dup 0) (match_dup 1))]
7270 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7272 (define_insn_and_split "*vec_ext_v4si_mem"
7273 [(set (match_operand:SI 0 "register_operand" "=r")
7275 (match_operand:V4SI 1 "memory_operand" "o")
7276 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7282 int i = INTVAL (operands[2]);
7284 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7288 (define_expand "sse_storeq"
7289 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7291 (match_operand:V2DI 1 "register_operand" "")
7292 (parallel [(const_int 0)])))]
7295 (define_insn "*sse2_storeq_rex64"
7296 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7298 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7299 (parallel [(const_int 0)])))]
7300 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7304 mov{q}\t{%1, %0|%0, %1}"
7305 [(set_attr "type" "*,*,imov")
7306 (set_attr "mode" "*,*,DI")])
7308 (define_insn "*sse2_storeq"
7309 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7311 (match_operand:V2DI 1 "register_operand" "x")
7312 (parallel [(const_int 0)])))]
7317 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7319 (match_operand:V2DI 1 "register_operand" "")
7320 (parallel [(const_int 0)])))]
7323 && (TARGET_INTER_UNIT_MOVES
7324 || MEM_P (operands [0])
7325 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7326 [(set (match_dup 0) (match_dup 1))]
7327 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7329 (define_insn "*vec_extractv2di_1_rex64"
7330 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7332 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7333 (parallel [(const_int 1)])))]
7334 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7336 %vmovhps\t{%1, %0|%0, %1}
7337 psrldq\t{$8, %0|%0, 8}
7338 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7339 %vmovq\t{%H1, %0|%0, %H1}
7340 mov{q}\t{%H1, %0|%0, %H1}"
7341 [(set_attr "isa" "*,noavx,avx,*,*")
7342 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7343 (set_attr "length_immediate" "*,1,1,*,*")
7344 (set_attr "memory" "*,none,none,*,*")
7345 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7346 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7348 (define_insn "*vec_extractv2di_1"
7349 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7351 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7352 (parallel [(const_int 1)])))]
7353 "!TARGET_64BIT && TARGET_SSE
7354 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7356 %vmovhps\t{%1, %0|%0, %1}
7357 psrldq\t{$8, %0|%0, 8}
7358 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7359 %vmovq\t{%H1, %0|%0, %H1}
7360 movhlps\t{%1, %0|%0, %1}
7361 movlps\t{%H1, %0|%0, %H1}"
7362 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7363 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7364 (set_attr "length_immediate" "*,1,1,*,*,*")
7365 (set_attr "memory" "*,none,none,*,*,*")
7366 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7367 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7369 (define_insn "*vec_dupv4si_avx"
7370 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7372 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
7375 vpshufd\t{$0, %1, %0|%0, %1, 0}
7376 vbroadcastss\t{%1, %0|%0, %1}"
7377 [(set_attr "type" "sselog1,ssemov")
7378 (set_attr "length_immediate" "1,0")
7379 (set_attr "prefix_extra" "0,1")
7380 (set_attr "prefix" "vex")
7381 (set_attr "mode" "TI,V4SF")])
7383 (define_insn "*vec_dupv4si"
7384 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7386 (match_operand:SI 1 "register_operand" " x,0")))]
7389 pshufd\t{$0, %1, %0|%0, %1, 0}
7390 shufps\t{$0, %0, %0|%0, %0, 0}"
7391 [(set_attr "isa" "sse2,*")
7392 (set_attr "type" "sselog1")
7393 (set_attr "length_immediate" "1")
7394 (set_attr "mode" "TI,V4SF")])
7396 (define_insn "*vec_dupv2di_sse3"
7397 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
7399 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
7403 vpunpcklqdq\t{%d1, %0|%0, %d1}
7404 %vmovddup\t{%1, %0|%0, %1}"
7405 [(set_attr "isa" "noavx,avx,*")
7406 (set_attr "type" "sselog1")
7407 (set_attr "prefix" "orig,vex,maybe_vex")
7408 (set_attr "mode" "TI,TI,DF")])
7410 (define_insn "*vec_dupv2di"
7411 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7413 (match_operand:DI 1 "register_operand" " 0,0")))]
7418 [(set_attr "isa" "sse2,*")
7419 (set_attr "type" "sselog1,ssemov")
7420 (set_attr "mode" "TI,V4SF")])
7422 (define_insn "*vec_concatv2si_sse4_1"
7423 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7425 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7426 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7429 pinsrd\t{$1, %2, %0|%0, %2, 1}
7430 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7431 punpckldq\t{%2, %0|%0, %2}
7432 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7433 %vmovd\t{%1, %0|%0, %1}
7434 punpckldq\t{%2, %0|%0, %2}
7435 movd\t{%1, %0|%0, %1}"
7436 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7437 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7438 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7439 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7440 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7441 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7443 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7444 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7445 ;; alternatives pretty much forces the MMX alternative to be chosen.
7446 (define_insn "*vec_concatv2si_sse2"
7447 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7449 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7450 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7453 punpckldq\t{%2, %0|%0, %2}
7454 movd\t{%1, %0|%0, %1}
7455 punpckldq\t{%2, %0|%0, %2}
7456 movd\t{%1, %0|%0, %1}"
7457 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7458 (set_attr "mode" "TI,TI,DI,DI")])
7460 (define_insn "*vec_concatv2si_sse"
7461 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7463 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7464 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7467 unpcklps\t{%2, %0|%0, %2}
7468 movss\t{%1, %0|%0, %1}
7469 punpckldq\t{%2, %0|%0, %2}
7470 movd\t{%1, %0|%0, %1}"
7471 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7472 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7474 (define_insn "*vec_concatv4si"
7475 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7477 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7478 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7481 punpcklqdq\t{%2, %0|%0, %2}
7482 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7483 movlhps\t{%2, %0|%0, %2}
7484 movhps\t{%2, %0|%0, %2}
7485 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7486 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7487 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7488 (set_attr "prefix" "orig,vex,orig,orig,vex")
7489 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7491 ;; movd instead of movq is required to handle broken assemblers.
7492 (define_insn "*vec_concatv2di_rex64"
7493 [(set (match_operand:V2DI 0 "register_operand"
7494 "=x,x ,x ,Yi,!x,x,x,x,x")
7496 (match_operand:DI 1 "nonimmediate_operand"
7497 " 0,x ,xm,r ,*y,0,x,0,x")
7498 (match_operand:DI 2 "vector_move_operand"
7499 "rm,rm,C ,C ,C ,x,x,m,m")))]
7502 pinsrq\t{$1, %2, %0|%0, %2, 1}
7503 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7504 %vmovq\t{%1, %0|%0, %1}
7505 %vmovd\t{%1, %0|%0, %1}
7506 movq2dq\t{%1, %0|%0, %1}
7507 punpcklqdq\t{%2, %0|%0, %2}
7508 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7509 movhps\t{%2, %0|%0, %2}
7510 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7511 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7514 (eq_attr "alternative" "0,1,5,6")
7515 (const_string "sselog")
7516 (const_string "ssemov")))
7517 (set (attr "prefix_rex")
7519 (and (eq_attr "alternative" "0,3")
7520 (not (match_test "TARGET_AVX")))
7522 (const_string "*")))
7523 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7524 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7525 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7526 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7528 (define_insn "vec_concatv2di"
7529 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7531 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7532 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7533 "!TARGET_64BIT && TARGET_SSE"
7535 %vmovq\t{%1, %0|%0, %1}
7536 movq2dq\t{%1, %0|%0, %1}
7537 punpcklqdq\t{%2, %0|%0, %2}
7538 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7539 movlhps\t{%2, %0|%0, %2}
7540 movhps\t{%2, %0|%0, %2}
7541 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7542 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7543 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7544 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7545 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7547 (define_expand "vec_unpacks_lo_<mode>"
7548 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7549 (match_operand:VI124_AVX2 1 "register_operand" "")]
7551 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7553 (define_expand "vec_unpacks_hi_<mode>"
7554 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7555 (match_operand:VI124_AVX2 1 "register_operand" "")]
7557 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7559 (define_expand "vec_unpacku_lo_<mode>"
7560 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7561 (match_operand:VI124_AVX2 1 "register_operand" "")]
7563 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7565 (define_expand "vec_unpacku_hi_<mode>"
7566 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7567 (match_operand:VI124_AVX2 1 "register_operand" "")]
7569 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7577 (define_expand "avx2_uavgv32qi3"
7578 [(set (match_operand:V32QI 0 "register_operand" "")
7584 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7586 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7587 (const_vector:V32QI [(const_int 1) (const_int 1)
7588 (const_int 1) (const_int 1)
7589 (const_int 1) (const_int 1)
7590 (const_int 1) (const_int 1)
7591 (const_int 1) (const_int 1)
7592 (const_int 1) (const_int 1)
7593 (const_int 1) (const_int 1)
7594 (const_int 1) (const_int 1)
7595 (const_int 1) (const_int 1)
7596 (const_int 1) (const_int 1)
7597 (const_int 1) (const_int 1)
7598 (const_int 1) (const_int 1)
7599 (const_int 1) (const_int 1)
7600 (const_int 1) (const_int 1)
7601 (const_int 1) (const_int 1)
7602 (const_int 1) (const_int 1)]))
7605 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7607 (define_expand "sse2_uavgv16qi3"
7608 [(set (match_operand:V16QI 0 "register_operand" "")
7614 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7616 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7617 (const_vector:V16QI [(const_int 1) (const_int 1)
7618 (const_int 1) (const_int 1)
7619 (const_int 1) (const_int 1)
7620 (const_int 1) (const_int 1)
7621 (const_int 1) (const_int 1)
7622 (const_int 1) (const_int 1)
7623 (const_int 1) (const_int 1)
7624 (const_int 1) (const_int 1)]))
7627 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7629 (define_insn "*avx2_uavgv32qi3"
7630 [(set (match_operand:V32QI 0 "register_operand" "=x")
7636 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7638 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7639 (const_vector:V32QI [(const_int 1) (const_int 1)
7640 (const_int 1) (const_int 1)
7641 (const_int 1) (const_int 1)
7642 (const_int 1) (const_int 1)
7643 (const_int 1) (const_int 1)
7644 (const_int 1) (const_int 1)
7645 (const_int 1) (const_int 1)
7646 (const_int 1) (const_int 1)
7647 (const_int 1) (const_int 1)
7648 (const_int 1) (const_int 1)
7649 (const_int 1) (const_int 1)
7650 (const_int 1) (const_int 1)
7651 (const_int 1) (const_int 1)
7652 (const_int 1) (const_int 1)
7653 (const_int 1) (const_int 1)
7654 (const_int 1) (const_int 1)]))
7656 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7657 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7658 [(set_attr "type" "sseiadd")
7659 (set_attr "prefix" "vex")
7660 (set_attr "mode" "OI")])
7662 (define_insn "*sse2_uavgv16qi3"
7663 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7669 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7671 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7672 (const_vector:V16QI [(const_int 1) (const_int 1)
7673 (const_int 1) (const_int 1)
7674 (const_int 1) (const_int 1)
7675 (const_int 1) (const_int 1)
7676 (const_int 1) (const_int 1)
7677 (const_int 1) (const_int 1)
7678 (const_int 1) (const_int 1)
7679 (const_int 1) (const_int 1)]))
7681 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7683 pavgb\t{%2, %0|%0, %2}
7684 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7685 [(set_attr "isa" "noavx,avx")
7686 (set_attr "type" "sseiadd")
7687 (set_attr "prefix_data16" "1,*")
7688 (set_attr "prefix" "orig,vex")
7689 (set_attr "mode" "TI")])
7691 (define_expand "avx2_uavgv16hi3"
7692 [(set (match_operand:V16HI 0 "register_operand" "")
7698 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7700 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7701 (const_vector:V16HI [(const_int 1) (const_int 1)
7702 (const_int 1) (const_int 1)
7703 (const_int 1) (const_int 1)
7704 (const_int 1) (const_int 1)
7705 (const_int 1) (const_int 1)
7706 (const_int 1) (const_int 1)
7707 (const_int 1) (const_int 1)
7708 (const_int 1) (const_int 1)]))
7711 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7713 (define_expand "sse2_uavgv8hi3"
7714 [(set (match_operand:V8HI 0 "register_operand" "")
7720 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7722 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7723 (const_vector:V8HI [(const_int 1) (const_int 1)
7724 (const_int 1) (const_int 1)
7725 (const_int 1) (const_int 1)
7726 (const_int 1) (const_int 1)]))
7729 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7731 (define_insn "*avx2_uavgv16hi3"
7732 [(set (match_operand:V16HI 0 "register_operand" "=x")
7738 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7740 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7741 (const_vector:V16HI [(const_int 1) (const_int 1)
7742 (const_int 1) (const_int 1)
7743 (const_int 1) (const_int 1)
7744 (const_int 1) (const_int 1)
7745 (const_int 1) (const_int 1)
7746 (const_int 1) (const_int 1)
7747 (const_int 1) (const_int 1)
7748 (const_int 1) (const_int 1)]))
7750 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7751 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7752 [(set_attr "type" "sseiadd")
7753 (set_attr "prefix" "vex")
7754 (set_attr "mode" "OI")])
7756 (define_insn "*sse2_uavgv8hi3"
7757 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7763 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7765 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7766 (const_vector:V8HI [(const_int 1) (const_int 1)
7767 (const_int 1) (const_int 1)
7768 (const_int 1) (const_int 1)
7769 (const_int 1) (const_int 1)]))
7771 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7773 pavgw\t{%2, %0|%0, %2}
7774 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7775 [(set_attr "isa" "noavx,avx")
7776 (set_attr "type" "sseiadd")
7777 (set_attr "prefix_data16" "1,*")
7778 (set_attr "prefix" "orig,vex")
7779 (set_attr "mode" "TI")])
7781 ;; The correct representation for this is absolutely enormous, and
7782 ;; surely not generally useful.
7783 (define_insn "<sse2_avx2>_psadbw"
7784 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7785 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7786 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7790 psadbw\t{%2, %0|%0, %2}
7791 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7792 [(set_attr "isa" "noavx,avx")
7793 (set_attr "type" "sseiadd")
7794 (set_attr "atom_unit" "simul")
7795 (set_attr "prefix_data16" "1,*")
7796 (set_attr "prefix" "orig,vex")
7797 (set_attr "mode" "<sseinsnmode>")])
7799 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7800 [(set (match_operand:SI 0 "register_operand" "=r")
7802 [(match_operand:VF 1 "register_operand" "x")]
7805 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7806 [(set_attr "type" "ssemov")
7807 (set_attr "prefix" "maybe_vex")
7808 (set_attr "mode" "<MODE>")])
7810 (define_insn "avx2_pmovmskb"
7811 [(set (match_operand:SI 0 "register_operand" "=r")
7812 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7815 "vpmovmskb\t{%1, %0|%0, %1}"
7816 [(set_attr "type" "ssemov")
7817 (set_attr "prefix" "vex")
7818 (set_attr "mode" "DI")])
7820 (define_insn "sse2_pmovmskb"
7821 [(set (match_operand:SI 0 "register_operand" "=r")
7822 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7825 "%vpmovmskb\t{%1, %0|%0, %1}"
7826 [(set_attr "type" "ssemov")
7827 (set_attr "prefix_data16" "1")
7828 (set_attr "prefix" "maybe_vex")
7829 (set_attr "mode" "SI")])
7831 (define_expand "sse2_maskmovdqu"
7832 [(set (match_operand:V16QI 0 "memory_operand" "")
7833 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7834 (match_operand:V16QI 2 "register_operand" "")
7839 (define_insn "*sse2_maskmovdqu"
7840 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7841 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7842 (match_operand:V16QI 2 "register_operand" "x")
7843 (mem:V16QI (match_dup 0))]
7846 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7847 [(set_attr "type" "ssemov")
7848 (set_attr "prefix_data16" "1")
7849 ;; The implicit %rdi operand confuses default length_vex computation.
7850 (set (attr "length_vex")
7851 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7852 (set_attr "prefix" "maybe_vex")
7853 (set_attr "mode" "TI")])
7855 (define_insn "sse_ldmxcsr"
7856 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7860 [(set_attr "type" "sse")
7861 (set_attr "atom_sse_attr" "mxcsr")
7862 (set_attr "prefix" "maybe_vex")
7863 (set_attr "memory" "load")])
7865 (define_insn "sse_stmxcsr"
7866 [(set (match_operand:SI 0 "memory_operand" "=m")
7867 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7870 [(set_attr "type" "sse")
7871 (set_attr "atom_sse_attr" "mxcsr")
7872 (set_attr "prefix" "maybe_vex")
7873 (set_attr "memory" "store")])
7875 (define_expand "sse_sfence"
7877 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7878 "TARGET_SSE || TARGET_3DNOW_A"
7880 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7881 MEM_VOLATILE_P (operands[0]) = 1;
7884 (define_insn "*sse_sfence"
7885 [(set (match_operand:BLK 0 "" "")
7886 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7887 "TARGET_SSE || TARGET_3DNOW_A"
7889 [(set_attr "type" "sse")
7890 (set_attr "length_address" "0")
7891 (set_attr "atom_sse_attr" "fence")
7892 (set_attr "memory" "unknown")])
7894 (define_insn "sse2_clflush"
7895 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7899 [(set_attr "type" "sse")
7900 (set_attr "atom_sse_attr" "fence")
7901 (set_attr "memory" "unknown")])
7903 (define_expand "sse2_mfence"
7905 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7908 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7909 MEM_VOLATILE_P (operands[0]) = 1;
7912 (define_insn "*sse2_mfence"
7913 [(set (match_operand:BLK 0 "" "")
7914 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7915 "TARGET_64BIT || TARGET_SSE2"
7917 [(set_attr "type" "sse")
7918 (set_attr "length_address" "0")
7919 (set_attr "atom_sse_attr" "fence")
7920 (set_attr "memory" "unknown")])
7922 (define_expand "sse2_lfence"
7924 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7927 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7928 MEM_VOLATILE_P (operands[0]) = 1;
7931 (define_insn "*sse2_lfence"
7932 [(set (match_operand:BLK 0 "" "")
7933 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7936 [(set_attr "type" "sse")
7937 (set_attr "length_address" "0")
7938 (set_attr "atom_sse_attr" "lfence")
7939 (set_attr "memory" "unknown")])
7941 (define_insn "sse3_mwait"
7942 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7943 (match_operand:SI 1 "register_operand" "c")]
7946 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7947 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7948 ;; we only need to set up 32bit registers.
7950 [(set_attr "length" "3")])
7952 (define_insn "sse3_monitor"
7953 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7954 (match_operand:SI 1 "register_operand" "c")
7955 (match_operand:SI 2 "register_operand" "d")]
7957 "TARGET_SSE3 && !TARGET_64BIT"
7958 "monitor\t%0, %1, %2"
7959 [(set_attr "length" "3")])
7961 (define_insn "sse3_monitor64"
7962 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7963 (match_operand:SI 1 "register_operand" "c")
7964 (match_operand:SI 2 "register_operand" "d")]
7966 "TARGET_SSE3 && TARGET_64BIT"
7967 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7968 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7969 ;; zero extended to 64bit, we only need to set up 32bit registers.
7971 [(set_attr "length" "3")])
7973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7975 ;; SSSE3 instructions
7977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7979 (define_insn "avx2_phaddwv16hi3"
7980 [(set (match_operand:V16HI 0 "register_operand" "=x")
7987 (match_operand:V16HI 1 "register_operand" "x")
7988 (parallel [(const_int 0)]))
7989 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7991 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7992 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7995 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7996 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7998 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7999 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8003 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8004 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8006 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8007 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8010 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8011 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8013 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8014 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8020 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8021 (parallel [(const_int 0)]))
8022 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8024 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8025 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8028 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8029 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8031 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8032 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8036 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8037 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8039 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8040 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8043 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8044 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8046 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8047 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8049 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8050 [(set_attr "type" "sseiadd")
8051 (set_attr "prefix_extra" "1")
8052 (set_attr "prefix" "vex")
8053 (set_attr "mode" "OI")])
8055 (define_insn "ssse3_phaddwv8hi3"
8056 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8062 (match_operand:V8HI 1 "register_operand" "0,x")
8063 (parallel [(const_int 0)]))
8064 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8066 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8067 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8070 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8071 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8073 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8074 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8079 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8080 (parallel [(const_int 0)]))
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8083 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8084 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8087 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8088 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8090 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8091 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8094 phaddw\t{%2, %0|%0, %2}
8095 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8096 [(set_attr "isa" "noavx,avx")
8097 (set_attr "type" "sseiadd")
8098 (set_attr "atom_unit" "complex")
8099 (set_attr "prefix_data16" "1,*")
8100 (set_attr "prefix_extra" "1")
8101 (set_attr "prefix" "orig,vex")
8102 (set_attr "mode" "TI")])
8104 (define_insn "ssse3_phaddwv4hi3"
8105 [(set (match_operand:V4HI 0 "register_operand" "=y")
8110 (match_operand:V4HI 1 "register_operand" "0")
8111 (parallel [(const_int 0)]))
8112 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8114 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8115 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8119 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8120 (parallel [(const_int 0)]))
8121 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8123 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8124 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8126 "phaddw\t{%2, %0|%0, %2}"
8127 [(set_attr "type" "sseiadd")
8128 (set_attr "atom_unit" "complex")
8129 (set_attr "prefix_extra" "1")
8130 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8131 (set_attr "mode" "DI")])
8133 (define_insn "avx2_phadddv8si3"
8134 [(set (match_operand:V8SI 0 "register_operand" "=x")
8140 (match_operand:V8SI 1 "register_operand" "x")
8141 (parallel [(const_int 0)]))
8142 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8144 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8145 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8148 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8149 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8151 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8152 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8157 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8158 (parallel [(const_int 0)]))
8159 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8161 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8162 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8165 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8166 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8168 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8169 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8171 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8172 [(set_attr "type" "sseiadd")
8173 (set_attr "prefix_extra" "1")
8174 (set_attr "prefix" "vex")
8175 (set_attr "mode" "OI")])
8177 (define_insn "ssse3_phadddv4si3"
8178 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8183 (match_operand:V4SI 1 "register_operand" "0,x")
8184 (parallel [(const_int 0)]))
8185 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8187 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8188 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8192 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8193 (parallel [(const_int 0)]))
8194 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8196 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8197 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8200 phaddd\t{%2, %0|%0, %2}
8201 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8202 [(set_attr "isa" "noavx,avx")
8203 (set_attr "type" "sseiadd")
8204 (set_attr "atom_unit" "complex")
8205 (set_attr "prefix_data16" "1,*")
8206 (set_attr "prefix_extra" "1")
8207 (set_attr "prefix" "orig,vex")
8208 (set_attr "mode" "TI")])
8210 (define_insn "ssse3_phadddv2si3"
8211 [(set (match_operand:V2SI 0 "register_operand" "=y")
8215 (match_operand:V2SI 1 "register_operand" "0")
8216 (parallel [(const_int 0)]))
8217 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8220 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8221 (parallel [(const_int 0)]))
8222 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8224 "phaddd\t{%2, %0|%0, %2}"
8225 [(set_attr "type" "sseiadd")
8226 (set_attr "atom_unit" "complex")
8227 (set_attr "prefix_extra" "1")
8228 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8229 (set_attr "mode" "DI")])
8231 (define_insn "avx2_phaddswv16hi3"
8232 [(set (match_operand:V16HI 0 "register_operand" "=x")
8239 (match_operand:V16HI 1 "register_operand" "x")
8240 (parallel [(const_int 0)]))
8241 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8243 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8244 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8247 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8248 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8250 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8251 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8255 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8256 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8258 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8259 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8262 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8265 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8266 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8272 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8273 (parallel [(const_int 0)]))
8274 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8276 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8277 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8281 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8283 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8284 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8288 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8289 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8291 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8292 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8295 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8296 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8298 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8299 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8301 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8302 [(set_attr "type" "sseiadd")
8303 (set_attr "prefix_extra" "1")
8304 (set_attr "prefix" "vex")
8305 (set_attr "mode" "OI")])
8307 (define_insn "ssse3_phaddswv8hi3"
8308 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8314 (match_operand:V8HI 1 "register_operand" "0,x")
8315 (parallel [(const_int 0)]))
8316 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8318 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8319 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8322 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8323 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8325 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8326 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8331 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8332 (parallel [(const_int 0)]))
8333 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8335 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8336 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8339 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8340 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8342 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8343 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8346 phaddsw\t{%2, %0|%0, %2}
8347 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8348 [(set_attr "isa" "noavx,avx")
8349 (set_attr "type" "sseiadd")
8350 (set_attr "atom_unit" "complex")
8351 (set_attr "prefix_data16" "1,*")
8352 (set_attr "prefix_extra" "1")
8353 (set_attr "prefix" "orig,vex")
8354 (set_attr "mode" "TI")])
8356 (define_insn "ssse3_phaddswv4hi3"
8357 [(set (match_operand:V4HI 0 "register_operand" "=y")
8362 (match_operand:V4HI 1 "register_operand" "0")
8363 (parallel [(const_int 0)]))
8364 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8366 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8367 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8371 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8372 (parallel [(const_int 0)]))
8373 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8375 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8376 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8378 "phaddsw\t{%2, %0|%0, %2}"
8379 [(set_attr "type" "sseiadd")
8380 (set_attr "atom_unit" "complex")
8381 (set_attr "prefix_extra" "1")
8382 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8383 (set_attr "mode" "DI")])
8385 (define_insn "avx2_phsubwv16hi3"
8386 [(set (match_operand:V16HI 0 "register_operand" "=x")
8393 (match_operand:V16HI 1 "register_operand" "x")
8394 (parallel [(const_int 0)]))
8395 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8397 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8398 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8401 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8402 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8404 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8405 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8409 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8410 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8412 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8413 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8416 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8419 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8420 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8426 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8427 (parallel [(const_int 0)]))
8428 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8430 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8431 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8434 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8437 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8438 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8445 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8446 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8449 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8450 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8455 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8456 [(set_attr "type" "sseiadd")
8457 (set_attr "prefix_extra" "1")
8458 (set_attr "prefix" "vex")
8459 (set_attr "mode" "OI")])
8461 (define_insn "ssse3_phsubwv8hi3"
8462 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8468 (match_operand:V8HI 1 "register_operand" "0,x")
8469 (parallel [(const_int 0)]))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8472 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8473 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8476 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8477 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8479 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8480 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8485 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8486 (parallel [(const_int 0)]))
8487 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8489 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8490 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8493 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8494 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8496 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8497 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8500 phsubw\t{%2, %0|%0, %2}
8501 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8502 [(set_attr "isa" "noavx,avx")
8503 (set_attr "type" "sseiadd")
8504 (set_attr "atom_unit" "complex")
8505 (set_attr "prefix_data16" "1,*")
8506 (set_attr "prefix_extra" "1")
8507 (set_attr "prefix" "orig,vex")
8508 (set_attr "mode" "TI")])
8510 (define_insn "ssse3_phsubwv4hi3"
8511 [(set (match_operand:V4HI 0 "register_operand" "=y")
8516 (match_operand:V4HI 1 "register_operand" "0")
8517 (parallel [(const_int 0)]))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8520 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8521 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8525 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8526 (parallel [(const_int 0)]))
8527 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8529 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8530 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8532 "phsubw\t{%2, %0|%0, %2}"
8533 [(set_attr "type" "sseiadd")
8534 (set_attr "atom_unit" "complex")
8535 (set_attr "prefix_extra" "1")
8536 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8537 (set_attr "mode" "DI")])
8539 (define_insn "avx2_phsubdv8si3"
8540 [(set (match_operand:V8SI 0 "register_operand" "=x")
8546 (match_operand:V8SI 1 "register_operand" "x")
8547 (parallel [(const_int 0)]))
8548 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8550 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8551 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8554 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8555 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8557 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8558 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8563 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8564 (parallel [(const_int 0)]))
8565 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8567 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8568 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8571 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8572 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8574 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8575 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8577 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8578 [(set_attr "type" "sseiadd")
8579 (set_attr "prefix_extra" "1")
8580 (set_attr "prefix" "vex")
8581 (set_attr "mode" "OI")])
8583 (define_insn "ssse3_phsubdv4si3"
8584 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8589 (match_operand:V4SI 1 "register_operand" "0,x")
8590 (parallel [(const_int 0)]))
8591 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8593 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8594 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8598 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8599 (parallel [(const_int 0)]))
8600 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8602 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8603 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8606 phsubd\t{%2, %0|%0, %2}
8607 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8609 [(set_attr "isa" "noavx,avx")
8610 (set_attr "type" "sseiadd")
8611 (set_attr "atom_unit" "complex")
8612 (set_attr "prefix_data16" "1,*")
8613 (set_attr "prefix_extra" "1")
8614 (set_attr "prefix" "orig,vex")
8615 (set_attr "mode" "TI")])
8617 (define_insn "ssse3_phsubdv2si3"
8618 [(set (match_operand:V2SI 0 "register_operand" "=y")
8622 (match_operand:V2SI 1 "register_operand" "0")
8623 (parallel [(const_int 0)]))
8624 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8627 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8628 (parallel [(const_int 0)]))
8629 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8631 "phsubd\t{%2, %0|%0, %2}"
8632 [(set_attr "type" "sseiadd")
8633 (set_attr "atom_unit" "complex")
8634 (set_attr "prefix_extra" "1")
8635 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8636 (set_attr "mode" "DI")])
8638 (define_insn "avx2_phsubswv16hi3"
8639 [(set (match_operand:V16HI 0 "register_operand" "=x")
8646 (match_operand:V16HI 1 "register_operand" "x")
8647 (parallel [(const_int 0)]))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8650 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8651 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8654 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8655 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8657 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8658 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8662 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8663 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8665 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8670 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8672 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8673 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8679 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8680 (parallel [(const_int 0)]))
8681 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8683 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8684 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8687 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8688 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8690 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8691 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8695 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8696 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8698 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8699 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8702 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8703 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8705 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8706 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8708 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8709 [(set_attr "type" "sseiadd")
8710 (set_attr "prefix_extra" "1")
8711 (set_attr "prefix" "vex")
8712 (set_attr "mode" "OI")])
8714 (define_insn "ssse3_phsubswv8hi3"
8715 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8721 (match_operand:V8HI 1 "register_operand" "0,x")
8722 (parallel [(const_int 0)]))
8723 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8725 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8726 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8729 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8730 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8732 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8733 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8738 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8739 (parallel [(const_int 0)]))
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8742 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8743 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8746 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8747 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8749 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8750 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8753 phsubsw\t{%2, %0|%0, %2}
8754 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8755 [(set_attr "isa" "noavx,avx")
8756 (set_attr "type" "sseiadd")
8757 (set_attr "atom_unit" "complex")
8758 (set_attr "prefix_data16" "1,*")
8759 (set_attr "prefix_extra" "1")
8760 (set_attr "prefix" "orig,vex")
8761 (set_attr "mode" "TI")])
8763 (define_insn "ssse3_phsubswv4hi3"
8764 [(set (match_operand:V4HI 0 "register_operand" "=y")
8769 (match_operand:V4HI 1 "register_operand" "0")
8770 (parallel [(const_int 0)]))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8774 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8778 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8779 (parallel [(const_int 0)]))
8780 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8782 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8783 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8785 "phsubsw\t{%2, %0|%0, %2}"
8786 [(set_attr "type" "sseiadd")
8787 (set_attr "atom_unit" "complex")
8788 (set_attr "prefix_extra" "1")
8789 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8790 (set_attr "mode" "DI")])
8792 (define_insn "avx2_pmaddubsw256"
8793 [(set (match_operand:V16HI 0 "register_operand" "=x")
8798 (match_operand:V32QI 1 "register_operand" "x")
8799 (parallel [(const_int 0)
8817 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8818 (parallel [(const_int 0)
8836 (vec_select:V16QI (match_dup 1)
8837 (parallel [(const_int 1)
8854 (vec_select:V16QI (match_dup 2)
8855 (parallel [(const_int 1)
8870 (const_int 31)]))))))]
8872 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8873 [(set_attr "type" "sseiadd")
8874 (set_attr "prefix_extra" "1")
8875 (set_attr "prefix" "vex")
8876 (set_attr "mode" "OI")])
8878 (define_insn "ssse3_pmaddubsw128"
8879 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8884 (match_operand:V16QI 1 "register_operand" "0,x")
8885 (parallel [(const_int 0)
8895 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8896 (parallel [(const_int 0)
8906 (vec_select:V8QI (match_dup 1)
8907 (parallel [(const_int 1)
8916 (vec_select:V8QI (match_dup 2)
8917 (parallel [(const_int 1)
8924 (const_int 15)]))))))]
8927 pmaddubsw\t{%2, %0|%0, %2}
8928 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8929 [(set_attr "isa" "noavx,avx")
8930 (set_attr "type" "sseiadd")
8931 (set_attr "atom_unit" "simul")
8932 (set_attr "prefix_data16" "1,*")
8933 (set_attr "prefix_extra" "1")
8934 (set_attr "prefix" "orig,vex")
8935 (set_attr "mode" "TI")])
8937 (define_insn "ssse3_pmaddubsw"
8938 [(set (match_operand:V4HI 0 "register_operand" "=y")
8943 (match_operand:V8QI 1 "register_operand" "0")
8944 (parallel [(const_int 0)
8950 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8951 (parallel [(const_int 0)
8957 (vec_select:V4QI (match_dup 1)
8958 (parallel [(const_int 1)
8963 (vec_select:V4QI (match_dup 2)
8964 (parallel [(const_int 1)
8967 (const_int 7)]))))))]
8969 "pmaddubsw\t{%2, %0|%0, %2}"
8970 [(set_attr "type" "sseiadd")
8971 (set_attr "atom_unit" "simul")
8972 (set_attr "prefix_extra" "1")
8973 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8974 (set_attr "mode" "DI")])
8976 (define_expand "avx2_umulhrswv16hi3"
8977 [(set (match_operand:V16HI 0 "register_operand" "")
8984 (match_operand:V16HI 1 "nonimmediate_operand" ""))
8986 (match_operand:V16HI 2 "nonimmediate_operand" "")))
8988 (const_vector:V16HI [(const_int 1) (const_int 1)
8989 (const_int 1) (const_int 1)
8990 (const_int 1) (const_int 1)
8991 (const_int 1) (const_int 1)
8992 (const_int 1) (const_int 1)
8993 (const_int 1) (const_int 1)
8994 (const_int 1) (const_int 1)
8995 (const_int 1) (const_int 1)]))
8998 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9000 (define_insn "*avx2_umulhrswv16hi3"
9001 [(set (match_operand:V16HI 0 "register_operand" "=x")
9008 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9010 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9012 (const_vector:V16HI [(const_int 1) (const_int 1)
9013 (const_int 1) (const_int 1)
9014 (const_int 1) (const_int 1)
9015 (const_int 1) (const_int 1)
9016 (const_int 1) (const_int 1)
9017 (const_int 1) (const_int 1)
9018 (const_int 1) (const_int 1)
9019 (const_int 1) (const_int 1)]))
9021 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9022 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9023 [(set_attr "type" "sseimul")
9024 (set_attr "prefix_extra" "1")
9025 (set_attr "prefix" "vex")
9026 (set_attr "mode" "OI")])
9028 (define_expand "ssse3_pmulhrswv8hi3"
9029 [(set (match_operand:V8HI 0 "register_operand" "")
9036 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9038 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9040 (const_vector:V8HI [(const_int 1) (const_int 1)
9041 (const_int 1) (const_int 1)
9042 (const_int 1) (const_int 1)
9043 (const_int 1) (const_int 1)]))
9046 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9048 (define_insn "*ssse3_pmulhrswv8hi3"
9049 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9056 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9058 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9060 (const_vector:V8HI [(const_int 1) (const_int 1)
9061 (const_int 1) (const_int 1)
9062 (const_int 1) (const_int 1)
9063 (const_int 1) (const_int 1)]))
9065 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9067 pmulhrsw\t{%2, %0|%0, %2}
9068 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9069 [(set_attr "isa" "noavx,avx")
9070 (set_attr "type" "sseimul")
9071 (set_attr "prefix_data16" "1,*")
9072 (set_attr "prefix_extra" "1")
9073 (set_attr "prefix" "orig,vex")
9074 (set_attr "mode" "TI")])
9076 (define_expand "ssse3_pmulhrswv4hi3"
9077 [(set (match_operand:V4HI 0 "register_operand" "")
9084 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9086 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9088 (const_vector:V4HI [(const_int 1) (const_int 1)
9089 (const_int 1) (const_int 1)]))
9092 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9094 (define_insn "*ssse3_pmulhrswv4hi3"
9095 [(set (match_operand:V4HI 0 "register_operand" "=y")
9102 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9104 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9106 (const_vector:V4HI [(const_int 1) (const_int 1)
9107 (const_int 1) (const_int 1)]))
9109 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9110 "pmulhrsw\t{%2, %0|%0, %2}"
9111 [(set_attr "type" "sseimul")
9112 (set_attr "prefix_extra" "1")
9113 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9114 (set_attr "mode" "DI")])
9116 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9117 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9118 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9119 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9123 pshufb\t{%2, %0|%0, %2}
9124 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9125 [(set_attr "isa" "noavx,avx")
9126 (set_attr "type" "sselog1")
9127 (set_attr "prefix_data16" "1,*")
9128 (set_attr "prefix_extra" "1")
9129 (set_attr "prefix" "orig,vex")
9130 (set_attr "mode" "<sseinsnmode>")])
9132 (define_insn "ssse3_pshufbv8qi3"
9133 [(set (match_operand:V8QI 0 "register_operand" "=y")
9134 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9135 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9138 "pshufb\t{%2, %0|%0, %2}";
9139 [(set_attr "type" "sselog1")
9140 (set_attr "prefix_extra" "1")
9141 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9142 (set_attr "mode" "DI")])
9144 (define_insn "<ssse3_avx2>_psign<mode>3"
9145 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9147 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9148 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9152 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9153 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9154 [(set_attr "isa" "noavx,avx")
9155 (set_attr "type" "sselog1")
9156 (set_attr "prefix_data16" "1,*")
9157 (set_attr "prefix_extra" "1")
9158 (set_attr "prefix" "orig,vex")
9159 (set_attr "mode" "<sseinsnmode>")])
9161 (define_insn "ssse3_psign<mode>3"
9162 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9164 [(match_operand:MMXMODEI 1 "register_operand" "0")
9165 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9168 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9169 [(set_attr "type" "sselog1")
9170 (set_attr "prefix_extra" "1")
9171 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9172 (set_attr "mode" "DI")])
9174 (define_insn "<ssse3_avx2>_palignr<mode>"
9175 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9176 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9177 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9178 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9182 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9184 switch (which_alternative)
9187 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9189 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9194 [(set_attr "isa" "noavx,avx")
9195 (set_attr "type" "sseishft")
9196 (set_attr "atom_unit" "sishuf")
9197 (set_attr "prefix_data16" "1,*")
9198 (set_attr "prefix_extra" "1")
9199 (set_attr "length_immediate" "1")
9200 (set_attr "prefix" "orig,vex")
9201 (set_attr "mode" "<sseinsnmode>")])
9203 (define_insn "ssse3_palignrdi"
9204 [(set (match_operand:DI 0 "register_operand" "=y")
9205 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9206 (match_operand:DI 2 "nonimmediate_operand" "ym")
9207 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9211 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9212 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9214 [(set_attr "type" "sseishft")
9215 (set_attr "atom_unit" "sishuf")
9216 (set_attr "prefix_extra" "1")
9217 (set_attr "length_immediate" "1")
9218 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9219 (set_attr "mode" "DI")])
9221 (define_insn "abs<mode>2"
9222 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9224 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9226 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9227 [(set_attr "type" "sselog1")
9228 (set_attr "prefix_data16" "1")
9229 (set_attr "prefix_extra" "1")
9230 (set_attr "prefix" "maybe_vex")
9231 (set_attr "mode" "<sseinsnmode>")])
9233 (define_insn "abs<mode>2"
9234 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9236 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9238 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9239 [(set_attr "type" "sselog1")
9240 (set_attr "prefix_rep" "0")
9241 (set_attr "prefix_extra" "1")
9242 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9243 (set_attr "mode" "DI")])
9245 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9247 ;; AMD SSE4A instructions
9249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9251 (define_insn "sse4a_movnt<mode>"
9252 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9254 [(match_operand:MODEF 1 "register_operand" "x")]
9257 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9258 [(set_attr "type" "ssemov")
9259 (set_attr "mode" "<MODE>")])
9261 (define_insn "sse4a_vmmovnt<mode>"
9262 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9263 (unspec:<ssescalarmode>
9264 [(vec_select:<ssescalarmode>
9265 (match_operand:VF_128 1 "register_operand" "x")
9266 (parallel [(const_int 0)]))]
9269 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9270 [(set_attr "type" "ssemov")
9271 (set_attr "mode" "<ssescalarmode>")])
9273 (define_insn "sse4a_extrqi"
9274 [(set (match_operand:V2DI 0 "register_operand" "=x")
9275 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9276 (match_operand 2 "const_0_to_255_operand" "")
9277 (match_operand 3 "const_0_to_255_operand" "")]
9280 "extrq\t{%3, %2, %0|%0, %2, %3}"
9281 [(set_attr "type" "sse")
9282 (set_attr "prefix_data16" "1")
9283 (set_attr "length_immediate" "2")
9284 (set_attr "mode" "TI")])
9286 (define_insn "sse4a_extrq"
9287 [(set (match_operand:V2DI 0 "register_operand" "=x")
9288 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9289 (match_operand:V16QI 2 "register_operand" "x")]
9292 "extrq\t{%2, %0|%0, %2}"
9293 [(set_attr "type" "sse")
9294 (set_attr "prefix_data16" "1")
9295 (set_attr "mode" "TI")])
9297 (define_insn "sse4a_insertqi"
9298 [(set (match_operand:V2DI 0 "register_operand" "=x")
9299 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9300 (match_operand:V2DI 2 "register_operand" "x")
9301 (match_operand 3 "const_0_to_255_operand" "")
9302 (match_operand 4 "const_0_to_255_operand" "")]
9305 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9306 [(set_attr "type" "sseins")
9307 (set_attr "prefix_data16" "0")
9308 (set_attr "prefix_rep" "1")
9309 (set_attr "length_immediate" "2")
9310 (set_attr "mode" "TI")])
9312 (define_insn "sse4a_insertq"
9313 [(set (match_operand:V2DI 0 "register_operand" "=x")
9314 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9315 (match_operand:V2DI 2 "register_operand" "x")]
9318 "insertq\t{%2, %0|%0, %2}"
9319 [(set_attr "type" "sseins")
9320 (set_attr "prefix_data16" "0")
9321 (set_attr "prefix_rep" "1")
9322 (set_attr "mode" "TI")])
9324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9326 ;; Intel SSE4.1 instructions
9328 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9330 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9331 [(set (match_operand:VF 0 "register_operand" "=x,x")
9333 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9334 (match_operand:VF 1 "register_operand" "0,x")
9335 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9338 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9339 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9340 [(set_attr "isa" "noavx,avx")
9341 (set_attr "type" "ssemov")
9342 (set_attr "length_immediate" "1")
9343 (set_attr "prefix_data16" "1,*")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "prefix" "orig,vex")
9346 (set_attr "mode" "<MODE>")])
9348 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9349 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9351 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9352 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9353 (match_operand:VF 3 "register_operand" "Yz,x")]
9357 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9358 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9359 [(set_attr "isa" "noavx,avx")
9360 (set_attr "type" "ssemov")
9361 (set_attr "length_immediate" "1")
9362 (set_attr "prefix_data16" "1,*")
9363 (set_attr "prefix_extra" "1")
9364 (set_attr "prefix" "orig,vex")
9365 (set_attr "mode" "<MODE>")])
9367 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9368 [(set (match_operand:VF 0 "register_operand" "=x,x")
9370 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9371 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9372 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9376 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9377 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9378 [(set_attr "isa" "noavx,avx")
9379 (set_attr "type" "ssemul")
9380 (set_attr "length_immediate" "1")
9381 (set_attr "prefix_data16" "1,*")
9382 (set_attr "prefix_extra" "1")
9383 (set_attr "prefix" "orig,vex")
9384 (set_attr "mode" "<MODE>")])
9386 (define_insn "<sse4_1_avx2>_movntdqa"
9387 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9388 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9391 "%vmovntdqa\t{%1, %0|%0, %1}"
9392 [(set_attr "type" "ssemov")
9393 (set_attr "prefix_extra" "1")
9394 (set_attr "prefix" "maybe_vex")
9395 (set_attr "mode" "<sseinsnmode>")])
9397 (define_insn "<sse4_1_avx2>_mpsadbw"
9398 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9399 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9400 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9401 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9405 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9406 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9407 [(set_attr "isa" "noavx,avx")
9408 (set_attr "type" "sselog1")
9409 (set_attr "length_immediate" "1")
9410 (set_attr "prefix_extra" "1")
9411 (set_attr "prefix" "orig,vex")
9412 (set_attr "mode" "<sseinsnmode>")])
9414 (define_insn "avx2_packusdw"
9415 [(set (match_operand:V16HI 0 "register_operand" "=x")
9418 (match_operand:V8SI 1 "register_operand" "x"))
9420 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9422 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9423 [(set_attr "type" "sselog")
9424 (set_attr "prefix_extra" "1")
9425 (set_attr "prefix" "vex")
9426 (set_attr "mode" "OI")])
9428 (define_insn "sse4_1_packusdw"
9429 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9432 (match_operand:V4SI 1 "register_operand" "0,x"))
9434 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9437 packusdw\t{%2, %0|%0, %2}
9438 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9439 [(set_attr "isa" "noavx,avx")
9440 (set_attr "type" "sselog")
9441 (set_attr "prefix_extra" "1")
9442 (set_attr "prefix" "orig,vex")
9443 (set_attr "mode" "TI")])
9445 (define_insn "<sse4_1_avx2>_pblendvb"
9446 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9448 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9449 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9450 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9454 pblendvb\t{%3, %2, %0|%0, %2, %3}
9455 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9456 [(set_attr "isa" "noavx,avx")
9457 (set_attr "type" "ssemov")
9458 (set_attr "prefix_extra" "1")
9459 (set_attr "length_immediate" "*,1")
9460 (set_attr "prefix" "orig,vex")
9461 (set_attr "mode" "<sseinsnmode>")])
9463 (define_insn "sse4_1_pblendw"
9464 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9466 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9467 (match_operand:V8HI 1 "register_operand" "0,x")
9468 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9471 pblendw\t{%3, %2, %0|%0, %2, %3}
9472 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9473 [(set_attr "isa" "noavx,avx")
9474 (set_attr "type" "ssemov")
9475 (set_attr "prefix_extra" "1")
9476 (set_attr "length_immediate" "1")
9477 (set_attr "prefix" "orig,vex")
9478 (set_attr "mode" "TI")])
9480 ;; The builtin uses an 8-bit immediate. Expand that.
9481 (define_expand "avx2_pblendw"
9482 [(set (match_operand:V16HI 0 "register_operand" "")
9484 (match_operand:V16HI 2 "nonimmediate_operand" "")
9485 (match_operand:V16HI 1 "register_operand" "")
9486 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9489 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9490 operands[3] = GEN_INT (val << 8 | val);
9493 (define_insn "*avx2_pblendw"
9494 [(set (match_operand:V16HI 0 "register_operand" "=x")
9496 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9497 (match_operand:V16HI 1 "register_operand" "x")
9498 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9501 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9502 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9504 [(set_attr "type" "ssemov")
9505 (set_attr "prefix_extra" "1")
9506 (set_attr "length_immediate" "1")
9507 (set_attr "prefix" "vex")
9508 (set_attr "mode" "OI")])
9510 (define_insn "avx2_pblendd<mode>"
9511 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9513 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9514 (match_operand:VI4_AVX2 1 "register_operand" "x")
9515 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9517 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9518 [(set_attr "type" "ssemov")
9519 (set_attr "prefix_extra" "1")
9520 (set_attr "length_immediate" "1")
9521 (set_attr "prefix" "vex")
9522 (set_attr "mode" "<sseinsnmode>")])
9524 (define_insn "sse4_1_phminposuw"
9525 [(set (match_operand:V8HI 0 "register_operand" "=x")
9526 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9527 UNSPEC_PHMINPOSUW))]
9529 "%vphminposuw\t{%1, %0|%0, %1}"
9530 [(set_attr "type" "sselog1")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "prefix" "maybe_vex")
9533 (set_attr "mode" "TI")])
9535 (define_insn "avx2_<code>v16qiv16hi2"
9536 [(set (match_operand:V16HI 0 "register_operand" "=x")
9538 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9540 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9541 [(set_attr "type" "ssemov")
9542 (set_attr "prefix_extra" "1")
9543 (set_attr "prefix" "vex")
9544 (set_attr "mode" "OI")])
9546 (define_insn "sse4_1_<code>v8qiv8hi2"
9547 [(set (match_operand:V8HI 0 "register_operand" "=x")
9550 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9551 (parallel [(const_int 0)
9560 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9561 [(set_attr "type" "ssemov")
9562 (set_attr "prefix_extra" "1")
9563 (set_attr "prefix" "maybe_vex")
9564 (set_attr "mode" "TI")])
9566 (define_insn "avx2_<code>v8qiv8si2"
9567 [(set (match_operand:V8SI 0 "register_operand" "=x")
9570 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9571 (parallel [(const_int 0)
9580 "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9581 [(set_attr "type" "ssemov")
9582 (set_attr "prefix_extra" "1")
9583 (set_attr "prefix" "vex")
9584 (set_attr "mode" "OI")])
9586 (define_insn "sse4_1_<code>v4qiv4si2"
9587 [(set (match_operand:V4SI 0 "register_operand" "=x")
9590 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9591 (parallel [(const_int 0)
9596 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9597 [(set_attr "type" "ssemov")
9598 (set_attr "prefix_extra" "1")
9599 (set_attr "prefix" "maybe_vex")
9600 (set_attr "mode" "TI")])
9602 (define_insn "avx2_<code>v8hiv8si2"
9603 [(set (match_operand:V8SI 0 "register_operand" "=x")
9605 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9607 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9608 [(set_attr "type" "ssemov")
9609 (set_attr "prefix_extra" "1")
9610 (set_attr "prefix" "vex")
9611 (set_attr "mode" "OI")])
9613 (define_insn "sse4_1_<code>v4hiv4si2"
9614 [(set (match_operand:V4SI 0 "register_operand" "=x")
9617 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9618 (parallel [(const_int 0)
9623 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9624 [(set_attr "type" "ssemov")
9625 (set_attr "prefix_extra" "1")
9626 (set_attr "prefix" "maybe_vex")
9627 (set_attr "mode" "TI")])
9629 (define_insn "avx2_<code>v4qiv4di2"
9630 [(set (match_operand:V4DI 0 "register_operand" "=x")
9633 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9634 (parallel [(const_int 0)
9639 "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9640 [(set_attr "type" "ssemov")
9641 (set_attr "prefix_extra" "1")
9642 (set_attr "prefix" "vex")
9643 (set_attr "mode" "OI")])
9645 (define_insn "sse4_1_<code>v2qiv2di2"
9646 [(set (match_operand:V2DI 0 "register_operand" "=x")
9649 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9650 (parallel [(const_int 0)
9653 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9654 [(set_attr "type" "ssemov")
9655 (set_attr "prefix_extra" "1")
9656 (set_attr "prefix" "maybe_vex")
9657 (set_attr "mode" "TI")])
9659 (define_insn "avx2_<code>v4hiv4di2"
9660 [(set (match_operand:V4DI 0 "register_operand" "=x")
9663 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9664 (parallel [(const_int 0)
9669 "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9670 [(set_attr "type" "ssemov")
9671 (set_attr "prefix_extra" "1")
9672 (set_attr "prefix" "vex")
9673 (set_attr "mode" "OI")])
9675 (define_insn "sse4_1_<code>v2hiv2di2"
9676 [(set (match_operand:V2DI 0 "register_operand" "=x")
9679 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9680 (parallel [(const_int 0)
9683 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9684 [(set_attr "type" "ssemov")
9685 (set_attr "prefix_extra" "1")
9686 (set_attr "prefix" "maybe_vex")
9687 (set_attr "mode" "TI")])
9689 (define_insn "avx2_<code>v4siv4di2"
9690 [(set (match_operand:V4DI 0 "register_operand" "=x")
9692 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9694 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9695 [(set_attr "type" "ssemov")
9696 (set_attr "prefix_extra" "1")
9697 (set_attr "mode" "OI")])
9699 (define_insn "sse4_1_<code>v2siv2di2"
9700 [(set (match_operand:V2DI 0 "register_operand" "=x")
9703 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9704 (parallel [(const_int 0)
9707 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9708 [(set_attr "type" "ssemov")
9709 (set_attr "prefix_extra" "1")
9710 (set_attr "prefix" "maybe_vex")
9711 (set_attr "mode" "TI")])
9713 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9714 ;; setting FLAGS_REG. But it is not a really compare instruction.
9715 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9716 [(set (reg:CC FLAGS_REG)
9717 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9718 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9721 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9722 [(set_attr "type" "ssecomi")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "prefix" "vex")
9725 (set_attr "mode" "<MODE>")])
9727 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9728 ;; But it is not a really compare instruction.
9729 (define_insn "avx_ptest256"
9730 [(set (reg:CC FLAGS_REG)
9731 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9732 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9735 "vptest\t{%1, %0|%0, %1}"
9736 [(set_attr "type" "ssecomi")
9737 (set_attr "prefix_extra" "1")
9738 (set_attr "prefix" "vex")
9739 (set_attr "mode" "OI")])
9741 (define_insn "sse4_1_ptest"
9742 [(set (reg:CC FLAGS_REG)
9743 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9744 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9747 "%vptest\t{%1, %0|%0, %1}"
9748 [(set_attr "type" "ssecomi")
9749 (set_attr "prefix_extra" "1")
9750 (set_attr "prefix" "maybe_vex")
9751 (set_attr "mode" "TI")])
9753 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9754 [(set (match_operand:VF 0 "register_operand" "=x")
9756 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9757 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9760 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9761 [(set_attr "type" "ssecvt")
9762 (set (attr "prefix_data16")
9764 (match_test "TARGET_AVX")
9766 (const_string "1")))
9767 (set_attr "prefix_extra" "1")
9768 (set_attr "length_immediate" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "mode" "<MODE>")])
9772 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9773 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9776 [(match_operand:VF_128 2 "register_operand" "x,x")
9777 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9779 (match_operand:VF_128 1 "register_operand" "0,x")
9783 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9784 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9785 [(set_attr "isa" "noavx,avx")
9786 (set_attr "type" "ssecvt")
9787 (set_attr "length_immediate" "1")
9788 (set_attr "prefix_data16" "1,*")
9789 (set_attr "prefix_extra" "1")
9790 (set_attr "prefix" "orig,vex")
9791 (set_attr "mode" "<MODE>")])
9793 (define_expand "round<mode>2"
9796 (match_operand:VF 1 "nonimmediate_operand" "")
9798 (set (match_operand:VF 0 "register_operand" "")
9800 [(match_dup 4) (match_dup 5)]
9802 "TARGET_ROUND && !flag_trapping_math"
9804 enum machine_mode scalar_mode;
9805 const struct real_format *fmt;
9806 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9809 scalar_mode = GET_MODE_INNER (<MODE>mode);
9811 /* load nextafter (0.5, 0.0) */
9812 fmt = REAL_MODE_FORMAT (scalar_mode);
9813 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9814 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9815 half = const_double_from_real_value (pred_half, scalar_mode);
9817 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9818 vec_half = force_reg (<MODE>mode, vec_half);
9820 operands[3] = gen_reg_rtx (<MODE>mode);
9821 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9823 operands[4] = gen_reg_rtx (<MODE>mode);
9824 operands[5] = GEN_INT (ROUND_TRUNC);
9827 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9829 ;; Intel SSE4.2 string/text processing instructions
9831 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9833 (define_insn_and_split "sse4_2_pcmpestr"
9834 [(set (match_operand:SI 0 "register_operand" "=c,c")
9836 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9837 (match_operand:SI 3 "register_operand" "a,a")
9838 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9839 (match_operand:SI 5 "register_operand" "d,d")
9840 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9842 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9850 (set (reg:CC FLAGS_REG)
9859 && can_create_pseudo_p ()"
9864 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9865 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9866 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9869 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9870 operands[3], operands[4],
9871 operands[5], operands[6]));
9873 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9874 operands[3], operands[4],
9875 operands[5], operands[6]));
9876 if (flags && !(ecx || xmm0))
9877 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9878 operands[2], operands[3],
9879 operands[4], operands[5],
9881 if (!(flags || ecx || xmm0))
9882 emit_note (NOTE_INSN_DELETED);
9886 [(set_attr "type" "sselog")
9887 (set_attr "prefix_data16" "1")
9888 (set_attr "prefix_extra" "1")
9889 (set_attr "length_immediate" "1")
9890 (set_attr "memory" "none,load")
9891 (set_attr "mode" "TI")])
9893 (define_insn "sse4_2_pcmpestri"
9894 [(set (match_operand:SI 0 "register_operand" "=c,c")
9896 [(match_operand:V16QI 1 "register_operand" "x,x")
9897 (match_operand:SI 2 "register_operand" "a,a")
9898 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9899 (match_operand:SI 4 "register_operand" "d,d")
9900 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9902 (set (reg:CC FLAGS_REG)
9911 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9912 [(set_attr "type" "sselog")
9913 (set_attr "prefix_data16" "1")
9914 (set_attr "prefix_extra" "1")
9915 (set_attr "prefix" "maybe_vex")
9916 (set_attr "length_immediate" "1")
9917 (set_attr "memory" "none,load")
9918 (set_attr "mode" "TI")])
9920 (define_insn "sse4_2_pcmpestrm"
9921 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9923 [(match_operand:V16QI 1 "register_operand" "x,x")
9924 (match_operand:SI 2 "register_operand" "a,a")
9925 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9926 (match_operand:SI 4 "register_operand" "d,d")
9927 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9929 (set (reg:CC FLAGS_REG)
9938 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9939 [(set_attr "type" "sselog")
9940 (set_attr "prefix_data16" "1")
9941 (set_attr "prefix_extra" "1")
9942 (set_attr "length_immediate" "1")
9943 (set_attr "prefix" "maybe_vex")
9944 (set_attr "memory" "none,load")
9945 (set_attr "mode" "TI")])
9947 (define_insn "sse4_2_pcmpestr_cconly"
9948 [(set (reg:CC FLAGS_REG)
9950 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9951 (match_operand:SI 3 "register_operand" "a,a,a,a")
9952 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9953 (match_operand:SI 5 "register_operand" "d,d,d,d")
9954 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9956 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9957 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9960 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9961 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9962 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9963 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9964 [(set_attr "type" "sselog")
9965 (set_attr "prefix_data16" "1")
9966 (set_attr "prefix_extra" "1")
9967 (set_attr "length_immediate" "1")
9968 (set_attr "memory" "none,load,none,load")
9969 (set_attr "prefix" "maybe_vex")
9970 (set_attr "mode" "TI")])
9972 (define_insn_and_split "sse4_2_pcmpistr"
9973 [(set (match_operand:SI 0 "register_operand" "=c,c")
9975 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9976 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9977 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9979 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9985 (set (reg:CC FLAGS_REG)
9992 && can_create_pseudo_p ()"
9997 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9998 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9999 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10002 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10003 operands[3], operands[4]));
10005 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10006 operands[3], operands[4]));
10007 if (flags && !(ecx || xmm0))
10008 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10009 operands[2], operands[3],
10011 if (!(flags || ecx || xmm0))
10012 emit_note (NOTE_INSN_DELETED);
10016 [(set_attr "type" "sselog")
10017 (set_attr "prefix_data16" "1")
10018 (set_attr "prefix_extra" "1")
10019 (set_attr "length_immediate" "1")
10020 (set_attr "memory" "none,load")
10021 (set_attr "mode" "TI")])
10023 (define_insn "sse4_2_pcmpistri"
10024 [(set (match_operand:SI 0 "register_operand" "=c,c")
10026 [(match_operand:V16QI 1 "register_operand" "x,x")
10027 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10028 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10030 (set (reg:CC FLAGS_REG)
10037 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10038 [(set_attr "type" "sselog")
10039 (set_attr "prefix_data16" "1")
10040 (set_attr "prefix_extra" "1")
10041 (set_attr "length_immediate" "1")
10042 (set_attr "prefix" "maybe_vex")
10043 (set_attr "memory" "none,load")
10044 (set_attr "mode" "TI")])
10046 (define_insn "sse4_2_pcmpistrm"
10047 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10049 [(match_operand:V16QI 1 "register_operand" "x,x")
10050 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10051 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10053 (set (reg:CC FLAGS_REG)
10060 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10061 [(set_attr "type" "sselog")
10062 (set_attr "prefix_data16" "1")
10063 (set_attr "prefix_extra" "1")
10064 (set_attr "length_immediate" "1")
10065 (set_attr "prefix" "maybe_vex")
10066 (set_attr "memory" "none,load")
10067 (set_attr "mode" "TI")])
10069 (define_insn "sse4_2_pcmpistr_cconly"
10070 [(set (reg:CC FLAGS_REG)
10072 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10073 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10074 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10076 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10077 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10080 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10081 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10082 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10083 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10084 [(set_attr "type" "sselog")
10085 (set_attr "prefix_data16" "1")
10086 (set_attr "prefix_extra" "1")
10087 (set_attr "length_immediate" "1")
10088 (set_attr "memory" "none,load,none,load")
10089 (set_attr "prefix" "maybe_vex")
10090 (set_attr "mode" "TI")])
10092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10094 ;; XOP instructions
10096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10098 ;; XOP parallel integer multiply/add instructions.
10099 ;; Note the XOP multiply/add instructions
10100 ;; a[i] = b[i] * c[i] + d[i];
10101 ;; do not allow the value being added to be a memory operation.
10102 (define_insn "xop_pmacsww"
10103 [(set (match_operand:V8HI 0 "register_operand" "=x")
10106 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10107 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10108 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10110 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10111 [(set_attr "type" "ssemuladd")
10112 (set_attr "mode" "TI")])
10114 (define_insn "xop_pmacssww"
10115 [(set (match_operand:V8HI 0 "register_operand" "=x")
10117 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10118 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10119 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10121 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10122 [(set_attr "type" "ssemuladd")
10123 (set_attr "mode" "TI")])
10125 (define_insn "xop_pmacsdd"
10126 [(set (match_operand:V4SI 0 "register_operand" "=x")
10129 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10130 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10131 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10133 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10134 [(set_attr "type" "ssemuladd")
10135 (set_attr "mode" "TI")])
10137 (define_insn "xop_pmacssdd"
10138 [(set (match_operand:V4SI 0 "register_operand" "=x")
10140 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10141 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10142 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10144 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10145 [(set_attr "type" "ssemuladd")
10146 (set_attr "mode" "TI")])
10148 (define_insn "xop_pmacssdql"
10149 [(set (match_operand:V2DI 0 "register_operand" "=x")
10154 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10155 (parallel [(const_int 1)
10158 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10159 (parallel [(const_int 1)
10161 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10163 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10164 [(set_attr "type" "ssemuladd")
10165 (set_attr "mode" "TI")])
10167 (define_insn "xop_pmacssdqh"
10168 [(set (match_operand:V2DI 0 "register_operand" "=x")
10173 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10174 (parallel [(const_int 0)
10178 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10179 (parallel [(const_int 0)
10181 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10183 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10184 [(set_attr "type" "ssemuladd")
10185 (set_attr "mode" "TI")])
10187 (define_insn "xop_pmacsdql"
10188 [(set (match_operand:V2DI 0 "register_operand" "=x")
10193 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10194 (parallel [(const_int 1)
10198 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10199 (parallel [(const_int 1)
10201 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10203 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10204 [(set_attr "type" "ssemuladd")
10205 (set_attr "mode" "TI")])
10207 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10208 ;; fake it with a multiply/add. In general, we expect the define_split to
10209 ;; occur before register allocation, so we have to handle the corner case where
10210 ;; the target is the same as operands 1/2
10211 (define_insn_and_split "xop_mulv2div2di3_low"
10212 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10216 (match_operand:V4SI 1 "register_operand" "%x")
10217 (parallel [(const_int 1)
10221 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10222 (parallel [(const_int 1)
10223 (const_int 3)])))))]
10226 "&& reload_completed"
10227 [(set (match_dup 0)
10235 (parallel [(const_int 1)
10240 (parallel [(const_int 1)
10244 operands[3] = CONST0_RTX (V2DImode);
10246 [(set_attr "type" "ssemul")
10247 (set_attr "mode" "TI")])
10249 (define_insn "xop_pmacsdqh"
10250 [(set (match_operand:V2DI 0 "register_operand" "=x")
10255 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10256 (parallel [(const_int 0)
10260 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10261 (parallel [(const_int 0)
10263 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10265 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10266 [(set_attr "type" "ssemuladd")
10267 (set_attr "mode" "TI")])
10269 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10270 ;; fake it with a multiply/add. In general, we expect the define_split to
10271 ;; occur before register allocation, so we have to handle the corner case where
10272 ;; the target is the same as either operands[1] or operands[2]
10273 (define_insn_and_split "xop_mulv2div2di3_high"
10274 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10278 (match_operand:V4SI 1 "register_operand" "%x")
10279 (parallel [(const_int 0)
10283 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10284 (parallel [(const_int 0)
10285 (const_int 2)])))))]
10288 "&& reload_completed"
10289 [(set (match_dup 0)
10297 (parallel [(const_int 0)
10302 (parallel [(const_int 0)
10306 operands[3] = CONST0_RTX (V2DImode);
10308 [(set_attr "type" "ssemul")
10309 (set_attr "mode" "TI")])
10311 ;; XOP parallel integer multiply/add instructions for the intrinisics
10312 (define_insn "xop_pmacsswd"
10313 [(set (match_operand:V4SI 0 "register_operand" "=x")
10318 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10319 (parallel [(const_int 1)
10325 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10326 (parallel [(const_int 1)
10330 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10332 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10333 [(set_attr "type" "ssemuladd")
10334 (set_attr "mode" "TI")])
10336 (define_insn "xop_pmacswd"
10337 [(set (match_operand:V4SI 0 "register_operand" "=x")
10342 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10343 (parallel [(const_int 1)
10349 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10350 (parallel [(const_int 1)
10354 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10356 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10357 [(set_attr "type" "ssemuladd")
10358 (set_attr "mode" "TI")])
10360 (define_insn "xop_pmadcsswd"
10361 [(set (match_operand:V4SI 0 "register_operand" "=x")
10367 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10368 (parallel [(const_int 0)
10374 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10375 (parallel [(const_int 0)
10383 (parallel [(const_int 1)
10390 (parallel [(const_int 1)
10393 (const_int 7)])))))
10394 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10396 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10397 [(set_attr "type" "ssemuladd")
10398 (set_attr "mode" "TI")])
10400 (define_insn "xop_pmadcswd"
10401 [(set (match_operand:V4SI 0 "register_operand" "=x")
10407 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10408 (parallel [(const_int 0)
10414 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10415 (parallel [(const_int 0)
10423 (parallel [(const_int 1)
10430 (parallel [(const_int 1)
10433 (const_int 7)])))))
10434 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10436 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10437 [(set_attr "type" "ssemuladd")
10438 (set_attr "mode" "TI")])
10440 ;; XOP parallel XMM conditional moves
10441 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10442 [(set (match_operand:V 0 "register_operand" "=x,x")
10444 (match_operand:V 3 "nonimmediate_operand" "x,m")
10445 (match_operand:V 1 "register_operand" "x,x")
10446 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10448 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10449 [(set_attr "type" "sse4arg")])
10451 ;; XOP horizontal add/subtract instructions
10452 (define_insn "xop_phaddbw"
10453 [(set (match_operand:V8HI 0 "register_operand" "=x")
10457 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10458 (parallel [(const_int 0)
10469 (parallel [(const_int 1)
10476 (const_int 15)])))))]
10478 "vphaddbw\t{%1, %0|%0, %1}"
10479 [(set_attr "type" "sseiadd1")])
10481 (define_insn "xop_phaddbd"
10482 [(set (match_operand:V4SI 0 "register_operand" "=x")
10487 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10488 (parallel [(const_int 0)
10495 (parallel [(const_int 1)
10498 (const_int 13)]))))
10503 (parallel [(const_int 2)
10510 (parallel [(const_int 3)
10513 (const_int 15)]))))))]
10515 "vphaddbd\t{%1, %0|%0, %1}"
10516 [(set_attr "type" "sseiadd1")])
10518 (define_insn "xop_phaddbq"
10519 [(set (match_operand:V2DI 0 "register_operand" "=x")
10525 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10526 (parallel [(const_int 0)
10531 (parallel [(const_int 1)
10537 (parallel [(const_int 2)
10542 (parallel [(const_int 3)
10543 (const_int 7)])))))
10549 (parallel [(const_int 8)
10554 (parallel [(const_int 9)
10555 (const_int 13)]))))
10560 (parallel [(const_int 10)
10565 (parallel [(const_int 11)
10566 (const_int 15)])))))))]
10568 "vphaddbq\t{%1, %0|%0, %1}"
10569 [(set_attr "type" "sseiadd1")])
10571 (define_insn "xop_phaddwd"
10572 [(set (match_operand:V4SI 0 "register_operand" "=x")
10576 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10577 (parallel [(const_int 0)
10584 (parallel [(const_int 1)
10587 (const_int 7)])))))]
10589 "vphaddwd\t{%1, %0|%0, %1}"
10590 [(set_attr "type" "sseiadd1")])
10592 (define_insn "xop_phaddwq"
10593 [(set (match_operand:V2DI 0 "register_operand" "=x")
10598 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10599 (parallel [(const_int 0)
10604 (parallel [(const_int 1)
10610 (parallel [(const_int 2)
10615 (parallel [(const_int 3)
10616 (const_int 7)]))))))]
10618 "vphaddwq\t{%1, %0|%0, %1}"
10619 [(set_attr "type" "sseiadd1")])
10621 (define_insn "xop_phadddq"
10622 [(set (match_operand:V2DI 0 "register_operand" "=x")
10626 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10627 (parallel [(const_int 0)
10632 (parallel [(const_int 1)
10633 (const_int 3)])))))]
10635 "vphadddq\t{%1, %0|%0, %1}"
10636 [(set_attr "type" "sseiadd1")])
10638 (define_insn "xop_phaddubw"
10639 [(set (match_operand:V8HI 0 "register_operand" "=x")
10643 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10644 (parallel [(const_int 0)
10655 (parallel [(const_int 1)
10662 (const_int 15)])))))]
10664 "vphaddubw\t{%1, %0|%0, %1}"
10665 [(set_attr "type" "sseiadd1")])
10667 (define_insn "xop_phaddubd"
10668 [(set (match_operand:V4SI 0 "register_operand" "=x")
10673 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10674 (parallel [(const_int 0)
10681 (parallel [(const_int 1)
10684 (const_int 13)]))))
10689 (parallel [(const_int 2)
10696 (parallel [(const_int 3)
10699 (const_int 15)]))))))]
10701 "vphaddubd\t{%1, %0|%0, %1}"
10702 [(set_attr "type" "sseiadd1")])
10704 (define_insn "xop_phaddubq"
10705 [(set (match_operand:V2DI 0 "register_operand" "=x")
10711 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10712 (parallel [(const_int 0)
10717 (parallel [(const_int 1)
10723 (parallel [(const_int 2)
10728 (parallel [(const_int 3)
10729 (const_int 7)])))))
10735 (parallel [(const_int 8)
10740 (parallel [(const_int 9)
10741 (const_int 13)]))))
10746 (parallel [(const_int 10)
10751 (parallel [(const_int 11)
10752 (const_int 15)])))))))]
10754 "vphaddubq\t{%1, %0|%0, %1}"
10755 [(set_attr "type" "sseiadd1")])
10757 (define_insn "xop_phadduwd"
10758 [(set (match_operand:V4SI 0 "register_operand" "=x")
10762 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10763 (parallel [(const_int 0)
10770 (parallel [(const_int 1)
10773 (const_int 7)])))))]
10775 "vphadduwd\t{%1, %0|%0, %1}"
10776 [(set_attr "type" "sseiadd1")])
10778 (define_insn "xop_phadduwq"
10779 [(set (match_operand:V2DI 0 "register_operand" "=x")
10784 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10785 (parallel [(const_int 0)
10790 (parallel [(const_int 1)
10796 (parallel [(const_int 2)
10801 (parallel [(const_int 3)
10802 (const_int 7)]))))))]
10804 "vphadduwq\t{%1, %0|%0, %1}"
10805 [(set_attr "type" "sseiadd1")])
10807 (define_insn "xop_phaddudq"
10808 [(set (match_operand:V2DI 0 "register_operand" "=x")
10812 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10813 (parallel [(const_int 0)
10818 (parallel [(const_int 1)
10819 (const_int 3)])))))]
10821 "vphaddudq\t{%1, %0|%0, %1}"
10822 [(set_attr "type" "sseiadd1")])
10824 (define_insn "xop_phsubbw"
10825 [(set (match_operand:V8HI 0 "register_operand" "=x")
10829 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10830 (parallel [(const_int 0)
10841 (parallel [(const_int 1)
10848 (const_int 15)])))))]
10850 "vphsubbw\t{%1, %0|%0, %1}"
10851 [(set_attr "type" "sseiadd1")])
10853 (define_insn "xop_phsubwd"
10854 [(set (match_operand:V4SI 0 "register_operand" "=x")
10858 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10859 (parallel [(const_int 0)
10866 (parallel [(const_int 1)
10869 (const_int 7)])))))]
10871 "vphsubwd\t{%1, %0|%0, %1}"
10872 [(set_attr "type" "sseiadd1")])
10874 (define_insn "xop_phsubdq"
10875 [(set (match_operand:V2DI 0 "register_operand" "=x")
10879 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10880 (parallel [(const_int 0)
10885 (parallel [(const_int 1)
10886 (const_int 3)])))))]
10888 "vphsubdq\t{%1, %0|%0, %1}"
10889 [(set_attr "type" "sseiadd1")])
10891 ;; XOP permute instructions
10892 (define_insn "xop_pperm"
10893 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10895 [(match_operand:V16QI 1 "register_operand" "x,x")
10896 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10897 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10898 UNSPEC_XOP_PERMUTE))]
10899 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10900 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10901 [(set_attr "type" "sse4arg")
10902 (set_attr "mode" "TI")])
10904 ;; XOP pack instructions that combine two vectors into a smaller vector
10905 (define_insn "xop_pperm_pack_v2di_v4si"
10906 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10909 (match_operand:V2DI 1 "register_operand" "x,x"))
10911 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10912 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10913 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10914 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10915 [(set_attr "type" "sse4arg")
10916 (set_attr "mode" "TI")])
10918 (define_insn "xop_pperm_pack_v4si_v8hi"
10919 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10922 (match_operand:V4SI 1 "register_operand" "x,x"))
10924 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10925 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10926 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10927 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10928 [(set_attr "type" "sse4arg")
10929 (set_attr "mode" "TI")])
10931 (define_insn "xop_pperm_pack_v8hi_v16qi"
10932 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10935 (match_operand:V8HI 1 "register_operand" "x,x"))
10937 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10938 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10939 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10940 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10941 [(set_attr "type" "sse4arg")
10942 (set_attr "mode" "TI")])
10944 ;; XOP packed rotate instructions
10945 (define_expand "rotl<mode>3"
10946 [(set (match_operand:VI_128 0 "register_operand" "")
10948 (match_operand:VI_128 1 "nonimmediate_operand" "")
10949 (match_operand:SI 2 "general_operand")))]
10952 /* If we were given a scalar, convert it to parallel */
10953 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10955 rtvec vs = rtvec_alloc (<ssescalarnum>);
10956 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10957 rtx reg = gen_reg_rtx (<MODE>mode);
10958 rtx op2 = operands[2];
10961 if (GET_MODE (op2) != <ssescalarmode>mode)
10963 op2 = gen_reg_rtx (<ssescalarmode>mode);
10964 convert_move (op2, operands[2], false);
10967 for (i = 0; i < <ssescalarnum>; i++)
10968 RTVEC_ELT (vs, i) = op2;
10970 emit_insn (gen_vec_init<mode> (reg, par));
10971 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10976 (define_expand "rotr<mode>3"
10977 [(set (match_operand:VI_128 0 "register_operand" "")
10979 (match_operand:VI_128 1 "nonimmediate_operand" "")
10980 (match_operand:SI 2 "general_operand")))]
10983 /* If we were given a scalar, convert it to parallel */
10984 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10986 rtvec vs = rtvec_alloc (<ssescalarnum>);
10987 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10988 rtx neg = gen_reg_rtx (<MODE>mode);
10989 rtx reg = gen_reg_rtx (<MODE>mode);
10990 rtx op2 = operands[2];
10993 if (GET_MODE (op2) != <ssescalarmode>mode)
10995 op2 = gen_reg_rtx (<ssescalarmode>mode);
10996 convert_move (op2, operands[2], false);
10999 for (i = 0; i < <ssescalarnum>; i++)
11000 RTVEC_ELT (vs, i) = op2;
11002 emit_insn (gen_vec_init<mode> (reg, par));
11003 emit_insn (gen_neg<mode>2 (neg, reg));
11004 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11009 (define_insn "xop_rotl<mode>3"
11010 [(set (match_operand:VI_128 0 "register_operand" "=x")
11012 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11013 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11015 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11016 [(set_attr "type" "sseishft")
11017 (set_attr "length_immediate" "1")
11018 (set_attr "mode" "TI")])
11020 (define_insn "xop_rotr<mode>3"
11021 [(set (match_operand:VI_128 0 "register_operand" "=x")
11023 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11024 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11027 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11028 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11030 [(set_attr "type" "sseishft")
11031 (set_attr "length_immediate" "1")
11032 (set_attr "mode" "TI")])
11034 (define_expand "vrotr<mode>3"
11035 [(match_operand:VI_128 0 "register_operand" "")
11036 (match_operand:VI_128 1 "register_operand" "")
11037 (match_operand:VI_128 2 "register_operand" "")]
11040 rtx reg = gen_reg_rtx (<MODE>mode);
11041 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11042 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11046 (define_expand "vrotl<mode>3"
11047 [(match_operand:VI_128 0 "register_operand" "")
11048 (match_operand:VI_128 1 "register_operand" "")
11049 (match_operand:VI_128 2 "register_operand" "")]
11052 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11056 (define_insn "xop_vrotl<mode>3"
11057 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11058 (if_then_else:VI_128
11060 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11063 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11067 (neg:VI_128 (match_dup 2)))))]
11068 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11069 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11070 [(set_attr "type" "sseishft")
11071 (set_attr "prefix_data16" "0")
11072 (set_attr "prefix_extra" "2")
11073 (set_attr "mode" "TI")])
11075 ;; XOP packed shift instructions.
11076 ;; FIXME: add V2DI back in
11077 (define_expand "vlshr<mode>3"
11078 [(match_operand:VI124_128 0 "register_operand" "")
11079 (match_operand:VI124_128 1 "register_operand" "")
11080 (match_operand:VI124_128 2 "register_operand" "")]
11083 rtx neg = gen_reg_rtx (<MODE>mode);
11084 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11085 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11089 (define_expand "vashr<mode>3"
11090 [(match_operand:VI124_128 0 "register_operand" "")
11091 (match_operand:VI124_128 1 "register_operand" "")
11092 (match_operand:VI124_128 2 "register_operand" "")]
11095 rtx neg = gen_reg_rtx (<MODE>mode);
11096 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11097 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11101 (define_expand "vashl<mode>3"
11102 [(match_operand:VI124_128 0 "register_operand" "")
11103 (match_operand:VI124_128 1 "register_operand" "")
11104 (match_operand:VI124_128 2 "register_operand" "")]
11107 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11111 (define_insn "xop_ashl<mode>3"
11112 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11113 (if_then_else:VI_128
11115 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11118 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11122 (neg:VI_128 (match_dup 2)))))]
11123 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11124 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11125 [(set_attr "type" "sseishft")
11126 (set_attr "prefix_data16" "0")
11127 (set_attr "prefix_extra" "2")
11128 (set_attr "mode" "TI")])
11130 (define_insn "xop_lshl<mode>3"
11131 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11132 (if_then_else:VI_128
11134 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11137 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11141 (neg:VI_128 (match_dup 2)))))]
11142 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11143 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11144 [(set_attr "type" "sseishft")
11145 (set_attr "prefix_data16" "0")
11146 (set_attr "prefix_extra" "2")
11147 (set_attr "mode" "TI")])
11149 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11150 (define_expand "ashlv16qi3"
11151 [(match_operand:V16QI 0 "register_operand" "")
11152 (match_operand:V16QI 1 "register_operand" "")
11153 (match_operand:SI 2 "nonmemory_operand" "")]
11156 rtvec vs = rtvec_alloc (16);
11157 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11158 rtx reg = gen_reg_rtx (V16QImode);
11160 for (i = 0; i < 16; i++)
11161 RTVEC_ELT (vs, i) = operands[2];
11163 emit_insn (gen_vec_initv16qi (reg, par));
11164 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11168 (define_expand "lshlv16qi3"
11169 [(match_operand:V16QI 0 "register_operand" "")
11170 (match_operand:V16QI 1 "register_operand" "")
11171 (match_operand:SI 2 "nonmemory_operand" "")]
11174 rtvec vs = rtvec_alloc (16);
11175 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11176 rtx reg = gen_reg_rtx (V16QImode);
11178 for (i = 0; i < 16; i++)
11179 RTVEC_ELT (vs, i) = operands[2];
11181 emit_insn (gen_vec_initv16qi (reg, par));
11182 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11186 (define_expand "ashrv16qi3"
11187 [(match_operand:V16QI 0 "register_operand" "")
11188 (match_operand:V16QI 1 "register_operand" "")
11189 (match_operand:SI 2 "nonmemory_operand" "")]
11192 rtvec vs = rtvec_alloc (16);
11193 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11194 rtx reg = gen_reg_rtx (V16QImode);
11196 rtx ele = ((CONST_INT_P (operands[2]))
11197 ? GEN_INT (- INTVAL (operands[2]))
11200 for (i = 0; i < 16; i++)
11201 RTVEC_ELT (vs, i) = ele;
11203 emit_insn (gen_vec_initv16qi (reg, par));
11205 if (!CONST_INT_P (operands[2]))
11207 rtx neg = gen_reg_rtx (V16QImode);
11208 emit_insn (gen_negv16qi2 (neg, reg));
11209 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11212 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11217 (define_expand "ashrv2di3"
11218 [(match_operand:V2DI 0 "register_operand" "")
11219 (match_operand:V2DI 1 "register_operand" "")
11220 (match_operand:DI 2 "nonmemory_operand" "")]
11223 rtvec vs = rtvec_alloc (2);
11224 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11225 rtx reg = gen_reg_rtx (V2DImode);
11228 if (CONST_INT_P (operands[2]))
11229 ele = GEN_INT (- INTVAL (operands[2]));
11230 else if (GET_MODE (operands[2]) != DImode)
11232 rtx move = gen_reg_rtx (DImode);
11233 ele = gen_reg_rtx (DImode);
11234 convert_move (move, operands[2], false);
11235 emit_insn (gen_negdi2 (ele, move));
11239 ele = gen_reg_rtx (DImode);
11240 emit_insn (gen_negdi2 (ele, operands[2]));
11243 RTVEC_ELT (vs, 0) = ele;
11244 RTVEC_ELT (vs, 1) = ele;
11245 emit_insn (gen_vec_initv2di (reg, par));
11246 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11250 ;; XOP FRCZ support
11251 (define_insn "xop_frcz<mode>2"
11252 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11254 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11257 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11258 [(set_attr "type" "ssecvt1")
11259 (set_attr "mode" "<MODE>")])
11262 (define_expand "xop_vmfrcz<mode>2"
11263 [(set (match_operand:VF_128 0 "register_operand")
11266 [(match_operand:VF_128 1 "nonimmediate_operand")]
11272 operands[3] = CONST0_RTX (<MODE>mode);
11275 (define_insn "*xop_vmfrcz_<mode>"
11276 [(set (match_operand:VF_128 0 "register_operand" "=x")
11279 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11281 (match_operand:VF_128 2 "const0_operand")
11284 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11285 [(set_attr "type" "ssecvt1")
11286 (set_attr "mode" "<MODE>")])
11288 (define_insn "xop_maskcmp<mode>3"
11289 [(set (match_operand:VI_128 0 "register_operand" "=x")
11290 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11291 [(match_operand:VI_128 2 "register_operand" "x")
11292 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11294 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11295 [(set_attr "type" "sse4arg")
11296 (set_attr "prefix_data16" "0")
11297 (set_attr "prefix_rep" "0")
11298 (set_attr "prefix_extra" "2")
11299 (set_attr "length_immediate" "1")
11300 (set_attr "mode" "TI")])
11302 (define_insn "xop_maskcmp_uns<mode>3"
11303 [(set (match_operand:VI_128 0 "register_operand" "=x")
11304 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11305 [(match_operand:VI_128 2 "register_operand" "x")
11306 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11308 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11309 [(set_attr "type" "ssecmp")
11310 (set_attr "prefix_data16" "0")
11311 (set_attr "prefix_rep" "0")
11312 (set_attr "prefix_extra" "2")
11313 (set_attr "length_immediate" "1")
11314 (set_attr "mode" "TI")])
11316 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11317 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11318 ;; the exact instruction generated for the intrinsic.
11319 (define_insn "xop_maskcmp_uns2<mode>3"
11320 [(set (match_operand:VI_128 0 "register_operand" "=x")
11322 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11323 [(match_operand:VI_128 2 "register_operand" "x")
11324 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11325 UNSPEC_XOP_UNSIGNED_CMP))]
11327 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11328 [(set_attr "type" "ssecmp")
11329 (set_attr "prefix_data16" "0")
11330 (set_attr "prefix_extra" "2")
11331 (set_attr "length_immediate" "1")
11332 (set_attr "mode" "TI")])
11334 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11335 ;; being added here to be complete.
11336 (define_insn "xop_pcom_tf<mode>3"
11337 [(set (match_operand:VI_128 0 "register_operand" "=x")
11339 [(match_operand:VI_128 1 "register_operand" "x")
11340 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11341 (match_operand:SI 3 "const_int_operand" "n")]
11342 UNSPEC_XOP_TRUEFALSE))]
11345 return ((INTVAL (operands[3]) != 0)
11346 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11347 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11349 [(set_attr "type" "ssecmp")
11350 (set_attr "prefix_data16" "0")
11351 (set_attr "prefix_extra" "2")
11352 (set_attr "length_immediate" "1")
11353 (set_attr "mode" "TI")])
11355 (define_insn "xop_vpermil2<mode>3"
11356 [(set (match_operand:VF 0 "register_operand" "=x")
11358 [(match_operand:VF 1 "register_operand" "x")
11359 (match_operand:VF 2 "nonimmediate_operand" "%x")
11360 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11361 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11364 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11365 [(set_attr "type" "sse4arg")
11366 (set_attr "length_immediate" "1")
11367 (set_attr "mode" "<MODE>")])
11369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11371 (define_insn "aesenc"
11372 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11373 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11374 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11378 aesenc\t{%2, %0|%0, %2}
11379 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11380 [(set_attr "isa" "noavx,avx")
11381 (set_attr "type" "sselog1")
11382 (set_attr "prefix_extra" "1")
11383 (set_attr "prefix" "orig,vex")
11384 (set_attr "mode" "TI")])
11386 (define_insn "aesenclast"
11387 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11388 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11389 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11390 UNSPEC_AESENCLAST))]
11393 aesenclast\t{%2, %0|%0, %2}
11394 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11395 [(set_attr "isa" "noavx,avx")
11396 (set_attr "type" "sselog1")
11397 (set_attr "prefix_extra" "1")
11398 (set_attr "prefix" "orig,vex")
11399 (set_attr "mode" "TI")])
11401 (define_insn "aesdec"
11402 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11403 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11404 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11408 aesdec\t{%2, %0|%0, %2}
11409 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11410 [(set_attr "isa" "noavx,avx")
11411 (set_attr "type" "sselog1")
11412 (set_attr "prefix_extra" "1")
11413 (set_attr "prefix" "orig,vex")
11414 (set_attr "mode" "TI")])
11416 (define_insn "aesdeclast"
11417 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11418 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11419 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11420 UNSPEC_AESDECLAST))]
11423 aesdeclast\t{%2, %0|%0, %2}
11424 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11425 [(set_attr "isa" "noavx,avx")
11426 (set_attr "type" "sselog1")
11427 (set_attr "prefix_extra" "1")
11428 (set_attr "prefix" "orig,vex")
11429 (set_attr "mode" "TI")])
11431 (define_insn "aesimc"
11432 [(set (match_operand:V2DI 0 "register_operand" "=x")
11433 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11436 "%vaesimc\t{%1, %0|%0, %1}"
11437 [(set_attr "type" "sselog1")
11438 (set_attr "prefix_extra" "1")
11439 (set_attr "prefix" "maybe_vex")
11440 (set_attr "mode" "TI")])
11442 (define_insn "aeskeygenassist"
11443 [(set (match_operand:V2DI 0 "register_operand" "=x")
11444 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11445 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11446 UNSPEC_AESKEYGENASSIST))]
11448 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11449 [(set_attr "type" "sselog1")
11450 (set_attr "prefix_extra" "1")
11451 (set_attr "length_immediate" "1")
11452 (set_attr "prefix" "maybe_vex")
11453 (set_attr "mode" "TI")])
11455 (define_insn "pclmulqdq"
11456 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11458 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11459 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11463 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11464 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11465 [(set_attr "isa" "noavx,avx")
11466 (set_attr "type" "sselog1")
11467 (set_attr "prefix_extra" "1")
11468 (set_attr "length_immediate" "1")
11469 (set_attr "prefix" "orig,vex")
11470 (set_attr "mode" "TI")])
11472 (define_expand "avx_vzeroall"
11473 [(match_par_dup 0 [(const_int 0)])]
11476 int nregs = TARGET_64BIT ? 16 : 8;
11479 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11481 XVECEXP (operands[0], 0, 0)
11482 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11485 for (regno = 0; regno < nregs; regno++)
11486 XVECEXP (operands[0], 0, regno + 1)
11487 = gen_rtx_SET (VOIDmode,
11488 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11489 CONST0_RTX (V8SImode));
11492 (define_insn "*avx_vzeroall"
11493 [(match_parallel 0 "vzeroall_operation"
11494 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11497 [(set_attr "type" "sse")
11498 (set_attr "modrm" "0")
11499 (set_attr "memory" "none")
11500 (set_attr "prefix" "vex")
11501 (set_attr "mode" "OI")])
11503 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11504 ;; if the upper 128bits are unused.
11505 (define_insn "avx_vzeroupper"
11506 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11507 UNSPECV_VZEROUPPER)]
11510 [(set_attr "type" "sse")
11511 (set_attr "modrm" "0")
11512 (set_attr "memory" "none")
11513 (set_attr "prefix" "vex")
11514 (set_attr "mode" "OI")])
11516 (define_mode_attr AVXTOSSEMODE
11517 [(V4DI "V2DI") (V2DI "V2DI")
11518 (V8SI "V4SI") (V4SI "V4SI")
11519 (V16HI "V8HI") (V8HI "V8HI")
11520 (V32QI "V16QI") (V16QI "V16QI")])
11522 (define_insn "avx2_pbroadcast<mode>"
11523 [(set (match_operand:VI 0 "register_operand" "=x")
11525 (vec_select:<ssescalarmode>
11526 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11527 (parallel [(const_int 0)]))))]
11529 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11530 [(set_attr "type" "ssemov")
11531 (set_attr "prefix_extra" "1")
11532 (set_attr "prefix" "vex")
11533 (set_attr "mode" "<sseinsnmode>")])
11535 (define_insn "avx2_permvarv8si"
11536 [(set (match_operand:V8SI 0 "register_operand" "=x")
11538 [(match_operand:V8SI 1 "register_operand" "x")
11539 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11542 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11543 [(set_attr "type" "sselog")
11544 (set_attr "prefix" "vex")
11545 (set_attr "mode" "OI")])
11547 (define_insn "avx2_permv4df"
11548 [(set (match_operand:V4DF 0 "register_operand" "=x")
11550 [(match_operand:V4DF 1 "register_operand" "xm")
11551 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11554 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11555 [(set_attr "type" "sselog")
11556 (set_attr "prefix_extra" "1")
11557 (set_attr "prefix" "vex")
11558 (set_attr "mode" "OI")])
11560 (define_insn "avx2_permvarv8sf"
11561 [(set (match_operand:V8SF 0 "register_operand" "=x")
11563 [(match_operand:V8SF 1 "register_operand" "x")
11564 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11567 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11568 [(set_attr "type" "sselog")
11569 (set_attr "prefix" "vex")
11570 (set_attr "mode" "OI")])
11572 (define_expand "avx2_permv4di"
11573 [(match_operand:V4DI 0 "register_operand" "")
11574 (match_operand:V4DI 1 "nonimmediate_operand" "")
11575 (match_operand:SI 2 "const_0_to_255_operand" "")]
11578 int mask = INTVAL (operands[2]);
11579 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11580 GEN_INT ((mask >> 0) & 3),
11581 GEN_INT ((mask >> 2) & 3),
11582 GEN_INT ((mask >> 4) & 3),
11583 GEN_INT ((mask >> 6) & 3)));
11587 (define_insn "avx2_permv4di_1"
11588 [(set (match_operand:V4DI 0 "register_operand" "=x")
11590 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11591 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11592 (match_operand 3 "const_0_to_3_operand" "")
11593 (match_operand 4 "const_0_to_3_operand" "")
11594 (match_operand 5 "const_0_to_3_operand" "")])))]
11598 mask |= INTVAL (operands[2]) << 0;
11599 mask |= INTVAL (operands[3]) << 2;
11600 mask |= INTVAL (operands[4]) << 4;
11601 mask |= INTVAL (operands[5]) << 6;
11602 operands[2] = GEN_INT (mask);
11603 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11605 [(set_attr "type" "sselog")
11606 (set_attr "prefix" "vex")
11607 (set_attr "mode" "OI")])
11609 (define_insn "avx2_permv2ti"
11610 [(set (match_operand:V4DI 0 "register_operand" "=x")
11612 [(match_operand:V4DI 1 "register_operand" "x")
11613 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11614 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11617 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11618 [(set_attr "type" "sselog")
11619 (set_attr "prefix" "vex")
11620 (set_attr "mode" "OI")])
11622 (define_insn "avx2_vec_dupv4df"
11623 [(set (match_operand:V4DF 0 "register_operand" "=x")
11624 (vec_duplicate:V4DF
11626 (match_operand:V2DF 1 "register_operand" "x")
11627 (parallel [(const_int 0)]))))]
11629 "vbroadcastsd\t{%1, %0|%0, %1}"
11630 [(set_attr "type" "sselog1")
11631 (set_attr "prefix" "vex")
11632 (set_attr "mode" "V4DF")])
11634 ;; Modes handled by AVX vec_dup patterns.
11635 (define_mode_iterator AVX_VEC_DUP_MODE
11636 [V8SI V8SF V4DI V4DF])
11638 (define_insn "vec_dup<mode>"
11639 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11640 (vec_duplicate:AVX_VEC_DUP_MODE
11641 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11644 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11646 [(set_attr "type" "ssemov")
11647 (set_attr "prefix_extra" "1")
11648 (set_attr "prefix" "vex")
11649 (set_attr "mode" "V8SF")])
11651 (define_insn "avx2_vbroadcasti128_<mode>"
11652 [(set (match_operand:VI_256 0 "register_operand" "=x")
11654 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11657 "vbroadcasti128\t{%1, %0|%0, %1}"
11658 [(set_attr "type" "ssemov")
11659 (set_attr "prefix_extra" "1")
11660 (set_attr "prefix" "vex")
11661 (set_attr "mode" "OI")])
11664 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11665 (vec_duplicate:AVX_VEC_DUP_MODE
11666 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11667 "TARGET_AVX && reload_completed"
11668 [(set (match_dup 2)
11669 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11671 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11672 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11674 (define_insn "avx_vbroadcastf128_<mode>"
11675 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11677 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11681 vbroadcast<i128>\t{%1, %0|%0, %1}
11682 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11683 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11684 [(set_attr "type" "ssemov,sselog1,sselog1")
11685 (set_attr "prefix_extra" "1")
11686 (set_attr "length_immediate" "0,1,1")
11687 (set_attr "prefix" "vex")
11688 (set_attr "mode" "<sseinsnmode>")])
11690 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11691 ;; If it so happens that the input is in memory, use vbroadcast.
11692 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11693 (define_insn "*avx_vperm_broadcast_v4sf"
11694 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11696 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11697 (match_parallel 2 "avx_vbroadcast_operand"
11698 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11701 int elt = INTVAL (operands[3]);
11702 switch (which_alternative)
11706 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11707 return "vbroadcastss\t{%1, %0|%0, %1}";
11709 operands[2] = GEN_INT (elt * 0x55);
11710 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11712 gcc_unreachable ();
11715 [(set_attr "type" "ssemov,ssemov,sselog1")
11716 (set_attr "prefix_extra" "1")
11717 (set_attr "length_immediate" "0,0,1")
11718 (set_attr "prefix" "vex")
11719 (set_attr "mode" "SF,SF,V4SF")])
11721 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11722 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11724 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11725 (match_parallel 2 "avx_vbroadcast_operand"
11726 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11729 "&& reload_completed"
11730 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11732 rtx op0 = operands[0], op1 = operands[1];
11733 int elt = INTVAL (operands[3]);
11739 /* Shuffle element we care about into all elements of the 128-bit lane.
11740 The other lane gets shuffled too, but we don't care. */
11741 if (<MODE>mode == V4DFmode)
11742 mask = (elt & 1 ? 15 : 0);
11744 mask = (elt & 3) * 0x55;
11745 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11747 /* Shuffle the lane we care about into both lanes of the dest. */
11748 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11749 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11753 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11754 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11757 (define_expand "avx_vpermil<mode>"
11758 [(set (match_operand:VF2 0 "register_operand" "")
11760 (match_operand:VF2 1 "nonimmediate_operand" "")
11761 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11764 int mask = INTVAL (operands[2]);
11765 rtx perm[<ssescalarnum>];
11767 perm[0] = GEN_INT (mask & 1);
11768 perm[1] = GEN_INT ((mask >> 1) & 1);
11769 if (<MODE>mode == V4DFmode)
11771 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11772 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11776 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11779 (define_expand "avx_vpermil<mode>"
11780 [(set (match_operand:VF1 0 "register_operand" "")
11782 (match_operand:VF1 1 "nonimmediate_operand" "")
11783 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11786 int mask = INTVAL (operands[2]);
11787 rtx perm[<ssescalarnum>];
11789 perm[0] = GEN_INT (mask & 3);
11790 perm[1] = GEN_INT ((mask >> 2) & 3);
11791 perm[2] = GEN_INT ((mask >> 4) & 3);
11792 perm[3] = GEN_INT ((mask >> 6) & 3);
11793 if (<MODE>mode == V8SFmode)
11795 perm[4] = GEN_INT ((mask & 3) + 4);
11796 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11797 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11798 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11802 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11805 (define_insn "*avx_vpermilp<mode>"
11806 [(set (match_operand:VF 0 "register_operand" "=x")
11808 (match_operand:VF 1 "nonimmediate_operand" "xm")
11809 (match_parallel 2 ""
11810 [(match_operand 3 "const_int_operand" "")])))]
11812 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11814 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11815 operands[2] = GEN_INT (mask);
11816 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11818 [(set_attr "type" "sselog")
11819 (set_attr "prefix_extra" "1")
11820 (set_attr "length_immediate" "1")
11821 (set_attr "prefix" "vex")
11822 (set_attr "mode" "<MODE>")])
11824 (define_insn "avx_vpermilvar<mode>3"
11825 [(set (match_operand:VF 0 "register_operand" "=x")
11827 [(match_operand:VF 1 "register_operand" "x")
11828 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11831 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11832 [(set_attr "type" "sselog")
11833 (set_attr "prefix_extra" "1")
11834 (set_attr "prefix" "vex")
11835 (set_attr "mode" "<MODE>")])
11837 (define_expand "avx_vperm2f128<mode>3"
11838 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11839 (unspec:AVX256MODE2P
11840 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11841 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11842 (match_operand:SI 3 "const_0_to_255_operand" "")]
11843 UNSPEC_VPERMIL2F128))]
11846 int mask = INTVAL (operands[3]);
11847 if ((mask & 0x88) == 0)
11849 rtx perm[<ssescalarnum>], t1, t2;
11850 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11852 base = (mask & 3) * nelt2;
11853 for (i = 0; i < nelt2; ++i)
11854 perm[i] = GEN_INT (base + i);
11856 base = ((mask >> 4) & 3) * nelt2;
11857 for (i = 0; i < nelt2; ++i)
11858 perm[i + nelt2] = GEN_INT (base + i);
11860 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
11861 operands[1], operands[2]);
11862 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11863 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11864 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11870 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11871 ;; means that in order to represent this properly in rtl we'd have to
11872 ;; nest *another* vec_concat with a zero operand and do the select from
11873 ;; a 4x wide vector. That doesn't seem very nice.
11874 (define_insn "*avx_vperm2f128<mode>_full"
11875 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11876 (unspec:AVX256MODE2P
11877 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11878 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11879 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11880 UNSPEC_VPERMIL2F128))]
11882 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11883 [(set_attr "type" "sselog")
11884 (set_attr "prefix_extra" "1")
11885 (set_attr "length_immediate" "1")
11886 (set_attr "prefix" "vex")
11887 (set_attr "mode" "<sseinsnmode>")])
11889 (define_insn "*avx_vperm2f128<mode>_nozero"
11890 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11891 (vec_select:AVX256MODE2P
11892 (vec_concat:<ssedoublevecmode>
11893 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11894 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11895 (match_parallel 3 ""
11896 [(match_operand 4 "const_int_operand" "")])))]
11898 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
11900 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11901 operands[3] = GEN_INT (mask);
11902 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11904 [(set_attr "type" "sselog")
11905 (set_attr "prefix_extra" "1")
11906 (set_attr "length_immediate" "1")
11907 (set_attr "prefix" "vex")
11908 (set_attr "mode" "<sseinsnmode>")])
11910 (define_expand "avx_vinsertf128<mode>"
11911 [(match_operand:V_256 0 "register_operand" "")
11912 (match_operand:V_256 1 "register_operand" "")
11913 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
11914 (match_operand:SI 3 "const_0_to_1_operand" "")]
11917 rtx (*insn)(rtx, rtx, rtx);
11919 switch (INTVAL (operands[3]))
11922 insn = gen_vec_set_lo_<mode>;
11925 insn = gen_vec_set_hi_<mode>;
11928 gcc_unreachable ();
11931 emit_insn (insn (operands[0], operands[1], operands[2]));
11935 (define_insn "avx2_vec_set_lo_v4di"
11936 [(set (match_operand:V4DI 0 "register_operand" "=x")
11938 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11940 (match_operand:V4DI 1 "register_operand" "x")
11941 (parallel [(const_int 2) (const_int 3)]))))]
11943 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11944 [(set_attr "type" "sselog")
11945 (set_attr "prefix_extra" "1")
11946 (set_attr "length_immediate" "1")
11947 (set_attr "prefix" "vex")
11948 (set_attr "mode" "OI")])
11950 (define_insn "avx2_vec_set_hi_v4di"
11951 [(set (match_operand:V4DI 0 "register_operand" "=x")
11954 (match_operand:V4DI 1 "register_operand" "x")
11955 (parallel [(const_int 0) (const_int 1)]))
11956 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
11958 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11959 [(set_attr "type" "sselog")
11960 (set_attr "prefix_extra" "1")
11961 (set_attr "length_immediate" "1")
11962 (set_attr "prefix" "vex")
11963 (set_attr "mode" "OI")])
11965 (define_insn "vec_set_lo_<mode>"
11966 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11967 (vec_concat:VI8F_256
11968 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11969 (vec_select:<ssehalfvecmode>
11970 (match_operand:VI8F_256 1 "register_operand" "x")
11971 (parallel [(const_int 2) (const_int 3)]))))]
11973 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11974 [(set_attr "type" "sselog")
11975 (set_attr "prefix_extra" "1")
11976 (set_attr "length_immediate" "1")
11977 (set_attr "prefix" "vex")
11978 (set_attr "mode" "<sseinsnmode>")])
11980 (define_insn "vec_set_hi_<mode>"
11981 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
11982 (vec_concat:VI8F_256
11983 (vec_select:<ssehalfvecmode>
11984 (match_operand:VI8F_256 1 "register_operand" "x")
11985 (parallel [(const_int 0) (const_int 1)]))
11986 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
11988 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11989 [(set_attr "type" "sselog")
11990 (set_attr "prefix_extra" "1")
11991 (set_attr "length_immediate" "1")
11992 (set_attr "prefix" "vex")
11993 (set_attr "mode" "<sseinsnmode>")])
11995 (define_insn "vec_set_lo_<mode>"
11996 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
11997 (vec_concat:VI4F_256
11998 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
11999 (vec_select:<ssehalfvecmode>
12000 (match_operand:VI4F_256 1 "register_operand" "x")
12001 (parallel [(const_int 4) (const_int 5)
12002 (const_int 6) (const_int 7)]))))]
12004 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12005 [(set_attr "type" "sselog")
12006 (set_attr "prefix_extra" "1")
12007 (set_attr "length_immediate" "1")
12008 (set_attr "prefix" "vex")
12009 (set_attr "mode" "<sseinsnmode>")])
12011 (define_insn "vec_set_hi_<mode>"
12012 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12013 (vec_concat:VI4F_256
12014 (vec_select:<ssehalfvecmode>
12015 (match_operand:VI4F_256 1 "register_operand" "x")
12016 (parallel [(const_int 0) (const_int 1)
12017 (const_int 2) (const_int 3)]))
12018 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12020 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12021 [(set_attr "type" "sselog")
12022 (set_attr "prefix_extra" "1")
12023 (set_attr "length_immediate" "1")
12024 (set_attr "prefix" "vex")
12025 (set_attr "mode" "<sseinsnmode>")])
12027 (define_insn "vec_set_lo_v16hi"
12028 [(set (match_operand:V16HI 0 "register_operand" "=x")
12030 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12032 (match_operand:V16HI 1 "register_operand" "x")
12033 (parallel [(const_int 8) (const_int 9)
12034 (const_int 10) (const_int 11)
12035 (const_int 12) (const_int 13)
12036 (const_int 14) (const_int 15)]))))]
12038 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12039 [(set_attr "type" "sselog")
12040 (set_attr "prefix_extra" "1")
12041 (set_attr "length_immediate" "1")
12042 (set_attr "prefix" "vex")
12043 (set_attr "mode" "OI")])
12045 (define_insn "vec_set_hi_v16hi"
12046 [(set (match_operand:V16HI 0 "register_operand" "=x")
12049 (match_operand:V16HI 1 "register_operand" "x")
12050 (parallel [(const_int 0) (const_int 1)
12051 (const_int 2) (const_int 3)
12052 (const_int 4) (const_int 5)
12053 (const_int 6) (const_int 7)]))
12054 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12056 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12057 [(set_attr "type" "sselog")
12058 (set_attr "prefix_extra" "1")
12059 (set_attr "length_immediate" "1")
12060 (set_attr "prefix" "vex")
12061 (set_attr "mode" "OI")])
12063 (define_insn "vec_set_lo_v32qi"
12064 [(set (match_operand:V32QI 0 "register_operand" "=x")
12066 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12068 (match_operand:V32QI 1 "register_operand" "x")
12069 (parallel [(const_int 16) (const_int 17)
12070 (const_int 18) (const_int 19)
12071 (const_int 20) (const_int 21)
12072 (const_int 22) (const_int 23)
12073 (const_int 24) (const_int 25)
12074 (const_int 26) (const_int 27)
12075 (const_int 28) (const_int 29)
12076 (const_int 30) (const_int 31)]))))]
12078 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12079 [(set_attr "type" "sselog")
12080 (set_attr "prefix_extra" "1")
12081 (set_attr "length_immediate" "1")
12082 (set_attr "prefix" "vex")
12083 (set_attr "mode" "OI")])
12085 (define_insn "vec_set_hi_v32qi"
12086 [(set (match_operand:V32QI 0 "register_operand" "=x")
12089 (match_operand:V32QI 1 "register_operand" "x")
12090 (parallel [(const_int 0) (const_int 1)
12091 (const_int 2) (const_int 3)
12092 (const_int 4) (const_int 5)
12093 (const_int 6) (const_int 7)
12094 (const_int 8) (const_int 9)
12095 (const_int 10) (const_int 11)
12096 (const_int 12) (const_int 13)
12097 (const_int 14) (const_int 15)]))
12098 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12100 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12101 [(set_attr "type" "sselog")
12102 (set_attr "prefix_extra" "1")
12103 (set_attr "length_immediate" "1")
12104 (set_attr "prefix" "vex")
12105 (set_attr "mode" "OI")])
12107 (define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12108 [(set (match_operand:V48_AVX2 0 "register_operand" "")
12110 [(match_operand:<sseintvecmode> 2 "register_operand" "")
12111 (match_operand:V48_AVX2 1 "memory_operand" "")
12116 (define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12117 [(set (match_operand:V48_AVX2 0 "memory_operand" "")
12119 [(match_operand:<sseintvecmode> 1 "register_operand" "")
12120 (match_operand:V48_AVX2 2 "register_operand" "")
12125 (define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>"
12126 [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m")
12128 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12129 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x")
12133 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12134 "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12135 [(set_attr "type" "sselog1")
12136 (set_attr "prefix_extra" "1")
12137 (set_attr "prefix" "vex")
12138 (set_attr "mode" "<sseinsnmode>")])
12140 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
12141 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
12143 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
12144 (match_operand:VF 2 "nonimmediate_operand" "m,x")
12148 && (REG_P (operands[0]) == MEM_P (operands[2]))"
12149 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12150 [(set_attr "type" "sselog1")
12151 (set_attr "prefix_extra" "1")
12152 (set_attr "prefix" "vex")
12153 (set_attr "mode" "<MODE>")])
12155 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12156 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12157 (unspec:AVX256MODE2P
12158 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12162 "&& reload_completed"
12165 rtx op0 = operands[0];
12166 rtx op1 = operands[1];
12168 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12170 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12171 emit_move_insn (op0, op1);
12175 (define_expand "vec_init<mode>"
12176 [(match_operand:V_256 0 "register_operand" "")
12177 (match_operand 1 "" "")]
12180 ix86_expand_vector_init (false, operands[0], operands[1]);
12184 (define_expand "avx2_extracti128"
12185 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12186 (match_operand:V4DI 1 "register_operand" "")
12187 (match_operand:SI 2 "const_0_to_1_operand" "")]
12190 rtx (*insn)(rtx, rtx);
12192 switch (INTVAL (operands[2]))
12195 insn = gen_vec_extract_lo_v4di;
12198 insn = gen_vec_extract_hi_v4di;
12201 gcc_unreachable ();
12204 emit_insn (insn (operands[0], operands[1]));
12208 (define_expand "avx2_inserti128"
12209 [(match_operand:V4DI 0 "register_operand" "")
12210 (match_operand:V4DI 1 "register_operand" "")
12211 (match_operand:V2DI 2 "nonimmediate_operand" "")
12212 (match_operand:SI 3 "const_0_to_1_operand" "")]
12215 rtx (*insn)(rtx, rtx, rtx);
12217 switch (INTVAL (operands[3]))
12220 insn = gen_avx2_vec_set_lo_v4di;
12223 insn = gen_avx2_vec_set_hi_v4di;
12226 gcc_unreachable ();
12229 emit_insn (insn (operands[0], operands[1], operands[2]));
12233 (define_insn "avx2_ashrvv8si"
12234 [(set (match_operand:V8SI 0 "register_operand" "=x")
12240 (match_operand:V8SI 1 "register_operand" "x")
12241 (parallel [(const_int 0)]))
12243 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12244 (parallel [(const_int 0)])))
12248 (parallel [(const_int 1)]))
12251 (parallel [(const_int 1)]))))
12256 (parallel [(const_int 2)]))
12259 (parallel [(const_int 2)])))
12263 (parallel [(const_int 3)]))
12266 (parallel [(const_int 3)])))))
12272 (parallel [(const_int 0)]))
12275 (parallel [(const_int 0)])))
12279 (parallel [(const_int 1)]))
12282 (parallel [(const_int 1)]))))
12287 (parallel [(const_int 2)]))
12290 (parallel [(const_int 2)])))
12294 (parallel [(const_int 3)]))
12297 (parallel [(const_int 3)])))))))]
12299 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12300 [(set_attr "type" "sseishft")
12301 (set_attr "prefix" "vex")
12302 (set_attr "mode" "OI")])
12304 (define_insn "avx2_ashrvv4si"
12305 [(set (match_operand:V4SI 0 "register_operand" "=x")
12310 (match_operand:V4SI 1 "register_operand" "x")
12311 (parallel [(const_int 0)]))
12313 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12314 (parallel [(const_int 0)])))
12318 (parallel [(const_int 1)]))
12321 (parallel [(const_int 1)]))))
12326 (parallel [(const_int 2)]))
12329 (parallel [(const_int 2)])))
12333 (parallel [(const_int 3)]))
12336 (parallel [(const_int 3)]))))))]
12338 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12339 [(set_attr "type" "sseishft")
12340 (set_attr "prefix" "vex")
12341 (set_attr "mode" "TI")])
12343 (define_insn "avx2_<lshift>vv8si"
12344 [(set (match_operand:V8SI 0 "register_operand" "=x")
12350 (match_operand:V8SI 1 "register_operand" "x")
12351 (parallel [(const_int 0)]))
12353 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
12354 (parallel [(const_int 0)])))
12358 (parallel [(const_int 1)]))
12361 (parallel [(const_int 1)]))))
12366 (parallel [(const_int 2)]))
12369 (parallel [(const_int 2)])))
12373 (parallel [(const_int 3)]))
12376 (parallel [(const_int 3)])))))
12382 (parallel [(const_int 0)]))
12385 (parallel [(const_int 0)])))
12389 (parallel [(const_int 1)]))
12392 (parallel [(const_int 1)]))))
12397 (parallel [(const_int 2)]))
12400 (parallel [(const_int 2)])))
12404 (parallel [(const_int 3)]))
12407 (parallel [(const_int 3)])))))))]
12409 "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
12410 [(set_attr "type" "sseishft")
12411 (set_attr "prefix" "vex")
12412 (set_attr "mode" "OI")])
12414 (define_insn "avx2_<lshift>v<mode>"
12415 [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
12416 (vec_concat:VI4SD_AVX2
12417 (vec_concat:<ssehalfvecmode>
12418 (lshift:<ssescalarmode>
12419 (vec_select:<ssescalarmode>
12420 (match_operand:VI4SD_AVX2 1 "register_operand" "x")
12421 (parallel [(const_int 0)]))
12422 (vec_select:<ssescalarmode>
12423 (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
12424 (parallel [(const_int 0)])))
12425 (lshift:<ssescalarmode>
12426 (vec_select:<ssescalarmode>
12428 (parallel [(const_int 1)]))
12429 (vec_select:<ssescalarmode>
12431 (parallel [(const_int 1)]))))
12432 (vec_concat:<ssehalfvecmode>
12433 (lshift:<ssescalarmode>
12434 (vec_select:<ssescalarmode>
12436 (parallel [(const_int 2)]))
12437 (vec_select:<ssescalarmode>
12439 (parallel [(const_int 2)])))
12440 (lshift:<ssescalarmode>
12441 (vec_select:<ssescalarmode>
12443 (parallel [(const_int 3)]))
12444 (vec_select:<ssescalarmode>
12446 (parallel [(const_int 3)]))))))]
12448 "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12449 [(set_attr "type" "sseishft")
12450 (set_attr "prefix" "vex")
12451 (set_attr "mode" "<sseinsnmode>")])
12453 (define_insn "avx2_<lshift>vv2di"
12454 [(set (match_operand:V2DI 0 "register_operand" "=x")
12458 (match_operand:V2DI 1 "register_operand" "x")
12459 (parallel [(const_int 0)]))
12461 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12462 (parallel [(const_int 0)])))
12466 (parallel [(const_int 1)]))
12469 (parallel [(const_int 1)])))))]
12471 "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
12472 [(set_attr "type" "sseishft")
12473 (set_attr "prefix" "vex")
12474 (set_attr "mode" "TI")])
12476 (define_insn "avx_vec_concat<mode>"
12477 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12479 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12480 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12483 switch (which_alternative)
12486 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12488 switch (get_attr_mode (insn))
12491 return "vmovaps\t{%1, %x0|%x0, %1}";
12493 return "vmovapd\t{%1, %x0|%x0, %1}";
12495 return "vmovdqa\t{%1, %x0|%x0, %1}";
12498 gcc_unreachable ();
12501 [(set_attr "type" "sselog,ssemov")
12502 (set_attr "prefix_extra" "1,*")
12503 (set_attr "length_immediate" "1,*")
12504 (set_attr "prefix" "vex")
12505 (set_attr "mode" "<sseinsnmode>")])
12507 (define_insn "vcvtph2ps"
12508 [(set (match_operand:V4SF 0 "register_operand" "=x")
12510 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12512 (parallel [(const_int 0) (const_int 1)
12513 (const_int 1) (const_int 2)])))]
12515 "vcvtph2ps\t{%1, %0|%0, %1}"
12516 [(set_attr "type" "ssecvt")
12517 (set_attr "prefix" "vex")
12518 (set_attr "mode" "V4SF")])
12520 (define_insn "*vcvtph2ps_load"
12521 [(set (match_operand:V4SF 0 "register_operand" "=x")
12522 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12523 UNSPEC_VCVTPH2PS))]
12525 "vcvtph2ps\t{%1, %0|%0, %1}"
12526 [(set_attr "type" "ssecvt")
12527 (set_attr "prefix" "vex")
12528 (set_attr "mode" "V8SF")])
12530 (define_insn "vcvtph2ps256"
12531 [(set (match_operand:V8SF 0 "register_operand" "=x")
12532 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12533 UNSPEC_VCVTPH2PS))]
12535 "vcvtph2ps\t{%1, %0|%0, %1}"
12536 [(set_attr "type" "ssecvt")
12537 (set_attr "prefix" "vex")
12538 (set_attr "mode" "V8SF")])
12540 (define_expand "vcvtps2ph"
12541 [(set (match_operand:V8HI 0 "register_operand" "")
12543 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12544 (match_operand:SI 2 "const_0_to_255_operand" "")]
12548 "operands[3] = CONST0_RTX (V4HImode);")
12550 (define_insn "*vcvtps2ph"
12551 [(set (match_operand:V8HI 0 "register_operand" "=x")
12553 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12554 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12556 (match_operand:V4HI 3 "const0_operand" "")))]
12558 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12559 [(set_attr "type" "ssecvt")
12560 (set_attr "prefix" "vex")
12561 (set_attr "mode" "V4SF")])
12563 (define_insn "*vcvtps2ph_store"
12564 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12565 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12566 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12567 UNSPEC_VCVTPS2PH))]
12569 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12570 [(set_attr "type" "ssecvt")
12571 (set_attr "prefix" "vex")
12572 (set_attr "mode" "V4SF")])
12574 (define_insn "vcvtps2ph256"
12575 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12576 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12577 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12578 UNSPEC_VCVTPS2PH))]
12580 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12581 [(set_attr "type" "ssecvt")
12582 (set_attr "prefix" "vex")
12583 (set_attr "mode" "V8SF")])
12585 ;; For gather* insn patterns
12586 (define_mode_iterator VEC_GATHER_MODE
12587 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12588 (define_mode_attr VEC_GATHER_MODE
12589 [(V2DI "V4SI") (V2DF "V4SI")
12590 (V4DI "V4SI") (V4DF "V4SI")
12591 (V4SI "V4SI") (V4SF "V4SI")
12592 (V8SI "V8SI") (V8SF "V8SI")])
12594 (define_expand "avx2_gathersi<mode>"
12595 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12596 (unspec:VEC_GATHER_MODE
12597 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12598 (match_operand 2 "register_operand" "")
12599 (mem:BLK (scratch))
12600 (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
12601 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12602 (match_operand:SI 5 "const1248_operand " "")]
12604 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12607 (define_insn "*avx2_gathersi<mode>"
12608 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12609 (unspec:VEC_GATHER_MODE
12610 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12611 (match_operand:P 3 "register_operand" "r")
12612 (mem:BLK (scratch))
12613 (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
12614 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")
12615 (match_operand:SI 6 "const1248_operand" "n")]
12617 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12619 "v<gthrfirstp>gatherd<gthrlastp>\t{%1, (%3, %4, %p6), %0|%0, (%3, %4, %p6), %1}"
12620 [(set_attr "type" "ssemov")
12621 (set_attr "prefix" "vex")
12622 (set_attr "mode" "<sseinsnmode>")])
12624 (define_expand "avx2_gatherdi<mode>"
12625 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12626 (unspec:VEC_GATHER_MODE
12627 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12628 (match_operand 2 "register_operand" "")
12629 (mem:BLK (scratch))
12630 (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
12631 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
12632 (match_operand:SI 5 "const1248_operand " "")]
12634 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12637 (define_insn "*avx2_gatherdi<mode>"
12638 [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
12639 (unspec:AVXMODE48P_DI
12640 [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
12641 (match_operand:P 3 "register_operand" "r")
12642 (mem:BLK (scratch))
12643 (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
12644 (match_operand:AVXMODE48P_DI 5 "register_operand" "1")
12645 (match_operand:SI 6 "const1248_operand" "n")]
12647 (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
12649 "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %p6), %0|%0, (%3, %4, %p6), %1}"
12650 [(set_attr "type" "ssemov")
12651 (set_attr "prefix" "vex")
12652 (set_attr "mode" "<sseinsnmode>")])
12654 ;; Special handling for VEX.256 with float arguments
12655 ;; since there're still xmms as operands
12656 (define_expand "avx2_gatherdi<mode>256"
12657 [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
12659 [(match_operand:VI4F_128 1 "register_operand" "")
12660 (match_operand 2 "register_operand" "")
12661 (mem:BLK (scratch))
12662 (match_operand:V4DI 3 "register_operand" "")
12663 (match_operand:VI4F_128 4 "register_operand" "")
12664 (match_operand:SI 5 "const1248_operand " "")]
12666 (clobber (match_scratch:VI4F_128 6 ""))])]
12669 (define_insn "*avx2_gatherdi<mode>256"
12670 [(set (match_operand:VI4F_128 0 "register_operand" "=x")
12672 [(match_operand:VI4F_128 2 "register_operand" "0")
12673 (match_operand:P 3 "register_operand" "r")
12674 (mem:BLK (scratch))
12675 (match_operand:V4DI 4 "register_operand" "x")
12676 (match_operand:VI4F_128 5 "register_operand" "1")
12677 (match_operand:SI 6 "const1248_operand" "n")]
12679 (clobber (match_scratch:VI4F_128 1 "=&x"))]
12681 "v<gthrfirstp>gatherq<gthrlastp>\t{%1, (%3, %4, %p6), %0|%0, (%3, %4, %p6), %1}"
12682 [(set_attr "type" "ssemov")
12683 (set_attr "prefix" "vex")
12684 (set_attr "mode" "<sseinsnmode>")])