1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
88 (define_c_enum "unspecv" [
98 ;; All vector modes including V?TImode, used in move patterns.
99 (define_mode_iterator V16
100 [(V32QI "TARGET_AVX") V16QI
101 (V16HI "TARGET_AVX") V8HI
102 (V8SI "TARGET_AVX") V4SI
103 (V4DI "TARGET_AVX") V2DI
104 (V2TI "TARGET_AVX") V1TI
105 (V8SF "TARGET_AVX") V4SF
106 (V4DF "TARGET_AVX") V2DF])
109 (define_mode_iterator V
110 [(V32QI "TARGET_AVX") V16QI
111 (V16HI "TARGET_AVX") V8HI
112 (V8SI "TARGET_AVX") V4SI
113 (V4DI "TARGET_AVX") V2DI
114 (V8SF "TARGET_AVX") V4SF
115 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
117 ;; All 128bit vector modes
118 (define_mode_iterator V_128
119 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
121 ;; All 256bit vector modes
122 (define_mode_iterator V_256
123 [V32QI V16HI V8SI V4DI V8SF V4DF])
125 ;; All vector float modes
126 (define_mode_iterator VF
127 [(V8SF "TARGET_AVX") V4SF
128 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
130 ;; All SFmode vector float modes
131 (define_mode_iterator VF1
132 [(V8SF "TARGET_AVX") V4SF])
134 ;; All DFmode vector float modes
135 (define_mode_iterator VF2
136 [(V4DF "TARGET_AVX") V2DF])
138 ;; All 128bit vector float modes
139 (define_mode_iterator VF_128
140 [V4SF (V2DF "TARGET_SSE2")])
142 ;; All 256bit vector float modes
143 (define_mode_iterator VF_256
146 ;; All vector integer modes
147 (define_mode_iterator VI
148 [(V32QI "TARGET_AVX") V16QI
149 (V16HI "TARGET_AVX") V8HI
150 (V8SI "TARGET_AVX") V4SI
151 (V4DI "TARGET_AVX") V2DI])
153 (define_mode_iterator VI_AVX2
154 [(V32QI "TARGET_AVX2") V16QI
155 (V16HI "TARGET_AVX2") V8HI
156 (V8SI "TARGET_AVX2") V4SI
157 (V4DI "TARGET_AVX2") V2DI])
159 ;; All QImode vector integer modes
160 (define_mode_iterator VI1
161 [(V32QI "TARGET_AVX") V16QI])
163 ;; All DImode vector integer modes
164 (define_mode_iterator VI8
165 [(V4DI "TARGET_AVX") V2DI])
167 (define_mode_iterator VI1_AVX2
168 [(V32QI "TARGET_AVX2") V16QI])
170 (define_mode_iterator VI2_AVX2
171 [(V16HI "TARGET_AVX2") V8HI])
173 (define_mode_iterator VI4_AVX2
174 [(V8SI "TARGET_AVX2") V4SI])
176 (define_mode_iterator VI8_AVX2
177 [(V4DI "TARGET_AVX2") V2DI])
179 ;; ??? We should probably use TImode instead.
180 (define_mode_iterator VIMAX_AVX2
181 [(V2TI "TARGET_AVX2") V1TI])
183 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
184 (define_mode_iterator SSESCALARMODE
185 [(V2TI "TARGET_AVX2") TI])
187 (define_mode_iterator VI12_AVX2
188 [(V32QI "TARGET_AVX2") V16QI
189 (V16HI "TARGET_AVX2") V8HI])
191 (define_mode_iterator VI24_AVX2
192 [(V16HI "TARGET_AVX2") V8HI
193 (V8SI "TARGET_AVX2") V4SI])
195 (define_mode_iterator VI124_AVX2
196 [(V32QI "TARGET_AVX2") V16QI
197 (V16HI "TARGET_AVX2") V8HI
198 (V8SI "TARGET_AVX2") V4SI])
200 (define_mode_iterator VI248_AVX2
201 [(V16HI "TARGET_AVX2") V8HI
202 (V8SI "TARGET_AVX2") V4SI
203 (V4DI "TARGET_AVX2") V2DI])
205 (define_mode_iterator VI48_AVX2
206 [(V8SI "TARGET_AVX2") V4SI
207 (V4DI "TARGET_AVX2") V2DI])
209 (define_mode_iterator V48_AVX2
212 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
213 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
215 (define_mode_attr sse2_avx2
216 [(V16QI "sse2") (V32QI "avx2")
217 (V8HI "sse2") (V16HI "avx2")
218 (V4SI "sse2") (V8SI "avx2")
219 (V2DI "sse2") (V4DI "avx2")
220 (V1TI "sse2") (V2TI "avx2")])
222 (define_mode_attr ssse3_avx2
223 [(V16QI "ssse3") (V32QI "avx2")
224 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
225 (V4SI "ssse3") (V8SI "avx2")
226 (V2DI "ssse3") (V4DI "avx2")
227 (TI "ssse3") (V2TI "avx2")])
229 (define_mode_attr sse4_1_avx2
230 [(V16QI "sse4_1") (V32QI "avx2")
231 (V8HI "sse4_1") (V16HI "avx2")
232 (V4SI "sse4_1") (V8SI "avx2")
233 (V2DI "sse4_1") (V4DI "avx2")])
235 (define_mode_attr avx_avx2
236 [(V4SF "avx") (V2DF "avx")
237 (V8SF "avx") (V4DF "avx")
238 (V4SI "avx2") (V2DI "avx2")
239 (V8SI "avx2") (V4DI "avx2")])
241 (define_mode_attr vec_avx2
242 [(V16QI "vec") (V32QI "avx2")
243 (V8HI "vec") (V16HI "avx2")
244 (V4SI "vec") (V8SI "avx2")
245 (V2DI "vec") (V4DI "avx2")])
247 (define_mode_attr ssedoublemode
248 [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
249 (V32QI "V32HI") (V16QI "V16HI")])
251 (define_mode_attr ssebytemode
252 [(V4DI "V32QI") (V2DI "V16QI")])
254 ;; All 128bit vector integer modes
255 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
257 ;; All 256bit vector integer modes
258 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
260 ;; Random 128bit vector integer mode combinations
261 (define_mode_iterator VI12_128 [V16QI V8HI])
262 (define_mode_iterator VI14_128 [V16QI V4SI])
263 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
264 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
265 (define_mode_iterator VI24_128 [V8HI V4SI])
266 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
267 (define_mode_iterator VI48_128 [V4SI V2DI])
269 ;; Random 256bit vector integer mode combinations
270 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
271 (define_mode_iterator VI48_256 [V8SI V4DI])
273 ;; Int-float size matches
274 (define_mode_iterator VI4F_128 [V4SI V4SF])
275 (define_mode_iterator VI8F_128 [V2DI V2DF])
276 (define_mode_iterator VI4F_256 [V8SI V8SF])
277 (define_mode_iterator VI8F_256 [V4DI V4DF])
279 ;; Mapping from float mode to required SSE level
280 (define_mode_attr sse
281 [(SF "sse") (DF "sse2")
282 (V4SF "sse") (V2DF "sse2")
283 (V8SF "avx") (V4DF "avx")])
285 (define_mode_attr sse2
286 [(V16QI "sse2") (V32QI "avx")
287 (V2DI "sse2") (V4DI "avx")])
289 (define_mode_attr sse3
290 [(V16QI "sse3") (V32QI "avx")])
292 (define_mode_attr sse4_1
293 [(V4SF "sse4_1") (V2DF "sse4_1")
294 (V8SF "avx") (V4DF "avx")])
296 (define_mode_attr avxsizesuffix
297 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
298 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
299 (V8SF "256") (V4DF "256")
300 (V4SF "") (V2DF "")])
302 ;; SSE instruction mode
303 (define_mode_attr sseinsnmode
304 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
305 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
306 (V8SF "V8SF") (V4DF "V4DF")
307 (V4SF "V4SF") (V2DF "V2DF")
310 ;; Mapping of vector float modes to an integer mode of the same size
311 (define_mode_attr sseintvecmode
312 [(V8SF "V8SI") (V4DF "V4DI")
313 (V4SF "V4SI") (V2DF "V2DI")
314 (V8SI "V8SI") (V4DI "V4DI")
315 (V4SI "V4SI") (V2DI "V2DI")
316 (V16HI "V16HI") (V8HI "V8HI")
317 (V32QI "V32QI") (V16QI "V16QI")])
319 (define_mode_attr sseintvecmodelower
320 [(V8SF "v8si") (V4DF "v4di")
321 (V4SF "v4si") (V2DF "v2di")
322 (V8SI "v8si") (V4DI "v4di")
323 (V4SI "v4si") (V2DI "v2di")
324 (V16HI "v16hi") (V8HI "v8hi")
325 (V32QI "v32qi") (V16QI "v16qi")])
327 ;; Mapping of vector modes to a vector mode of double size
328 (define_mode_attr ssedoublevecmode
329 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
330 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
331 (V8SF "V16SF") (V4DF "V8DF")
332 (V4SF "V8SF") (V2DF "V4DF")])
334 ;; Mapping of vector modes to a vector mode of half size
335 (define_mode_attr ssehalfvecmode
336 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
337 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
338 (V8SF "V4SF") (V4DF "V2DF")
341 ;; Mapping of vector modes ti packed single mode of the same size
342 (define_mode_attr ssePSmode
343 [(V32QI "V8SF") (V16QI "V4SF")
344 (V16HI "V8SF") (V8HI "V4SF")
345 (V8SI "V8SF") (V4SI "V4SF")
346 (V4DI "V8SF") (V2DI "V4SF")
347 (V2TI "V8SF") (V1TI "V4SF")
348 (V8SF "V8SF") (V4SF "V4SF")
349 (V4DF "V8SF") (V2DF "V4SF")])
351 ;; Mapping of vector modes back to the scalar modes
352 (define_mode_attr ssescalarmode
353 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
354 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
355 (V8SF "SF") (V4DF "DF")
356 (V4SF "SF") (V2DF "DF")])
358 ;; Pointer size override for scalar modes (Intel asm dialect)
359 (define_mode_attr iptr
360 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
361 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
362 (V8SF "k") (V4DF "q")
363 (V4SF "k") (V2DF "q")
366 ;; Number of scalar elements in each vector type
367 (define_mode_attr ssescalarnum
368 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
369 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
370 (V8SF "8") (V4DF "4")
371 (V4SF "4") (V2DF "2")])
373 ;; Mask of scalar elements in each vector type
374 (define_mode_attr ssescalarnummask
375 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
376 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
377 (V8SF "7") (V4DF "3")
378 (V4SF "3") (V2DF "1")])
380 ;; SSE prefix for integer vector modes
381 (define_mode_attr sseintprefix
382 [(V2DI "p") (V2DF "")
385 (V8SI "p") (V8SF "")])
387 ;; SSE scalar suffix for vector modes
388 (define_mode_attr ssescalarmodesuffix
390 (V8SF "ss") (V4DF "sd")
391 (V4SF "ss") (V2DF "sd")
392 (V8SI "ss") (V4DI "sd")
395 ;; Pack/unpack vector modes
396 (define_mode_attr sseunpackmode
397 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
398 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
400 (define_mode_attr ssepackmode
401 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
402 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
404 ;; Mapping of the max integer size for xop rotate immediate constraint
405 (define_mode_attr sserotatemax
406 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
408 ;; Mapping of mode to cast intrinsic name
409 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
411 ;; Instruction suffix for sign and zero extensions.
412 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
414 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
415 (define_mode_attr i128
416 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
417 (V8SI "%~128") (V4DI "%~128")])
420 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
422 ;; Mapping of immediate bits for blend instructions
423 (define_mode_attr blendbits
424 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
426 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
428 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
432 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
434 ;; All of these patterns are enabled for SSE1 as well as SSE2.
435 ;; This is essential for maintaining stable calling conventions.
437 (define_expand "mov<mode>"
438 [(set (match_operand:V16 0 "nonimmediate_operand")
439 (match_operand:V16 1 "nonimmediate_operand"))]
442 ix86_expand_vector_move (<MODE>mode, operands);
446 (define_insn "*mov<mode>_internal"
447 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
448 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
450 && (register_operand (operands[0], <MODE>mode)
451 || register_operand (operands[1], <MODE>mode))"
453 switch (which_alternative)
456 return standard_sse_constant_opcode (insn, operands[1]);
459 switch (get_attr_mode (insn))
464 && (misaligned_operand (operands[0], <MODE>mode)
465 || misaligned_operand (operands[1], <MODE>mode)))
466 return "vmovups\t{%1, %0|%0, %1}";
468 return "%vmovaps\t{%1, %0|%0, %1}";
473 && (misaligned_operand (operands[0], <MODE>mode)
474 || misaligned_operand (operands[1], <MODE>mode)))
475 return "vmovupd\t{%1, %0|%0, %1}";
477 return "%vmovapd\t{%1, %0|%0, %1}";
482 && (misaligned_operand (operands[0], <MODE>mode)
483 || misaligned_operand (operands[1], <MODE>mode)))
484 return "vmovdqu\t{%1, %0|%0, %1}";
486 return "%vmovdqa\t{%1, %0|%0, %1}";
495 [(set_attr "type" "sselog1,ssemov,ssemov")
496 (set_attr "prefix" "maybe_vex")
498 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
499 (const_string "<ssePSmode>")
500 (and (eq_attr "alternative" "2")
501 (match_test "TARGET_SSE_TYPELESS_STORES"))
502 (const_string "<ssePSmode>")
503 (match_test "TARGET_AVX")
504 (const_string "<sseinsnmode>")
505 (ior (not (match_test "TARGET_SSE2"))
506 (match_test "optimize_function_for_size_p (cfun)"))
507 (const_string "V4SF")
508 (and (eq_attr "alternative" "0")
509 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
512 (const_string "<sseinsnmode>")))])
514 (define_insn "sse2_movq128"
515 [(set (match_operand:V2DI 0 "register_operand" "=x")
518 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
519 (parallel [(const_int 0)]))
522 "%vmovq\t{%1, %0|%0, %q1}"
523 [(set_attr "type" "ssemov")
524 (set_attr "prefix" "maybe_vex")
525 (set_attr "mode" "TI")])
527 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
528 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
529 ;; from memory, we'd prefer to load the memory directly into the %xmm
530 ;; register. To facilitate this happy circumstance, this pattern won't
531 ;; split until after register allocation. If the 64-bit value didn't
532 ;; come from memory, this is the best we can do. This is much better
533 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
536 (define_insn_and_split "movdi_to_sse"
538 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
539 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
540 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
541 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
543 "&& reload_completed"
546 if (register_operand (operands[1], DImode))
548 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
549 Assemble the 64-bit DImode value in an xmm register. */
550 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
551 gen_rtx_SUBREG (SImode, operands[1], 0)));
552 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
553 gen_rtx_SUBREG (SImode, operands[1], 4)));
554 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
557 else if (memory_operand (operands[1], DImode))
558 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
559 operands[1], const0_rtx));
565 [(set (match_operand:V4SF 0 "register_operand")
566 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
567 "TARGET_SSE && reload_completed"
570 (vec_duplicate:V4SF (match_dup 1))
574 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
575 operands[2] = CONST0_RTX (V4SFmode);
579 [(set (match_operand:V2DF 0 "register_operand")
580 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
581 "TARGET_SSE2 && reload_completed"
582 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
584 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
585 operands[2] = CONST0_RTX (DFmode);
588 (define_expand "push<mode>1"
589 [(match_operand:V16 0 "register_operand")]
592 ix86_expand_push (<MODE>mode, operands[0]);
596 (define_expand "movmisalign<mode>"
597 [(set (match_operand:V16 0 "nonimmediate_operand")
598 (match_operand:V16 1 "nonimmediate_operand"))]
601 ix86_expand_vector_move_misalign (<MODE>mode, operands);
605 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
606 [(set (match_operand:VF 0 "register_operand" "=x")
608 [(match_operand:VF 1 "memory_operand" "m")]
612 switch (get_attr_mode (insn))
616 return "%vmovups\t{%1, %0|%0, %1}";
618 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
621 [(set_attr "type" "ssemov")
622 (set_attr "movu" "1")
623 (set_attr "prefix" "maybe_vex")
625 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
626 (const_string "<ssePSmode>")
627 (match_test "TARGET_AVX")
628 (const_string "<MODE>")
629 (match_test "optimize_function_for_size_p (cfun)")
630 (const_string "V4SF")
632 (const_string "<MODE>")))])
634 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
635 [(set (match_operand:VF 0 "memory_operand" "=m")
637 [(match_operand:VF 1 "register_operand" "x")]
641 switch (get_attr_mode (insn))
645 return "%vmovups\t{%1, %0|%0, %1}";
647 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
650 [(set_attr "type" "ssemov")
651 (set_attr "movu" "1")
652 (set_attr "prefix" "maybe_vex")
654 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
655 (match_test "TARGET_SSE_TYPELESS_STORES"))
656 (const_string "<ssePSmode>")
657 (match_test "TARGET_AVX")
658 (const_string "<MODE>")
659 (match_test "optimize_function_for_size_p (cfun)")
660 (const_string "V4SF")
662 (const_string "<MODE>")))])
664 (define_insn "<sse2>_loaddqu<avxsizesuffix>"
665 [(set (match_operand:VI1 0 "register_operand" "=x")
666 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
670 switch (get_attr_mode (insn))
674 return "%vmovups\t{%1, %0|%0, %1}";
676 return "%vmovdqu\t{%1, %0|%0, %1}";
679 [(set_attr "type" "ssemov")
680 (set_attr "movu" "1")
681 (set (attr "prefix_data16")
683 (match_test "TARGET_AVX")
686 (set_attr "prefix" "maybe_vex")
688 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
689 (const_string "<ssePSmode>")
690 (match_test "TARGET_AVX")
691 (const_string "<sseinsnmode>")
692 (match_test "optimize_function_for_size_p (cfun)")
693 (const_string "V4SF")
695 (const_string "<sseinsnmode>")))])
697 (define_insn "<sse2>_storedqu<avxsizesuffix>"
698 [(set (match_operand:VI1 0 "memory_operand" "=m")
699 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
703 switch (get_attr_mode (insn))
707 return "%vmovups\t{%1, %0|%0, %1}";
709 return "%vmovdqu\t{%1, %0|%0, %1}";
712 [(set_attr "type" "ssemov")
713 (set_attr "movu" "1")
714 (set (attr "prefix_data16")
716 (match_test "TARGET_AVX")
719 (set_attr "prefix" "maybe_vex")
721 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
722 (match_test "TARGET_SSE_TYPELESS_STORES"))
723 (const_string "<ssePSmode>")
724 (match_test "TARGET_AVX")
725 (const_string "<sseinsnmode>")
726 (match_test "optimize_function_for_size_p (cfun)")
727 (const_string "V4SF")
729 (const_string "<sseinsnmode>")))])
731 (define_insn "<sse3>_lddqu<avxsizesuffix>"
732 [(set (match_operand:VI1 0 "register_operand" "=x")
733 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
736 "%vlddqu\t{%1, %0|%0, %1}"
737 [(set_attr "type" "ssemov")
738 (set_attr "movu" "1")
739 (set (attr "prefix_data16")
741 (match_test "TARGET_AVX")
744 (set (attr "prefix_rep")
746 (match_test "TARGET_AVX")
749 (set_attr "prefix" "maybe_vex")
750 (set_attr "mode" "<sseinsnmode>")])
752 (define_insn "sse2_movnti<mode>"
753 [(set (match_operand:SWI48 0 "memory_operand" "=m")
754 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
757 "movnti\t{%1, %0|%0, %1}"
758 [(set_attr "type" "ssemov")
759 (set_attr "prefix_data16" "0")
760 (set_attr "mode" "<MODE>")])
762 (define_insn "<sse>_movnt<mode>"
763 [(set (match_operand:VF 0 "memory_operand" "=m")
764 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
767 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
768 [(set_attr "type" "ssemov")
769 (set_attr "prefix" "maybe_vex")
770 (set_attr "mode" "<MODE>")])
772 (define_insn "<sse2>_movnt<mode>"
773 [(set (match_operand:VI8 0 "memory_operand" "=m")
774 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
777 "%vmovntdq\t{%1, %0|%0, %1}"
778 [(set_attr "type" "ssecvt")
779 (set (attr "prefix_data16")
781 (match_test "TARGET_AVX")
784 (set_attr "prefix" "maybe_vex")
785 (set_attr "mode" "<sseinsnmode>")])
787 ; Expand patterns for non-temporal stores. At the moment, only those
788 ; that directly map to insns are defined; it would be possible to
789 ; define patterns for other modes that would expand to several insns.
791 ;; Modes handled by storent patterns.
792 (define_mode_iterator STORENT_MODE
793 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
794 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
795 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
796 (V8SF "TARGET_AVX") V4SF
797 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
799 (define_expand "storent<mode>"
800 [(set (match_operand:STORENT_MODE 0 "memory_operand")
802 [(match_operand:STORENT_MODE 1 "register_operand")]
806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
808 ;; Parallel floating point arithmetic
810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
812 (define_expand "<code><mode>2"
813 [(set (match_operand:VF 0 "register_operand")
815 (match_operand:VF 1 "register_operand")))]
817 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
819 (define_insn_and_split "*absneg<mode>2"
820 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
821 (match_operator:VF 3 "absneg_operator"
822 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
823 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
826 "&& reload_completed"
829 enum rtx_code absneg_op;
835 if (MEM_P (operands[1]))
836 op1 = operands[2], op2 = operands[1];
838 op1 = operands[1], op2 = operands[2];
843 if (rtx_equal_p (operands[0], operands[1]))
849 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
850 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
851 t = gen_rtx_SET (VOIDmode, operands[0], t);
855 [(set_attr "isa" "noavx,noavx,avx,avx")])
857 (define_expand "<plusminus_insn><mode>3"
858 [(set (match_operand:VF 0 "register_operand")
860 (match_operand:VF 1 "nonimmediate_operand")
861 (match_operand:VF 2 "nonimmediate_operand")))]
863 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
865 (define_insn "*<plusminus_insn><mode>3"
866 [(set (match_operand:VF 0 "register_operand" "=x,x")
868 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
869 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
870 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
872 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
873 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
874 [(set_attr "isa" "noavx,avx")
875 (set_attr "type" "sseadd")
876 (set_attr "prefix" "orig,vex")
877 (set_attr "mode" "<MODE>")])
879 (define_insn "<sse>_vm<plusminus_insn><mode>3"
880 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
883 (match_operand:VF_128 1 "register_operand" "0,x")
884 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
889 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
890 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
891 [(set_attr "isa" "noavx,avx")
892 (set_attr "type" "sseadd")
893 (set_attr "prefix" "orig,vex")
894 (set_attr "mode" "<ssescalarmode>")])
896 (define_expand "mul<mode>3"
897 [(set (match_operand:VF 0 "register_operand")
899 (match_operand:VF 1 "nonimmediate_operand")
900 (match_operand:VF 2 "nonimmediate_operand")))]
902 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
904 (define_insn "*mul<mode>3"
905 [(set (match_operand:VF 0 "register_operand" "=x,x")
907 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
908 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
909 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
911 mul<ssemodesuffix>\t{%2, %0|%0, %2}
912 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
913 [(set_attr "isa" "noavx,avx")
914 (set_attr "type" "ssemul")
915 (set_attr "prefix" "orig,vex")
916 (set_attr "btver2_decode" "direct,double")
917 (set_attr "mode" "<MODE>")])
919 (define_insn "<sse>_vmmul<mode>3"
920 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
923 (match_operand:VF_128 1 "register_operand" "0,x")
924 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
929 mul<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
930 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
931 [(set_attr "isa" "noavx,avx")
932 (set_attr "type" "ssemul")
933 (set_attr "prefix" "orig,vex")
934 (set_attr "mode" "<ssescalarmode>")])
936 (define_expand "div<mode>3"
937 [(set (match_operand:VF2 0 "register_operand")
938 (div:VF2 (match_operand:VF2 1 "register_operand")
939 (match_operand:VF2 2 "nonimmediate_operand")))]
941 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
943 (define_expand "div<mode>3"
944 [(set (match_operand:VF1 0 "register_operand")
945 (div:VF1 (match_operand:VF1 1 "register_operand")
946 (match_operand:VF1 2 "nonimmediate_operand")))]
949 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
952 && TARGET_RECIP_VEC_DIV
953 && !optimize_insn_for_size_p ()
954 && flag_finite_math_only && !flag_trapping_math
955 && flag_unsafe_math_optimizations)
957 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
962 (define_insn "<sse>_div<mode>3"
963 [(set (match_operand:VF 0 "register_operand" "=x,x")
965 (match_operand:VF 1 "register_operand" "0,x")
966 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
969 div<ssemodesuffix>\t{%2, %0|%0, %2}
970 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
971 [(set_attr "isa" "noavx,avx")
972 (set_attr "type" "ssediv")
973 (set_attr "prefix" "orig,vex")
974 (set_attr "mode" "<MODE>")])
976 (define_insn "<sse>_vmdiv<mode>3"
977 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
980 (match_operand:VF_128 1 "register_operand" "0,x")
981 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
986 div<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
987 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
988 [(set_attr "isa" "noavx,avx")
989 (set_attr "type" "ssediv")
990 (set_attr "prefix" "orig,vex")
991 (set_attr "btver2_decode" "direct,double")
992 (set_attr "mode" "<ssescalarmode>")])
994 (define_insn "<sse>_rcp<mode>2"
995 [(set (match_operand:VF1 0 "register_operand" "=x")
997 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
999 "%vrcpps\t{%1, %0|%0, %1}"
1000 [(set_attr "type" "sse")
1001 (set_attr "atom_sse_attr" "rcp")
1002 (set_attr "btver2_sse_attr" "rcp")
1003 (set_attr "prefix" "maybe_vex")
1004 (set_attr "mode" "<MODE>")])
1006 (define_insn "sse_vmrcpv4sf2"
1007 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1009 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1011 (match_operand:V4SF 2 "register_operand" "0,x")
1015 rcpss\t{%1, %0|%0, %k1}
1016 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1017 [(set_attr "isa" "noavx,avx")
1018 (set_attr "type" "sse")
1019 (set_attr "atom_sse_attr" "rcp")
1020 (set_attr "btver2_sse_attr" "rcp")
1021 (set_attr "prefix" "orig,vex")
1022 (set_attr "mode" "SF")])
1024 (define_expand "sqrt<mode>2"
1025 [(set (match_operand:VF2 0 "register_operand")
1026 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1029 (define_expand "sqrt<mode>2"
1030 [(set (match_operand:VF1 0 "register_operand")
1031 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1035 && TARGET_RECIP_VEC_SQRT
1036 && !optimize_insn_for_size_p ()
1037 && flag_finite_math_only && !flag_trapping_math
1038 && flag_unsafe_math_optimizations)
1040 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1045 (define_insn "<sse>_sqrt<mode>2"
1046 [(set (match_operand:VF 0 "register_operand" "=x")
1047 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
1049 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
1050 [(set_attr "type" "sse")
1051 (set_attr "atom_sse_attr" "sqrt")
1052 (set_attr "btver2_sse_attr" "sqrt")
1053 (set_attr "prefix" "maybe_vex")
1054 (set_attr "mode" "<MODE>")])
1056 (define_insn "<sse>_vmsqrt<mode>2"
1057 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1060 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
1061 (match_operand:VF_128 2 "register_operand" "0,x")
1065 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1066 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1067 [(set_attr "isa" "noavx,avx")
1068 (set_attr "type" "sse")
1069 (set_attr "atom_sse_attr" "sqrt")
1070 (set_attr "btver2_sse_attr" "sqrt")
1071 (set_attr "prefix" "orig,vex")
1072 (set_attr "mode" "<ssescalarmode>")])
1074 (define_expand "rsqrt<mode>2"
1075 [(set (match_operand:VF1 0 "register_operand")
1077 [(match_operand:VF1 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1080 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1084 (define_insn "<sse>_rsqrt<mode>2"
1085 [(set (match_operand:VF1 0 "register_operand" "=x")
1087 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1089 "%vrsqrtps\t{%1, %0|%0, %1}"
1090 [(set_attr "type" "sse")
1091 (set_attr "prefix" "maybe_vex")
1092 (set_attr "mode" "<MODE>")])
1094 (define_insn "sse_vmrsqrtv4sf2"
1095 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1097 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1099 (match_operand:V4SF 2 "register_operand" "0,x")
1103 rsqrtss\t{%1, %0|%0, %k1}
1104 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1105 [(set_attr "isa" "noavx,avx")
1106 (set_attr "type" "sse")
1107 (set_attr "prefix" "orig,vex")
1108 (set_attr "mode" "SF")])
1110 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1111 ;; isn't really correct, as those rtl operators aren't defined when
1112 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1114 (define_expand "<code><mode>3"
1115 [(set (match_operand:VF 0 "register_operand")
1117 (match_operand:VF 1 "nonimmediate_operand")
1118 (match_operand:VF 2 "nonimmediate_operand")))]
1121 if (!flag_finite_math_only)
1122 operands[1] = force_reg (<MODE>mode, operands[1]);
1123 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1126 (define_insn "*<code><mode>3_finite"
1127 [(set (match_operand:VF 0 "register_operand" "=x,x")
1129 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1130 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1131 "TARGET_SSE && flag_finite_math_only
1132 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1134 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1135 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1136 [(set_attr "isa" "noavx,avx")
1137 (set_attr "type" "sseadd")
1138 (set_attr "btver2_sse_attr" "maxmin")
1139 (set_attr "prefix" "orig,vex")
1140 (set_attr "mode" "<MODE>")])
1142 (define_insn "*<code><mode>3"
1143 [(set (match_operand:VF 0 "register_operand" "=x,x")
1145 (match_operand:VF 1 "register_operand" "0,x")
1146 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1147 "TARGET_SSE && !flag_finite_math_only"
1149 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1150 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1151 [(set_attr "isa" "noavx,avx")
1152 (set_attr "type" "sseadd")
1153 (set_attr "btver2_sse_attr" "maxmin")
1154 (set_attr "prefix" "orig,vex")
1155 (set_attr "mode" "<MODE>")])
1157 (define_insn "<sse>_vm<code><mode>3"
1158 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1161 (match_operand:VF_128 1 "register_operand" "0,x")
1162 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1167 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1168 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1169 [(set_attr "isa" "noavx,avx")
1170 (set_attr "type" "sse")
1171 (set_attr "btver2_sse_attr" "maxmin")
1172 (set_attr "prefix" "orig,vex")
1173 (set_attr "mode" "<ssescalarmode>")])
1175 ;; These versions of the min/max patterns implement exactly the operations
1176 ;; min = (op1 < op2 ? op1 : op2)
1177 ;; max = (!(op1 < op2) ? op1 : op2)
1178 ;; Their operands are not commutative, and thus they may be used in the
1179 ;; presence of -0.0 and NaN.
1181 (define_insn "*ieee_smin<mode>3"
1182 [(set (match_operand:VF 0 "register_operand" "=x,x")
1184 [(match_operand:VF 1 "register_operand" "0,x")
1185 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1189 min<ssemodesuffix>\t{%2, %0|%0, %2}
1190 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1191 [(set_attr "isa" "noavx,avx")
1192 (set_attr "type" "sseadd")
1193 (set_attr "prefix" "orig,vex")
1194 (set_attr "mode" "<MODE>")])
1196 (define_insn "*ieee_smax<mode>3"
1197 [(set (match_operand:VF 0 "register_operand" "=x,x")
1199 [(match_operand:VF 1 "register_operand" "0,x")
1200 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1204 max<ssemodesuffix>\t{%2, %0|%0, %2}
1205 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1206 [(set_attr "isa" "noavx,avx")
1207 (set_attr "type" "sseadd")
1208 (set_attr "prefix" "orig,vex")
1209 (set_attr "mode" "<MODE>")])
1211 (define_insn "avx_addsubv4df3"
1212 [(set (match_operand:V4DF 0 "register_operand" "=x")
1215 (match_operand:V4DF 1 "register_operand" "x")
1216 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1217 (minus:V4DF (match_dup 1) (match_dup 2))
1220 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1221 [(set_attr "type" "sseadd")
1222 (set_attr "prefix" "vex")
1223 (set_attr "mode" "V4DF")])
1225 (define_insn "sse3_addsubv2df3"
1226 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1229 (match_operand:V2DF 1 "register_operand" "0,x")
1230 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1231 (minus:V2DF (match_dup 1) (match_dup 2))
1235 addsubpd\t{%2, %0|%0, %2}
1236 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1237 [(set_attr "isa" "noavx,avx")
1238 (set_attr "type" "sseadd")
1239 (set_attr "atom_unit" "complex")
1240 (set_attr "prefix" "orig,vex")
1241 (set_attr "mode" "V2DF")])
1243 (define_insn "avx_addsubv8sf3"
1244 [(set (match_operand:V8SF 0 "register_operand" "=x")
1247 (match_operand:V8SF 1 "register_operand" "x")
1248 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1249 (minus:V8SF (match_dup 1) (match_dup 2))
1252 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1253 [(set_attr "type" "sseadd")
1254 (set_attr "prefix" "vex")
1255 (set_attr "mode" "V8SF")])
1257 (define_insn "sse3_addsubv4sf3"
1258 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1261 (match_operand:V4SF 1 "register_operand" "0,x")
1262 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1263 (minus:V4SF (match_dup 1) (match_dup 2))
1267 addsubps\t{%2, %0|%0, %2}
1268 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1269 [(set_attr "isa" "noavx,avx")
1270 (set_attr "type" "sseadd")
1271 (set_attr "prefix" "orig,vex")
1272 (set_attr "prefix_rep" "1,*")
1273 (set_attr "mode" "V4SF")])
1275 (define_insn "avx_h<plusminus_insn>v4df3"
1276 [(set (match_operand:V4DF 0 "register_operand" "=x")
1281 (match_operand:V4DF 1 "register_operand" "x")
1282 (parallel [(const_int 0)]))
1283 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1286 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1287 (parallel [(const_int 0)]))
1288 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1291 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1292 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1294 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1295 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1297 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1298 [(set_attr "type" "sseadd")
1299 (set_attr "prefix" "vex")
1300 (set_attr "mode" "V4DF")])
1302 (define_expand "sse3_haddv2df3"
1303 [(set (match_operand:V2DF 0 "register_operand")
1307 (match_operand:V2DF 1 "register_operand")
1308 (parallel [(const_int 0)]))
1309 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1312 (match_operand:V2DF 2 "nonimmediate_operand")
1313 (parallel [(const_int 0)]))
1314 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1317 (define_insn "*sse3_haddv2df3"
1318 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1322 (match_operand:V2DF 1 "register_operand" "0,x")
1323 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1326 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1329 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1330 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1333 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1335 && INTVAL (operands[3]) != INTVAL (operands[4])
1336 && INTVAL (operands[5]) != INTVAL (operands[6])"
1338 haddpd\t{%2, %0|%0, %2}
1339 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1340 [(set_attr "isa" "noavx,avx")
1341 (set_attr "type" "sseadd")
1342 (set_attr "prefix" "orig,vex")
1343 (set_attr "mode" "V2DF")])
1345 (define_insn "sse3_hsubv2df3"
1346 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1350 (match_operand:V2DF 1 "register_operand" "0,x")
1351 (parallel [(const_int 0)]))
1352 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1355 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1356 (parallel [(const_int 0)]))
1357 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1360 hsubpd\t{%2, %0|%0, %2}
1361 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1362 [(set_attr "isa" "noavx,avx")
1363 (set_attr "type" "sseadd")
1364 (set_attr "prefix" "orig,vex")
1365 (set_attr "mode" "V2DF")])
1367 (define_insn "*sse3_haddv2df3_low"
1368 [(set (match_operand:DF 0 "register_operand" "=x,x")
1371 (match_operand:V2DF 1 "register_operand" "0,x")
1372 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1375 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1377 && INTVAL (operands[2]) != INTVAL (operands[3])"
1379 haddpd\t{%0, %0|%0, %0}
1380 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1381 [(set_attr "isa" "noavx,avx")
1382 (set_attr "type" "sseadd1")
1383 (set_attr "prefix" "orig,vex")
1384 (set_attr "mode" "V2DF")])
1386 (define_insn "*sse3_hsubv2df3_low"
1387 [(set (match_operand:DF 0 "register_operand" "=x,x")
1390 (match_operand:V2DF 1 "register_operand" "0,x")
1391 (parallel [(const_int 0)]))
1394 (parallel [(const_int 1)]))))]
1397 hsubpd\t{%0, %0|%0, %0}
1398 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1399 [(set_attr "isa" "noavx,avx")
1400 (set_attr "type" "sseadd1")
1401 (set_attr "prefix" "orig,vex")
1402 (set_attr "mode" "V2DF")])
1404 (define_insn "avx_h<plusminus_insn>v8sf3"
1405 [(set (match_operand:V8SF 0 "register_operand" "=x")
1411 (match_operand:V8SF 1 "register_operand" "x")
1412 (parallel [(const_int 0)]))
1413 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1415 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1416 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1420 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1421 (parallel [(const_int 0)]))
1422 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1424 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1425 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1429 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1430 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1432 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1433 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1436 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1437 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1439 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1440 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1442 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1443 [(set_attr "type" "sseadd")
1444 (set_attr "prefix" "vex")
1445 (set_attr "mode" "V8SF")])
1447 (define_insn "sse3_h<plusminus_insn>v4sf3"
1448 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1453 (match_operand:V4SF 1 "register_operand" "0,x")
1454 (parallel [(const_int 0)]))
1455 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1457 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1458 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1462 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1463 (parallel [(const_int 0)]))
1464 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1466 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1467 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1470 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1471 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1472 [(set_attr "isa" "noavx,avx")
1473 (set_attr "type" "sseadd")
1474 (set_attr "atom_unit" "complex")
1475 (set_attr "prefix" "orig,vex")
1476 (set_attr "prefix_rep" "1,*")
1477 (set_attr "mode" "V4SF")])
1479 (define_expand "reduc_splus_v4df"
1480 [(match_operand:V4DF 0 "register_operand")
1481 (match_operand:V4DF 1 "register_operand")]
1484 rtx tmp = gen_reg_rtx (V4DFmode);
1485 rtx tmp2 = gen_reg_rtx (V4DFmode);
1486 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1487 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1488 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1492 (define_expand "reduc_splus_v2df"
1493 [(match_operand:V2DF 0 "register_operand")
1494 (match_operand:V2DF 1 "register_operand")]
1497 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1501 (define_expand "reduc_splus_v8sf"
1502 [(match_operand:V8SF 0 "register_operand")
1503 (match_operand:V8SF 1 "register_operand")]
1506 rtx tmp = gen_reg_rtx (V8SFmode);
1507 rtx tmp2 = gen_reg_rtx (V8SFmode);
1508 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1509 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1510 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1511 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1515 (define_expand "reduc_splus_v4sf"
1516 [(match_operand:V4SF 0 "register_operand")
1517 (match_operand:V4SF 1 "register_operand")]
1522 rtx tmp = gen_reg_rtx (V4SFmode);
1523 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1524 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1527 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1531 ;; Modes handled by reduc_sm{in,ax}* patterns.
1532 (define_mode_iterator REDUC_SMINMAX_MODE
1533 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1534 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1535 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1536 (V4SF "TARGET_SSE")])
1538 (define_expand "reduc_<code>_<mode>"
1539 [(smaxmin:REDUC_SMINMAX_MODE
1540 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
1541 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
1544 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1548 (define_expand "reduc_<code>_<mode>"
1550 (match_operand:VI_256 0 "register_operand")
1551 (match_operand:VI_256 1 "register_operand"))]
1554 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1558 (define_expand "reduc_umin_v8hi"
1560 (match_operand:V8HI 0 "register_operand")
1561 (match_operand:V8HI 1 "register_operand"))]
1564 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1568 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1570 ;; Parallel floating point comparisons
1572 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1574 (define_insn "avx_cmp<mode>3"
1575 [(set (match_operand:VF 0 "register_operand" "=x")
1577 [(match_operand:VF 1 "register_operand" "x")
1578 (match_operand:VF 2 "nonimmediate_operand" "xm")
1579 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1582 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1583 [(set_attr "type" "ssecmp")
1584 (set_attr "length_immediate" "1")
1585 (set_attr "prefix" "vex")
1586 (set_attr "mode" "<MODE>")])
1588 (define_insn "avx_vmcmp<mode>3"
1589 [(set (match_operand:VF_128 0 "register_operand" "=x")
1592 [(match_operand:VF_128 1 "register_operand" "x")
1593 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1594 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1599 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
1600 [(set_attr "type" "ssecmp")
1601 (set_attr "length_immediate" "1")
1602 (set_attr "prefix" "vex")
1603 (set_attr "mode" "<ssescalarmode>")])
1605 (define_insn "*<sse>_maskcmp<mode>3_comm"
1606 [(set (match_operand:VF 0 "register_operand" "=x,x")
1607 (match_operator:VF 3 "sse_comparison_operator"
1608 [(match_operand:VF 1 "register_operand" "%0,x")
1609 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1611 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1613 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1614 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1615 [(set_attr "isa" "noavx,avx")
1616 (set_attr "type" "ssecmp")
1617 (set_attr "length_immediate" "1")
1618 (set_attr "prefix" "orig,vex")
1619 (set_attr "mode" "<MODE>")])
1621 (define_insn "<sse>_maskcmp<mode>3"
1622 [(set (match_operand:VF 0 "register_operand" "=x,x")
1623 (match_operator:VF 3 "sse_comparison_operator"
1624 [(match_operand:VF 1 "register_operand" "0,x")
1625 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1628 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1629 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1630 [(set_attr "isa" "noavx,avx")
1631 (set_attr "type" "ssecmp")
1632 (set_attr "length_immediate" "1")
1633 (set_attr "prefix" "orig,vex")
1634 (set_attr "mode" "<MODE>")])
1636 (define_insn "<sse>_vmmaskcmp<mode>3"
1637 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1639 (match_operator:VF_128 3 "sse_comparison_operator"
1640 [(match_operand:VF_128 1 "register_operand" "0,x")
1641 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1646 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1647 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
1648 [(set_attr "isa" "noavx,avx")
1649 (set_attr "type" "ssecmp")
1650 (set_attr "length_immediate" "1,*")
1651 (set_attr "prefix" "orig,vex")
1652 (set_attr "mode" "<ssescalarmode>")])
1654 (define_insn "<sse>_comi"
1655 [(set (reg:CCFP FLAGS_REG)
1658 (match_operand:<ssevecmode> 0 "register_operand" "x")
1659 (parallel [(const_int 0)]))
1661 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1662 (parallel [(const_int 0)]))))]
1663 "SSE_FLOAT_MODE_P (<MODE>mode)"
1664 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
1665 [(set_attr "type" "ssecomi")
1666 (set_attr "prefix" "maybe_vex")
1667 (set_attr "prefix_rep" "0")
1668 (set (attr "prefix_data16")
1669 (if_then_else (eq_attr "mode" "DF")
1671 (const_string "0")))
1672 (set_attr "mode" "<MODE>")])
1674 (define_insn "<sse>_ucomi"
1675 [(set (reg:CCFPU FLAGS_REG)
1678 (match_operand:<ssevecmode> 0 "register_operand" "x")
1679 (parallel [(const_int 0)]))
1681 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1682 (parallel [(const_int 0)]))))]
1683 "SSE_FLOAT_MODE_P (<MODE>mode)"
1684 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
1685 [(set_attr "type" "ssecomi")
1686 (set_attr "prefix" "maybe_vex")
1687 (set_attr "prefix_rep" "0")
1688 (set (attr "prefix_data16")
1689 (if_then_else (eq_attr "mode" "DF")
1691 (const_string "0")))
1692 (set_attr "mode" "<MODE>")])
1694 (define_expand "vcond<V_256:mode><VF_256:mode>"
1695 [(set (match_operand:V_256 0 "register_operand")
1697 (match_operator 3 ""
1698 [(match_operand:VF_256 4 "nonimmediate_operand")
1699 (match_operand:VF_256 5 "nonimmediate_operand")])
1700 (match_operand:V_256 1 "general_operand")
1701 (match_operand:V_256 2 "general_operand")))]
1703 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1704 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1706 bool ok = ix86_expand_fp_vcond (operands);
1711 (define_expand "vcond<V_128:mode><VF_128:mode>"
1712 [(set (match_operand:V_128 0 "register_operand")
1714 (match_operator 3 ""
1715 [(match_operand:VF_128 4 "nonimmediate_operand")
1716 (match_operand:VF_128 5 "nonimmediate_operand")])
1717 (match_operand:V_128 1 "general_operand")
1718 (match_operand:V_128 2 "general_operand")))]
1720 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1721 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1723 bool ok = ix86_expand_fp_vcond (operands);
1728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1730 ;; Parallel floating point logical operations
1732 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1734 (define_insn "<sse>_andnot<mode>3"
1735 [(set (match_operand:VF 0 "register_operand" "=x,x")
1738 (match_operand:VF 1 "register_operand" "0,x"))
1739 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1742 static char buf[32];
1746 switch (get_attr_mode (insn))
1753 suffix = "<ssemodesuffix>";
1756 switch (which_alternative)
1759 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1762 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1768 snprintf (buf, sizeof (buf), ops, suffix);
1771 [(set_attr "isa" "noavx,avx")
1772 (set_attr "type" "sselog")
1773 (set_attr "prefix" "orig,vex")
1775 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1776 (const_string "<ssePSmode>")
1777 (match_test "TARGET_AVX")
1778 (const_string "<MODE>")
1779 (match_test "optimize_function_for_size_p (cfun)")
1780 (const_string "V4SF")
1782 (const_string "<MODE>")))])
1784 (define_expand "<code><mode>3"
1785 [(set (match_operand:VF 0 "register_operand")
1787 (match_operand:VF 1 "nonimmediate_operand")
1788 (match_operand:VF 2 "nonimmediate_operand")))]
1790 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1792 (define_insn "*<code><mode>3"
1793 [(set (match_operand:VF 0 "register_operand" "=x,x")
1795 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1796 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1797 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1799 static char buf[32];
1803 switch (get_attr_mode (insn))
1810 suffix = "<ssemodesuffix>";
1813 switch (which_alternative)
1816 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1819 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1825 snprintf (buf, sizeof (buf), ops, suffix);
1828 [(set_attr "isa" "noavx,avx")
1829 (set_attr "type" "sselog")
1830 (set_attr "prefix" "orig,vex")
1832 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1833 (const_string "<ssePSmode>")
1834 (match_test "TARGET_AVX")
1835 (const_string "<MODE>")
1836 (match_test "optimize_function_for_size_p (cfun)")
1837 (const_string "V4SF")
1839 (const_string "<MODE>")))])
1841 (define_expand "copysign<mode>3"
1844 (not:VF (match_dup 3))
1845 (match_operand:VF 1 "nonimmediate_operand")))
1847 (and:VF (match_dup 3)
1848 (match_operand:VF 2 "nonimmediate_operand")))
1849 (set (match_operand:VF 0 "register_operand")
1850 (ior:VF (match_dup 4) (match_dup 5)))]
1853 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1855 operands[4] = gen_reg_rtx (<MODE>mode);
1856 operands[5] = gen_reg_rtx (<MODE>mode);
1859 ;; Also define scalar versions. These are used for abs, neg, and
1860 ;; conditional move. Using subregs into vector modes causes register
1861 ;; allocation lossage. These patterns do not allow memory operands
1862 ;; because the native instructions read the full 128-bits.
1864 (define_insn "*andnot<mode>3"
1865 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1868 (match_operand:MODEF 1 "register_operand" "0,x"))
1869 (match_operand:MODEF 2 "register_operand" "x,x")))]
1870 "SSE_FLOAT_MODE_P (<MODE>mode)"
1872 static char buf[32];
1875 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
1877 switch (which_alternative)
1880 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
1883 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1889 snprintf (buf, sizeof (buf), ops, suffix);
1892 [(set_attr "isa" "noavx,avx")
1893 (set_attr "type" "sselog")
1894 (set_attr "prefix" "orig,vex")
1896 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1897 (const_string "V4SF")
1898 (match_test "TARGET_AVX")
1899 (const_string "<ssevecmode>")
1900 (match_test "optimize_function_for_size_p (cfun)")
1901 (const_string "V4SF")
1903 (const_string "<ssevecmode>")))])
1905 (define_insn "*andnottf3"
1906 [(set (match_operand:TF 0 "register_operand" "=x,x")
1908 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
1909 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
1912 static char buf[32];
1915 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
1917 switch (which_alternative)
1920 ops = "%s\t{%%2, %%0|%%0, %%2}";
1923 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1929 snprintf (buf, sizeof (buf), ops, tmp);
1932 [(set_attr "isa" "noavx,avx")
1933 (set_attr "type" "sselog")
1934 (set (attr "prefix_data16")
1936 (and (eq_attr "alternative" "0")
1937 (eq_attr "mode" "TI"))
1939 (const_string "*")))
1940 (set_attr "prefix" "orig,vex")
1942 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1943 (const_string "V4SF")
1944 (match_test "TARGET_AVX")
1946 (ior (not (match_test "TARGET_SSE2"))
1947 (match_test "optimize_function_for_size_p (cfun)"))
1948 (const_string "V4SF")
1950 (const_string "TI")))])
1952 (define_insn "*<code><mode>3"
1953 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1955 (match_operand:MODEF 1 "register_operand" "%0,x")
1956 (match_operand:MODEF 2 "register_operand" "x,x")))]
1957 "SSE_FLOAT_MODE_P (<MODE>mode)"
1959 static char buf[32];
1962 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
1964 switch (which_alternative)
1967 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1970 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1976 snprintf (buf, sizeof (buf), ops, suffix);
1979 [(set_attr "isa" "noavx,avx")
1980 (set_attr "type" "sselog")
1981 (set_attr "prefix" "orig,vex")
1983 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1984 (const_string "V4SF")
1985 (match_test "TARGET_AVX")
1986 (const_string "<ssevecmode>")
1987 (match_test "optimize_function_for_size_p (cfun)")
1988 (const_string "V4SF")
1990 (const_string "<ssevecmode>")))])
1992 (define_expand "<code>tf3"
1993 [(set (match_operand:TF 0 "register_operand")
1995 (match_operand:TF 1 "nonimmediate_operand")
1996 (match_operand:TF 2 "nonimmediate_operand")))]
1998 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2000 (define_insn "*<code>tf3"
2001 [(set (match_operand:TF 0 "register_operand" "=x,x")
2003 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2004 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2006 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2008 static char buf[32];
2011 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2013 switch (which_alternative)
2016 ops = "%s\t{%%2, %%0|%%0, %%2}";
2019 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2025 snprintf (buf, sizeof (buf), ops, tmp);
2028 [(set_attr "isa" "noavx,avx")
2029 (set_attr "type" "sselog")
2030 (set (attr "prefix_data16")
2032 (and (eq_attr "alternative" "0")
2033 (eq_attr "mode" "TI"))
2035 (const_string "*")))
2036 (set_attr "prefix" "orig,vex")
2038 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2039 (const_string "V4SF")
2040 (match_test "TARGET_AVX")
2042 (ior (not (match_test "TARGET_SSE2"))
2043 (match_test "optimize_function_for_size_p (cfun)"))
2044 (const_string "V4SF")
2046 (const_string "TI")))])
2048 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2050 ;; FMA floating point multiply/accumulate instructions. These include
2051 ;; scalar versions of the instructions as well as vector versions.
2053 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2055 ;; The standard names for scalar FMA are only available with SSE math enabled.
2056 (define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
2057 (DF "TARGET_SSE_MATH")
2058 V4SF V2DF V8SF V4DF])
2060 (define_expand "fma<mode>4"
2061 [(set (match_operand:FMAMODEM 0 "register_operand")
2063 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2064 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2065 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2066 "TARGET_FMA || TARGET_FMA4")
2068 (define_expand "fms<mode>4"
2069 [(set (match_operand:FMAMODEM 0 "register_operand")
2071 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2072 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2073 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2074 "TARGET_FMA || TARGET_FMA4")
2076 (define_expand "fnma<mode>4"
2077 [(set (match_operand:FMAMODEM 0 "register_operand")
2079 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2080 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2081 (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
2082 "TARGET_FMA || TARGET_FMA4")
2084 (define_expand "fnms<mode>4"
2085 [(set (match_operand:FMAMODEM 0 "register_operand")
2087 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2088 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2089 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
2090 "TARGET_FMA || TARGET_FMA4")
2092 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2093 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
2095 (define_expand "fma4i_fmadd_<mode>"
2096 [(set (match_operand:FMAMODE 0 "register_operand")
2098 (match_operand:FMAMODE 1 "nonimmediate_operand")
2099 (match_operand:FMAMODE 2 "nonimmediate_operand")
2100 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2101 "TARGET_FMA || TARGET_FMA4")
2103 (define_insn "*fma_fmadd_<mode>"
2104 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2106 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
2107 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2108 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
2109 "TARGET_FMA || TARGET_FMA4"
2111 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2112 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2113 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2114 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2115 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2116 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2117 (set_attr "type" "ssemuladd")
2118 (set_attr "mode" "<MODE>")])
2120 (define_insn "*fma_fmsub_<mode>"
2121 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2123 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
2124 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2126 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
2127 "TARGET_FMA || TARGET_FMA4"
2129 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2130 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2131 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2132 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2133 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2134 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2135 (set_attr "type" "ssemuladd")
2136 (set_attr "mode" "<MODE>")])
2138 (define_insn "*fma_fnmadd_<mode>"
2139 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2142 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
2143 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2144 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
2145 "TARGET_FMA || TARGET_FMA4"
2147 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2148 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2149 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2150 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2151 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2152 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2153 (set_attr "type" "ssemuladd")
2154 (set_attr "mode" "<MODE>")])
2156 (define_insn "*fma_fnmsub_<mode>"
2157 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
2160 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
2161 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
2163 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
2164 "TARGET_FMA || TARGET_FMA4"
2166 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2167 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2168 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2169 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2170 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2171 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2172 (set_attr "type" "ssemuladd")
2173 (set_attr "mode" "<MODE>")])
2175 ;; FMA parallel floating point multiply addsub and subadd operations.
2177 ;; It would be possible to represent these without the UNSPEC as
2180 ;; (fma op1 op2 op3)
2181 ;; (fma op1 op2 (neg op3))
2184 ;; But this doesn't seem useful in practice.
2186 (define_expand "fmaddsub_<mode>"
2187 [(set (match_operand:VF 0 "register_operand")
2189 [(match_operand:VF 1 "nonimmediate_operand")
2190 (match_operand:VF 2 "nonimmediate_operand")
2191 (match_operand:VF 3 "nonimmediate_operand")]
2193 "TARGET_FMA || TARGET_FMA4")
2195 (define_insn "*fma_fmaddsub_<mode>"
2196 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
2198 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
2199 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
2200 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
2202 "TARGET_FMA || TARGET_FMA4"
2204 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2205 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2206 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2207 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2208 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2209 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2210 (set_attr "type" "ssemuladd")
2211 (set_attr "mode" "<MODE>")])
2213 (define_insn "*fma_fmsubadd_<mode>"
2214 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
2216 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
2217 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
2219 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
2221 "TARGET_FMA || TARGET_FMA4"
2223 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2224 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2225 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2226 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2227 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2228 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2229 (set_attr "type" "ssemuladd")
2230 (set_attr "mode" "<MODE>")])
2232 ;; FMA3 floating point scalar intrinsics. These merge result with
2233 ;; high-order elements from the destination register.
2235 (define_expand "fmai_vmfmadd_<mode>"
2236 [(set (match_operand:VF_128 0 "register_operand")
2239 (match_operand:VF_128 1 "nonimmediate_operand")
2240 (match_operand:VF_128 2 "nonimmediate_operand")
2241 (match_operand:VF_128 3 "nonimmediate_operand"))
2246 (define_insn "*fmai_fmadd_<mode>"
2247 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2250 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2251 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
2252 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
2257 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2258 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2259 [(set_attr "type" "ssemuladd")
2260 (set_attr "mode" "<MODE>")])
2262 (define_insn "*fmai_fmsub_<mode>"
2263 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2266 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2267 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
2269 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
2274 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2275 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2276 [(set_attr "type" "ssemuladd")
2277 (set_attr "mode" "<MODE>")])
2279 (define_insn "*fmai_fnmadd_<mode>"
2280 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2284 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
2285 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2286 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
2291 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2292 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2293 [(set_attr "type" "ssemuladd")
2294 (set_attr "mode" "<MODE>")])
2296 (define_insn "*fmai_fnmsub_<mode>"
2297 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2301 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
2302 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
2304 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
2309 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
2310 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
2311 [(set_attr "type" "ssemuladd")
2312 (set_attr "mode" "<MODE>")])
2314 ;; FMA4 floating point scalar intrinsics. These write the
2315 ;; entire destination register, with the high-order elements zeroed.
2317 (define_expand "fma4i_vmfmadd_<mode>"
2318 [(set (match_operand:VF_128 0 "register_operand")
2321 (match_operand:VF_128 1 "nonimmediate_operand")
2322 (match_operand:VF_128 2 "nonimmediate_operand")
2323 (match_operand:VF_128 3 "nonimmediate_operand"))
2327 "operands[4] = CONST0_RTX (<MODE>mode);")
2329 (define_insn "*fma4i_vmfmadd_<mode>"
2330 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2333 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2334 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2335 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2336 (match_operand:VF_128 4 "const0_operand")
2339 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2340 [(set_attr "type" "ssemuladd")
2341 (set_attr "mode" "<MODE>")])
2343 (define_insn "*fma4i_vmfmsub_<mode>"
2344 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2347 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2348 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2350 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2351 (match_operand:VF_128 4 "const0_operand")
2354 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2355 [(set_attr "type" "ssemuladd")
2356 (set_attr "mode" "<MODE>")])
2358 (define_insn "*fma4i_vmfnmadd_<mode>"
2359 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2363 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2364 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2365 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2366 (match_operand:VF_128 4 "const0_operand")
2369 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2370 [(set_attr "type" "ssemuladd")
2371 (set_attr "mode" "<MODE>")])
2373 (define_insn "*fma4i_vmfnmsub_<mode>"
2374 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2378 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2379 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2381 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2382 (match_operand:VF_128 4 "const0_operand")
2385 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2386 [(set_attr "type" "ssemuladd")
2387 (set_attr "mode" "<MODE>")])
2389 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2391 ;; Parallel single-precision floating point conversion operations
2393 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2395 (define_insn "sse_cvtpi2ps"
2396 [(set (match_operand:V4SF 0 "register_operand" "=x")
2399 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2400 (match_operand:V4SF 1 "register_operand" "0")
2403 "cvtpi2ps\t{%2, %0|%0, %2}"
2404 [(set_attr "type" "ssecvt")
2405 (set_attr "mode" "V4SF")])
2407 (define_insn "sse_cvtps2pi"
2408 [(set (match_operand:V2SI 0 "register_operand" "=y")
2410 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2412 (parallel [(const_int 0) (const_int 1)])))]
2414 "cvtps2pi\t{%1, %0|%0, %q1}"
2415 [(set_attr "type" "ssecvt")
2416 (set_attr "unit" "mmx")
2417 (set_attr "mode" "DI")])
2419 (define_insn "sse_cvttps2pi"
2420 [(set (match_operand:V2SI 0 "register_operand" "=y")
2422 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2423 (parallel [(const_int 0) (const_int 1)])))]
2425 "cvttps2pi\t{%1, %0|%0, %q1}"
2426 [(set_attr "type" "ssecvt")
2427 (set_attr "unit" "mmx")
2428 (set_attr "prefix_rep" "0")
2429 (set_attr "mode" "SF")])
2431 (define_insn "sse_cvtsi2ss"
2432 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2435 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2436 (match_operand:V4SF 1 "register_operand" "0,0,x")
2440 cvtsi2ss\t{%2, %0|%0, %2}
2441 cvtsi2ss\t{%2, %0|%0, %2}
2442 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2443 [(set_attr "isa" "noavx,noavx,avx")
2444 (set_attr "type" "sseicvt")
2445 (set_attr "athlon_decode" "vector,double,*")
2446 (set_attr "amdfam10_decode" "vector,double,*")
2447 (set_attr "bdver1_decode" "double,direct,*")
2448 (set_attr "btver2_decode" "double,double,double")
2449 (set_attr "prefix" "orig,orig,vex")
2450 (set_attr "mode" "SF")])
2452 (define_insn "sse_cvtsi2ssq"
2453 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2456 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2457 (match_operand:V4SF 1 "register_operand" "0,0,x")
2459 "TARGET_SSE && TARGET_64BIT"
2461 cvtsi2ssq\t{%2, %0|%0, %2}
2462 cvtsi2ssq\t{%2, %0|%0, %2}
2463 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2464 [(set_attr "isa" "noavx,noavx,avx")
2465 (set_attr "type" "sseicvt")
2466 (set_attr "athlon_decode" "vector,double,*")
2467 (set_attr "amdfam10_decode" "vector,double,*")
2468 (set_attr "bdver1_decode" "double,direct,*")
2469 (set_attr "btver2_decode" "double,double,double")
2470 (set_attr "length_vex" "*,*,4")
2471 (set_attr "prefix_rex" "1,1,*")
2472 (set_attr "prefix" "orig,orig,vex")
2473 (set_attr "mode" "SF")])
2475 (define_insn "sse_cvtss2si"
2476 [(set (match_operand:SI 0 "register_operand" "=r,r")
2479 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2480 (parallel [(const_int 0)]))]
2481 UNSPEC_FIX_NOTRUNC))]
2483 "%vcvtss2si\t{%1, %0|%0, %k1}"
2484 [(set_attr "type" "sseicvt")
2485 (set_attr "athlon_decode" "double,vector")
2486 (set_attr "bdver1_decode" "double,double")
2487 (set_attr "prefix_rep" "1")
2488 (set_attr "prefix" "maybe_vex")
2489 (set_attr "mode" "SI")])
2491 (define_insn "sse_cvtss2si_2"
2492 [(set (match_operand:SI 0 "register_operand" "=r,r")
2493 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2494 UNSPEC_FIX_NOTRUNC))]
2496 "%vcvtss2si\t{%1, %0|%0, %k1}"
2497 [(set_attr "type" "sseicvt")
2498 (set_attr "athlon_decode" "double,vector")
2499 (set_attr "amdfam10_decode" "double,double")
2500 (set_attr "bdver1_decode" "double,double")
2501 (set_attr "prefix_rep" "1")
2502 (set_attr "prefix" "maybe_vex")
2503 (set_attr "mode" "SI")])
2505 (define_insn "sse_cvtss2siq"
2506 [(set (match_operand:DI 0 "register_operand" "=r,r")
2509 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2510 (parallel [(const_int 0)]))]
2511 UNSPEC_FIX_NOTRUNC))]
2512 "TARGET_SSE && TARGET_64BIT"
2513 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2514 [(set_attr "type" "sseicvt")
2515 (set_attr "athlon_decode" "double,vector")
2516 (set_attr "bdver1_decode" "double,double")
2517 (set_attr "prefix_rep" "1")
2518 (set_attr "prefix" "maybe_vex")
2519 (set_attr "mode" "DI")])
2521 (define_insn "sse_cvtss2siq_2"
2522 [(set (match_operand:DI 0 "register_operand" "=r,r")
2523 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2524 UNSPEC_FIX_NOTRUNC))]
2525 "TARGET_SSE && TARGET_64BIT"
2526 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2527 [(set_attr "type" "sseicvt")
2528 (set_attr "athlon_decode" "double,vector")
2529 (set_attr "amdfam10_decode" "double,double")
2530 (set_attr "bdver1_decode" "double,double")
2531 (set_attr "prefix_rep" "1")
2532 (set_attr "prefix" "maybe_vex")
2533 (set_attr "mode" "DI")])
2535 (define_insn "sse_cvttss2si"
2536 [(set (match_operand:SI 0 "register_operand" "=r,r")
2539 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2540 (parallel [(const_int 0)]))))]
2542 "%vcvttss2si\t{%1, %0|%0, %k1}"
2543 [(set_attr "type" "sseicvt")
2544 (set_attr "athlon_decode" "double,vector")
2545 (set_attr "amdfam10_decode" "double,double")
2546 (set_attr "bdver1_decode" "double,double")
2547 (set_attr "prefix_rep" "1")
2548 (set_attr "prefix" "maybe_vex")
2549 (set_attr "mode" "SI")])
2551 (define_insn "sse_cvttss2siq"
2552 [(set (match_operand:DI 0 "register_operand" "=r,r")
2555 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2556 (parallel [(const_int 0)]))))]
2557 "TARGET_SSE && TARGET_64BIT"
2558 "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
2559 [(set_attr "type" "sseicvt")
2560 (set_attr "athlon_decode" "double,vector")
2561 (set_attr "amdfam10_decode" "double,double")
2562 (set_attr "bdver1_decode" "double,double")
2563 (set_attr "prefix_rep" "1")
2564 (set_attr "prefix" "maybe_vex")
2565 (set_attr "mode" "DI")])
2567 (define_insn "float<sseintvecmodelower><mode>2"
2568 [(set (match_operand:VF1 0 "register_operand" "=x")
2570 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2572 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2573 [(set_attr "type" "ssecvt")
2574 (set_attr "prefix" "maybe_vex")
2575 (set_attr "mode" "<sseinsnmode>")])
2577 (define_expand "floatuns<sseintvecmodelower><mode>2"
2578 [(match_operand:VF1 0 "register_operand")
2579 (match_operand:<sseintvecmode> 1 "register_operand")]
2580 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2582 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2586 (define_insn "avx_cvtps2dq256"
2587 [(set (match_operand:V8SI 0 "register_operand" "=x")
2588 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2589 UNSPEC_FIX_NOTRUNC))]
2591 "vcvtps2dq\t{%1, %0|%0, %1}"
2592 [(set_attr "type" "ssecvt")
2593 (set_attr "prefix" "vex")
2594 (set_attr "mode" "OI")])
2596 (define_insn "sse2_cvtps2dq"
2597 [(set (match_operand:V4SI 0 "register_operand" "=x")
2598 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2599 UNSPEC_FIX_NOTRUNC))]
2601 "%vcvtps2dq\t{%1, %0|%0, %1}"
2602 [(set_attr "type" "ssecvt")
2603 (set (attr "prefix_data16")
2605 (match_test "TARGET_AVX")
2607 (const_string "1")))
2608 (set_attr "prefix" "maybe_vex")
2609 (set_attr "mode" "TI")])
2611 (define_insn "fix_truncv8sfv8si2"
2612 [(set (match_operand:V8SI 0 "register_operand" "=x")
2613 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2615 "vcvttps2dq\t{%1, %0|%0, %1}"
2616 [(set_attr "type" "ssecvt")
2617 (set_attr "prefix" "vex")
2618 (set_attr "mode" "OI")])
2620 (define_insn "fix_truncv4sfv4si2"
2621 [(set (match_operand:V4SI 0 "register_operand" "=x")
2622 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2624 "%vcvttps2dq\t{%1, %0|%0, %1}"
2625 [(set_attr "type" "ssecvt")
2626 (set (attr "prefix_rep")
2628 (match_test "TARGET_AVX")
2630 (const_string "1")))
2631 (set (attr "prefix_data16")
2633 (match_test "TARGET_AVX")
2635 (const_string "0")))
2636 (set_attr "prefix_data16" "0")
2637 (set_attr "prefix" "maybe_vex")
2638 (set_attr "mode" "TI")])
2640 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2641 [(match_operand:<sseintvecmode> 0 "register_operand")
2642 (match_operand:VF1 1 "register_operand")]
2646 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2647 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2648 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2649 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2653 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2655 ;; Parallel double-precision floating point conversion operations
2657 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2659 (define_insn "sse2_cvtpi2pd"
2660 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2661 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2663 "cvtpi2pd\t{%1, %0|%0, %1}"
2664 [(set_attr "type" "ssecvt")
2665 (set_attr "unit" "mmx,*")
2666 (set_attr "prefix_data16" "1,*")
2667 (set_attr "mode" "V2DF")])
2669 (define_insn "sse2_cvtpd2pi"
2670 [(set (match_operand:V2SI 0 "register_operand" "=y")
2671 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2672 UNSPEC_FIX_NOTRUNC))]
2674 "cvtpd2pi\t{%1, %0|%0, %1}"
2675 [(set_attr "type" "ssecvt")
2676 (set_attr "unit" "mmx")
2677 (set_attr "bdver1_decode" "double")
2678 (set_attr "btver2_decode" "direct")
2679 (set_attr "prefix_data16" "1")
2680 (set_attr "mode" "DI")])
2682 (define_insn "sse2_cvttpd2pi"
2683 [(set (match_operand:V2SI 0 "register_operand" "=y")
2684 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2686 "cvttpd2pi\t{%1, %0|%0, %1}"
2687 [(set_attr "type" "ssecvt")
2688 (set_attr "unit" "mmx")
2689 (set_attr "bdver1_decode" "double")
2690 (set_attr "prefix_data16" "1")
2691 (set_attr "mode" "TI")])
2693 (define_insn "sse2_cvtsi2sd"
2694 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2697 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2698 (match_operand:V2DF 1 "register_operand" "0,0,x")
2702 cvtsi2sd\t{%2, %0|%0, %2}
2703 cvtsi2sd\t{%2, %0|%0, %2}
2704 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2705 [(set_attr "isa" "noavx,noavx,avx")
2706 (set_attr "type" "sseicvt")
2707 (set_attr "athlon_decode" "double,direct,*")
2708 (set_attr "amdfam10_decode" "vector,double,*")
2709 (set_attr "bdver1_decode" "double,direct,*")
2710 (set_attr "btver2_decode" "double,double,double")
2711 (set_attr "prefix" "orig,orig,vex")
2712 (set_attr "mode" "DF")])
2714 (define_insn "sse2_cvtsi2sdq"
2715 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2718 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2719 (match_operand:V2DF 1 "register_operand" "0,0,x")
2721 "TARGET_SSE2 && TARGET_64BIT"
2723 cvtsi2sdq\t{%2, %0|%0, %2}
2724 cvtsi2sdq\t{%2, %0|%0, %2}
2725 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2726 [(set_attr "isa" "noavx,noavx,avx")
2727 (set_attr "type" "sseicvt")
2728 (set_attr "athlon_decode" "double,direct,*")
2729 (set_attr "amdfam10_decode" "vector,double,*")
2730 (set_attr "bdver1_decode" "double,direct,*")
2731 (set_attr "length_vex" "*,*,4")
2732 (set_attr "prefix_rex" "1,1,*")
2733 (set_attr "prefix" "orig,orig,vex")
2734 (set_attr "mode" "DF")])
2736 (define_insn "sse2_cvtsd2si"
2737 [(set (match_operand:SI 0 "register_operand" "=r,r")
2740 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2741 (parallel [(const_int 0)]))]
2742 UNSPEC_FIX_NOTRUNC))]
2744 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2745 [(set_attr "type" "sseicvt")
2746 (set_attr "athlon_decode" "double,vector")
2747 (set_attr "bdver1_decode" "double,double")
2748 (set_attr "btver2_decode" "double,double")
2749 (set_attr "prefix_rep" "1")
2750 (set_attr "prefix" "maybe_vex")
2751 (set_attr "mode" "SI")])
2753 (define_insn "sse2_cvtsd2si_2"
2754 [(set (match_operand:SI 0 "register_operand" "=r,r")
2755 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2756 UNSPEC_FIX_NOTRUNC))]
2758 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2759 [(set_attr "type" "sseicvt")
2760 (set_attr "athlon_decode" "double,vector")
2761 (set_attr "amdfam10_decode" "double,double")
2762 (set_attr "bdver1_decode" "double,double")
2763 (set_attr "prefix_rep" "1")
2764 (set_attr "prefix" "maybe_vex")
2765 (set_attr "mode" "SI")])
2767 (define_insn "sse2_cvtsd2siq"
2768 [(set (match_operand:DI 0 "register_operand" "=r,r")
2771 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2772 (parallel [(const_int 0)]))]
2773 UNSPEC_FIX_NOTRUNC))]
2774 "TARGET_SSE2 && TARGET_64BIT"
2775 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2776 [(set_attr "type" "sseicvt")
2777 (set_attr "athlon_decode" "double,vector")
2778 (set_attr "bdver1_decode" "double,double")
2779 (set_attr "prefix_rep" "1")
2780 (set_attr "prefix" "maybe_vex")
2781 (set_attr "mode" "DI")])
2783 (define_insn "sse2_cvtsd2siq_2"
2784 [(set (match_operand:DI 0 "register_operand" "=r,r")
2785 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2786 UNSPEC_FIX_NOTRUNC))]
2787 "TARGET_SSE2 && TARGET_64BIT"
2788 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2789 [(set_attr "type" "sseicvt")
2790 (set_attr "athlon_decode" "double,vector")
2791 (set_attr "amdfam10_decode" "double,double")
2792 (set_attr "bdver1_decode" "double,double")
2793 (set_attr "prefix_rep" "1")
2794 (set_attr "prefix" "maybe_vex")
2795 (set_attr "mode" "DI")])
2797 (define_insn "sse2_cvttsd2si"
2798 [(set (match_operand:SI 0 "register_operand" "=r,r")
2801 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2802 (parallel [(const_int 0)]))))]
2804 "%vcvttsd2si\t{%1, %0|%0, %q1}"
2805 [(set_attr "type" "sseicvt")
2806 (set_attr "athlon_decode" "double,vector")
2807 (set_attr "amdfam10_decode" "double,double")
2808 (set_attr "bdver1_decode" "double,double")
2809 (set_attr "btver2_decode" "double,double")
2810 (set_attr "prefix_rep" "1")
2811 (set_attr "prefix" "maybe_vex")
2812 (set_attr "mode" "SI")])
2814 (define_insn "sse2_cvttsd2siq"
2815 [(set (match_operand:DI 0 "register_operand" "=r,r")
2818 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2819 (parallel [(const_int 0)]))))]
2820 "TARGET_SSE2 && TARGET_64BIT"
2821 "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
2822 [(set_attr "type" "sseicvt")
2823 (set_attr "athlon_decode" "double,vector")
2824 (set_attr "amdfam10_decode" "double,double")
2825 (set_attr "bdver1_decode" "double,double")
2826 (set_attr "prefix_rep" "1")
2827 (set_attr "prefix" "maybe_vex")
2828 (set_attr "mode" "DI")])
2830 (define_insn "floatv4siv4df2"
2831 [(set (match_operand:V4DF 0 "register_operand" "=x")
2832 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2834 "vcvtdq2pd\t{%1, %0|%0, %1}"
2835 [(set_attr "type" "ssecvt")
2836 (set_attr "prefix" "vex")
2837 (set_attr "mode" "V4DF")])
2839 (define_insn "avx_cvtdq2pd256_2"
2840 [(set (match_operand:V4DF 0 "register_operand" "=x")
2843 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2844 (parallel [(const_int 0) (const_int 1)
2845 (const_int 2) (const_int 3)]))))]
2847 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2848 [(set_attr "type" "ssecvt")
2849 (set_attr "prefix" "vex")
2850 (set_attr "mode" "V4DF")])
2852 (define_insn "sse2_cvtdq2pd"
2853 [(set (match_operand:V2DF 0 "register_operand" "=x")
2856 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2857 (parallel [(const_int 0) (const_int 1)]))))]
2859 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2860 [(set_attr "type" "ssecvt")
2861 (set_attr "prefix" "maybe_vex")
2862 (set_attr "mode" "V2DF")])
2864 (define_insn "avx_cvtpd2dq256"
2865 [(set (match_operand:V4SI 0 "register_operand" "=x")
2866 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2867 UNSPEC_FIX_NOTRUNC))]
2869 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2870 [(set_attr "type" "ssecvt")
2871 (set_attr "prefix" "vex")
2872 (set_attr "mode" "OI")])
2874 (define_expand "avx_cvtpd2dq256_2"
2875 [(set (match_operand:V8SI 0 "register_operand")
2877 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
2881 "operands[2] = CONST0_RTX (V4SImode);")
2883 (define_insn "*avx_cvtpd2dq256_2"
2884 [(set (match_operand:V8SI 0 "register_operand" "=x")
2886 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2888 (match_operand:V4SI 2 "const0_operand")))]
2890 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2891 [(set_attr "type" "ssecvt")
2892 (set_attr "prefix" "vex")
2893 (set_attr "btver2_decode" "vector")
2894 (set_attr "mode" "OI")])
2896 (define_expand "sse2_cvtpd2dq"
2897 [(set (match_operand:V4SI 0 "register_operand")
2899 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
2903 "operands[2] = CONST0_RTX (V2SImode);")
2905 (define_insn "*sse2_cvtpd2dq"
2906 [(set (match_operand:V4SI 0 "register_operand" "=x")
2908 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2910 (match_operand:V2SI 2 "const0_operand")))]
2914 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2916 return "cvtpd2dq\t{%1, %0|%0, %1}";
2918 [(set_attr "type" "ssecvt")
2919 (set_attr "prefix_rep" "1")
2920 (set_attr "prefix_data16" "0")
2921 (set_attr "prefix" "maybe_vex")
2922 (set_attr "mode" "TI")
2923 (set_attr "amdfam10_decode" "double")
2924 (set_attr "athlon_decode" "vector")
2925 (set_attr "bdver1_decode" "double")])
2927 (define_insn "fix_truncv4dfv4si2"
2928 [(set (match_operand:V4SI 0 "register_operand" "=x")
2929 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2931 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2932 [(set_attr "type" "ssecvt")
2933 (set_attr "prefix" "vex")
2934 (set_attr "mode" "OI")])
2936 (define_expand "avx_cvttpd2dq256_2"
2937 [(set (match_operand:V8SI 0 "register_operand")
2939 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
2942 "operands[2] = CONST0_RTX (V4SImode);")
2944 (define_insn "*avx_cvttpd2dq256_2"
2945 [(set (match_operand:V8SI 0 "register_operand" "=x")
2947 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2948 (match_operand:V4SI 2 "const0_operand")))]
2950 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2951 [(set_attr "type" "ssecvt")
2952 (set_attr "prefix" "vex")
2953 (set_attr "btver2_decode" "vector")
2954 (set_attr "mode" "OI")])
2956 (define_expand "sse2_cvttpd2dq"
2957 [(set (match_operand:V4SI 0 "register_operand")
2959 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
2962 "operands[2] = CONST0_RTX (V2SImode);")
2964 (define_insn "*sse2_cvttpd2dq"
2965 [(set (match_operand:V4SI 0 "register_operand" "=x")
2967 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2968 (match_operand:V2SI 2 "const0_operand")))]
2972 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2974 return "cvttpd2dq\t{%1, %0|%0, %1}";
2976 [(set_attr "type" "ssecvt")
2977 (set_attr "amdfam10_decode" "double")
2978 (set_attr "athlon_decode" "vector")
2979 (set_attr "bdver1_decode" "double")
2980 (set_attr "prefix" "maybe_vex")
2981 (set_attr "mode" "TI")])
2983 (define_insn "sse2_cvtsd2ss"
2984 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2987 (float_truncate:V2SF
2988 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2989 (match_operand:V4SF 1 "register_operand" "0,0,x")
2993 cvtsd2ss\t{%2, %0|%0, %2}
2994 cvtsd2ss\t{%2, %0|%0, %q2}
2995 vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
2996 [(set_attr "isa" "noavx,noavx,avx")
2997 (set_attr "type" "ssecvt")
2998 (set_attr "athlon_decode" "vector,double,*")
2999 (set_attr "amdfam10_decode" "vector,double,*")
3000 (set_attr "bdver1_decode" "direct,direct,*")
3001 (set_attr "btver2_decode" "double,double,double")
3002 (set_attr "prefix" "orig,orig,vex")
3003 (set_attr "mode" "SF")])
3005 (define_insn "sse2_cvtss2sd"
3006 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3010 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
3011 (parallel [(const_int 0) (const_int 1)])))
3012 (match_operand:V2DF 1 "register_operand" "0,0,x")
3016 cvtss2sd\t{%2, %0|%0, %2}
3017 cvtss2sd\t{%2, %0|%0, %k2}
3018 vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
3019 [(set_attr "isa" "noavx,noavx,avx")
3020 (set_attr "type" "ssecvt")
3021 (set_attr "amdfam10_decode" "vector,double,*")
3022 (set_attr "athlon_decode" "direct,direct,*")
3023 (set_attr "bdver1_decode" "direct,direct,*")
3024 (set_attr "btver2_decode" "double,double,double")
3025 (set_attr "prefix" "orig,orig,vex")
3026 (set_attr "mode" "DF")])
3028 (define_insn "avx_cvtpd2ps256"
3029 [(set (match_operand:V4SF 0 "register_operand" "=x")
3030 (float_truncate:V4SF
3031 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3033 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3034 [(set_attr "type" "ssecvt")
3035 (set_attr "prefix" "vex")
3036 (set_attr "btver2_decode" "vector")
3037 (set_attr "mode" "V4SF")])
3039 (define_expand "sse2_cvtpd2ps"
3040 [(set (match_operand:V4SF 0 "register_operand")
3042 (float_truncate:V2SF
3043 (match_operand:V2DF 1 "nonimmediate_operand"))
3046 "operands[2] = CONST0_RTX (V2SFmode);")
3048 (define_insn "*sse2_cvtpd2ps"
3049 [(set (match_operand:V4SF 0 "register_operand" "=x")
3051 (float_truncate:V2SF
3052 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3053 (match_operand:V2SF 2 "const0_operand")))]
3057 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
3059 return "cvtpd2ps\t{%1, %0|%0, %1}";
3061 [(set_attr "type" "ssecvt")
3062 (set_attr "amdfam10_decode" "double")
3063 (set_attr "athlon_decode" "vector")
3064 (set_attr "bdver1_decode" "double")
3065 (set_attr "prefix_data16" "1")
3066 (set_attr "prefix" "maybe_vex")
3067 (set_attr "mode" "V4SF")])
3069 (define_insn "avx_cvtps2pd256"
3070 [(set (match_operand:V4DF 0 "register_operand" "=x")
3072 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3074 "vcvtps2pd\t{%1, %0|%0, %1}"
3075 [(set_attr "type" "ssecvt")
3076 (set_attr "prefix" "vex")
3077 (set_attr "mode" "V4DF")])
3079 (define_insn "*avx_cvtps2pd256_2"
3080 [(set (match_operand:V4DF 0 "register_operand" "=x")
3083 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3084 (parallel [(const_int 0) (const_int 1)
3085 (const_int 2) (const_int 3)]))))]
3087 "vcvtps2pd\t{%x1, %0|%0, %x1}"
3088 [(set_attr "type" "ssecvt")
3089 (set_attr "prefix" "vex")
3090 (set_attr "mode" "V4DF")])
3092 (define_insn "sse2_cvtps2pd"
3093 [(set (match_operand:V2DF 0 "register_operand" "=x")
3096 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3097 (parallel [(const_int 0) (const_int 1)]))))]
3099 "%vcvtps2pd\t{%1, %0|%0, %q1}"
3100 [(set_attr "type" "ssecvt")
3101 (set_attr "amdfam10_decode" "direct")
3102 (set_attr "athlon_decode" "double")
3103 (set_attr "bdver1_decode" "double")
3104 (set_attr "prefix_data16" "0")
3105 (set_attr "prefix" "maybe_vex")
3106 (set_attr "mode" "V2DF")])
3108 (define_expand "vec_unpacks_hi_v4sf"
3113 (match_operand:V4SF 1 "nonimmediate_operand"))
3114 (parallel [(const_int 6) (const_int 7)
3115 (const_int 2) (const_int 3)])))
3116 (set (match_operand:V2DF 0 "register_operand")
3120 (parallel [(const_int 0) (const_int 1)]))))]
3122 "operands[2] = gen_reg_rtx (V4SFmode);")
3124 (define_expand "vec_unpacks_hi_v8sf"
3127 (match_operand:V8SF 1 "nonimmediate_operand")
3128 (parallel [(const_int 4) (const_int 5)
3129 (const_int 6) (const_int 7)])))
3130 (set (match_operand:V4DF 0 "register_operand")
3134 "operands[2] = gen_reg_rtx (V4SFmode);")
3136 (define_expand "vec_unpacks_lo_v4sf"
3137 [(set (match_operand:V2DF 0 "register_operand")
3140 (match_operand:V4SF 1 "nonimmediate_operand")
3141 (parallel [(const_int 0) (const_int 1)]))))]
3144 (define_expand "vec_unpacks_lo_v8sf"
3145 [(set (match_operand:V4DF 0 "register_operand")
3148 (match_operand:V8SF 1 "nonimmediate_operand")
3149 (parallel [(const_int 0) (const_int 1)
3150 (const_int 2) (const_int 3)]))))]
3153 (define_mode_attr sseunpackfltmode
3154 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
3156 (define_expand "vec_unpacks_float_hi_<mode>"
3157 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3158 (match_operand:VI2_AVX2 1 "register_operand")]
3161 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3163 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
3164 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3165 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3169 (define_expand "vec_unpacks_float_lo_<mode>"
3170 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3171 (match_operand:VI2_AVX2 1 "register_operand")]
3174 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3176 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
3177 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3178 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3182 (define_expand "vec_unpacku_float_hi_<mode>"
3183 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3184 (match_operand:VI2_AVX2 1 "register_operand")]
3187 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3189 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
3190 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3191 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3195 (define_expand "vec_unpacku_float_lo_<mode>"
3196 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3197 (match_operand:VI2_AVX2 1 "register_operand")]
3200 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
3202 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
3203 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
3204 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
3208 (define_expand "vec_unpacks_float_hi_v4si"
3211 (match_operand:V4SI 1 "nonimmediate_operand")
3212 (parallel [(const_int 2) (const_int 3)
3213 (const_int 2) (const_int 3)])))
3214 (set (match_operand:V2DF 0 "register_operand")
3218 (parallel [(const_int 0) (const_int 1)]))))]
3220 "operands[2] = gen_reg_rtx (V4SImode);")
3222 (define_expand "vec_unpacks_float_lo_v4si"
3223 [(set (match_operand:V2DF 0 "register_operand")
3226 (match_operand:V4SI 1 "nonimmediate_operand")
3227 (parallel [(const_int 0) (const_int 1)]))))]
3230 (define_expand "vec_unpacks_float_hi_v8si"
3233 (match_operand:V8SI 1 "nonimmediate_operand")
3234 (parallel [(const_int 4) (const_int 5)
3235 (const_int 6) (const_int 7)])))
3236 (set (match_operand:V4DF 0 "register_operand")
3240 "operands[2] = gen_reg_rtx (V4SImode);")
3242 (define_expand "vec_unpacks_float_lo_v8si"
3243 [(set (match_operand:V4DF 0 "register_operand")
3246 (match_operand:V8SI 1 "nonimmediate_operand")
3247 (parallel [(const_int 0) (const_int 1)
3248 (const_int 2) (const_int 3)]))))]
3251 (define_expand "vec_unpacku_float_hi_v4si"
3254 (match_operand:V4SI 1 "nonimmediate_operand")
3255 (parallel [(const_int 2) (const_int 3)
3256 (const_int 2) (const_int 3)])))
3261 (parallel [(const_int 0) (const_int 1)]))))
3263 (lt:V2DF (match_dup 6) (match_dup 3)))
3265 (and:V2DF (match_dup 7) (match_dup 4)))
3266 (set (match_operand:V2DF 0 "register_operand")
3267 (plus:V2DF (match_dup 6) (match_dup 8)))]
3270 REAL_VALUE_TYPE TWO32r;
3274 real_ldexp (&TWO32r, &dconst1, 32);
3275 x = const_double_from_real_value (TWO32r, DFmode);
3277 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3278 operands[4] = force_reg (V2DFmode,
3279 ix86_build_const_vector (V2DFmode, 1, x));
3281 operands[5] = gen_reg_rtx (V4SImode);
3283 for (i = 6; i < 9; i++)
3284 operands[i] = gen_reg_rtx (V2DFmode);
3287 (define_expand "vec_unpacku_float_lo_v4si"
3291 (match_operand:V4SI 1 "nonimmediate_operand")
3292 (parallel [(const_int 0) (const_int 1)]))))
3294 (lt:V2DF (match_dup 5) (match_dup 3)))
3296 (and:V2DF (match_dup 6) (match_dup 4)))
3297 (set (match_operand:V2DF 0 "register_operand")
3298 (plus:V2DF (match_dup 5) (match_dup 7)))]
3301 REAL_VALUE_TYPE TWO32r;
3305 real_ldexp (&TWO32r, &dconst1, 32);
3306 x = const_double_from_real_value (TWO32r, DFmode);
3308 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3309 operands[4] = force_reg (V2DFmode,
3310 ix86_build_const_vector (V2DFmode, 1, x));
3312 for (i = 5; i < 8; i++)
3313 operands[i] = gen_reg_rtx (V2DFmode);
3316 (define_expand "vec_unpacku_float_hi_v8si"
3317 [(match_operand:V4DF 0 "register_operand")
3318 (match_operand:V8SI 1 "register_operand")]
3321 REAL_VALUE_TYPE TWO32r;
3325 real_ldexp (&TWO32r, &dconst1, 32);
3326 x = const_double_from_real_value (TWO32r, DFmode);
3328 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3329 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3330 tmp[5] = gen_reg_rtx (V4SImode);
3332 for (i = 2; i < 5; i++)
3333 tmp[i] = gen_reg_rtx (V4DFmode);
3334 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3335 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3336 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3337 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3338 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3339 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3343 (define_expand "vec_unpacku_float_lo_v8si"
3344 [(match_operand:V4DF 0 "register_operand")
3345 (match_operand:V8SI 1 "nonimmediate_operand")]
3348 REAL_VALUE_TYPE TWO32r;
3352 real_ldexp (&TWO32r, &dconst1, 32);
3353 x = const_double_from_real_value (TWO32r, DFmode);
3355 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3356 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3358 for (i = 2; i < 5; i++)
3359 tmp[i] = gen_reg_rtx (V4DFmode);
3360 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3361 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3362 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3363 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3364 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3368 (define_expand "vec_pack_trunc_v4df"
3370 (float_truncate:V4SF
3371 (match_operand:V4DF 1 "nonimmediate_operand")))
3373 (float_truncate:V4SF
3374 (match_operand:V4DF 2 "nonimmediate_operand")))
3375 (set (match_operand:V8SF 0 "register_operand")
3381 operands[3] = gen_reg_rtx (V4SFmode);
3382 operands[4] = gen_reg_rtx (V4SFmode);
3385 (define_expand "vec_pack_trunc_v2df"
3386 [(match_operand:V4SF 0 "register_operand")
3387 (match_operand:V2DF 1 "nonimmediate_operand")
3388 (match_operand:V2DF 2 "nonimmediate_operand")]
3393 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3395 tmp0 = gen_reg_rtx (V4DFmode);
3396 tmp1 = force_reg (V2DFmode, operands[1]);
3398 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3399 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3403 tmp0 = gen_reg_rtx (V4SFmode);
3404 tmp1 = gen_reg_rtx (V4SFmode);
3406 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3407 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3408 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3413 (define_expand "vec_pack_sfix_trunc_v4df"
3414 [(match_operand:V8SI 0 "register_operand")
3415 (match_operand:V4DF 1 "nonimmediate_operand")
3416 (match_operand:V4DF 2 "nonimmediate_operand")]
3421 r1 = gen_reg_rtx (V4SImode);
3422 r2 = gen_reg_rtx (V4SImode);
3424 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3425 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3426 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3430 (define_expand "vec_pack_sfix_trunc_v2df"
3431 [(match_operand:V4SI 0 "register_operand")
3432 (match_operand:V2DF 1 "nonimmediate_operand")
3433 (match_operand:V2DF 2 "nonimmediate_operand")]
3438 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3440 tmp0 = gen_reg_rtx (V4DFmode);
3441 tmp1 = force_reg (V2DFmode, operands[1]);
3443 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3444 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3448 tmp0 = gen_reg_rtx (V4SImode);
3449 tmp1 = gen_reg_rtx (V4SImode);
3451 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3452 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3454 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3455 gen_lowpart (V2DImode, tmp0),
3456 gen_lowpart (V2DImode, tmp1)));
3461 (define_mode_attr ssepackfltmode
3462 [(V4DF "V8SI") (V2DF "V4SI")])
3464 (define_expand "vec_pack_ufix_trunc_<mode>"
3465 [(match_operand:<ssepackfltmode> 0 "register_operand")
3466 (match_operand:VF2 1 "register_operand")
3467 (match_operand:VF2 2 "register_operand")]
3471 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3472 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3473 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3474 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3475 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3477 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3478 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3482 tmp[5] = gen_reg_rtx (V8SFmode);
3483 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3484 gen_lowpart (V8SFmode, tmp[3]), 0);
3485 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3487 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3488 operands[0], 0, OPTAB_DIRECT);
3489 if (tmp[6] != operands[0])
3490 emit_move_insn (operands[0], tmp[6]);
3494 (define_expand "vec_pack_sfix_v4df"
3495 [(match_operand:V8SI 0 "register_operand")
3496 (match_operand:V4DF 1 "nonimmediate_operand")
3497 (match_operand:V4DF 2 "nonimmediate_operand")]
3502 r1 = gen_reg_rtx (V4SImode);
3503 r2 = gen_reg_rtx (V4SImode);
3505 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3506 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3507 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3511 (define_expand "vec_pack_sfix_v2df"
3512 [(match_operand:V4SI 0 "register_operand")
3513 (match_operand:V2DF 1 "nonimmediate_operand")
3514 (match_operand:V2DF 2 "nonimmediate_operand")]
3519 if (TARGET_AVX && !TARGET_PREFER_AVX128)
3521 tmp0 = gen_reg_rtx (V4DFmode);
3522 tmp1 = force_reg (V2DFmode, operands[1]);
3524 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3525 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3529 tmp0 = gen_reg_rtx (V4SImode);
3530 tmp1 = gen_reg_rtx (V4SImode);
3532 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3533 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3535 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3536 gen_lowpart (V2DImode, tmp0),
3537 gen_lowpart (V2DImode, tmp1)));
3542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3544 ;; Parallel single-precision floating point element swizzling
3546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3548 (define_expand "sse_movhlps_exp"
3549 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3552 (match_operand:V4SF 1 "nonimmediate_operand")
3553 (match_operand:V4SF 2 "nonimmediate_operand"))
3554 (parallel [(const_int 6)
3560 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3562 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3564 /* Fix up the destination if needed. */
3565 if (dst != operands[0])
3566 emit_move_insn (operands[0], dst);
3571 (define_insn "sse_movhlps"
3572 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3575 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3576 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3577 (parallel [(const_int 6)
3581 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3583 movhlps\t{%2, %0|%0, %2}
3584 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3585 movlps\t{%H2, %0|%0, %H2}
3586 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3587 %vmovhps\t{%2, %0|%q0, %2}"
3588 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3589 (set_attr "type" "ssemov")
3590 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3591 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3593 (define_expand "sse_movlhps_exp"
3594 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3597 (match_operand:V4SF 1 "nonimmediate_operand")
3598 (match_operand:V4SF 2 "nonimmediate_operand"))
3599 (parallel [(const_int 0)
3605 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3607 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3609 /* Fix up the destination if needed. */
3610 if (dst != operands[0])
3611 emit_move_insn (operands[0], dst);
3616 (define_insn "sse_movlhps"
3617 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3620 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3621 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
3622 (parallel [(const_int 0)
3626 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3628 movlhps\t{%2, %0|%0, %2}
3629 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3630 movhps\t{%2, %0|%0, %q2}
3631 vmovhps\t{%2, %1, %0|%0, %1, %q2}
3632 %vmovlps\t{%2, %H0|%H0, %2}"
3633 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3634 (set_attr "type" "ssemov")
3635 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3636 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3638 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3639 (define_insn "avx_unpckhps256"
3640 [(set (match_operand:V8SF 0 "register_operand" "=x")
3643 (match_operand:V8SF 1 "register_operand" "x")
3644 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3645 (parallel [(const_int 2) (const_int 10)
3646 (const_int 3) (const_int 11)
3647 (const_int 6) (const_int 14)
3648 (const_int 7) (const_int 15)])))]
3650 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3651 [(set_attr "type" "sselog")
3652 (set_attr "prefix" "vex")
3653 (set_attr "mode" "V8SF")])
3655 (define_expand "vec_interleave_highv8sf"
3659 (match_operand:V8SF 1 "register_operand" "x")
3660 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3661 (parallel [(const_int 0) (const_int 8)
3662 (const_int 1) (const_int 9)
3663 (const_int 4) (const_int 12)
3664 (const_int 5) (const_int 13)])))
3670 (parallel [(const_int 2) (const_int 10)
3671 (const_int 3) (const_int 11)
3672 (const_int 6) (const_int 14)
3673 (const_int 7) (const_int 15)])))
3674 (set (match_operand:V8SF 0 "register_operand")
3679 (parallel [(const_int 4) (const_int 5)
3680 (const_int 6) (const_int 7)
3681 (const_int 12) (const_int 13)
3682 (const_int 14) (const_int 15)])))]
3685 operands[3] = gen_reg_rtx (V8SFmode);
3686 operands[4] = gen_reg_rtx (V8SFmode);
3689 (define_insn "vec_interleave_highv4sf"
3690 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3693 (match_operand:V4SF 1 "register_operand" "0,x")
3694 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3695 (parallel [(const_int 2) (const_int 6)
3696 (const_int 3) (const_int 7)])))]
3699 unpckhps\t{%2, %0|%0, %2}
3700 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3701 [(set_attr "isa" "noavx,avx")
3702 (set_attr "type" "sselog")
3703 (set_attr "prefix" "orig,vex")
3704 (set_attr "mode" "V4SF")])
3706 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3707 (define_insn "avx_unpcklps256"
3708 [(set (match_operand:V8SF 0 "register_operand" "=x")
3711 (match_operand:V8SF 1 "register_operand" "x")
3712 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3713 (parallel [(const_int 0) (const_int 8)
3714 (const_int 1) (const_int 9)
3715 (const_int 4) (const_int 12)
3716 (const_int 5) (const_int 13)])))]
3718 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3719 [(set_attr "type" "sselog")
3720 (set_attr "prefix" "vex")
3721 (set_attr "mode" "V8SF")])
3723 (define_expand "vec_interleave_lowv8sf"
3727 (match_operand:V8SF 1 "register_operand" "x")
3728 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3729 (parallel [(const_int 0) (const_int 8)
3730 (const_int 1) (const_int 9)
3731 (const_int 4) (const_int 12)
3732 (const_int 5) (const_int 13)])))
3738 (parallel [(const_int 2) (const_int 10)
3739 (const_int 3) (const_int 11)
3740 (const_int 6) (const_int 14)
3741 (const_int 7) (const_int 15)])))
3742 (set (match_operand:V8SF 0 "register_operand")
3747 (parallel [(const_int 0) (const_int 1)
3748 (const_int 2) (const_int 3)
3749 (const_int 8) (const_int 9)
3750 (const_int 10) (const_int 11)])))]
3753 operands[3] = gen_reg_rtx (V8SFmode);
3754 operands[4] = gen_reg_rtx (V8SFmode);
3757 (define_insn "vec_interleave_lowv4sf"
3758 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3761 (match_operand:V4SF 1 "register_operand" "0,x")
3762 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3763 (parallel [(const_int 0) (const_int 4)
3764 (const_int 1) (const_int 5)])))]
3767 unpcklps\t{%2, %0|%0, %2}
3768 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3769 [(set_attr "isa" "noavx,avx")
3770 (set_attr "type" "sselog")
3771 (set_attr "prefix" "orig,vex")
3772 (set_attr "mode" "V4SF")])
3774 ;; These are modeled with the same vec_concat as the others so that we
3775 ;; capture users of shufps that can use the new instructions
3776 (define_insn "avx_movshdup256"
3777 [(set (match_operand:V8SF 0 "register_operand" "=x")
3780 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3782 (parallel [(const_int 1) (const_int 1)
3783 (const_int 3) (const_int 3)
3784 (const_int 5) (const_int 5)
3785 (const_int 7) (const_int 7)])))]
3787 "vmovshdup\t{%1, %0|%0, %1}"
3788 [(set_attr "type" "sse")
3789 (set_attr "prefix" "vex")
3790 (set_attr "mode" "V8SF")])
3792 (define_insn "sse3_movshdup"
3793 [(set (match_operand:V4SF 0 "register_operand" "=x")
3796 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3798 (parallel [(const_int 1)
3803 "%vmovshdup\t{%1, %0|%0, %1}"
3804 [(set_attr "type" "sse")
3805 (set_attr "prefix_rep" "1")
3806 (set_attr "prefix" "maybe_vex")
3807 (set_attr "mode" "V4SF")])
3809 (define_insn "avx_movsldup256"
3810 [(set (match_operand:V8SF 0 "register_operand" "=x")
3813 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3815 (parallel [(const_int 0) (const_int 0)
3816 (const_int 2) (const_int 2)
3817 (const_int 4) (const_int 4)
3818 (const_int 6) (const_int 6)])))]
3820 "vmovsldup\t{%1, %0|%0, %1}"
3821 [(set_attr "type" "sse")
3822 (set_attr "prefix" "vex")
3823 (set_attr "mode" "V8SF")])
3825 (define_insn "sse3_movsldup"
3826 [(set (match_operand:V4SF 0 "register_operand" "=x")
3829 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3831 (parallel [(const_int 0)
3836 "%vmovsldup\t{%1, %0|%0, %1}"
3837 [(set_attr "type" "sse")
3838 (set_attr "prefix_rep" "1")
3839 (set_attr "prefix" "maybe_vex")
3840 (set_attr "mode" "V4SF")])
3842 (define_expand "avx_shufps256"
3843 [(match_operand:V8SF 0 "register_operand")
3844 (match_operand:V8SF 1 "register_operand")
3845 (match_operand:V8SF 2 "nonimmediate_operand")
3846 (match_operand:SI 3 "const_int_operand")]
3849 int mask = INTVAL (operands[3]);
3850 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3851 GEN_INT ((mask >> 0) & 3),
3852 GEN_INT ((mask >> 2) & 3),
3853 GEN_INT (((mask >> 4) & 3) + 8),
3854 GEN_INT (((mask >> 6) & 3) + 8),
3855 GEN_INT (((mask >> 0) & 3) + 4),
3856 GEN_INT (((mask >> 2) & 3) + 4),
3857 GEN_INT (((mask >> 4) & 3) + 12),
3858 GEN_INT (((mask >> 6) & 3) + 12)));
3862 ;; One bit in mask selects 2 elements.
3863 (define_insn "avx_shufps256_1"
3864 [(set (match_operand:V8SF 0 "register_operand" "=x")
3867 (match_operand:V8SF 1 "register_operand" "x")
3868 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3869 (parallel [(match_operand 3 "const_0_to_3_operand" )
3870 (match_operand 4 "const_0_to_3_operand" )
3871 (match_operand 5 "const_8_to_11_operand" )
3872 (match_operand 6 "const_8_to_11_operand" )
3873 (match_operand 7 "const_4_to_7_operand" )
3874 (match_operand 8 "const_4_to_7_operand" )
3875 (match_operand 9 "const_12_to_15_operand")
3876 (match_operand 10 "const_12_to_15_operand")])))]
3878 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3879 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3880 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3881 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3884 mask = INTVAL (operands[3]);
3885 mask |= INTVAL (operands[4]) << 2;
3886 mask |= (INTVAL (operands[5]) - 8) << 4;
3887 mask |= (INTVAL (operands[6]) - 8) << 6;
3888 operands[3] = GEN_INT (mask);
3890 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3892 [(set_attr "type" "sseshuf")
3893 (set_attr "length_immediate" "1")
3894 (set_attr "prefix" "vex")
3895 (set_attr "mode" "V8SF")])
3897 (define_expand "sse_shufps"
3898 [(match_operand:V4SF 0 "register_operand")
3899 (match_operand:V4SF 1 "register_operand")
3900 (match_operand:V4SF 2 "nonimmediate_operand")
3901 (match_operand:SI 3 "const_int_operand")]
3904 int mask = INTVAL (operands[3]);
3905 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3906 GEN_INT ((mask >> 0) & 3),
3907 GEN_INT ((mask >> 2) & 3),
3908 GEN_INT (((mask >> 4) & 3) + 4),
3909 GEN_INT (((mask >> 6) & 3) + 4)));
3913 (define_insn "sse_shufps_<mode>"
3914 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3915 (vec_select:VI4F_128
3916 (vec_concat:<ssedoublevecmode>
3917 (match_operand:VI4F_128 1 "register_operand" "0,x")
3918 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3919 (parallel [(match_operand 3 "const_0_to_3_operand")
3920 (match_operand 4 "const_0_to_3_operand")
3921 (match_operand 5 "const_4_to_7_operand")
3922 (match_operand 6 "const_4_to_7_operand")])))]
3926 mask |= INTVAL (operands[3]) << 0;
3927 mask |= INTVAL (operands[4]) << 2;
3928 mask |= (INTVAL (operands[5]) - 4) << 4;
3929 mask |= (INTVAL (operands[6]) - 4) << 6;
3930 operands[3] = GEN_INT (mask);
3932 switch (which_alternative)
3935 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3937 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3942 [(set_attr "isa" "noavx,avx")
3943 (set_attr "type" "sseshuf")
3944 (set_attr "length_immediate" "1")
3945 (set_attr "prefix" "orig,vex")
3946 (set_attr "mode" "V4SF")])
3948 (define_insn "sse_storehps"
3949 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3951 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3952 (parallel [(const_int 2) (const_int 3)])))]
3955 %vmovhps\t{%1, %0|%q0, %1}
3956 %vmovhlps\t{%1, %d0|%d0, %1}
3957 %vmovlps\t{%H1, %d0|%d0, %H1}"
3958 [(set_attr "type" "ssemov")
3959 (set_attr "prefix" "maybe_vex")
3960 (set_attr "mode" "V2SF,V4SF,V2SF")])
3962 (define_expand "sse_loadhps_exp"
3963 [(set (match_operand:V4SF 0 "nonimmediate_operand")
3966 (match_operand:V4SF 1 "nonimmediate_operand")
3967 (parallel [(const_int 0) (const_int 1)]))
3968 (match_operand:V2SF 2 "nonimmediate_operand")))]
3971 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3973 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3975 /* Fix up the destination if needed. */
3976 if (dst != operands[0])
3977 emit_move_insn (operands[0], dst);
3982 (define_insn "sse_loadhps"
3983 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3986 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3987 (parallel [(const_int 0) (const_int 1)]))
3988 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3991 movhps\t{%2, %0|%0, %q2}
3992 vmovhps\t{%2, %1, %0|%0, %1, %q2}
3993 movlhps\t{%2, %0|%0, %2}
3994 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3995 %vmovlps\t{%2, %H0|%H0, %2}"
3996 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3997 (set_attr "type" "ssemov")
3998 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3999 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
4001 (define_insn "sse_storelps"
4002 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
4004 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
4005 (parallel [(const_int 0) (const_int 1)])))]
4008 %vmovlps\t{%1, %0|%q0, %1}
4009 %vmovaps\t{%1, %0|%0, %1}
4010 %vmovlps\t{%1, %d0|%d0, %q1}"
4011 [(set_attr "type" "ssemov")
4012 (set_attr "prefix" "maybe_vex")
4013 (set_attr "mode" "V2SF,V4SF,V2SF")])
4015 (define_expand "sse_loadlps_exp"
4016 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4018 (match_operand:V2SF 2 "nonimmediate_operand")
4020 (match_operand:V4SF 1 "nonimmediate_operand")
4021 (parallel [(const_int 2) (const_int 3)]))))]
4024 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4026 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
4028 /* Fix up the destination if needed. */
4029 if (dst != operands[0])
4030 emit_move_insn (operands[0], dst);
4035 (define_insn "sse_loadlps"
4036 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4038 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
4040 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
4041 (parallel [(const_int 2) (const_int 3)]))))]
4044 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
4045 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
4046 movlps\t{%2, %0|%0, %q2}
4047 vmovlps\t{%2, %1, %0|%0, %1, %q2}
4048 %vmovlps\t{%2, %0|%q0, %2}"
4049 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4050 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
4051 (set_attr "length_immediate" "1,1,*,*,*")
4052 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4053 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4055 (define_insn "sse_movss"
4056 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4058 (match_operand:V4SF 2 "register_operand" " x,x")
4059 (match_operand:V4SF 1 "register_operand" " 0,x")
4063 movss\t{%2, %0|%0, %2}
4064 vmovss\t{%2, %1, %0|%0, %1, %2}"
4065 [(set_attr "isa" "noavx,avx")
4066 (set_attr "type" "ssemov")
4067 (set_attr "prefix" "orig,vex")
4068 (set_attr "mode" "SF")])
4070 (define_insn "avx2_vec_dup<mode>"
4071 [(set (match_operand:VF1 0 "register_operand" "=x")
4074 (match_operand:V4SF 1 "register_operand" "x")
4075 (parallel [(const_int 0)]))))]
4077 "vbroadcastss\t{%1, %0|%0, %1}"
4078 [(set_attr "type" "sselog1")
4079 (set_attr "prefix" "vex")
4080 (set_attr "mode" "<MODE>")])
4082 (define_insn "avx2_vec_dupv8sf_1"
4083 [(set (match_operand:V8SF 0 "register_operand" "=x")
4086 (match_operand:V8SF 1 "register_operand" "x")
4087 (parallel [(const_int 0)]))))]
4089 "vbroadcastss\t{%x1, %0|%0, %x1}"
4090 [(set_attr "type" "sselog1")
4091 (set_attr "prefix" "vex")
4092 (set_attr "mode" "V8SF")])
4094 (define_insn "vec_dupv4sf"
4095 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
4097 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
4100 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
4101 vbroadcastss\t{%1, %0|%0, %1}
4102 shufps\t{$0, %0, %0|%0, %0, 0}"
4103 [(set_attr "isa" "avx,avx,noavx")
4104 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
4105 (set_attr "length_immediate" "1,0,1")
4106 (set_attr "prefix_extra" "0,1,*")
4107 (set_attr "prefix" "vex,vex,orig")
4108 (set_attr "mode" "V4SF")])
4110 ;; Although insertps takes register source, we prefer
4111 ;; unpcklps with register source since it is shorter.
4112 (define_insn "*vec_concatv2sf_sse4_1"
4113 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
4115 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
4116 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
4119 unpcklps\t{%2, %0|%0, %2}
4120 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4121 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4122 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4123 %vmovss\t{%1, %0|%0, %1}
4124 punpckldq\t{%2, %0|%0, %2}
4125 movd\t{%1, %0|%0, %1}"
4126 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4127 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
4128 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4129 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
4130 (set_attr "length_immediate" "*,*,1,1,*,*,*")
4131 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4132 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
4134 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4135 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4136 ;; alternatives pretty much forces the MMX alternative to be chosen.
4137 (define_insn "*vec_concatv2sf_sse"
4138 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4140 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4141 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4144 unpcklps\t{%2, %0|%0, %2}
4145 movss\t{%1, %0|%0, %1}
4146 punpckldq\t{%2, %0|%0, %2}
4147 movd\t{%1, %0|%0, %1}"
4148 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4149 (set_attr "mode" "V4SF,SF,DI,DI")])
4151 (define_insn "*vec_concatv4sf"
4152 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
4154 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
4155 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
4158 movlhps\t{%2, %0|%0, %2}
4159 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4160 movhps\t{%2, %0|%0, %q2}
4161 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
4162 [(set_attr "isa" "noavx,avx,noavx,avx")
4163 (set_attr "type" "ssemov")
4164 (set_attr "prefix" "orig,vex,orig,vex")
4165 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
4167 (define_expand "vec_init<mode>"
4168 [(match_operand:V_128 0 "register_operand")
4172 ix86_expand_vector_init (false, operands[0], operands[1]);
4176 ;; Avoid combining registers from different units in a single alternative,
4177 ;; see comment above inline_secondary_memory_needed function in i386.c
4178 (define_insn "vec_set<mode>_0"
4179 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
4180 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
4182 (vec_duplicate:VI4F_128
4183 (match_operand:<ssescalarmode> 2 "general_operand"
4184 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
4185 (match_operand:VI4F_128 1 "vector_move_operand"
4186 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
4190 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
4191 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4192 %vmovd\t{%2, %0|%0, %2}
4193 movss\t{%2, %0|%0, %2}
4194 movss\t{%2, %0|%0, %2}
4195 vmovss\t{%2, %1, %0|%0, %1, %2}
4196 pinsrd\t{$0, %2, %0|%0, %2, 0}
4197 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4201 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
4203 (cond [(eq_attr "alternative" "0,6,7")
4204 (const_string "sselog")
4205 (eq_attr "alternative" "9")
4206 (const_string "imov")
4207 (eq_attr "alternative" "10")
4208 (const_string "fmov")
4210 (const_string "ssemov")))
4211 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
4212 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
4213 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
4214 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
4216 ;; A subset is vec_setv4sf.
4217 (define_insn "*vec_setv4sf_sse4_1"
4218 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4221 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
4222 (match_operand:V4SF 1 "register_operand" "0,x")
4223 (match_operand:SI 3 "const_int_operand")))]
4225 && ((unsigned) exact_log2 (INTVAL (operands[3]))
4226 < GET_MODE_NUNITS (V4SFmode))"
4228 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4229 switch (which_alternative)
4232 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4234 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4239 [(set_attr "isa" "noavx,avx")
4240 (set_attr "type" "sselog")
4241 (set_attr "prefix_data16" "1,*")
4242 (set_attr "prefix_extra" "1")
4243 (set_attr "length_immediate" "1")
4244 (set_attr "prefix" "orig,vex")
4245 (set_attr "mode" "V4SF")])
4247 (define_insn "sse4_1_insertps"
4248 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4249 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
4250 (match_operand:V4SF 1 "register_operand" "0,x")
4251 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
4255 if (MEM_P (operands[2]))
4257 unsigned count_s = INTVAL (operands[3]) >> 6;
4259 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
4260 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
4262 switch (which_alternative)
4265 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4267 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4272 [(set_attr "isa" "noavx,avx")
4273 (set_attr "type" "sselog")
4274 (set_attr "prefix_data16" "1,*")
4275 (set_attr "prefix_extra" "1")
4276 (set_attr "length_immediate" "1")
4277 (set_attr "prefix" "orig,vex")
4278 (set_attr "mode" "V4SF")])
4281 [(set (match_operand:VI4F_128 0 "memory_operand")
4283 (vec_duplicate:VI4F_128
4284 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
4287 "TARGET_SSE && reload_completed"
4288 [(set (match_dup 0) (match_dup 1))]
4289 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
4291 (define_expand "vec_set<mode>"
4292 [(match_operand:V 0 "register_operand")
4293 (match_operand:<ssescalarmode> 1 "register_operand")
4294 (match_operand 2 "const_int_operand")]
4297 ix86_expand_vector_set (false, operands[0], operands[1],
4298 INTVAL (operands[2]));
4302 (define_insn_and_split "*vec_extractv4sf_0"
4303 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4305 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4306 (parallel [(const_int 0)])))]
4307 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4309 "&& reload_completed"
4310 [(set (match_dup 0) (match_dup 1))]
4312 if (REG_P (operands[1]))
4313 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
4315 operands[1] = adjust_address (operands[1], SFmode, 0);
4318 (define_insn_and_split "*sse4_1_extractps"
4319 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4321 (match_operand:V4SF 1 "register_operand" "x,0,x")
4322 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4325 %vextractps\t{%2, %1, %0|%0, %1, %2}
4328 "&& reload_completed && SSE_REG_P (operands[0])"
4331 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4332 switch (INTVAL (operands[2]))
4336 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4337 operands[2], operands[2],
4338 GEN_INT (INTVAL (operands[2]) + 4),
4339 GEN_INT (INTVAL (operands[2]) + 4)));
4342 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4345 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
4350 [(set_attr "isa" "*,noavx,avx")
4351 (set_attr "type" "sselog,*,*")
4352 (set_attr "prefix_data16" "1,*,*")
4353 (set_attr "prefix_extra" "1,*,*")
4354 (set_attr "length_immediate" "1,*,*")
4355 (set_attr "prefix" "maybe_vex,*,*")
4356 (set_attr "mode" "V4SF,*,*")])
4358 (define_insn_and_split "*vec_extractv4sf_mem"
4359 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4361 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4362 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4365 "&& reload_completed"
4366 [(set (match_dup 0) (match_dup 1))]
4368 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
4371 (define_expand "avx_vextractf128<mode>"
4372 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
4373 (match_operand:V_256 1 "register_operand")
4374 (match_operand:SI 2 "const_0_to_1_operand")]
4377 rtx (*insn)(rtx, rtx);
4379 switch (INTVAL (operands[2]))
4382 insn = gen_vec_extract_lo_<mode>;
4385 insn = gen_vec_extract_hi_<mode>;
4391 emit_insn (insn (operands[0], operands[1]));
4395 (define_insn_and_split "vec_extract_lo_<mode>"
4396 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4397 (vec_select:<ssehalfvecmode>
4398 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4399 (parallel [(const_int 0) (const_int 1)])))]
4400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4402 "&& reload_completed"
4403 [(set (match_dup 0) (match_dup 1))]
4405 if (REG_P (operands[1]))
4406 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
4408 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
4411 (define_insn "vec_extract_hi_<mode>"
4412 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4413 (vec_select:<ssehalfvecmode>
4414 (match_operand:VI8F_256 1 "register_operand" "x,x")
4415 (parallel [(const_int 2) (const_int 3)])))]
4417 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4418 [(set_attr "type" "sselog")
4419 (set_attr "prefix_extra" "1")
4420 (set_attr "length_immediate" "1")
4421 (set_attr "memory" "none,store")
4422 (set_attr "prefix" "vex")
4423 (set_attr "mode" "<sseinsnmode>")])
4425 (define_insn_and_split "vec_extract_lo_<mode>"
4426 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4427 (vec_select:<ssehalfvecmode>
4428 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4429 (parallel [(const_int 0) (const_int 1)
4430 (const_int 2) (const_int 3)])))]
4431 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4433 "&& reload_completed"
4434 [(set (match_dup 0) (match_dup 1))]
4436 if (REG_P (operands[1]))
4437 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
4439 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
4442 (define_insn "vec_extract_hi_<mode>"
4443 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4444 (vec_select:<ssehalfvecmode>
4445 (match_operand:VI4F_256 1 "register_operand" "x,x")
4446 (parallel [(const_int 4) (const_int 5)
4447 (const_int 6) (const_int 7)])))]
4449 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4450 [(set_attr "type" "sselog")
4451 (set_attr "prefix_extra" "1")
4452 (set_attr "length_immediate" "1")
4453 (set_attr "memory" "none,store")
4454 (set_attr "prefix" "vex")
4455 (set_attr "mode" "<sseinsnmode>")])
4457 (define_insn_and_split "vec_extract_lo_v16hi"
4458 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4460 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4461 (parallel [(const_int 0) (const_int 1)
4462 (const_int 2) (const_int 3)
4463 (const_int 4) (const_int 5)
4464 (const_int 6) (const_int 7)])))]
4465 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4467 "&& reload_completed"
4468 [(set (match_dup 0) (match_dup 1))]
4470 if (REG_P (operands[1]))
4471 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
4473 operands[1] = adjust_address (operands[1], V8HImode, 0);
4476 (define_insn "vec_extract_hi_v16hi"
4477 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4479 (match_operand:V16HI 1 "register_operand" "x,x")
4480 (parallel [(const_int 8) (const_int 9)
4481 (const_int 10) (const_int 11)
4482 (const_int 12) (const_int 13)
4483 (const_int 14) (const_int 15)])))]
4485 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4486 [(set_attr "type" "sselog")
4487 (set_attr "prefix_extra" "1")
4488 (set_attr "length_immediate" "1")
4489 (set_attr "memory" "none,store")
4490 (set_attr "prefix" "vex")
4491 (set_attr "mode" "OI")])
4493 (define_insn_and_split "vec_extract_lo_v32qi"
4494 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4496 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4497 (parallel [(const_int 0) (const_int 1)
4498 (const_int 2) (const_int 3)
4499 (const_int 4) (const_int 5)
4500 (const_int 6) (const_int 7)
4501 (const_int 8) (const_int 9)
4502 (const_int 10) (const_int 11)
4503 (const_int 12) (const_int 13)
4504 (const_int 14) (const_int 15)])))]
4505 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4507 "&& reload_completed"
4508 [(set (match_dup 0) (match_dup 1))]
4510 if (REG_P (operands[1]))
4511 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
4513 operands[1] = adjust_address (operands[1], V16QImode, 0);
4516 (define_insn "vec_extract_hi_v32qi"
4517 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4519 (match_operand:V32QI 1 "register_operand" "x,x")
4520 (parallel [(const_int 16) (const_int 17)
4521 (const_int 18) (const_int 19)
4522 (const_int 20) (const_int 21)
4523 (const_int 22) (const_int 23)
4524 (const_int 24) (const_int 25)
4525 (const_int 26) (const_int 27)
4526 (const_int 28) (const_int 29)
4527 (const_int 30) (const_int 31)])))]
4529 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4530 [(set_attr "type" "sselog")
4531 (set_attr "prefix_extra" "1")
4532 (set_attr "length_immediate" "1")
4533 (set_attr "memory" "none,store")
4534 (set_attr "prefix" "vex")
4535 (set_attr "mode" "OI")])
4537 ;; Modes handled by vec_extract patterns.
4538 (define_mode_iterator VEC_EXTRACT_MODE
4539 [(V32QI "TARGET_AVX") V16QI
4540 (V16HI "TARGET_AVX") V8HI
4541 (V8SI "TARGET_AVX") V4SI
4542 (V4DI "TARGET_AVX") V2DI
4543 (V8SF "TARGET_AVX") V4SF
4544 (V4DF "TARGET_AVX") V2DF])
4546 (define_expand "vec_extract<mode>"
4547 [(match_operand:<ssescalarmode> 0 "register_operand")
4548 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
4549 (match_operand 2 "const_int_operand")]
4552 ix86_expand_vector_extract (false, operands[0], operands[1],
4553 INTVAL (operands[2]));
4557 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4559 ;; Parallel double-precision floating point element swizzling
4561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4563 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4564 (define_insn "avx_unpckhpd256"
4565 [(set (match_operand:V4DF 0 "register_operand" "=x")
4568 (match_operand:V4DF 1 "register_operand" "x")
4569 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4570 (parallel [(const_int 1) (const_int 5)
4571 (const_int 3) (const_int 7)])))]
4573 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4574 [(set_attr "type" "sselog")
4575 (set_attr "prefix" "vex")
4576 (set_attr "mode" "V4DF")])
4578 (define_expand "vec_interleave_highv4df"
4582 (match_operand:V4DF 1 "register_operand" "x")
4583 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4584 (parallel [(const_int 0) (const_int 4)
4585 (const_int 2) (const_int 6)])))
4591 (parallel [(const_int 1) (const_int 5)
4592 (const_int 3) (const_int 7)])))
4593 (set (match_operand:V4DF 0 "register_operand")
4598 (parallel [(const_int 2) (const_int 3)
4599 (const_int 6) (const_int 7)])))]
4602 operands[3] = gen_reg_rtx (V4DFmode);
4603 operands[4] = gen_reg_rtx (V4DFmode);
4607 (define_expand "vec_interleave_highv2df"
4608 [(set (match_operand:V2DF 0 "register_operand")
4611 (match_operand:V2DF 1 "nonimmediate_operand")
4612 (match_operand:V2DF 2 "nonimmediate_operand"))
4613 (parallel [(const_int 1)
4617 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4618 operands[2] = force_reg (V2DFmode, operands[2]);
4621 (define_insn "*vec_interleave_highv2df"
4622 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4625 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4626 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4627 (parallel [(const_int 1)
4629 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4631 unpckhpd\t{%2, %0|%0, %2}
4632 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4633 %vmovddup\t{%H1, %0|%0, %H1}
4634 movlpd\t{%H1, %0|%0, %H1}
4635 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4636 %vmovhpd\t{%1, %0|%q0, %1}"
4637 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4638 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4639 (set_attr "prefix_data16" "*,*,*,1,*,1")
4640 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4641 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4643 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4644 (define_expand "avx_movddup256"
4645 [(set (match_operand:V4DF 0 "register_operand")
4648 (match_operand:V4DF 1 "nonimmediate_operand")
4650 (parallel [(const_int 0) (const_int 4)
4651 (const_int 2) (const_int 6)])))]
4654 (define_expand "avx_unpcklpd256"
4655 [(set (match_operand:V4DF 0 "register_operand")
4658 (match_operand:V4DF 1 "register_operand")
4659 (match_operand:V4DF 2 "nonimmediate_operand"))
4660 (parallel [(const_int 0) (const_int 4)
4661 (const_int 2) (const_int 6)])))]
4664 (define_insn "*avx_unpcklpd256"
4665 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4668 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4669 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4670 (parallel [(const_int 0) (const_int 4)
4671 (const_int 2) (const_int 6)])))]
4674 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4675 vmovddup\t{%1, %0|%0, %1}"
4676 [(set_attr "type" "sselog")
4677 (set_attr "prefix" "vex")
4678 (set_attr "mode" "V4DF")])
4680 (define_expand "vec_interleave_lowv4df"
4684 (match_operand:V4DF 1 "register_operand" "x")
4685 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4686 (parallel [(const_int 0) (const_int 4)
4687 (const_int 2) (const_int 6)])))
4693 (parallel [(const_int 1) (const_int 5)
4694 (const_int 3) (const_int 7)])))
4695 (set (match_operand:V4DF 0 "register_operand")
4700 (parallel [(const_int 0) (const_int 1)
4701 (const_int 4) (const_int 5)])))]
4704 operands[3] = gen_reg_rtx (V4DFmode);
4705 operands[4] = gen_reg_rtx (V4DFmode);
4708 (define_expand "vec_interleave_lowv2df"
4709 [(set (match_operand:V2DF 0 "register_operand")
4712 (match_operand:V2DF 1 "nonimmediate_operand")
4713 (match_operand:V2DF 2 "nonimmediate_operand"))
4714 (parallel [(const_int 0)
4718 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4719 operands[1] = force_reg (V2DFmode, operands[1]);
4722 (define_insn "*vec_interleave_lowv2df"
4723 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4726 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4727 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4728 (parallel [(const_int 0)
4730 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4732 unpcklpd\t{%2, %0|%0, %2}
4733 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4734 %vmovddup\t{%1, %0|%0, %q1}
4735 movhpd\t{%2, %0|%0, %q2}
4736 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
4737 %vmovlpd\t{%2, %H0|%H0, %2}"
4738 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4739 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4740 (set_attr "prefix_data16" "*,*,*,1,*,1")
4741 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4742 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4745 [(set (match_operand:V2DF 0 "memory_operand")
4748 (match_operand:V2DF 1 "register_operand")
4750 (parallel [(const_int 0)
4752 "TARGET_SSE3 && reload_completed"
4755 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4756 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4757 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4762 [(set (match_operand:V2DF 0 "register_operand")
4765 (match_operand:V2DF 1 "memory_operand")
4767 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
4768 (match_operand:SI 3 "const_int_operand")])))]
4769 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4770 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4772 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4775 (define_expand "avx_shufpd256"
4776 [(match_operand:V4DF 0 "register_operand")
4777 (match_operand:V4DF 1 "register_operand")
4778 (match_operand:V4DF 2 "nonimmediate_operand")
4779 (match_operand:SI 3 "const_int_operand")]
4782 int mask = INTVAL (operands[3]);
4783 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4785 GEN_INT (mask & 2 ? 5 : 4),
4786 GEN_INT (mask & 4 ? 3 : 2),
4787 GEN_INT (mask & 8 ? 7 : 6)));
4791 (define_insn "avx_shufpd256_1"
4792 [(set (match_operand:V4DF 0 "register_operand" "=x")
4795 (match_operand:V4DF 1 "register_operand" "x")
4796 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4797 (parallel [(match_operand 3 "const_0_to_1_operand")
4798 (match_operand 4 "const_4_to_5_operand")
4799 (match_operand 5 "const_2_to_3_operand")
4800 (match_operand 6 "const_6_to_7_operand")])))]
4804 mask = INTVAL (operands[3]);
4805 mask |= (INTVAL (operands[4]) - 4) << 1;
4806 mask |= (INTVAL (operands[5]) - 2) << 2;
4807 mask |= (INTVAL (operands[6]) - 6) << 3;
4808 operands[3] = GEN_INT (mask);
4810 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4812 [(set_attr "type" "sseshuf")
4813 (set_attr "length_immediate" "1")
4814 (set_attr "prefix" "vex")
4815 (set_attr "mode" "V4DF")])
4817 (define_expand "sse2_shufpd"
4818 [(match_operand:V2DF 0 "register_operand")
4819 (match_operand:V2DF 1 "register_operand")
4820 (match_operand:V2DF 2 "nonimmediate_operand")
4821 (match_operand:SI 3 "const_int_operand")]
4824 int mask = INTVAL (operands[3]);
4825 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4827 GEN_INT (mask & 2 ? 3 : 2)));
4831 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4832 (define_insn "avx2_interleave_highv4di"
4833 [(set (match_operand:V4DI 0 "register_operand" "=x")
4836 (match_operand:V4DI 1 "register_operand" "x")
4837 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4838 (parallel [(const_int 1)
4843 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4844 [(set_attr "type" "sselog")
4845 (set_attr "prefix" "vex")
4846 (set_attr "mode" "OI")])
4848 (define_insn "vec_interleave_highv2di"
4849 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4852 (match_operand:V2DI 1 "register_operand" "0,x")
4853 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4854 (parallel [(const_int 1)
4858 punpckhqdq\t{%2, %0|%0, %2}
4859 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4860 [(set_attr "isa" "noavx,avx")
4861 (set_attr "type" "sselog")
4862 (set_attr "prefix_data16" "1,*")
4863 (set_attr "prefix" "orig,vex")
4864 (set_attr "mode" "TI")])
4866 (define_insn "avx2_interleave_lowv4di"
4867 [(set (match_operand:V4DI 0 "register_operand" "=x")
4870 (match_operand:V4DI 1 "register_operand" "x")
4871 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4872 (parallel [(const_int 0)
4877 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4878 [(set_attr "type" "sselog")
4879 (set_attr "prefix" "vex")
4880 (set_attr "mode" "OI")])
4882 (define_insn "vec_interleave_lowv2di"
4883 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4886 (match_operand:V2DI 1 "register_operand" "0,x")
4887 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4888 (parallel [(const_int 0)
4892 punpcklqdq\t{%2, %0|%0, %2}
4893 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4894 [(set_attr "isa" "noavx,avx")
4895 (set_attr "type" "sselog")
4896 (set_attr "prefix_data16" "1,*")
4897 (set_attr "prefix" "orig,vex")
4898 (set_attr "mode" "TI")])
4900 (define_insn "sse2_shufpd_<mode>"
4901 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4902 (vec_select:VI8F_128
4903 (vec_concat:<ssedoublevecmode>
4904 (match_operand:VI8F_128 1 "register_operand" "0,x")
4905 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4906 (parallel [(match_operand 3 "const_0_to_1_operand")
4907 (match_operand 4 "const_2_to_3_operand")])))]
4911 mask = INTVAL (operands[3]);
4912 mask |= (INTVAL (operands[4]) - 2) << 1;
4913 operands[3] = GEN_INT (mask);
4915 switch (which_alternative)
4918 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4920 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4925 [(set_attr "isa" "noavx,avx")
4926 (set_attr "type" "sseshuf")
4927 (set_attr "length_immediate" "1")
4928 (set_attr "prefix" "orig,vex")
4929 (set_attr "mode" "V2DF")])
4931 ;; Avoid combining registers from different units in a single alternative,
4932 ;; see comment above inline_secondary_memory_needed function in i386.c
4933 (define_insn "sse2_storehpd"
4934 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4936 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4937 (parallel [(const_int 1)])))]
4938 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4940 %vmovhpd\t{%1, %0|%0, %1}
4942 vunpckhpd\t{%d1, %0|%0, %d1}
4946 [(set_attr "isa" "*,noavx,avx,*,*,*")
4947 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4948 (set (attr "prefix_data16")
4950 (and (eq_attr "alternative" "0")
4951 (not (match_test "TARGET_AVX")))
4953 (const_string "*")))
4954 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4955 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4958 [(set (match_operand:DF 0 "register_operand")
4960 (match_operand:V2DF 1 "memory_operand")
4961 (parallel [(const_int 1)])))]
4962 "TARGET_SSE2 && reload_completed"
4963 [(set (match_dup 0) (match_dup 1))]
4964 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4966 (define_insn "*vec_extractv2df_1_sse"
4967 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4969 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4970 (parallel [(const_int 1)])))]
4971 "!TARGET_SSE2 && TARGET_SSE
4972 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4974 movhps\t{%1, %0|%q0, %1}
4975 movhlps\t{%1, %0|%0, %1}
4976 movlps\t{%H1, %0|%0, %H1}"
4977 [(set_attr "type" "ssemov")
4978 (set_attr "mode" "V2SF,V4SF,V2SF")])
4980 ;; Avoid combining registers from different units in a single alternative,
4981 ;; see comment above inline_secondary_memory_needed function in i386.c
4982 (define_insn "sse2_storelpd"
4983 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4985 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4986 (parallel [(const_int 0)])))]
4987 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4989 %vmovlpd\t{%1, %0|%0, %1}
4994 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4995 (set_attr "prefix_data16" "1,*,*,*,*")
4996 (set_attr "prefix" "maybe_vex")
4997 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5000 [(set (match_operand:DF 0 "register_operand")
5002 (match_operand:V2DF 1 "nonimmediate_operand")
5003 (parallel [(const_int 0)])))]
5004 "TARGET_SSE2 && reload_completed"
5005 [(set (match_dup 0) (match_dup 1))]
5007 if (REG_P (operands[1]))
5008 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
5010 operands[1] = adjust_address (operands[1], DFmode, 0);
5013 (define_insn "*vec_extractv2df_0_sse"
5014 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5016 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5017 (parallel [(const_int 0)])))]
5018 "!TARGET_SSE2 && TARGET_SSE
5019 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5021 movlps\t{%1, %0|%0, %1}
5022 movaps\t{%1, %0|%0, %1}
5023 movlps\t{%1, %0|%0, %q1}"
5024 [(set_attr "type" "ssemov")
5025 (set_attr "mode" "V2SF,V4SF,V2SF")])
5027 (define_expand "sse2_loadhpd_exp"
5028 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5031 (match_operand:V2DF 1 "nonimmediate_operand")
5032 (parallel [(const_int 0)]))
5033 (match_operand:DF 2 "nonimmediate_operand")))]
5036 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5038 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
5040 /* Fix up the destination if needed. */
5041 if (dst != operands[0])
5042 emit_move_insn (operands[0], dst);
5047 ;; Avoid combining registers from different units in a single alternative,
5048 ;; see comment above inline_secondary_memory_needed function in i386.c
5049 (define_insn "sse2_loadhpd"
5050 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5054 (match_operand:V2DF 1 "nonimmediate_operand"
5056 (parallel [(const_int 0)]))
5057 (match_operand:DF 2 "nonimmediate_operand"
5058 " m,m,x,x,x,*f,r")))]
5059 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5061 movhpd\t{%2, %0|%0, %2}
5062 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5063 unpcklpd\t{%2, %0|%0, %2}
5064 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5068 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5069 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
5070 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
5071 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
5072 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
5075 [(set (match_operand:V2DF 0 "memory_operand")
5077 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
5078 (match_operand:DF 1 "register_operand")))]
5079 "TARGET_SSE2 && reload_completed"
5080 [(set (match_dup 0) (match_dup 1))]
5081 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5083 (define_expand "sse2_loadlpd_exp"
5084 [(set (match_operand:V2DF 0 "nonimmediate_operand")
5086 (match_operand:DF 2 "nonimmediate_operand")
5088 (match_operand:V2DF 1 "nonimmediate_operand")
5089 (parallel [(const_int 1)]))))]
5092 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5094 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5096 /* Fix up the destination if needed. */
5097 if (dst != operands[0])
5098 emit_move_insn (operands[0], dst);
5103 ;; Avoid combining registers from different units in a single alternative,
5104 ;; see comment above inline_secondary_memory_needed function in i386.c
5105 (define_insn "sse2_loadlpd"
5106 [(set (match_operand:V2DF 0 "nonimmediate_operand"
5107 "=x,x,x,x,x,x,x,x,m,m ,m")
5109 (match_operand:DF 2 "nonimmediate_operand"
5110 " m,m,m,x,x,0,0,x,x,*f,r")
5112 (match_operand:V2DF 1 "vector_move_operand"
5113 " C,0,x,0,x,x,o,o,0,0 ,0")
5114 (parallel [(const_int 1)]))))]
5115 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5117 %vmovsd\t{%2, %0|%0, %2}
5118 movlpd\t{%2, %0|%0, %2}
5119 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5120 movsd\t{%2, %0|%0, %2}
5121 vmovsd\t{%2, %1, %0|%0, %1, %2}
5122 shufpd\t{$2, %1, %0|%0, %1, 2}
5123 movhpd\t{%H1, %0|%0, %H1}
5124 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5128 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
5130 (cond [(eq_attr "alternative" "5")
5131 (const_string "sselog")
5132 (eq_attr "alternative" "9")
5133 (const_string "fmov")
5134 (eq_attr "alternative" "10")
5135 (const_string "imov")
5137 (const_string "ssemov")))
5138 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
5139 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
5140 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
5141 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
5144 [(set (match_operand:V2DF 0 "memory_operand")
5146 (match_operand:DF 1 "register_operand")
5147 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5148 "TARGET_SSE2 && reload_completed"
5149 [(set (match_dup 0) (match_dup 1))]
5150 "operands[0] = adjust_address (operands[0], DFmode, 0);")
5152 (define_insn "sse2_movsd"
5153 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
5155 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
5156 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
5160 movsd\t{%2, %0|%0, %2}
5161 vmovsd\t{%2, %1, %0|%0, %1, %2}
5162 movlpd\t{%2, %0|%0, %q2}
5163 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
5164 %vmovlpd\t{%2, %0|%q0, %2}
5165 shufpd\t{$2, %1, %0|%0, %1, 2}
5166 movhps\t{%H1, %0|%0, %H1}
5167 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5168 %vmovhps\t{%1, %H0|%H0, %1}"
5169 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
5172 (eq_attr "alternative" "5")
5173 (const_string "sselog")
5174 (const_string "ssemov")))
5175 (set (attr "prefix_data16")
5177 (and (eq_attr "alternative" "2,4")
5178 (not (match_test "TARGET_AVX")))
5180 (const_string "*")))
5181 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
5182 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
5183 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
5185 (define_insn "vec_dupv2df"
5186 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
5188 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
5192 %vmovddup\t{%1, %0|%0, %1}"
5193 [(set_attr "isa" "noavx,sse3")
5194 (set_attr "type" "sselog1")
5195 (set_attr "prefix" "orig,maybe_vex")
5196 (set_attr "mode" "V2DF,DF")])
5198 (define_insn "*vec_concatv2df"
5199 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
5201 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
5202 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
5205 unpcklpd\t{%2, %0|%0, %2}
5206 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5207 %vmovddup\t{%1, %0|%0, %1}
5208 movhpd\t{%2, %0|%0, %2}
5209 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5210 %vmovsd\t{%1, %0|%0, %1}
5211 movlhps\t{%2, %0|%0, %2}
5212 movhps\t{%2, %0|%0, %2}"
5213 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
5216 (eq_attr "alternative" "0,1,2")
5217 (const_string "sselog")
5218 (const_string "ssemov")))
5219 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
5220 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
5221 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
5223 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5225 ;; Parallel integral arithmetic
5227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5229 (define_expand "neg<mode>2"
5230 [(set (match_operand:VI_AVX2 0 "register_operand")
5233 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
5235 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5237 (define_expand "<plusminus_insn><mode>3"
5238 [(set (match_operand:VI_AVX2 0 "register_operand")
5240 (match_operand:VI_AVX2 1 "nonimmediate_operand")
5241 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
5243 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5245 (define_insn "*<plusminus_insn><mode>3"
5246 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
5248 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5249 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5250 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5252 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5253 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5254 [(set_attr "isa" "noavx,avx")
5255 (set_attr "type" "sseiadd")
5256 (set_attr "prefix_data16" "1,*")
5257 (set_attr "prefix" "orig,vex")
5258 (set_attr "mode" "<sseinsnmode>")])
5260 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
5261 [(set (match_operand:VI12_AVX2 0 "register_operand")
5262 (sat_plusminus:VI12_AVX2
5263 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
5264 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
5266 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5268 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
5269 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5270 (sat_plusminus:VI12_AVX2
5271 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
5272 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5273 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5275 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5276 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5277 [(set_attr "isa" "noavx,avx")
5278 (set_attr "type" "sseiadd")
5279 (set_attr "prefix_data16" "1,*")
5280 (set_attr "prefix" "orig,vex")
5281 (set_attr "mode" "TI")])
5283 (define_expand "mul<mode>3"
5284 [(set (match_operand:VI1_AVX2 0 "register_operand")
5285 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
5286 (match_operand:VI1_AVX2 2 "register_operand")))]
5289 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
5293 (define_expand "mul<mode>3"
5294 [(set (match_operand:VI2_AVX2 0 "register_operand")
5295 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
5296 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
5298 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5300 (define_insn "*mul<mode>3"
5301 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5302 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5303 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5304 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5306 pmullw\t{%2, %0|%0, %2}
5307 vpmullw\t{%2, %1, %0|%0, %1, %2}"
5308 [(set_attr "isa" "noavx,avx")
5309 (set_attr "type" "sseimul")
5310 (set_attr "prefix_data16" "1,*")
5311 (set_attr "prefix" "orig,vex")
5312 (set_attr "mode" "<sseinsnmode>")])
5314 (define_expand "<s>mul<mode>3_highpart"
5315 [(set (match_operand:VI2_AVX2 0 "register_operand")
5317 (lshiftrt:<ssedoublemode>
5318 (mult:<ssedoublemode>
5319 (any_extend:<ssedoublemode>
5320 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
5321 (any_extend:<ssedoublemode>
5322 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
5325 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5327 (define_insn "*<s>mul<mode>3_highpart"
5328 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5330 (lshiftrt:<ssedoublemode>
5331 (mult:<ssedoublemode>
5332 (any_extend:<ssedoublemode>
5333 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5334 (any_extend:<ssedoublemode>
5335 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5337 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5339 pmulh<u>w\t{%2, %0|%0, %2}
5340 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5341 [(set_attr "isa" "noavx,avx")
5342 (set_attr "type" "sseimul")
5343 (set_attr "prefix_data16" "1,*")
5344 (set_attr "prefix" "orig,vex")
5345 (set_attr "mode" "<sseinsnmode>")])
5347 (define_expand "vec_widen_umult_even_v8si"
5348 [(set (match_operand:V4DI 0 "register_operand")
5352 (match_operand:V8SI 1 "nonimmediate_operand")
5353 (parallel [(const_int 0) (const_int 2)
5354 (const_int 4) (const_int 6)])))
5357 (match_operand:V8SI 2 "nonimmediate_operand")
5358 (parallel [(const_int 0) (const_int 2)
5359 (const_int 4) (const_int 6)])))))]
5361 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5363 (define_insn "*vec_widen_umult_even_v8si"
5364 [(set (match_operand:V4DI 0 "register_operand" "=x")
5368 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5369 (parallel [(const_int 0) (const_int 2)
5370 (const_int 4) (const_int 6)])))
5373 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5374 (parallel [(const_int 0) (const_int 2)
5375 (const_int 4) (const_int 6)])))))]
5376 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5377 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5378 [(set_attr "type" "sseimul")
5379 (set_attr "prefix" "vex")
5380 (set_attr "mode" "OI")])
5382 (define_expand "vec_widen_umult_even_v4si"
5383 [(set (match_operand:V2DI 0 "register_operand")
5387 (match_operand:V4SI 1 "nonimmediate_operand")
5388 (parallel [(const_int 0) (const_int 2)])))
5391 (match_operand:V4SI 2 "nonimmediate_operand")
5392 (parallel [(const_int 0) (const_int 2)])))))]
5394 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5396 (define_insn "*vec_widen_umult_even_v4si"
5397 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5401 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5402 (parallel [(const_int 0) (const_int 2)])))
5405 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5406 (parallel [(const_int 0) (const_int 2)])))))]
5407 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5409 pmuludq\t{%2, %0|%0, %2}
5410 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5411 [(set_attr "isa" "noavx,avx")
5412 (set_attr "type" "sseimul")
5413 (set_attr "prefix_data16" "1,*")
5414 (set_attr "prefix" "orig,vex")
5415 (set_attr "mode" "TI")])
5417 (define_expand "vec_widen_smult_even_v8si"
5418 [(set (match_operand:V4DI 0 "register_operand")
5422 (match_operand:V8SI 1 "nonimmediate_operand")
5423 (parallel [(const_int 0) (const_int 2)
5424 (const_int 4) (const_int 6)])))
5427 (match_operand:V8SI 2 "nonimmediate_operand")
5428 (parallel [(const_int 0) (const_int 2)
5429 (const_int 4) (const_int 6)])))))]
5431 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5433 (define_insn "*vec_widen_smult_even_v8si"
5434 [(set (match_operand:V4DI 0 "register_operand" "=x")
5438 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5439 (parallel [(const_int 0) (const_int 2)
5440 (const_int 4) (const_int 6)])))
5443 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5444 (parallel [(const_int 0) (const_int 2)
5445 (const_int 4) (const_int 6)])))))]
5446 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5447 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5448 [(set_attr "isa" "avx")
5449 (set_attr "type" "sseimul")
5450 (set_attr "prefix_extra" "1")
5451 (set_attr "prefix" "vex")
5452 (set_attr "mode" "OI")])
5454 (define_expand "sse4_1_mulv2siv2di3"
5455 [(set (match_operand:V2DI 0 "register_operand")
5459 (match_operand:V4SI 1 "nonimmediate_operand")
5460 (parallel [(const_int 0) (const_int 2)])))
5463 (match_operand:V4SI 2 "nonimmediate_operand")
5464 (parallel [(const_int 0) (const_int 2)])))))]
5466 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5468 (define_insn "*sse4_1_mulv2siv2di3"
5469 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5473 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5474 (parallel [(const_int 0) (const_int 2)])))
5477 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5478 (parallel [(const_int 0) (const_int 2)])))))]
5479 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5481 pmuldq\t{%2, %0|%0, %2}
5482 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5483 [(set_attr "isa" "noavx,avx")
5484 (set_attr "type" "sseimul")
5485 (set_attr "prefix_data16" "1,*")
5486 (set_attr "prefix_extra" "1")
5487 (set_attr "prefix" "orig,vex")
5488 (set_attr "mode" "TI")])
5490 (define_expand "avx2_pmaddwd"
5491 [(set (match_operand:V8SI 0 "register_operand")
5496 (match_operand:V16HI 1 "nonimmediate_operand")
5497 (parallel [(const_int 0) (const_int 2)
5498 (const_int 4) (const_int 6)
5499 (const_int 8) (const_int 10)
5500 (const_int 12) (const_int 14)])))
5503 (match_operand:V16HI 2 "nonimmediate_operand")
5504 (parallel [(const_int 0) (const_int 2)
5505 (const_int 4) (const_int 6)
5506 (const_int 8) (const_int 10)
5507 (const_int 12) (const_int 14)]))))
5510 (vec_select:V8HI (match_dup 1)
5511 (parallel [(const_int 1) (const_int 3)
5512 (const_int 5) (const_int 7)
5513 (const_int 9) (const_int 11)
5514 (const_int 13) (const_int 15)])))
5516 (vec_select:V8HI (match_dup 2)
5517 (parallel [(const_int 1) (const_int 3)
5518 (const_int 5) (const_int 7)
5519 (const_int 9) (const_int 11)
5520 (const_int 13) (const_int 15)]))))))]
5522 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5524 (define_insn "*avx2_pmaddwd"
5525 [(set (match_operand:V8SI 0 "register_operand" "=x")
5530 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5531 (parallel [(const_int 0) (const_int 2)
5532 (const_int 4) (const_int 6)
5533 (const_int 8) (const_int 10)
5534 (const_int 12) (const_int 14)])))
5537 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5538 (parallel [(const_int 0) (const_int 2)
5539 (const_int 4) (const_int 6)
5540 (const_int 8) (const_int 10)
5541 (const_int 12) (const_int 14)]))))
5544 (vec_select:V8HI (match_dup 1)
5545 (parallel [(const_int 1) (const_int 3)
5546 (const_int 5) (const_int 7)
5547 (const_int 9) (const_int 11)
5548 (const_int 13) (const_int 15)])))
5550 (vec_select:V8HI (match_dup 2)
5551 (parallel [(const_int 1) (const_int 3)
5552 (const_int 5) (const_int 7)
5553 (const_int 9) (const_int 11)
5554 (const_int 13) (const_int 15)]))))))]
5555 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5556 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5557 [(set_attr "type" "sseiadd")
5558 (set_attr "prefix" "vex")
5559 (set_attr "mode" "OI")])
5561 (define_expand "sse2_pmaddwd"
5562 [(set (match_operand:V4SI 0 "register_operand")
5567 (match_operand:V8HI 1 "nonimmediate_operand")
5568 (parallel [(const_int 0) (const_int 2)
5569 (const_int 4) (const_int 6)])))
5572 (match_operand:V8HI 2 "nonimmediate_operand")
5573 (parallel [(const_int 0) (const_int 2)
5574 (const_int 4) (const_int 6)]))))
5577 (vec_select:V4HI (match_dup 1)
5578 (parallel [(const_int 1) (const_int 3)
5579 (const_int 5) (const_int 7)])))
5581 (vec_select:V4HI (match_dup 2)
5582 (parallel [(const_int 1) (const_int 3)
5583 (const_int 5) (const_int 7)]))))))]
5585 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5587 (define_insn "*sse2_pmaddwd"
5588 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5593 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5594 (parallel [(const_int 0) (const_int 2)
5595 (const_int 4) (const_int 6)])))
5598 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5599 (parallel [(const_int 0) (const_int 2)
5600 (const_int 4) (const_int 6)]))))
5603 (vec_select:V4HI (match_dup 1)
5604 (parallel [(const_int 1) (const_int 3)
5605 (const_int 5) (const_int 7)])))
5607 (vec_select:V4HI (match_dup 2)
5608 (parallel [(const_int 1) (const_int 3)
5609 (const_int 5) (const_int 7)]))))))]
5610 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5612 pmaddwd\t{%2, %0|%0, %2}
5613 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5614 [(set_attr "isa" "noavx,avx")
5615 (set_attr "type" "sseiadd")
5616 (set_attr "atom_unit" "simul")
5617 (set_attr "prefix_data16" "1,*")
5618 (set_attr "prefix" "orig,vex")
5619 (set_attr "mode" "TI")])
5621 (define_expand "mul<mode>3"
5622 [(set (match_operand:VI4_AVX2 0 "register_operand")
5624 (match_operand:VI4_AVX2 1 "general_vector_operand")
5625 (match_operand:VI4_AVX2 2 "general_vector_operand")))]
5630 if (!nonimmediate_operand (operands[1], <MODE>mode))
5631 operands[1] = force_reg (<MODE>mode, operands[1]);
5632 if (!nonimmediate_operand (operands[2], <MODE>mode))
5633 operands[2] = force_reg (<MODE>mode, operands[2]);
5634 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5638 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
5643 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5644 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5646 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5647 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5648 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5650 pmulld\t{%2, %0|%0, %2}
5651 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5652 [(set_attr "isa" "noavx,avx")
5653 (set_attr "type" "sseimul")
5654 (set_attr "prefix_extra" "1")
5655 (set_attr "prefix" "orig,vex")
5656 (set_attr "btver2_decode" "vector,vector")
5657 (set_attr "mode" "<sseinsnmode>")])
5659 (define_expand "mul<mode>3"
5660 [(set (match_operand:VI8_AVX2 0 "register_operand")
5661 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand")
5662 (match_operand:VI8_AVX2 2 "register_operand")))]
5665 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
5669 (define_expand "vec_widen_<s>mult_hi_<mode>"
5670 [(match_operand:<sseunpackmode> 0 "register_operand")
5671 (any_extend:<sseunpackmode>
5672 (match_operand:VI124_AVX2 1 "register_operand"))
5673 (match_operand:VI124_AVX2 2 "register_operand")]
5676 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5681 (define_expand "vec_widen_<s>mult_lo_<mode>"
5682 [(match_operand:<sseunpackmode> 0 "register_operand")
5683 (any_extend:<sseunpackmode>
5684 (match_operand:VI124_AVX2 1 "register_operand"))
5685 (match_operand:VI124_AVX2 2 "register_operand")]
5688 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
5693 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
5694 ;; named patterns, but signed V4SI needs special help for plain SSE2.
5695 (define_expand "vec_widen_smult_even_v4si"
5696 [(match_operand:V2DI 0 "register_operand")
5697 (match_operand:V4SI 1 "nonimmediate_operand")
5698 (match_operand:V4SI 2 "nonimmediate_operand")]
5701 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5706 (define_expand "vec_widen_<s>mult_odd_<mode>"
5707 [(match_operand:<sseunpackmode> 0 "register_operand")
5708 (any_extend:<sseunpackmode>
5709 (match_operand:VI4_AVX2 1 "general_vector_operand"))
5710 (match_operand:VI4_AVX2 2 "general_vector_operand")]
5713 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
5718 (define_expand "sdot_prod<mode>"
5719 [(match_operand:<sseunpackmode> 0 "register_operand")
5720 (match_operand:VI2_AVX2 1 "register_operand")
5721 (match_operand:VI2_AVX2 2 "register_operand")
5722 (match_operand:<sseunpackmode> 3 "register_operand")]
5725 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5726 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5727 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5728 gen_rtx_PLUS (<sseunpackmode>mode,
5733 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
5734 ;; back together when madd is available.
5735 (define_expand "sdot_prodv4si"
5736 [(match_operand:V2DI 0 "register_operand")
5737 (match_operand:V4SI 1 "register_operand")
5738 (match_operand:V4SI 2 "register_operand")
5739 (match_operand:V2DI 3 "register_operand")]
5742 rtx t = gen_reg_rtx (V2DImode);
5743 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
5744 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
5748 (define_insn "ashr<mode>3"
5749 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5751 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5752 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5755 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5756 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5757 [(set_attr "isa" "noavx,avx")
5758 (set_attr "type" "sseishft")
5759 (set (attr "length_immediate")
5760 (if_then_else (match_operand 2 "const_int_operand")
5762 (const_string "0")))
5763 (set_attr "prefix_data16" "1,*")
5764 (set_attr "prefix" "orig,vex")
5765 (set_attr "mode" "<sseinsnmode>")])
5767 (define_insn "<shift_insn><mode>3"
5768 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5769 (any_lshift:VI248_AVX2
5770 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5771 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5774 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5775 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5776 [(set_attr "isa" "noavx,avx")
5777 (set_attr "type" "sseishft")
5778 (set (attr "length_immediate")
5779 (if_then_else (match_operand 2 "const_int_operand")
5781 (const_string "0")))
5782 (set_attr "prefix_data16" "1,*")
5783 (set_attr "prefix" "orig,vex")
5784 (set_attr "mode" "<sseinsnmode>")])
5786 (define_expand "vec_shl_<mode>"
5787 [(set (match_operand:VI_128 0 "register_operand")
5789 (match_operand:VI_128 1 "register_operand")
5790 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
5793 operands[0] = gen_lowpart (V1TImode, operands[0]);
5794 operands[1] = gen_lowpart (V1TImode, operands[1]);
5797 (define_insn "<sse2_avx2>_ashl<mode>3"
5798 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5800 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5801 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5804 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5806 switch (which_alternative)
5809 return "pslldq\t{%2, %0|%0, %2}";
5811 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5816 [(set_attr "isa" "noavx,avx")
5817 (set_attr "type" "sseishft")
5818 (set_attr "length_immediate" "1")
5819 (set_attr "prefix_data16" "1,*")
5820 (set_attr "prefix" "orig,vex")
5821 (set_attr "mode" "<sseinsnmode>")])
5823 (define_expand "vec_shr_<mode>"
5824 [(set (match_operand:VI_128 0 "register_operand")
5826 (match_operand:VI_128 1 "register_operand")
5827 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
5830 operands[0] = gen_lowpart (V1TImode, operands[0]);
5831 operands[1] = gen_lowpart (V1TImode, operands[1]);
5834 (define_insn "<sse2_avx2>_lshr<mode>3"
5835 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5836 (lshiftrt:VIMAX_AVX2
5837 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5838 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5841 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5843 switch (which_alternative)
5846 return "psrldq\t{%2, %0|%0, %2}";
5848 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5853 [(set_attr "isa" "noavx,avx")
5854 (set_attr "type" "sseishft")
5855 (set_attr "length_immediate" "1")
5856 (set_attr "atom_unit" "sishuf")
5857 (set_attr "prefix_data16" "1,*")
5858 (set_attr "prefix" "orig,vex")
5859 (set_attr "mode" "<sseinsnmode>")])
5862 (define_expand "<code><mode>3"
5863 [(set (match_operand:VI124_256 0 "register_operand")
5865 (match_operand:VI124_256 1 "nonimmediate_operand")
5866 (match_operand:VI124_256 2 "nonimmediate_operand")))]
5868 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5870 (define_insn "*avx2_<code><mode>3"
5871 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5873 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5874 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5875 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5876 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5877 [(set_attr "type" "sseiadd")
5878 (set_attr "prefix_extra" "1")
5879 (set_attr "prefix" "vex")
5880 (set_attr "mode" "OI")])
5882 (define_expand "<code><mode>3"
5883 [(set (match_operand:VI8_AVX2 0 "register_operand")
5885 (match_operand:VI8_AVX2 1 "register_operand")
5886 (match_operand:VI8_AVX2 2 "register_operand")))]
5893 xops[0] = operands[0];
5895 if (<CODE> == SMAX || <CODE> == UMAX)
5897 xops[1] = operands[1];
5898 xops[2] = operands[2];
5902 xops[1] = operands[2];
5903 xops[2] = operands[1];
5906 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5908 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5909 xops[4] = operands[1];
5910 xops[5] = operands[2];
5912 ok = ix86_expand_int_vcond (xops);
5917 (define_expand "<code><mode>3"
5918 [(set (match_operand:VI124_128 0 "register_operand")
5920 (match_operand:VI124_128 1 "nonimmediate_operand")
5921 (match_operand:VI124_128 2 "nonimmediate_operand")))]
5924 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5925 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5931 xops[0] = operands[0];
5932 operands[1] = force_reg (<MODE>mode, operands[1]);
5933 operands[2] = force_reg (<MODE>mode, operands[2]);
5937 xops[1] = operands[1];
5938 xops[2] = operands[2];
5942 xops[1] = operands[2];
5943 xops[2] = operands[1];
5946 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5947 xops[4] = operands[1];
5948 xops[5] = operands[2];
5950 ok = ix86_expand_int_vcond (xops);
5956 (define_insn "*sse4_1_<code><mode>3"
5957 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5959 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5960 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5961 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5963 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5964 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5965 [(set_attr "isa" "noavx,avx")
5966 (set_attr "type" "sseiadd")
5967 (set_attr "prefix_extra" "1,*")
5968 (set_attr "prefix" "orig,vex")
5969 (set_attr "mode" "TI")])
5971 (define_insn "*<code>v8hi3"
5972 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5974 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5975 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5976 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5978 p<maxmin_int>w\t{%2, %0|%0, %2}
5979 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5980 [(set_attr "isa" "noavx,avx")
5981 (set_attr "type" "sseiadd")
5982 (set_attr "prefix_data16" "1,*")
5983 (set_attr "prefix_extra" "*,1")
5984 (set_attr "prefix" "orig,vex")
5985 (set_attr "mode" "TI")])
5987 (define_expand "<code><mode>3"
5988 [(set (match_operand:VI124_128 0 "register_operand")
5990 (match_operand:VI124_128 1 "nonimmediate_operand")
5991 (match_operand:VI124_128 2 "nonimmediate_operand")))]
5994 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
5995 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5996 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
5998 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5999 operands[1] = force_reg (<MODE>mode, operands[1]);
6000 if (rtx_equal_p (op3, op2))
6001 op3 = gen_reg_rtx (V8HImode);
6002 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6003 emit_insn (gen_addv8hi3 (op0, op3, op2));
6011 operands[1] = force_reg (<MODE>mode, operands[1]);
6012 operands[2] = force_reg (<MODE>mode, operands[2]);
6014 xops[0] = operands[0];
6018 xops[1] = operands[1];
6019 xops[2] = operands[2];
6023 xops[1] = operands[2];
6024 xops[2] = operands[1];
6027 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6028 xops[4] = operands[1];
6029 xops[5] = operands[2];
6031 ok = ix86_expand_int_vcond (xops);
6037 (define_insn "*sse4_1_<code><mode>3"
6038 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6040 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6041 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6042 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6044 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6045 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6046 [(set_attr "isa" "noavx,avx")
6047 (set_attr "type" "sseiadd")
6048 (set_attr "prefix_extra" "1,*")
6049 (set_attr "prefix" "orig,vex")
6050 (set_attr "mode" "TI")])
6052 (define_insn "*<code>v16qi3"
6053 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6055 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6056 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6057 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6059 p<maxmin_int>b\t{%2, %0|%0, %2}
6060 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6061 [(set_attr "isa" "noavx,avx")
6062 (set_attr "type" "sseiadd")
6063 (set_attr "prefix_data16" "1,*")
6064 (set_attr "prefix_extra" "*,1")
6065 (set_attr "prefix" "orig,vex")
6066 (set_attr "mode" "TI")])
6068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6070 ;; Parallel integral comparisons
6072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6074 (define_expand "avx2_eq<mode>3"
6075 [(set (match_operand:VI_256 0 "register_operand")
6077 (match_operand:VI_256 1 "nonimmediate_operand")
6078 (match_operand:VI_256 2 "nonimmediate_operand")))]
6080 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6082 (define_insn "*avx2_eq<mode>3"
6083 [(set (match_operand:VI_256 0 "register_operand" "=x")
6085 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6086 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6087 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6088 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6089 [(set_attr "type" "ssecmp")
6090 (set_attr "prefix_extra" "1")
6091 (set_attr "prefix" "vex")
6092 (set_attr "mode" "OI")])
6094 (define_insn "*sse4_1_eqv2di3"
6095 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6097 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6098 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6099 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6101 pcmpeqq\t{%2, %0|%0, %2}
6102 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6103 [(set_attr "isa" "noavx,avx")
6104 (set_attr "type" "ssecmp")
6105 (set_attr "prefix_extra" "1")
6106 (set_attr "prefix" "orig,vex")
6107 (set_attr "mode" "TI")])
6109 (define_insn "*sse2_eq<mode>3"
6110 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6112 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6113 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6114 "TARGET_SSE2 && !TARGET_XOP
6115 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6117 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6118 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6119 [(set_attr "isa" "noavx,avx")
6120 (set_attr "type" "ssecmp")
6121 (set_attr "prefix_data16" "1,*")
6122 (set_attr "prefix" "orig,vex")
6123 (set_attr "mode" "TI")])
6125 (define_expand "sse2_eq<mode>3"
6126 [(set (match_operand:VI124_128 0 "register_operand")
6128 (match_operand:VI124_128 1 "nonimmediate_operand")
6129 (match_operand:VI124_128 2 "nonimmediate_operand")))]
6130 "TARGET_SSE2 && !TARGET_XOP "
6131 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6133 (define_expand "sse4_1_eqv2di3"
6134 [(set (match_operand:V2DI 0 "register_operand")
6136 (match_operand:V2DI 1 "nonimmediate_operand")
6137 (match_operand:V2DI 2 "nonimmediate_operand")))]
6139 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6141 (define_insn "sse4_2_gtv2di3"
6142 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6144 (match_operand:V2DI 1 "register_operand" "0,x")
6145 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6148 pcmpgtq\t{%2, %0|%0, %2}
6149 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6150 [(set_attr "isa" "noavx,avx")
6151 (set_attr "type" "ssecmp")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "prefix" "orig,vex")
6154 (set_attr "mode" "TI")])
6156 (define_insn "avx2_gt<mode>3"
6157 [(set (match_operand:VI_256 0 "register_operand" "=x")
6159 (match_operand:VI_256 1 "register_operand" "x")
6160 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6162 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6163 [(set_attr "type" "ssecmp")
6164 (set_attr "prefix_extra" "1")
6165 (set_attr "prefix" "vex")
6166 (set_attr "mode" "OI")])
6168 (define_insn "sse2_gt<mode>3"
6169 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6171 (match_operand:VI124_128 1 "register_operand" "0,x")
6172 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6173 "TARGET_SSE2 && !TARGET_XOP"
6175 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6176 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6177 [(set_attr "isa" "noavx,avx")
6178 (set_attr "type" "ssecmp")
6179 (set_attr "prefix_data16" "1,*")
6180 (set_attr "prefix" "orig,vex")
6181 (set_attr "mode" "TI")])
6183 (define_expand "vcond<V_256:mode><VI_256:mode>"
6184 [(set (match_operand:V_256 0 "register_operand")
6186 (match_operator 3 ""
6187 [(match_operand:VI_256 4 "nonimmediate_operand")
6188 (match_operand:VI_256 5 "general_operand")])
6189 (match_operand:V_256 1)
6190 (match_operand:V_256 2)))]
6192 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6193 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6195 bool ok = ix86_expand_int_vcond (operands);
6200 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6201 [(set (match_operand:V_128 0 "register_operand")
6203 (match_operator 3 ""
6204 [(match_operand:VI124_128 4 "nonimmediate_operand")
6205 (match_operand:VI124_128 5 "general_operand")])
6206 (match_operand:V_128 1)
6207 (match_operand:V_128 2)))]
6209 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6210 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6212 bool ok = ix86_expand_int_vcond (operands);
6217 (define_expand "vcond<VI8F_128:mode>v2di"
6218 [(set (match_operand:VI8F_128 0 "register_operand")
6219 (if_then_else:VI8F_128
6220 (match_operator 3 ""
6221 [(match_operand:V2DI 4 "nonimmediate_operand")
6222 (match_operand:V2DI 5 "general_operand")])
6223 (match_operand:VI8F_128 1)
6224 (match_operand:VI8F_128 2)))]
6227 bool ok = ix86_expand_int_vcond (operands);
6232 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6233 [(set (match_operand:V_256 0 "register_operand")
6235 (match_operator 3 ""
6236 [(match_operand:VI_256 4 "nonimmediate_operand")
6237 (match_operand:VI_256 5 "nonimmediate_operand")])
6238 (match_operand:V_256 1 "general_operand")
6239 (match_operand:V_256 2 "general_operand")))]
6241 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6242 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6244 bool ok = ix86_expand_int_vcond (operands);
6249 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6250 [(set (match_operand:V_128 0 "register_operand")
6252 (match_operator 3 ""
6253 [(match_operand:VI124_128 4 "nonimmediate_operand")
6254 (match_operand:VI124_128 5 "nonimmediate_operand")])
6255 (match_operand:V_128 1 "general_operand")
6256 (match_operand:V_128 2 "general_operand")))]
6258 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6259 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6261 bool ok = ix86_expand_int_vcond (operands);
6266 (define_expand "vcondu<VI8F_128:mode>v2di"
6267 [(set (match_operand:VI8F_128 0 "register_operand")
6268 (if_then_else:VI8F_128
6269 (match_operator 3 ""
6270 [(match_operand:V2DI 4 "nonimmediate_operand")
6271 (match_operand:V2DI 5 "nonimmediate_operand")])
6272 (match_operand:VI8F_128 1 "general_operand")
6273 (match_operand:VI8F_128 2 "general_operand")))]
6276 bool ok = ix86_expand_int_vcond (operands);
6281 (define_mode_iterator VEC_PERM_AVX2
6282 [V16QI V8HI V4SI V2DI V4SF V2DF
6283 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6284 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6285 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6287 (define_expand "vec_perm<mode>"
6288 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
6289 (match_operand:VEC_PERM_AVX2 1 "register_operand")
6290 (match_operand:VEC_PERM_AVX2 2 "register_operand")
6291 (match_operand:<sseintvecmode> 3 "register_operand")]
6292 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6294 ix86_expand_vec_perm (operands);
6298 (define_mode_iterator VEC_PERM_CONST
6299 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6300 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6301 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6302 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6303 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6304 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6306 (define_expand "vec_perm_const<mode>"
6307 [(match_operand:VEC_PERM_CONST 0 "register_operand")
6308 (match_operand:VEC_PERM_CONST 1 "register_operand")
6309 (match_operand:VEC_PERM_CONST 2 "register_operand")
6310 (match_operand:<sseintvecmode> 3)]
6313 if (ix86_expand_vec_perm_const (operands))
6319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6321 ;; Parallel bitwise logical operations
6323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6325 (define_expand "one_cmpl<mode>2"
6326 [(set (match_operand:VI 0 "register_operand")
6327 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
6331 int i, n = GET_MODE_NUNITS (<MODE>mode);
6332 rtvec v = rtvec_alloc (n);
6334 for (i = 0; i < n; ++i)
6335 RTVEC_ELT (v, i) = constm1_rtx;
6337 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6340 (define_expand "<sse2_avx2>_andnot<mode>3"
6341 [(set (match_operand:VI_AVX2 0 "register_operand")
6343 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
6344 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
6347 (define_insn "*andnot<mode>3"
6348 [(set (match_operand:VI 0 "register_operand" "=x,x")
6350 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6351 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6354 static char buf[32];
6358 switch (get_attr_mode (insn))
6361 gcc_assert (TARGET_AVX2);
6363 gcc_assert (TARGET_SSE2);
6369 gcc_assert (TARGET_AVX);
6371 gcc_assert (TARGET_SSE);
6380 switch (which_alternative)
6383 ops = "%s\t{%%2, %%0|%%0, %%2}";
6386 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6392 snprintf (buf, sizeof (buf), ops, tmp);
6395 [(set_attr "isa" "noavx,avx")
6396 (set_attr "type" "sselog")
6397 (set (attr "prefix_data16")
6399 (and (eq_attr "alternative" "0")
6400 (eq_attr "mode" "TI"))
6402 (const_string "*")))
6403 (set_attr "prefix" "orig,vex")
6405 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6406 (const_string "<ssePSmode>")
6407 (match_test "TARGET_AVX2")
6408 (const_string "<sseinsnmode>")
6409 (match_test "TARGET_AVX")
6411 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6412 (const_string "V8SF")
6413 (const_string "<sseinsnmode>"))
6414 (ior (not (match_test "TARGET_SSE2"))
6415 (match_test "optimize_function_for_size_p (cfun)"))
6416 (const_string "V4SF")
6418 (const_string "<sseinsnmode>")))])
6420 (define_expand "<code><mode>3"
6421 [(set (match_operand:VI 0 "register_operand")
6423 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
6424 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
6427 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
6431 (define_insn "*<code><mode>3"
6432 [(set (match_operand:VI 0 "register_operand" "=x,x")
6434 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6435 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6437 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6439 static char buf[32];
6443 switch (get_attr_mode (insn))
6446 gcc_assert (TARGET_AVX2);
6448 gcc_assert (TARGET_SSE2);
6454 gcc_assert (TARGET_AVX);
6456 gcc_assert (TARGET_SSE);
6465 switch (which_alternative)
6468 ops = "%s\t{%%2, %%0|%%0, %%2}";
6471 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6477 snprintf (buf, sizeof (buf), ops, tmp);
6480 [(set_attr "isa" "noavx,avx")
6481 (set_attr "type" "sselog")
6482 (set (attr "prefix_data16")
6484 (and (eq_attr "alternative" "0")
6485 (eq_attr "mode" "TI"))
6487 (const_string "*")))
6488 (set_attr "prefix" "orig,vex")
6490 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
6491 (const_string "<ssePSmode>")
6492 (match_test "TARGET_AVX2")
6493 (const_string "<sseinsnmode>")
6494 (match_test "TARGET_AVX")
6496 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")
6497 (const_string "V8SF")
6498 (const_string "<sseinsnmode>"))
6499 (ior (not (match_test "TARGET_SSE2"))
6500 (match_test "optimize_function_for_size_p (cfun)"))
6501 (const_string "V4SF")
6503 (const_string "<sseinsnmode>")))])
6505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6507 ;; Parallel integral element swizzling
6509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6511 (define_expand "vec_pack_trunc_<mode>"
6512 [(match_operand:<ssepackmode> 0 "register_operand")
6513 (match_operand:VI248_AVX2 1 "register_operand")
6514 (match_operand:VI248_AVX2 2 "register_operand")]
6517 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6518 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6519 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6523 (define_insn "<sse2_avx2>_packsswb"
6524 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6525 (vec_concat:VI1_AVX2
6526 (ss_truncate:<ssehalfvecmode>
6527 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6528 (ss_truncate:<ssehalfvecmode>
6529 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6532 packsswb\t{%2, %0|%0, %2}
6533 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6534 [(set_attr "isa" "noavx,avx")
6535 (set_attr "type" "sselog")
6536 (set_attr "prefix_data16" "1,*")
6537 (set_attr "prefix" "orig,vex")
6538 (set_attr "mode" "<sseinsnmode>")])
6540 (define_insn "<sse2_avx2>_packssdw"
6541 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6542 (vec_concat:VI2_AVX2
6543 (ss_truncate:<ssehalfvecmode>
6544 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6545 (ss_truncate:<ssehalfvecmode>
6546 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6549 packssdw\t{%2, %0|%0, %2}
6550 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6551 [(set_attr "isa" "noavx,avx")
6552 (set_attr "type" "sselog")
6553 (set_attr "prefix_data16" "1,*")
6554 (set_attr "prefix" "orig,vex")
6555 (set_attr "mode" "<sseinsnmode>")])
6557 (define_insn "<sse2_avx2>_packuswb"
6558 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6559 (vec_concat:VI1_AVX2
6560 (us_truncate:<ssehalfvecmode>
6561 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6562 (us_truncate:<ssehalfvecmode>
6563 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6566 packuswb\t{%2, %0|%0, %2}
6567 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6568 [(set_attr "isa" "noavx,avx")
6569 (set_attr "type" "sselog")
6570 (set_attr "prefix_data16" "1,*")
6571 (set_attr "prefix" "orig,vex")
6572 (set_attr "mode" "<sseinsnmode>")])
6574 (define_insn "avx2_interleave_highv32qi"
6575 [(set (match_operand:V32QI 0 "register_operand" "=x")
6578 (match_operand:V32QI 1 "register_operand" "x")
6579 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6580 (parallel [(const_int 8) (const_int 40)
6581 (const_int 9) (const_int 41)
6582 (const_int 10) (const_int 42)
6583 (const_int 11) (const_int 43)
6584 (const_int 12) (const_int 44)
6585 (const_int 13) (const_int 45)
6586 (const_int 14) (const_int 46)
6587 (const_int 15) (const_int 47)
6588 (const_int 24) (const_int 56)
6589 (const_int 25) (const_int 57)
6590 (const_int 26) (const_int 58)
6591 (const_int 27) (const_int 59)
6592 (const_int 28) (const_int 60)
6593 (const_int 29) (const_int 61)
6594 (const_int 30) (const_int 62)
6595 (const_int 31) (const_int 63)])))]
6597 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6598 [(set_attr "type" "sselog")
6599 (set_attr "prefix" "vex")
6600 (set_attr "mode" "OI")])
6602 (define_insn "vec_interleave_highv16qi"
6603 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6606 (match_operand:V16QI 1 "register_operand" "0,x")
6607 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6608 (parallel [(const_int 8) (const_int 24)
6609 (const_int 9) (const_int 25)
6610 (const_int 10) (const_int 26)
6611 (const_int 11) (const_int 27)
6612 (const_int 12) (const_int 28)
6613 (const_int 13) (const_int 29)
6614 (const_int 14) (const_int 30)
6615 (const_int 15) (const_int 31)])))]
6618 punpckhbw\t{%2, %0|%0, %2}
6619 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6620 [(set_attr "isa" "noavx,avx")
6621 (set_attr "type" "sselog")
6622 (set_attr "prefix_data16" "1,*")
6623 (set_attr "prefix" "orig,vex")
6624 (set_attr "mode" "TI")])
6626 (define_insn "avx2_interleave_lowv32qi"
6627 [(set (match_operand:V32QI 0 "register_operand" "=x")
6630 (match_operand:V32QI 1 "register_operand" "x")
6631 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6632 (parallel [(const_int 0) (const_int 32)
6633 (const_int 1) (const_int 33)
6634 (const_int 2) (const_int 34)
6635 (const_int 3) (const_int 35)
6636 (const_int 4) (const_int 36)
6637 (const_int 5) (const_int 37)
6638 (const_int 6) (const_int 38)
6639 (const_int 7) (const_int 39)
6640 (const_int 16) (const_int 48)
6641 (const_int 17) (const_int 49)
6642 (const_int 18) (const_int 50)
6643 (const_int 19) (const_int 51)
6644 (const_int 20) (const_int 52)
6645 (const_int 21) (const_int 53)
6646 (const_int 22) (const_int 54)
6647 (const_int 23) (const_int 55)])))]
6649 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6650 [(set_attr "type" "sselog")
6651 (set_attr "prefix" "vex")
6652 (set_attr "mode" "OI")])
6654 (define_insn "vec_interleave_lowv16qi"
6655 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6658 (match_operand:V16QI 1 "register_operand" "0,x")
6659 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6660 (parallel [(const_int 0) (const_int 16)
6661 (const_int 1) (const_int 17)
6662 (const_int 2) (const_int 18)
6663 (const_int 3) (const_int 19)
6664 (const_int 4) (const_int 20)
6665 (const_int 5) (const_int 21)
6666 (const_int 6) (const_int 22)
6667 (const_int 7) (const_int 23)])))]
6670 punpcklbw\t{%2, %0|%0, %2}
6671 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6672 [(set_attr "isa" "noavx,avx")
6673 (set_attr "type" "sselog")
6674 (set_attr "prefix_data16" "1,*")
6675 (set_attr "prefix" "orig,vex")
6676 (set_attr "mode" "TI")])
6678 (define_insn "avx2_interleave_highv16hi"
6679 [(set (match_operand:V16HI 0 "register_operand" "=x")
6682 (match_operand:V16HI 1 "register_operand" "x")
6683 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6684 (parallel [(const_int 4) (const_int 20)
6685 (const_int 5) (const_int 21)
6686 (const_int 6) (const_int 22)
6687 (const_int 7) (const_int 23)
6688 (const_int 12) (const_int 28)
6689 (const_int 13) (const_int 29)
6690 (const_int 14) (const_int 30)
6691 (const_int 15) (const_int 31)])))]
6693 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6694 [(set_attr "type" "sselog")
6695 (set_attr "prefix" "vex")
6696 (set_attr "mode" "OI")])
6698 (define_insn "vec_interleave_highv8hi"
6699 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6702 (match_operand:V8HI 1 "register_operand" "0,x")
6703 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6704 (parallel [(const_int 4) (const_int 12)
6705 (const_int 5) (const_int 13)
6706 (const_int 6) (const_int 14)
6707 (const_int 7) (const_int 15)])))]
6710 punpckhwd\t{%2, %0|%0, %2}
6711 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6712 [(set_attr "isa" "noavx,avx")
6713 (set_attr "type" "sselog")
6714 (set_attr "prefix_data16" "1,*")
6715 (set_attr "prefix" "orig,vex")
6716 (set_attr "mode" "TI")])
6718 (define_insn "avx2_interleave_lowv16hi"
6719 [(set (match_operand:V16HI 0 "register_operand" "=x")
6722 (match_operand:V16HI 1 "register_operand" "x")
6723 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6724 (parallel [(const_int 0) (const_int 16)
6725 (const_int 1) (const_int 17)
6726 (const_int 2) (const_int 18)
6727 (const_int 3) (const_int 19)
6728 (const_int 8) (const_int 24)
6729 (const_int 9) (const_int 25)
6730 (const_int 10) (const_int 26)
6731 (const_int 11) (const_int 27)])))]
6733 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6734 [(set_attr "type" "sselog")
6735 (set_attr "prefix" "vex")
6736 (set_attr "mode" "OI")])
6738 (define_insn "vec_interleave_lowv8hi"
6739 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6742 (match_operand:V8HI 1 "register_operand" "0,x")
6743 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6744 (parallel [(const_int 0) (const_int 8)
6745 (const_int 1) (const_int 9)
6746 (const_int 2) (const_int 10)
6747 (const_int 3) (const_int 11)])))]
6750 punpcklwd\t{%2, %0|%0, %2}
6751 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6752 [(set_attr "isa" "noavx,avx")
6753 (set_attr "type" "sselog")
6754 (set_attr "prefix_data16" "1,*")
6755 (set_attr "prefix" "orig,vex")
6756 (set_attr "mode" "TI")])
6758 (define_insn "avx2_interleave_highv8si"
6759 [(set (match_operand:V8SI 0 "register_operand" "=x")
6762 (match_operand:V8SI 1 "register_operand" "x")
6763 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6764 (parallel [(const_int 2) (const_int 10)
6765 (const_int 3) (const_int 11)
6766 (const_int 6) (const_int 14)
6767 (const_int 7) (const_int 15)])))]
6769 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6770 [(set_attr "type" "sselog")
6771 (set_attr "prefix" "vex")
6772 (set_attr "mode" "OI")])
6774 (define_insn "vec_interleave_highv4si"
6775 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6778 (match_operand:V4SI 1 "register_operand" "0,x")
6779 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6780 (parallel [(const_int 2) (const_int 6)
6781 (const_int 3) (const_int 7)])))]
6784 punpckhdq\t{%2, %0|%0, %2}
6785 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6786 [(set_attr "isa" "noavx,avx")
6787 (set_attr "type" "sselog")
6788 (set_attr "prefix_data16" "1,*")
6789 (set_attr "prefix" "orig,vex")
6790 (set_attr "mode" "TI")])
6792 (define_insn "avx2_interleave_lowv8si"
6793 [(set (match_operand:V8SI 0 "register_operand" "=x")
6796 (match_operand:V8SI 1 "register_operand" "x")
6797 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6798 (parallel [(const_int 0) (const_int 8)
6799 (const_int 1) (const_int 9)
6800 (const_int 4) (const_int 12)
6801 (const_int 5) (const_int 13)])))]
6803 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6804 [(set_attr "type" "sselog")
6805 (set_attr "prefix" "vex")
6806 (set_attr "mode" "OI")])
6808 (define_insn "vec_interleave_lowv4si"
6809 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6812 (match_operand:V4SI 1 "register_operand" "0,x")
6813 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6814 (parallel [(const_int 0) (const_int 4)
6815 (const_int 1) (const_int 5)])))]
6818 punpckldq\t{%2, %0|%0, %2}
6819 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6820 [(set_attr "isa" "noavx,avx")
6821 (set_attr "type" "sselog")
6822 (set_attr "prefix_data16" "1,*")
6823 (set_attr "prefix" "orig,vex")
6824 (set_attr "mode" "TI")])
6826 (define_expand "vec_interleave_high<mode>"
6827 [(match_operand:VI_256 0 "register_operand" "=x")
6828 (match_operand:VI_256 1 "register_operand" "x")
6829 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6832 rtx t1 = gen_reg_rtx (<MODE>mode);
6833 rtx t2 = gen_reg_rtx (<MODE>mode);
6834 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6835 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6836 emit_insn (gen_avx2_permv2ti
6837 (gen_lowpart (V4DImode, operands[0]),
6838 gen_lowpart (V4DImode, t1),
6839 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6843 (define_expand "vec_interleave_low<mode>"
6844 [(match_operand:VI_256 0 "register_operand" "=x")
6845 (match_operand:VI_256 1 "register_operand" "x")
6846 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6849 rtx t1 = gen_reg_rtx (<MODE>mode);
6850 rtx t2 = gen_reg_rtx (<MODE>mode);
6851 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6852 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6853 emit_insn (gen_avx2_permv2ti
6854 (gen_lowpart (V4DImode, operands[0]),
6855 gen_lowpart (V4DImode, t1),
6856 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6860 ;; Modes handled by pinsr patterns.
6861 (define_mode_iterator PINSR_MODE
6862 [(V16QI "TARGET_SSE4_1") V8HI
6863 (V4SI "TARGET_SSE4_1")
6864 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6866 (define_mode_attr sse2p4_1
6867 [(V16QI "sse4_1") (V8HI "sse2")
6868 (V4SI "sse4_1") (V2DI "sse4_1")])
6870 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6871 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6872 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6873 (vec_merge:PINSR_MODE
6874 (vec_duplicate:PINSR_MODE
6875 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6876 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6877 (match_operand:SI 3 "const_int_operand")))]
6879 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6880 < GET_MODE_NUNITS (<MODE>mode))"
6882 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6884 switch (which_alternative)
6887 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6888 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6891 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6893 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6894 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6897 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6902 [(set_attr "isa" "noavx,noavx,avx,avx")
6903 (set_attr "type" "sselog")
6904 (set (attr "prefix_rex")
6906 (and (not (match_test "TARGET_AVX"))
6907 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6909 (const_string "*")))
6910 (set (attr "prefix_data16")
6912 (and (not (match_test "TARGET_AVX"))
6913 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6915 (const_string "*")))
6916 (set (attr "prefix_extra")
6918 (and (not (match_test "TARGET_AVX"))
6919 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6921 (const_string "1")))
6922 (set_attr "length_immediate" "1")
6923 (set_attr "prefix" "orig,orig,vex,vex")
6924 (set_attr "mode" "TI")])
6926 (define_expand "avx2_pshufdv3"
6927 [(match_operand:V8SI 0 "register_operand")
6928 (match_operand:V8SI 1 "nonimmediate_operand")
6929 (match_operand:SI 2 "const_0_to_255_operand")]
6932 int mask = INTVAL (operands[2]);
6933 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
6934 GEN_INT ((mask >> 0) & 3),
6935 GEN_INT ((mask >> 2) & 3),
6936 GEN_INT ((mask >> 4) & 3),
6937 GEN_INT ((mask >> 6) & 3),
6938 GEN_INT (((mask >> 0) & 3) + 4),
6939 GEN_INT (((mask >> 2) & 3) + 4),
6940 GEN_INT (((mask >> 4) & 3) + 4),
6941 GEN_INT (((mask >> 6) & 3) + 4)));
6945 (define_insn "avx2_pshufd_1"
6946 [(set (match_operand:V8SI 0 "register_operand" "=x")
6948 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
6949 (parallel [(match_operand 2 "const_0_to_3_operand")
6950 (match_operand 3 "const_0_to_3_operand")
6951 (match_operand 4 "const_0_to_3_operand")
6952 (match_operand 5 "const_0_to_3_operand")
6953 (match_operand 6 "const_4_to_7_operand")
6954 (match_operand 7 "const_4_to_7_operand")
6955 (match_operand 8 "const_4_to_7_operand")
6956 (match_operand 9 "const_4_to_7_operand")])))]
6958 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
6959 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
6960 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
6961 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
6964 mask |= INTVAL (operands[2]) << 0;
6965 mask |= INTVAL (operands[3]) << 2;
6966 mask |= INTVAL (operands[4]) << 4;
6967 mask |= INTVAL (operands[5]) << 6;
6968 operands[2] = GEN_INT (mask);
6970 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
6972 [(set_attr "type" "sselog1")
6973 (set_attr "prefix" "vex")
6974 (set_attr "length_immediate" "1")
6975 (set_attr "mode" "OI")])
6977 (define_expand "sse2_pshufd"
6978 [(match_operand:V4SI 0 "register_operand")
6979 (match_operand:V4SI 1 "nonimmediate_operand")
6980 (match_operand:SI 2 "const_int_operand")]
6983 int mask = INTVAL (operands[2]);
6984 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6985 GEN_INT ((mask >> 0) & 3),
6986 GEN_INT ((mask >> 2) & 3),
6987 GEN_INT ((mask >> 4) & 3),
6988 GEN_INT ((mask >> 6) & 3)));
6992 (define_insn "sse2_pshufd_1"
6993 [(set (match_operand:V4SI 0 "register_operand" "=x")
6995 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6996 (parallel [(match_operand 2 "const_0_to_3_operand")
6997 (match_operand 3 "const_0_to_3_operand")
6998 (match_operand 4 "const_0_to_3_operand")
6999 (match_operand 5 "const_0_to_3_operand")])))]
7003 mask |= INTVAL (operands[2]) << 0;
7004 mask |= INTVAL (operands[3]) << 2;
7005 mask |= INTVAL (operands[4]) << 4;
7006 mask |= INTVAL (operands[5]) << 6;
7007 operands[2] = GEN_INT (mask);
7009 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7011 [(set_attr "type" "sselog1")
7012 (set_attr "prefix_data16" "1")
7013 (set_attr "prefix" "maybe_vex")
7014 (set_attr "length_immediate" "1")
7015 (set_attr "mode" "TI")])
7017 (define_expand "avx2_pshuflwv3"
7018 [(match_operand:V16HI 0 "register_operand")
7019 (match_operand:V16HI 1 "nonimmediate_operand")
7020 (match_operand:SI 2 "const_0_to_255_operand")]
7023 int mask = INTVAL (operands[2]);
7024 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7025 GEN_INT ((mask >> 0) & 3),
7026 GEN_INT ((mask >> 2) & 3),
7027 GEN_INT ((mask >> 4) & 3),
7028 GEN_INT ((mask >> 6) & 3),
7029 GEN_INT (((mask >> 0) & 3) + 8),
7030 GEN_INT (((mask >> 2) & 3) + 8),
7031 GEN_INT (((mask >> 4) & 3) + 8),
7032 GEN_INT (((mask >> 6) & 3) + 8)));
7036 (define_insn "avx2_pshuflw_1"
7037 [(set (match_operand:V16HI 0 "register_operand" "=x")
7039 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7040 (parallel [(match_operand 2 "const_0_to_3_operand")
7041 (match_operand 3 "const_0_to_3_operand")
7042 (match_operand 4 "const_0_to_3_operand")
7043 (match_operand 5 "const_0_to_3_operand")
7048 (match_operand 6 "const_8_to_11_operand")
7049 (match_operand 7 "const_8_to_11_operand")
7050 (match_operand 8 "const_8_to_11_operand")
7051 (match_operand 9 "const_8_to_11_operand")
7057 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7058 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7059 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7060 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7063 mask |= INTVAL (operands[2]) << 0;
7064 mask |= INTVAL (operands[3]) << 2;
7065 mask |= INTVAL (operands[4]) << 4;
7066 mask |= INTVAL (operands[5]) << 6;
7067 operands[2] = GEN_INT (mask);
7069 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7071 [(set_attr "type" "sselog")
7072 (set_attr "prefix" "vex")
7073 (set_attr "length_immediate" "1")
7074 (set_attr "mode" "OI")])
7076 (define_expand "sse2_pshuflw"
7077 [(match_operand:V8HI 0 "register_operand")
7078 (match_operand:V8HI 1 "nonimmediate_operand")
7079 (match_operand:SI 2 "const_int_operand")]
7082 int mask = INTVAL (operands[2]);
7083 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7084 GEN_INT ((mask >> 0) & 3),
7085 GEN_INT ((mask >> 2) & 3),
7086 GEN_INT ((mask >> 4) & 3),
7087 GEN_INT ((mask >> 6) & 3)));
7091 (define_insn "sse2_pshuflw_1"
7092 [(set (match_operand:V8HI 0 "register_operand" "=x")
7094 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7095 (parallel [(match_operand 2 "const_0_to_3_operand")
7096 (match_operand 3 "const_0_to_3_operand")
7097 (match_operand 4 "const_0_to_3_operand")
7098 (match_operand 5 "const_0_to_3_operand")
7106 mask |= INTVAL (operands[2]) << 0;
7107 mask |= INTVAL (operands[3]) << 2;
7108 mask |= INTVAL (operands[4]) << 4;
7109 mask |= INTVAL (operands[5]) << 6;
7110 operands[2] = GEN_INT (mask);
7112 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7114 [(set_attr "type" "sselog")
7115 (set_attr "prefix_data16" "0")
7116 (set_attr "prefix_rep" "1")
7117 (set_attr "prefix" "maybe_vex")
7118 (set_attr "length_immediate" "1")
7119 (set_attr "mode" "TI")])
7121 (define_expand "avx2_pshufhwv3"
7122 [(match_operand:V16HI 0 "register_operand")
7123 (match_operand:V16HI 1 "nonimmediate_operand")
7124 (match_operand:SI 2 "const_0_to_255_operand")]
7127 int mask = INTVAL (operands[2]);
7128 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7129 GEN_INT (((mask >> 0) & 3) + 4),
7130 GEN_INT (((mask >> 2) & 3) + 4),
7131 GEN_INT (((mask >> 4) & 3) + 4),
7132 GEN_INT (((mask >> 6) & 3) + 4),
7133 GEN_INT (((mask >> 0) & 3) + 12),
7134 GEN_INT (((mask >> 2) & 3) + 12),
7135 GEN_INT (((mask >> 4) & 3) + 12),
7136 GEN_INT (((mask >> 6) & 3) + 12)));
7140 (define_insn "avx2_pshufhw_1"
7141 [(set (match_operand:V16HI 0 "register_operand" "=x")
7143 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7144 (parallel [(const_int 0)
7148 (match_operand 2 "const_4_to_7_operand")
7149 (match_operand 3 "const_4_to_7_operand")
7150 (match_operand 4 "const_4_to_7_operand")
7151 (match_operand 5 "const_4_to_7_operand")
7156 (match_operand 6 "const_12_to_15_operand")
7157 (match_operand 7 "const_12_to_15_operand")
7158 (match_operand 8 "const_12_to_15_operand")
7159 (match_operand 9 "const_12_to_15_operand")])))]
7161 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7162 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7163 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7164 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7167 mask |= (INTVAL (operands[2]) - 4) << 0;
7168 mask |= (INTVAL (operands[3]) - 4) << 2;
7169 mask |= (INTVAL (operands[4]) - 4) << 4;
7170 mask |= (INTVAL (operands[5]) - 4) << 6;
7171 operands[2] = GEN_INT (mask);
7173 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7175 [(set_attr "type" "sselog")
7176 (set_attr "prefix" "vex")
7177 (set_attr "length_immediate" "1")
7178 (set_attr "mode" "OI")])
7180 (define_expand "sse2_pshufhw"
7181 [(match_operand:V8HI 0 "register_operand")
7182 (match_operand:V8HI 1 "nonimmediate_operand")
7183 (match_operand:SI 2 "const_int_operand")]
7186 int mask = INTVAL (operands[2]);
7187 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7188 GEN_INT (((mask >> 0) & 3) + 4),
7189 GEN_INT (((mask >> 2) & 3) + 4),
7190 GEN_INT (((mask >> 4) & 3) + 4),
7191 GEN_INT (((mask >> 6) & 3) + 4)));
7195 (define_insn "sse2_pshufhw_1"
7196 [(set (match_operand:V8HI 0 "register_operand" "=x")
7198 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7199 (parallel [(const_int 0)
7203 (match_operand 2 "const_4_to_7_operand")
7204 (match_operand 3 "const_4_to_7_operand")
7205 (match_operand 4 "const_4_to_7_operand")
7206 (match_operand 5 "const_4_to_7_operand")])))]
7210 mask |= (INTVAL (operands[2]) - 4) << 0;
7211 mask |= (INTVAL (operands[3]) - 4) << 2;
7212 mask |= (INTVAL (operands[4]) - 4) << 4;
7213 mask |= (INTVAL (operands[5]) - 4) << 6;
7214 operands[2] = GEN_INT (mask);
7216 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7218 [(set_attr "type" "sselog")
7219 (set_attr "prefix_rep" "1")
7220 (set_attr "prefix_data16" "0")
7221 (set_attr "prefix" "maybe_vex")
7222 (set_attr "length_immediate" "1")
7223 (set_attr "mode" "TI")])
7225 (define_expand "sse2_loadd"
7226 [(set (match_operand:V4SI 0 "register_operand")
7229 (match_operand:SI 1 "nonimmediate_operand"))
7233 "operands[2] = CONST0_RTX (V4SImode);")
7235 (define_insn "sse2_loadld"
7236 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7239 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7240 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7244 %vmovd\t{%2, %0|%0, %2}
7245 %vmovd\t{%2, %0|%0, %2}
7246 movss\t{%2, %0|%0, %2}
7247 movss\t{%2, %0|%0, %2}
7248 vmovss\t{%2, %1, %0|%0, %1, %2}"
7249 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7250 (set_attr "type" "ssemov")
7251 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7252 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7254 (define_insn "*vec_extract<mode>"
7255 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
7256 (vec_select:<ssescalarmode>
7257 (match_operand:VI12_128 1 "register_operand" "x,x")
7259 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
7262 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
7263 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7264 [(set_attr "type" "sselog1")
7265 (set (attr "prefix_data16")
7267 (and (eq_attr "alternative" "0")
7268 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7270 (const_string "*")))
7271 (set (attr "prefix_extra")
7273 (and (eq_attr "alternative" "0")
7274 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7276 (const_string "1")))
7277 (set_attr "length_immediate" "1")
7278 (set_attr "prefix" "maybe_vex")
7279 (set_attr "mode" "TI")])
7281 (define_insn "*vec_extractv8hi_sse2"
7282 [(set (match_operand:HI 0 "register_operand" "=r")
7284 (match_operand:V8HI 1 "register_operand" "x")
7286 [(match_operand:SI 2 "const_0_to_7_operand")])))]
7287 "TARGET_SSE2 && !TARGET_SSE4_1"
7288 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
7289 [(set_attr "type" "sselog1")
7290 (set_attr "prefix_data16" "1")
7291 (set_attr "length_immediate" "1")
7292 (set_attr "mode" "TI")])
7294 (define_insn "*vec_extractv16qi_zext"
7295 [(set (match_operand:SWI48 0 "register_operand" "=r")
7298 (match_operand:V16QI 1 "register_operand" "x")
7300 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
7302 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7303 [(set_attr "type" "sselog1")
7304 (set_attr "prefix_extra" "1")
7305 (set_attr "length_immediate" "1")
7306 (set_attr "prefix" "maybe_vex")
7307 (set_attr "mode" "TI")])
7309 (define_insn "*vec_extractv8hi_zext"
7310 [(set (match_operand:SWI48 0 "register_operand" "=r")
7313 (match_operand:V8HI 1 "register_operand" "x")
7315 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
7317 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7318 [(set_attr "type" "sselog1")
7319 (set_attr "prefix_data16" "1")
7320 (set_attr "length_immediate" "1")
7321 (set_attr "prefix" "maybe_vex")
7322 (set_attr "mode" "TI")])
7324 (define_insn "*vec_extract<mode>_mem"
7325 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
7326 (vec_select:<ssescalarmode>
7327 (match_operand:VI12_128 1 "memory_operand" "o")
7329 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
7333 (define_insn "*vec_extract<ssevecmodelower>_0"
7334 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
7336 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
7337 (parallel [(const_int 0)])))]
7338 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7340 [(set_attr "isa" "*,sse4,*,*")])
7342 (define_insn_and_split "*vec_extractv4si_0_zext"
7343 [(set (match_operand:DI 0 "register_operand" "=r")
7346 (match_operand:V4SI 1 "register_operand" "x")
7347 (parallel [(const_int 0)]))))]
7348 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
7350 "&& reload_completed"
7351 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
7352 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7354 (define_insn "*vec_extractv2di_0_sse"
7355 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
7357 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
7358 (parallel [(const_int 0)])))]
7359 "TARGET_SSE && !TARGET_64BIT
7360 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7364 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
7366 (match_operand:<ssevecmode> 1 "register_operand")
7367 (parallel [(const_int 0)])))]
7368 "TARGET_SSE && reload_completed"
7369 [(set (match_dup 0) (match_dup 1))]
7370 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
7372 (define_insn "*vec_extractv4si"
7373 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
7375 (match_operand:V4SI 1 "register_operand" "x,0,x")
7376 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
7379 switch (which_alternative)
7382 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
7385 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
7386 return "psrldq\t{%2, %0|%0, %2}";
7389 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
7390 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
7396 [(set_attr "isa" "*,noavx,avx")
7397 (set_attr "type" "sselog1,sseishft1,sseishft1")
7398 (set_attr "prefix_extra" "1,*,*")
7399 (set_attr "length_immediate" "1")
7400 (set_attr "prefix" "maybe_vex,orig,vex")
7401 (set_attr "mode" "TI")])
7403 (define_insn "*vec_extractv4si_zext"
7404 [(set (match_operand:DI 0 "register_operand" "=r")
7407 (match_operand:V4SI 1 "register_operand" "x")
7408 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
7409 "TARGET_64BIT && TARGET_SSE4_1"
7410 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7411 [(set_attr "type" "sselog1")
7412 (set_attr "prefix_extra" "1")
7413 (set_attr "length_immediate" "1")
7414 (set_attr "prefix" "maybe_vex")
7415 (set_attr "mode" "TI")])
7417 (define_insn "*vec_extractv4si_mem"
7418 [(set (match_operand:SI 0 "register_operand" "=x,r")
7420 (match_operand:V4SI 1 "memory_operand" "o,o")
7421 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
7425 (define_insn_and_split "*vec_extractv4si_zext_mem"
7426 [(set (match_operand:DI 0 "register_operand" "=x,r")
7429 (match_operand:V4SI 1 "memory_operand" "o,o")
7430 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
7431 "TARGET_64BIT && TARGET_SSE"
7433 "&& reload_completed"
7434 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
7436 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
7439 (define_insn "*vec_extractv2di_1"
7440 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
7442 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
7443 (parallel [(const_int 1)])))]
7444 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7446 %vpextrq\t{$1, %1, %0|%0, %1, 1}
7447 %vmovhps\t{%1, %0|%0, %1}
7448 psrldq\t{$8, %0|%0, 8}
7449 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7450 movhlps\t{%1, %0|%0, %1}
7453 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
7454 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
7455 (set_attr "length_immediate" "1,*,1,1,*,*,*")
7456 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
7457 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7458 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
7459 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
7462 [(set (match_operand:<ssescalarmode> 0 "register_operand")
7463 (vec_select:<ssescalarmode>
7464 (match_operand:VI_128 1 "memory_operand")
7466 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
7467 "TARGET_SSE && reload_completed"
7468 [(set (match_dup 0) (match_dup 1))]
7470 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
7472 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
7475 (define_insn "*vec_dupv4si"
7476 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7478 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7481 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7482 vbroadcastss\t{%1, %0|%0, %1}
7483 shufps\t{$0, %0, %0|%0, %0, 0}"
7484 [(set_attr "isa" "sse2,avx,noavx")
7485 (set_attr "type" "sselog1,ssemov,sselog1")
7486 (set_attr "length_immediate" "1,0,1")
7487 (set_attr "prefix_extra" "0,1,*")
7488 (set_attr "prefix" "maybe_vex,vex,orig")
7489 (set_attr "mode" "TI,V4SF,V4SF")])
7491 (define_insn "*vec_dupv2di"
7492 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7494 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7498 vpunpcklqdq\t{%d1, %0|%0, %d1}
7499 %vmovddup\t{%1, %0|%0, %1}
7501 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7502 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7503 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7504 (set_attr "mode" "TI,TI,DF,V4SF")])
7506 (define_insn "*vec_concatv2si_sse4_1"
7507 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7509 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7510 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7513 pinsrd\t{$1, %2, %0|%0, %2, 1}
7514 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7515 punpckldq\t{%2, %0|%0, %2}
7516 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7517 %vmovd\t{%1, %0|%0, %1}
7518 punpckldq\t{%2, %0|%0, %2}
7519 movd\t{%1, %0|%0, %1}"
7520 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7521 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7522 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7523 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7524 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7525 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7527 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7528 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7529 ;; alternatives pretty much forces the MMX alternative to be chosen.
7530 (define_insn "*vec_concatv2si"
7531 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
7533 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
7534 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
7535 "TARGET_SSE && !TARGET_SSE4_1"
7537 punpckldq\t{%2, %0|%0, %2}
7538 movd\t{%1, %0|%0, %1}
7539 movd\t{%1, %0|%0, %1}
7540 unpcklps\t{%2, %0|%0, %2}
7541 movss\t{%1, %0|%0, %1}
7542 punpckldq\t{%2, %0|%0, %2}
7543 movd\t{%1, %0|%0, %1}"
7544 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
7545 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
7546 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
7548 (define_insn "*vec_concatv4si"
7549 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7551 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7552 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7555 punpcklqdq\t{%2, %0|%0, %2}
7556 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7557 movlhps\t{%2, %0|%0, %2}
7558 movhps\t{%2, %0|%0, %q2}
7559 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7560 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7561 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7562 (set_attr "prefix" "orig,vex,orig,orig,vex")
7563 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7565 ;; movd instead of movq is required to handle broken assemblers.
7566 (define_insn "vec_concatv2di"
7567 [(set (match_operand:V2DI 0 "register_operand"
7568 "=x,x ,Yi,x ,!x,x,x,x,x,x")
7570 (match_operand:DI 1 "nonimmediate_operand"
7571 " 0,x ,r ,xm,*y,0,x,0,0,x")
7572 (match_operand:DI 2 "vector_move_operand"
7573 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
7576 pinsrq\t{$1, %2, %0|%0, %2, 1}
7577 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7578 %vmovd\t{%1, %0|%0, %1}
7579 %vmovq\t{%1, %0|%0, %1}
7580 movq2dq\t{%1, %0|%0, %1}
7581 punpcklqdq\t{%2, %0|%0, %2}
7582 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7583 movlhps\t{%2, %0|%0, %2}
7584 movhps\t{%2, %0|%0, %2}
7585 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7586 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7589 (eq_attr "alternative" "0,1,5,6")
7590 (const_string "sselog")
7591 (const_string "ssemov")))
7592 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
7593 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
7594 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
7595 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
7596 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7598 (define_expand "vec_unpacks_lo_<mode>"
7599 [(match_operand:<sseunpackmode> 0 "register_operand")
7600 (match_operand:VI124_AVX2 1 "register_operand")]
7602 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
7604 (define_expand "vec_unpacks_hi_<mode>"
7605 [(match_operand:<sseunpackmode> 0 "register_operand")
7606 (match_operand:VI124_AVX2 1 "register_operand")]
7608 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
7610 (define_expand "vec_unpacku_lo_<mode>"
7611 [(match_operand:<sseunpackmode> 0 "register_operand")
7612 (match_operand:VI124_AVX2 1 "register_operand")]
7614 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
7616 (define_expand "vec_unpacku_hi_<mode>"
7617 [(match_operand:<sseunpackmode> 0 "register_operand")
7618 (match_operand:VI124_AVX2 1 "register_operand")]
7620 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
7622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7626 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7628 (define_expand "<sse2_avx2>_uavg<mode>3"
7629 [(set (match_operand:VI12_AVX2 0 "register_operand")
7631 (lshiftrt:<ssedoublemode>
7632 (plus:<ssedoublemode>
7633 (plus:<ssedoublemode>
7634 (zero_extend:<ssedoublemode>
7635 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
7636 (zero_extend:<ssedoublemode>
7637 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
7642 operands[3] = CONST1_RTX(<MODE>mode);
7643 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
7646 (define_insn "*<sse2_avx2>_uavg<mode>3"
7647 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
7649 (lshiftrt:<ssedoublemode>
7650 (plus:<ssedoublemode>
7651 (plus:<ssedoublemode>
7652 (zero_extend:<ssedoublemode>
7653 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
7654 (zero_extend:<ssedoublemode>
7655 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
7656 (match_operand:VI12_AVX2 3 "const1_operand"))
7658 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
7660 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
7661 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7662 [(set_attr "isa" "noavx,avx")
7663 (set_attr "type" "sseiadd")
7664 (set_attr "prefix_data16" "1,*")
7665 (set_attr "prefix" "orig,vex")
7666 (set_attr "mode" "<sseinsnmode>")])
7668 ;; The correct representation for this is absolutely enormous, and
7669 ;; surely not generally useful.
7670 (define_insn "<sse2_avx2>_psadbw"
7671 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7673 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7674 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7678 psadbw\t{%2, %0|%0, %2}
7679 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7680 [(set_attr "isa" "noavx,avx")
7681 (set_attr "type" "sseiadd")
7682 (set_attr "atom_unit" "simul")
7683 (set_attr "prefix_data16" "1,*")
7684 (set_attr "prefix" "orig,vex")
7685 (set_attr "mode" "<sseinsnmode>")])
7687 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7688 [(set (match_operand:SI 0 "register_operand" "=r")
7690 [(match_operand:VF 1 "register_operand" "x")]
7693 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7694 [(set_attr "type" "ssemov")
7695 (set_attr "prefix" "maybe_vex")
7696 (set_attr "mode" "<MODE>")])
7698 (define_insn "avx2_pmovmskb"
7699 [(set (match_operand:SI 0 "register_operand" "=r")
7700 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7703 "vpmovmskb\t{%1, %0|%0, %1}"
7704 [(set_attr "type" "ssemov")
7705 (set_attr "prefix" "vex")
7706 (set_attr "mode" "DI")])
7708 (define_insn "sse2_pmovmskb"
7709 [(set (match_operand:SI 0 "register_operand" "=r")
7710 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7713 "%vpmovmskb\t{%1, %0|%0, %1}"
7714 [(set_attr "type" "ssemov")
7715 (set_attr "prefix_data16" "1")
7716 (set_attr "prefix" "maybe_vex")
7717 (set_attr "mode" "SI")])
7719 (define_expand "sse2_maskmovdqu"
7720 [(set (match_operand:V16QI 0 "memory_operand")
7721 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
7722 (match_operand:V16QI 2 "register_operand")
7727 (define_insn "*sse2_maskmovdqu"
7728 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7729 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7730 (match_operand:V16QI 2 "register_operand" "x")
7731 (mem:V16QI (match_dup 0))]
7734 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7735 [(set_attr "type" "ssemov")
7736 (set_attr "prefix_data16" "1")
7737 ;; The implicit %rdi operand confuses default length_vex computation.
7738 (set (attr "length_vex")
7739 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7740 (set_attr "prefix" "maybe_vex")
7741 (set_attr "mode" "TI")])
7743 (define_insn "sse_ldmxcsr"
7744 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7748 [(set_attr "type" "sse")
7749 (set_attr "atom_sse_attr" "mxcsr")
7750 (set_attr "prefix" "maybe_vex")
7751 (set_attr "memory" "load")])
7753 (define_insn "sse_stmxcsr"
7754 [(set (match_operand:SI 0 "memory_operand" "=m")
7755 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7758 [(set_attr "type" "sse")
7759 (set_attr "atom_sse_attr" "mxcsr")
7760 (set_attr "prefix" "maybe_vex")
7761 (set_attr "memory" "store")])
7763 (define_insn "sse2_clflush"
7764 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7768 [(set_attr "type" "sse")
7769 (set_attr "atom_sse_attr" "fence")
7770 (set_attr "memory" "unknown")])
7773 (define_insn "sse3_mwait"
7774 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7775 (match_operand:SI 1 "register_operand" "c")]
7778 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7779 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7780 ;; we only need to set up 32bit registers.
7782 [(set_attr "length" "3")])
7784 (define_insn "sse3_monitor"
7785 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7786 (match_operand:SI 1 "register_operand" "c")
7787 (match_operand:SI 2 "register_operand" "d")]
7789 "TARGET_SSE3 && !TARGET_64BIT"
7790 "monitor\t%0, %1, %2"
7791 [(set_attr "length" "3")])
7793 (define_insn "sse3_monitor64_<mode>"
7794 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
7795 (match_operand:SI 1 "register_operand" "c")
7796 (match_operand:SI 2 "register_operand" "d")]
7798 "TARGET_SSE3 && TARGET_64BIT"
7799 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7800 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7801 ;; zero extended to 64bit, we only need to set up 32bit registers.
7803 [(set_attr "length" "3")])
7805 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7807 ;; SSSE3 instructions
7809 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7811 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
7813 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
7814 [(set (match_operand:V16HI 0 "register_operand" "=x")
7821 (match_operand:V16HI 1 "register_operand" "x")
7822 (parallel [(const_int 0)]))
7823 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7825 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7826 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7829 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7830 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7832 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7833 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7837 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
7838 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
7840 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
7841 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
7844 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
7845 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
7847 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
7848 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
7854 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
7855 (parallel [(const_int 0)]))
7856 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7858 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7859 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7862 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7863 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7865 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7866 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
7870 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
7871 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
7873 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
7874 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
7877 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
7878 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
7880 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
7881 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
7883 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
7884 [(set_attr "type" "sseiadd")
7885 (set_attr "prefix_extra" "1")
7886 (set_attr "prefix" "vex")
7887 (set_attr "mode" "OI")])
7889 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
7890 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7896 (match_operand:V8HI 1 "register_operand" "0,x")
7897 (parallel [(const_int 0)]))
7898 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7900 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7901 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7904 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7905 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7907 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7908 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7913 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7914 (parallel [(const_int 0)]))
7915 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7917 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7918 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7921 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7922 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7924 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7925 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7928 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
7929 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
7930 [(set_attr "isa" "noavx,avx")
7931 (set_attr "type" "sseiadd")
7932 (set_attr "atom_unit" "complex")
7933 (set_attr "prefix_data16" "1,*")
7934 (set_attr "prefix_extra" "1")
7935 (set_attr "prefix" "orig,vex")
7936 (set_attr "mode" "TI")])
7938 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
7939 [(set (match_operand:V4HI 0 "register_operand" "=y")
7944 (match_operand:V4HI 1 "register_operand" "0")
7945 (parallel [(const_int 0)]))
7946 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7948 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7949 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7953 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7954 (parallel [(const_int 0)]))
7955 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7957 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7958 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7960 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
7961 [(set_attr "type" "sseiadd")
7962 (set_attr "atom_unit" "complex")
7963 (set_attr "prefix_extra" "1")
7964 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7965 (set_attr "mode" "DI")])
7967 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
7968 [(set (match_operand:V8SI 0 "register_operand" "=x")
7974 (match_operand:V8SI 1 "register_operand" "x")
7975 (parallel [(const_int 0)]))
7976 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7978 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7979 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7982 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
7983 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
7985 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
7986 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
7991 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7992 (parallel [(const_int 0)]))
7993 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7995 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7996 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
7999 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8000 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8002 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8003 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8005 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8006 [(set_attr "type" "sseiadd")
8007 (set_attr "prefix_extra" "1")
8008 (set_attr "prefix" "vex")
8009 (set_attr "mode" "OI")])
8011 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
8012 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8017 (match_operand:V4SI 1 "register_operand" "0,x")
8018 (parallel [(const_int 0)]))
8019 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8021 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8022 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8026 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8027 (parallel [(const_int 0)]))
8028 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8030 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8031 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8034 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
8035 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
8036 [(set_attr "isa" "noavx,avx")
8037 (set_attr "type" "sseiadd")
8038 (set_attr "atom_unit" "complex")
8039 (set_attr "prefix_data16" "1,*")
8040 (set_attr "prefix_extra" "1")
8041 (set_attr "prefix" "orig,vex")
8042 (set_attr "mode" "TI")])
8044 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
8045 [(set (match_operand:V2SI 0 "register_operand" "=y")
8049 (match_operand:V2SI 1 "register_operand" "0")
8050 (parallel [(const_int 0)]))
8051 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8054 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8055 (parallel [(const_int 0)]))
8056 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8058 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
8059 [(set_attr "type" "sseiadd")
8060 (set_attr "atom_unit" "complex")
8061 (set_attr "prefix_extra" "1")
8062 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8063 (set_attr "mode" "DI")])
8065 (define_insn "avx2_pmaddubsw256"
8066 [(set (match_operand:V16HI 0 "register_operand" "=x")
8071 (match_operand:V32QI 1 "register_operand" "x")
8072 (parallel [(const_int 0) (const_int 2)
8073 (const_int 4) (const_int 6)
8074 (const_int 8) (const_int 10)
8075 (const_int 12) (const_int 14)
8076 (const_int 16) (const_int 18)
8077 (const_int 20) (const_int 22)
8078 (const_int 24) (const_int 26)
8079 (const_int 28) (const_int 30)])))
8082 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8083 (parallel [(const_int 0) (const_int 2)
8084 (const_int 4) (const_int 6)
8085 (const_int 8) (const_int 10)
8086 (const_int 12) (const_int 14)
8087 (const_int 16) (const_int 18)
8088 (const_int 20) (const_int 22)
8089 (const_int 24) (const_int 26)
8090 (const_int 28) (const_int 30)]))))
8093 (vec_select:V16QI (match_dup 1)
8094 (parallel [(const_int 1) (const_int 3)
8095 (const_int 5) (const_int 7)
8096 (const_int 9) (const_int 11)
8097 (const_int 13) (const_int 15)
8098 (const_int 17) (const_int 19)
8099 (const_int 21) (const_int 23)
8100 (const_int 25) (const_int 27)
8101 (const_int 29) (const_int 31)])))
8103 (vec_select:V16QI (match_dup 2)
8104 (parallel [(const_int 1) (const_int 3)
8105 (const_int 5) (const_int 7)
8106 (const_int 9) (const_int 11)
8107 (const_int 13) (const_int 15)
8108 (const_int 17) (const_int 19)
8109 (const_int 21) (const_int 23)
8110 (const_int 25) (const_int 27)
8111 (const_int 29) (const_int 31)]))))))]
8113 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8114 [(set_attr "type" "sseiadd")
8115 (set_attr "prefix_extra" "1")
8116 (set_attr "prefix" "vex")
8117 (set_attr "mode" "OI")])
8119 (define_insn "ssse3_pmaddubsw128"
8120 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8125 (match_operand:V16QI 1 "register_operand" "0,x")
8126 (parallel [(const_int 0) (const_int 2)
8127 (const_int 4) (const_int 6)
8128 (const_int 8) (const_int 10)
8129 (const_int 12) (const_int 14)])))
8132 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8133 (parallel [(const_int 0) (const_int 2)
8134 (const_int 4) (const_int 6)
8135 (const_int 8) (const_int 10)
8136 (const_int 12) (const_int 14)]))))
8139 (vec_select:V8QI (match_dup 1)
8140 (parallel [(const_int 1) (const_int 3)
8141 (const_int 5) (const_int 7)
8142 (const_int 9) (const_int 11)
8143 (const_int 13) (const_int 15)])))
8145 (vec_select:V8QI (match_dup 2)
8146 (parallel [(const_int 1) (const_int 3)
8147 (const_int 5) (const_int 7)
8148 (const_int 9) (const_int 11)
8149 (const_int 13) (const_int 15)]))))))]
8152 pmaddubsw\t{%2, %0|%0, %2}
8153 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8154 [(set_attr "isa" "noavx,avx")
8155 (set_attr "type" "sseiadd")
8156 (set_attr "atom_unit" "simul")
8157 (set_attr "prefix_data16" "1,*")
8158 (set_attr "prefix_extra" "1")
8159 (set_attr "prefix" "orig,vex")
8160 (set_attr "mode" "TI")])
8162 (define_insn "ssse3_pmaddubsw"
8163 [(set (match_operand:V4HI 0 "register_operand" "=y")
8168 (match_operand:V8QI 1 "register_operand" "0")
8169 (parallel [(const_int 0) (const_int 2)
8170 (const_int 4) (const_int 6)])))
8173 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8174 (parallel [(const_int 0) (const_int 2)
8175 (const_int 4) (const_int 6)]))))
8178 (vec_select:V4QI (match_dup 1)
8179 (parallel [(const_int 1) (const_int 3)
8180 (const_int 5) (const_int 7)])))
8182 (vec_select:V4QI (match_dup 2)
8183 (parallel [(const_int 1) (const_int 3)
8184 (const_int 5) (const_int 7)]))))))]
8186 "pmaddubsw\t{%2, %0|%0, %2}"
8187 [(set_attr "type" "sseiadd")
8188 (set_attr "atom_unit" "simul")
8189 (set_attr "prefix_extra" "1")
8190 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8191 (set_attr "mode" "DI")])
8193 (define_mode_iterator PMULHRSW
8194 [V4HI V8HI (V16HI "TARGET_AVX2")])
8196 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
8197 [(set (match_operand:PMULHRSW 0 "register_operand")
8199 (lshiftrt:<ssedoublemode>
8200 (plus:<ssedoublemode>
8201 (lshiftrt:<ssedoublemode>
8202 (mult:<ssedoublemode>
8203 (sign_extend:<ssedoublemode>
8204 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
8205 (sign_extend:<ssedoublemode>
8206 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
8212 operands[3] = CONST1_RTX(<MODE>mode);
8213 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8216 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
8217 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
8219 (lshiftrt:<ssedoublemode>
8220 (plus:<ssedoublemode>
8221 (lshiftrt:<ssedoublemode>
8222 (mult:<ssedoublemode>
8223 (sign_extend:<ssedoublemode>
8224 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
8225 (sign_extend:<ssedoublemode>
8226 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
8228 (match_operand:VI2_AVX2 3 "const1_operand"))
8230 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
8232 pmulhrsw\t{%2, %0|%0, %2}
8233 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8234 [(set_attr "isa" "noavx,avx")
8235 (set_attr "type" "sseimul")
8236 (set_attr "prefix_data16" "1,*")
8237 (set_attr "prefix_extra" "1")
8238 (set_attr "prefix" "orig,vex")
8239 (set_attr "mode" "<sseinsnmode>")])
8241 (define_insn "*ssse3_pmulhrswv4hi3"
8242 [(set (match_operand:V4HI 0 "register_operand" "=y")
8249 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
8251 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
8253 (match_operand:V4HI 3 "const1_operand"))
8255 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8256 "pmulhrsw\t{%2, %0|%0, %2}"
8257 [(set_attr "type" "sseimul")
8258 (set_attr "prefix_extra" "1")
8259 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8260 (set_attr "mode" "DI")])
8262 (define_insn "<ssse3_avx2>_pshufb<mode>3"
8263 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8265 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8266 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
8270 pshufb\t{%2, %0|%0, %2}
8271 vpshufb\t{%2, %1, %0|%0, %1, %2}"
8272 [(set_attr "isa" "noavx,avx")
8273 (set_attr "type" "sselog1")
8274 (set_attr "prefix_data16" "1,*")
8275 (set_attr "prefix_extra" "1")
8276 (set_attr "prefix" "orig,vex")
8277 (set_attr "btver2_decode" "vector,vector")
8278 (set_attr "mode" "<sseinsnmode>")])
8280 (define_insn "ssse3_pshufbv8qi3"
8281 [(set (match_operand:V8QI 0 "register_operand" "=y")
8282 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
8283 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
8286 "pshufb\t{%2, %0|%0, %2}";
8287 [(set_attr "type" "sselog1")
8288 (set_attr "prefix_extra" "1")
8289 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8290 (set_attr "mode" "DI")])
8292 (define_insn "<ssse3_avx2>_psign<mode>3"
8293 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
8295 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
8296 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
8300 psign<ssemodesuffix>\t{%2, %0|%0, %2}
8301 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8302 [(set_attr "isa" "noavx,avx")
8303 (set_attr "type" "sselog1")
8304 (set_attr "prefix_data16" "1,*")
8305 (set_attr "prefix_extra" "1")
8306 (set_attr "prefix" "orig,vex")
8307 (set_attr "mode" "<sseinsnmode>")])
8309 (define_insn "ssse3_psign<mode>3"
8310 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8312 [(match_operand:MMXMODEI 1 "register_operand" "0")
8313 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
8316 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8317 [(set_attr "type" "sselog1")
8318 (set_attr "prefix_extra" "1")
8319 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8320 (set_attr "mode" "DI")])
8322 (define_insn "<ssse3_avx2>_palignr<mode>"
8323 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
8324 (unspec:SSESCALARMODE
8325 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
8326 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
8327 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
8331 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8333 switch (which_alternative)
8336 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8338 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8343 [(set_attr "isa" "noavx,avx")
8344 (set_attr "type" "sseishft")
8345 (set_attr "atom_unit" "sishuf")
8346 (set_attr "prefix_data16" "1,*")
8347 (set_attr "prefix_extra" "1")
8348 (set_attr "length_immediate" "1")
8349 (set_attr "prefix" "orig,vex")
8350 (set_attr "mode" "<sseinsnmode>")])
8352 (define_insn "ssse3_palignrdi"
8353 [(set (match_operand:DI 0 "register_operand" "=y")
8354 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8355 (match_operand:DI 2 "nonimmediate_operand" "ym")
8356 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
8360 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8361 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8363 [(set_attr "type" "sseishft")
8364 (set_attr "atom_unit" "sishuf")
8365 (set_attr "prefix_extra" "1")
8366 (set_attr "length_immediate" "1")
8367 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8368 (set_attr "mode" "DI")])
8370 (define_insn "abs<mode>2"
8371 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
8373 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
8375 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
8376 [(set_attr "type" "sselog1")
8377 (set_attr "prefix_data16" "1")
8378 (set_attr "prefix_extra" "1")
8379 (set_attr "prefix" "maybe_vex")
8380 (set_attr "mode" "<sseinsnmode>")])
8382 (define_insn "abs<mode>2"
8383 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8385 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8387 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8388 [(set_attr "type" "sselog1")
8389 (set_attr "prefix_rep" "0")
8390 (set_attr "prefix_extra" "1")
8391 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8392 (set_attr "mode" "DI")])
8394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8396 ;; AMD SSE4A instructions
8398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8400 (define_insn "sse4a_movnt<mode>"
8401 [(set (match_operand:MODEF 0 "memory_operand" "=m")
8403 [(match_operand:MODEF 1 "register_operand" "x")]
8406 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
8407 [(set_attr "type" "ssemov")
8408 (set_attr "mode" "<MODE>")])
8410 (define_insn "sse4a_vmmovnt<mode>"
8411 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
8412 (unspec:<ssescalarmode>
8413 [(vec_select:<ssescalarmode>
8414 (match_operand:VF_128 1 "register_operand" "x")
8415 (parallel [(const_int 0)]))]
8418 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
8419 [(set_attr "type" "ssemov")
8420 (set_attr "mode" "<ssescalarmode>")])
8422 (define_insn "sse4a_extrqi"
8423 [(set (match_operand:V2DI 0 "register_operand" "=x")
8424 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8425 (match_operand 2 "const_0_to_255_operand")
8426 (match_operand 3 "const_0_to_255_operand")]
8429 "extrq\t{%3, %2, %0|%0, %2, %3}"
8430 [(set_attr "type" "sse")
8431 (set_attr "prefix_data16" "1")
8432 (set_attr "length_immediate" "2")
8433 (set_attr "mode" "TI")])
8435 (define_insn "sse4a_extrq"
8436 [(set (match_operand:V2DI 0 "register_operand" "=x")
8437 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8438 (match_operand:V16QI 2 "register_operand" "x")]
8441 "extrq\t{%2, %0|%0, %2}"
8442 [(set_attr "type" "sse")
8443 (set_attr "prefix_data16" "1")
8444 (set_attr "mode" "TI")])
8446 (define_insn "sse4a_insertqi"
8447 [(set (match_operand:V2DI 0 "register_operand" "=x")
8448 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8449 (match_operand:V2DI 2 "register_operand" "x")
8450 (match_operand 3 "const_0_to_255_operand")
8451 (match_operand 4 "const_0_to_255_operand")]
8454 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8455 [(set_attr "type" "sseins")
8456 (set_attr "prefix_data16" "0")
8457 (set_attr "prefix_rep" "1")
8458 (set_attr "length_immediate" "2")
8459 (set_attr "mode" "TI")])
8461 (define_insn "sse4a_insertq"
8462 [(set (match_operand:V2DI 0 "register_operand" "=x")
8463 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8464 (match_operand:V2DI 2 "register_operand" "x")]
8467 "insertq\t{%2, %0|%0, %2}"
8468 [(set_attr "type" "sseins")
8469 (set_attr "prefix_data16" "0")
8470 (set_attr "prefix_rep" "1")
8471 (set_attr "mode" "TI")])
8473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8475 ;; Intel SSE4.1 instructions
8477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8479 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
8480 [(set (match_operand:VF 0 "register_operand" "=x,x")
8482 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8483 (match_operand:VF 1 "register_operand" "0,x")
8484 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
8487 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8488 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8489 [(set_attr "isa" "noavx,avx")
8490 (set_attr "type" "ssemov")
8491 (set_attr "length_immediate" "1")
8492 (set_attr "prefix_data16" "1,*")
8493 (set_attr "prefix_extra" "1")
8494 (set_attr "prefix" "orig,vex")
8495 (set_attr "mode" "<MODE>")])
8497 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
8498 [(set (match_operand:VF 0 "register_operand" "=x,x")
8500 [(match_operand:VF 1 "register_operand" "0,x")
8501 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8502 (match_operand:VF 3 "register_operand" "Yz,x")]
8506 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8507 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8508 [(set_attr "isa" "noavx,avx")
8509 (set_attr "type" "ssemov")
8510 (set_attr "length_immediate" "1")
8511 (set_attr "prefix_data16" "1,*")
8512 (set_attr "prefix_extra" "1")
8513 (set_attr "prefix" "orig,vex")
8514 (set_attr "btver2_decode" "vector,vector")
8515 (set_attr "mode" "<MODE>")])
8517 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
8518 [(set (match_operand:VF 0 "register_operand" "=x,x")
8520 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
8521 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
8522 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8526 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
8527 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8528 [(set_attr "isa" "noavx,avx")
8529 (set_attr "type" "ssemul")
8530 (set_attr "length_immediate" "1")
8531 (set_attr "prefix_data16" "1,*")
8532 (set_attr "prefix_extra" "1")
8533 (set_attr "prefix" "orig,vex")
8534 (set_attr "btver2_decode" "vector,vector")
8535 (set_attr "mode" "<MODE>")])
8537 (define_insn "<sse4_1_avx2>_movntdqa"
8538 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
8539 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
8542 "%vmovntdqa\t{%1, %0|%0, %1}"
8543 [(set_attr "type" "ssemov")
8544 (set_attr "prefix_extra" "1")
8545 (set_attr "prefix" "maybe_vex")
8546 (set_attr "mode" "<sseinsnmode>")])
8548 (define_insn "<sse4_1_avx2>_mpsadbw"
8549 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8551 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8552 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8553 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8557 mpsadbw\t{%3, %2, %0|%0, %2, %3}
8558 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8559 [(set_attr "isa" "noavx,avx")
8560 (set_attr "type" "sselog1")
8561 (set_attr "length_immediate" "1")
8562 (set_attr "prefix_extra" "1")
8563 (set_attr "prefix" "orig,vex")
8564 (set_attr "btver2_decode" "vector,vector")
8565 (set_attr "mode" "<sseinsnmode>")])
8567 (define_insn "avx2_packusdw"
8568 [(set (match_operand:V16HI 0 "register_operand" "=x")
8571 (match_operand:V8SI 1 "register_operand" "x"))
8573 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
8575 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8576 [(set_attr "type" "sselog")
8577 (set_attr "prefix_extra" "1")
8578 (set_attr "prefix" "vex")
8579 (set_attr "mode" "OI")])
8581 (define_insn "sse4_1_packusdw"
8582 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8585 (match_operand:V4SI 1 "register_operand" "0,x"))
8587 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
8590 packusdw\t{%2, %0|%0, %2}
8591 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
8592 [(set_attr "isa" "noavx,avx")
8593 (set_attr "type" "sselog")
8594 (set_attr "prefix_extra" "1")
8595 (set_attr "prefix" "orig,vex")
8596 (set_attr "mode" "TI")])
8598 (define_insn "<sse4_1_avx2>_pblendvb"
8599 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
8601 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
8602 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
8603 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
8607 pblendvb\t{%3, %2, %0|%0, %2, %3}
8608 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8609 [(set_attr "isa" "noavx,avx")
8610 (set_attr "type" "ssemov")
8611 (set_attr "prefix_extra" "1")
8612 (set_attr "length_immediate" "*,1")
8613 (set_attr "prefix" "orig,vex")
8614 (set_attr "btver2_decode" "vector,vector")
8615 (set_attr "mode" "<sseinsnmode>")])
8617 (define_insn "sse4_1_pblendw"
8618 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8620 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8621 (match_operand:V8HI 1 "register_operand" "0,x")
8622 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8625 pblendw\t{%3, %2, %0|%0, %2, %3}
8626 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8627 [(set_attr "isa" "noavx,avx")
8628 (set_attr "type" "ssemov")
8629 (set_attr "prefix_extra" "1")
8630 (set_attr "length_immediate" "1")
8631 (set_attr "prefix" "orig,vex")
8632 (set_attr "mode" "TI")])
8634 ;; The builtin uses an 8-bit immediate. Expand that.
8635 (define_expand "avx2_pblendw"
8636 [(set (match_operand:V16HI 0 "register_operand")
8638 (match_operand:V16HI 2 "nonimmediate_operand")
8639 (match_operand:V16HI 1 "register_operand")
8640 (match_operand:SI 3 "const_0_to_255_operand")))]
8643 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
8644 operands[3] = GEN_INT (val << 8 | val);
8647 (define_insn "*avx2_pblendw"
8648 [(set (match_operand:V16HI 0 "register_operand" "=x")
8650 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8651 (match_operand:V16HI 1 "register_operand" "x")
8652 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
8655 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
8656 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8658 [(set_attr "type" "ssemov")
8659 (set_attr "prefix_extra" "1")
8660 (set_attr "length_immediate" "1")
8661 (set_attr "prefix" "vex")
8662 (set_attr "mode" "OI")])
8664 (define_insn "avx2_pblendd<mode>"
8665 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
8667 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
8668 (match_operand:VI4_AVX2 1 "register_operand" "x")
8669 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
8671 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8672 [(set_attr "type" "ssemov")
8673 (set_attr "prefix_extra" "1")
8674 (set_attr "length_immediate" "1")
8675 (set_attr "prefix" "vex")
8676 (set_attr "mode" "<sseinsnmode>")])
8678 (define_insn "sse4_1_phminposuw"
8679 [(set (match_operand:V8HI 0 "register_operand" "=x")
8680 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8681 UNSPEC_PHMINPOSUW))]
8683 "%vphminposuw\t{%1, %0|%0, %1}"
8684 [(set_attr "type" "sselog1")
8685 (set_attr "prefix_extra" "1")
8686 (set_attr "prefix" "maybe_vex")
8687 (set_attr "mode" "TI")])
8689 (define_insn "avx2_<code>v16qiv16hi2"
8690 [(set (match_operand:V16HI 0 "register_operand" "=x")
8692 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
8694 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8695 [(set_attr "type" "ssemov")
8696 (set_attr "prefix_extra" "1")
8697 (set_attr "prefix" "vex")
8698 (set_attr "mode" "OI")])
8700 (define_insn "sse4_1_<code>v8qiv8hi2"
8701 [(set (match_operand:V8HI 0 "register_operand" "=x")
8704 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8705 (parallel [(const_int 0) (const_int 1)
8706 (const_int 2) (const_int 3)
8707 (const_int 4) (const_int 5)
8708 (const_int 6) (const_int 7)]))))]
8710 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
8711 [(set_attr "type" "ssemov")
8712 (set_attr "prefix_extra" "1")
8713 (set_attr "prefix" "maybe_vex")
8714 (set_attr "mode" "TI")])
8716 (define_insn "avx2_<code>v8qiv8si2"
8717 [(set (match_operand:V8SI 0 "register_operand" "=x")
8720 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8721 (parallel [(const_int 0) (const_int 1)
8722 (const_int 2) (const_int 3)
8723 (const_int 4) (const_int 5)
8724 (const_int 6) (const_int 7)]))))]
8726 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
8727 [(set_attr "type" "ssemov")
8728 (set_attr "prefix_extra" "1")
8729 (set_attr "prefix" "vex")
8730 (set_attr "mode" "OI")])
8732 (define_insn "sse4_1_<code>v4qiv4si2"
8733 [(set (match_operand:V4SI 0 "register_operand" "=x")
8736 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8737 (parallel [(const_int 0) (const_int 1)
8738 (const_int 2) (const_int 3)]))))]
8740 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
8741 [(set_attr "type" "ssemov")
8742 (set_attr "prefix_extra" "1")
8743 (set_attr "prefix" "maybe_vex")
8744 (set_attr "mode" "TI")])
8746 (define_insn "avx2_<code>v8hiv8si2"
8747 [(set (match_operand:V8SI 0 "register_operand" "=x")
8749 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
8751 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8752 [(set_attr "type" "ssemov")
8753 (set_attr "prefix_extra" "1")
8754 (set_attr "prefix" "vex")
8755 (set_attr "mode" "OI")])
8757 (define_insn "sse4_1_<code>v4hiv4si2"
8758 [(set (match_operand:V4SI 0 "register_operand" "=x")
8761 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8762 (parallel [(const_int 0) (const_int 1)
8763 (const_int 2) (const_int 3)]))))]
8765 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
8766 [(set_attr "type" "ssemov")
8767 (set_attr "prefix_extra" "1")
8768 (set_attr "prefix" "maybe_vex")
8769 (set_attr "mode" "TI")])
8771 (define_insn "avx2_<code>v4qiv4di2"
8772 [(set (match_operand:V4DI 0 "register_operand" "=x")
8775 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8776 (parallel [(const_int 0) (const_int 1)
8777 (const_int 2) (const_int 3)]))))]
8779 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
8780 [(set_attr "type" "ssemov")
8781 (set_attr "prefix_extra" "1")
8782 (set_attr "prefix" "vex")
8783 (set_attr "mode" "OI")])
8785 (define_insn "sse4_1_<code>v2qiv2di2"
8786 [(set (match_operand:V2DI 0 "register_operand" "=x")
8789 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8790 (parallel [(const_int 0) (const_int 1)]))))]
8792 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
8793 [(set_attr "type" "ssemov")
8794 (set_attr "prefix_extra" "1")
8795 (set_attr "prefix" "maybe_vex")
8796 (set_attr "mode" "TI")])
8798 (define_insn "avx2_<code>v4hiv4di2"
8799 [(set (match_operand:V4DI 0 "register_operand" "=x")
8802 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8803 (parallel [(const_int 0) (const_int 1)
8804 (const_int 2) (const_int 3)]))))]
8806 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
8807 [(set_attr "type" "ssemov")
8808 (set_attr "prefix_extra" "1")
8809 (set_attr "prefix" "vex")
8810 (set_attr "mode" "OI")])
8812 (define_insn "sse4_1_<code>v2hiv2di2"
8813 [(set (match_operand:V2DI 0 "register_operand" "=x")
8816 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8817 (parallel [(const_int 0) (const_int 1)]))))]
8819 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
8820 [(set_attr "type" "ssemov")
8821 (set_attr "prefix_extra" "1")
8822 (set_attr "prefix" "maybe_vex")
8823 (set_attr "mode" "TI")])
8825 (define_insn "avx2_<code>v4siv4di2"
8826 [(set (match_operand:V4DI 0 "register_operand" "=x")
8828 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
8830 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8831 [(set_attr "type" "ssemov")
8832 (set_attr "prefix_extra" "1")
8833 (set_attr "mode" "OI")])
8835 (define_insn "sse4_1_<code>v2siv2di2"
8836 [(set (match_operand:V2DI 0 "register_operand" "=x")
8839 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8840 (parallel [(const_int 0) (const_int 1)]))))]
8842 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
8843 [(set_attr "type" "ssemov")
8844 (set_attr "prefix_extra" "1")
8845 (set_attr "prefix" "maybe_vex")
8846 (set_attr "mode" "TI")])
8848 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8849 ;; setting FLAGS_REG. But it is not a really compare instruction.
8850 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8851 [(set (reg:CC FLAGS_REG)
8852 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8853 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8856 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8857 [(set_attr "type" "ssecomi")
8858 (set_attr "prefix_extra" "1")
8859 (set_attr "prefix" "vex")
8860 (set_attr "mode" "<MODE>")])
8862 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8863 ;; But it is not a really compare instruction.
8864 (define_insn "avx_ptest256"
8865 [(set (reg:CC FLAGS_REG)
8866 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8867 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8870 "vptest\t{%1, %0|%0, %1}"
8871 [(set_attr "type" "ssecomi")
8872 (set_attr "prefix_extra" "1")
8873 (set_attr "prefix" "vex")
8874 (set_attr "btver2_decode" "vector")
8875 (set_attr "mode" "OI")])
8877 (define_insn "sse4_1_ptest"
8878 [(set (reg:CC FLAGS_REG)
8879 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8880 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8883 "%vptest\t{%1, %0|%0, %1}"
8884 [(set_attr "type" "ssecomi")
8885 (set_attr "prefix_extra" "1")
8886 (set_attr "prefix" "maybe_vex")
8887 (set_attr "mode" "TI")])
8889 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8890 [(set (match_operand:VF 0 "register_operand" "=x")
8892 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8893 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8896 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8897 [(set_attr "type" "ssecvt")
8898 (set (attr "prefix_data16")
8900 (match_test "TARGET_AVX")
8902 (const_string "1")))
8903 (set_attr "prefix_extra" "1")
8904 (set_attr "length_immediate" "1")
8905 (set_attr "prefix" "maybe_vex")
8906 (set_attr "mode" "<MODE>")])
8908 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
8909 [(match_operand:<sseintvecmode> 0 "register_operand")
8910 (match_operand:VF1 1 "nonimmediate_operand")
8911 (match_operand:SI 2 "const_0_to_15_operand")]
8914 rtx tmp = gen_reg_rtx (<MODE>mode);
8917 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
8920 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
8924 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
8925 [(match_operand:<ssepackfltmode> 0 "register_operand")
8926 (match_operand:VF2 1 "nonimmediate_operand")
8927 (match_operand:VF2 2 "nonimmediate_operand")
8928 (match_operand:SI 3 "const_0_to_15_operand")]
8933 if (<MODE>mode == V2DFmode
8934 && TARGET_AVX && !TARGET_PREFER_AVX128)
8936 rtx tmp2 = gen_reg_rtx (V4DFmode);
8938 tmp0 = gen_reg_rtx (V4DFmode);
8939 tmp1 = force_reg (V2DFmode, operands[1]);
8941 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
8942 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
8943 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
8947 tmp0 = gen_reg_rtx (<MODE>mode);
8948 tmp1 = gen_reg_rtx (<MODE>mode);
8951 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
8954 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
8957 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
8962 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8963 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8966 [(match_operand:VF_128 2 "register_operand" "x,x")
8967 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8969 (match_operand:VF_128 1 "register_operand" "0,x")
8973 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8974 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8975 [(set_attr "isa" "noavx,avx")
8976 (set_attr "type" "ssecvt")
8977 (set_attr "length_immediate" "1")
8978 (set_attr "prefix_data16" "1,*")
8979 (set_attr "prefix_extra" "1")
8980 (set_attr "prefix" "orig,vex")
8981 (set_attr "mode" "<MODE>")])
8983 (define_expand "round<mode>2"
8986 (match_operand:VF 1 "register_operand")
8988 (set (match_operand:VF 0 "register_operand")
8990 [(match_dup 4) (match_dup 5)]
8992 "TARGET_ROUND && !flag_trapping_math"
8994 enum machine_mode scalar_mode;
8995 const struct real_format *fmt;
8996 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
8999 scalar_mode = GET_MODE_INNER (<MODE>mode);
9001 /* load nextafter (0.5, 0.0) */
9002 fmt = REAL_MODE_FORMAT (scalar_mode);
9003 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9004 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9005 half = const_double_from_real_value (pred_half, scalar_mode);
9007 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9008 vec_half = force_reg (<MODE>mode, vec_half);
9010 operands[3] = gen_reg_rtx (<MODE>mode);
9011 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9013 operands[4] = gen_reg_rtx (<MODE>mode);
9014 operands[5] = GEN_INT (ROUND_TRUNC);
9017 (define_expand "round<mode>2_sfix"
9018 [(match_operand:<sseintvecmode> 0 "register_operand")
9019 (match_operand:VF1 1 "register_operand")]
9020 "TARGET_ROUND && !flag_trapping_math"
9022 rtx tmp = gen_reg_rtx (<MODE>mode);
9024 emit_insn (gen_round<mode>2 (tmp, operands[1]));
9027 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9031 (define_expand "round<mode>2_vec_pack_sfix"
9032 [(match_operand:<ssepackfltmode> 0 "register_operand")
9033 (match_operand:VF2 1 "register_operand")
9034 (match_operand:VF2 2 "register_operand")]
9035 "TARGET_ROUND && !flag_trapping_math"
9039 if (<MODE>mode == V2DFmode
9040 && TARGET_AVX && !TARGET_PREFER_AVX128)
9042 rtx tmp2 = gen_reg_rtx (V4DFmode);
9044 tmp0 = gen_reg_rtx (V4DFmode);
9045 tmp1 = force_reg (V2DFmode, operands[1]);
9047 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9048 emit_insn (gen_roundv4df2 (tmp2, tmp0));
9049 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9053 tmp0 = gen_reg_rtx (<MODE>mode);
9054 tmp1 = gen_reg_rtx (<MODE>mode);
9056 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
9057 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
9060 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9067 ;; Intel SSE4.2 string/text processing instructions
9069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9071 (define_insn_and_split "sse4_2_pcmpestr"
9072 [(set (match_operand:SI 0 "register_operand" "=c,c")
9074 [(match_operand:V16QI 2 "register_operand" "x,x")
9075 (match_operand:SI 3 "register_operand" "a,a")
9076 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
9077 (match_operand:SI 5 "register_operand" "d,d")
9078 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9080 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9088 (set (reg:CC FLAGS_REG)
9097 && can_create_pseudo_p ()"
9102 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9103 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9104 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9107 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9108 operands[3], operands[4],
9109 operands[5], operands[6]));
9111 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9112 operands[3], operands[4],
9113 operands[5], operands[6]));
9114 if (flags && !(ecx || xmm0))
9115 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9116 operands[2], operands[3],
9117 operands[4], operands[5],
9119 if (!(flags || ecx || xmm0))
9120 emit_note (NOTE_INSN_DELETED);
9124 [(set_attr "type" "sselog")
9125 (set_attr "prefix_data16" "1")
9126 (set_attr "prefix_extra" "1")
9127 (set_attr "length_immediate" "1")
9128 (set_attr "memory" "none,load")
9129 (set_attr "mode" "TI")])
9131 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
9132 [(set (match_operand:SI 0 "register_operand" "=c")
9134 [(match_operand:V16QI 2 "register_operand" "x")
9135 (match_operand:SI 3 "register_operand" "a")
9137 [(match_operand:V16QI 4 "memory_operand" "m")]
9139 (match_operand:SI 5 "register_operand" "d")
9140 (match_operand:SI 6 "const_0_to_255_operand" "n")]
9142 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9146 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9150 (set (reg:CC FLAGS_REG)
9154 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
9159 && can_create_pseudo_p ()"
9164 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9165 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9166 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9169 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9170 operands[3], operands[4],
9171 operands[5], operands[6]));
9173 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9174 operands[3], operands[4],
9175 operands[5], operands[6]));
9176 if (flags && !(ecx || xmm0))
9177 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9178 operands[2], operands[3],
9179 operands[4], operands[5],
9181 if (!(flags || ecx || xmm0))
9182 emit_note (NOTE_INSN_DELETED);
9186 [(set_attr "type" "sselog")
9187 (set_attr "prefix_data16" "1")
9188 (set_attr "prefix_extra" "1")
9189 (set_attr "length_immediate" "1")
9190 (set_attr "memory" "load")
9191 (set_attr "mode" "TI")])
9193 (define_insn "sse4_2_pcmpestri"
9194 [(set (match_operand:SI 0 "register_operand" "=c,c")
9196 [(match_operand:V16QI 1 "register_operand" "x,x")
9197 (match_operand:SI 2 "register_operand" "a,a")
9198 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9199 (match_operand:SI 4 "register_operand" "d,d")
9200 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9202 (set (reg:CC FLAGS_REG)
9211 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9212 [(set_attr "type" "sselog")
9213 (set_attr "prefix_data16" "1")
9214 (set_attr "prefix_extra" "1")
9215 (set_attr "prefix" "maybe_vex")
9216 (set_attr "length_immediate" "1")
9217 (set_attr "btver2_decode" "vector")
9218 (set_attr "memory" "none,load")
9219 (set_attr "mode" "TI")])
9221 (define_insn "sse4_2_pcmpestrm"
9222 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9224 [(match_operand:V16QI 1 "register_operand" "x,x")
9225 (match_operand:SI 2 "register_operand" "a,a")
9226 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9227 (match_operand:SI 4 "register_operand" "d,d")
9228 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9230 (set (reg:CC FLAGS_REG)
9239 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9240 [(set_attr "type" "sselog")
9241 (set_attr "prefix_data16" "1")
9242 (set_attr "prefix_extra" "1")
9243 (set_attr "length_immediate" "1")
9244 (set_attr "prefix" "maybe_vex")
9245 (set_attr "btver2_decode" "vector")
9246 (set_attr "memory" "none,load")
9247 (set_attr "mode" "TI")])
9249 (define_insn "sse4_2_pcmpestr_cconly"
9250 [(set (reg:CC FLAGS_REG)
9252 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9253 (match_operand:SI 3 "register_operand" "a,a,a,a")
9254 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9255 (match_operand:SI 5 "register_operand" "d,d,d,d")
9256 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9258 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9259 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9262 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9263 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9264 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9265 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9266 [(set_attr "type" "sselog")
9267 (set_attr "prefix_data16" "1")
9268 (set_attr "prefix_extra" "1")
9269 (set_attr "length_immediate" "1")
9270 (set_attr "memory" "none,load,none,load")
9271 (set_attr "btver2_decode" "vector,vector,vector,vector")
9272 (set_attr "prefix" "maybe_vex")
9273 (set_attr "mode" "TI")])
9275 (define_insn_and_split "sse4_2_pcmpistr"
9276 [(set (match_operand:SI 0 "register_operand" "=c,c")
9278 [(match_operand:V16QI 2 "register_operand" "x,x")
9279 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9280 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9282 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9288 (set (reg:CC FLAGS_REG)
9295 && can_create_pseudo_p ()"
9300 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9301 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9302 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9305 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9306 operands[3], operands[4]));
9308 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9309 operands[3], operands[4]));
9310 if (flags && !(ecx || xmm0))
9311 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9312 operands[2], operands[3],
9314 if (!(flags || ecx || xmm0))
9315 emit_note (NOTE_INSN_DELETED);
9319 [(set_attr "type" "sselog")
9320 (set_attr "prefix_data16" "1")
9321 (set_attr "prefix_extra" "1")
9322 (set_attr "length_immediate" "1")
9323 (set_attr "memory" "none,load")
9324 (set_attr "mode" "TI")])
9326 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
9327 [(set (match_operand:SI 0 "register_operand" "=c")
9329 [(match_operand:V16QI 2 "register_operand" "x")
9331 [(match_operand:V16QI 3 "memory_operand" "m")]
9333 (match_operand:SI 4 "const_0_to_255_operand" "n")]
9335 (set (match_operand:V16QI 1 "register_operand" "=Yz")
9338 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9341 (set (reg:CC FLAGS_REG)
9344 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
9348 && can_create_pseudo_p ()"
9353 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9354 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9355 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9358 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9359 operands[3], operands[4]));
9361 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9362 operands[3], operands[4]));
9363 if (flags && !(ecx || xmm0))
9364 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9365 operands[2], operands[3],
9367 if (!(flags || ecx || xmm0))
9368 emit_note (NOTE_INSN_DELETED);
9372 [(set_attr "type" "sselog")
9373 (set_attr "prefix_data16" "1")
9374 (set_attr "prefix_extra" "1")
9375 (set_attr "length_immediate" "1")
9376 (set_attr "memory" "load")
9377 (set_attr "mode" "TI")])
9379 (define_insn "sse4_2_pcmpistri"
9380 [(set (match_operand:SI 0 "register_operand" "=c,c")
9382 [(match_operand:V16QI 1 "register_operand" "x,x")
9383 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9384 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9386 (set (reg:CC FLAGS_REG)
9393 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9394 [(set_attr "type" "sselog")
9395 (set_attr "prefix_data16" "1")
9396 (set_attr "prefix_extra" "1")
9397 (set_attr "length_immediate" "1")
9398 (set_attr "prefix" "maybe_vex")
9399 (set_attr "memory" "none,load")
9400 (set_attr "btver2_decode" "vector")
9401 (set_attr "mode" "TI")])
9403 (define_insn "sse4_2_pcmpistrm"
9404 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9406 [(match_operand:V16QI 1 "register_operand" "x,x")
9407 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9408 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9410 (set (reg:CC FLAGS_REG)
9417 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9418 [(set_attr "type" "sselog")
9419 (set_attr "prefix_data16" "1")
9420 (set_attr "prefix_extra" "1")
9421 (set_attr "length_immediate" "1")
9422 (set_attr "prefix" "maybe_vex")
9423 (set_attr "memory" "none,load")
9424 (set_attr "btver2_decode" "vector")
9425 (set_attr "mode" "TI")])
9427 (define_insn "sse4_2_pcmpistr_cconly"
9428 [(set (reg:CC FLAGS_REG)
9430 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9431 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9432 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9434 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9435 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9438 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9439 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9440 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9441 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9442 [(set_attr "type" "sselog")
9443 (set_attr "prefix_data16" "1")
9444 (set_attr "prefix_extra" "1")
9445 (set_attr "length_immediate" "1")
9446 (set_attr "memory" "none,load,none,load")
9447 (set_attr "prefix" "maybe_vex")
9448 (set_attr "btver2_decode" "vector,vector,vector,vector")
9449 (set_attr "mode" "TI")])
9451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9455 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9457 (define_code_iterator xop_plus [plus ss_plus])
9459 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
9460 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
9462 ;; XOP parallel integer multiply/add instructions.
9464 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
9465 [(set (match_operand:VI24_128 0 "register_operand" "=x")
9468 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
9469 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
9470 (match_operand:VI24_128 3 "register_operand" "x")))]
9472 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9473 [(set_attr "type" "ssemuladd")
9474 (set_attr "mode" "TI")])
9476 (define_insn "xop_p<macs>dql"
9477 [(set (match_operand:V2DI 0 "register_operand" "=x")
9482 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9483 (parallel [(const_int 0) (const_int 2)])))
9486 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9487 (parallel [(const_int 0) (const_int 2)]))))
9488 (match_operand:V2DI 3 "register_operand" "x")))]
9490 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9491 [(set_attr "type" "ssemuladd")
9492 (set_attr "mode" "TI")])
9494 (define_insn "xop_p<macs>dqh"
9495 [(set (match_operand:V2DI 0 "register_operand" "=x")
9500 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9501 (parallel [(const_int 1) (const_int 3)])))
9504 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9505 (parallel [(const_int 1) (const_int 3)]))))
9506 (match_operand:V2DI 3 "register_operand" "x")))]
9508 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9509 [(set_attr "type" "ssemuladd")
9510 (set_attr "mode" "TI")])
9512 ;; XOP parallel integer multiply/add instructions for the intrinisics
9513 (define_insn "xop_p<macs>wd"
9514 [(set (match_operand:V4SI 0 "register_operand" "=x")
9519 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9520 (parallel [(const_int 1) (const_int 3)
9521 (const_int 5) (const_int 7)])))
9524 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9525 (parallel [(const_int 1) (const_int 3)
9526 (const_int 5) (const_int 7)]))))
9527 (match_operand:V4SI 3 "register_operand" "x")))]
9529 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9530 [(set_attr "type" "ssemuladd")
9531 (set_attr "mode" "TI")])
9533 (define_insn "xop_p<madcs>wd"
9534 [(set (match_operand:V4SI 0 "register_operand" "=x")
9540 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9541 (parallel [(const_int 0) (const_int 2)
9542 (const_int 4) (const_int 6)])))
9545 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9546 (parallel [(const_int 0) (const_int 2)
9547 (const_int 4) (const_int 6)]))))
9552 (parallel [(const_int 1) (const_int 3)
9553 (const_int 5) (const_int 7)])))
9557 (parallel [(const_int 1) (const_int 3)
9558 (const_int 5) (const_int 7)])))))
9559 (match_operand:V4SI 3 "register_operand" "x")))]
9561 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9562 [(set_attr "type" "ssemuladd")
9563 (set_attr "mode" "TI")])
9565 ;; XOP parallel XMM conditional moves
9566 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
9567 [(set (match_operand:V 0 "register_operand" "=x,x")
9569 (match_operand:V 3 "nonimmediate_operand" "x,m")
9570 (match_operand:V 1 "register_operand" "x,x")
9571 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
9573 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9574 [(set_attr "type" "sse4arg")])
9576 ;; XOP horizontal add/subtract instructions
9577 (define_insn "xop_phadd<u>bw"
9578 [(set (match_operand:V8HI 0 "register_operand" "=x")
9582 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9583 (parallel [(const_int 0) (const_int 2)
9584 (const_int 4) (const_int 6)
9585 (const_int 8) (const_int 10)
9586 (const_int 12) (const_int 14)])))
9590 (parallel [(const_int 1) (const_int 3)
9591 (const_int 5) (const_int 7)
9592 (const_int 9) (const_int 11)
9593 (const_int 13) (const_int 15)])))))]
9595 "vphadd<u>bw\t{%1, %0|%0, %1}"
9596 [(set_attr "type" "sseiadd1")])
9598 (define_insn "xop_phadd<u>bd"
9599 [(set (match_operand:V4SI 0 "register_operand" "=x")
9604 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9605 (parallel [(const_int 0) (const_int 4)
9606 (const_int 8) (const_int 12)])))
9610 (parallel [(const_int 1) (const_int 5)
9611 (const_int 9) (const_int 13)]))))
9616 (parallel [(const_int 2) (const_int 6)
9617 (const_int 10) (const_int 14)])))
9621 (parallel [(const_int 3) (const_int 7)
9622 (const_int 11) (const_int 15)]))))))]
9624 "vphadd<u>bd\t{%1, %0|%0, %1}"
9625 [(set_attr "type" "sseiadd1")])
9627 (define_insn "xop_phadd<u>bq"
9628 [(set (match_operand:V2DI 0 "register_operand" "=x")
9634 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9635 (parallel [(const_int 0) (const_int 8)])))
9639 (parallel [(const_int 1) (const_int 9)]))))
9644 (parallel [(const_int 2) (const_int 10)])))
9648 (parallel [(const_int 3) (const_int 11)])))))
9654 (parallel [(const_int 4) (const_int 12)])))
9658 (parallel [(const_int 5) (const_int 13)]))))
9663 (parallel [(const_int 6) (const_int 14)])))
9667 (parallel [(const_int 7) (const_int 15)])))))))]
9669 "vphadd<u>bq\t{%1, %0|%0, %1}"
9670 [(set_attr "type" "sseiadd1")])
9672 (define_insn "xop_phadd<u>wd"
9673 [(set (match_operand:V4SI 0 "register_operand" "=x")
9677 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9678 (parallel [(const_int 0) (const_int 2)
9679 (const_int 4) (const_int 6)])))
9683 (parallel [(const_int 1) (const_int 3)
9684 (const_int 5) (const_int 7)])))))]
9686 "vphadd<u>wd\t{%1, %0|%0, %1}"
9687 [(set_attr "type" "sseiadd1")])
9689 (define_insn "xop_phadd<u>wq"
9690 [(set (match_operand:V2DI 0 "register_operand" "=x")
9695 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9696 (parallel [(const_int 0) (const_int 4)])))
9700 (parallel [(const_int 1) (const_int 5)]))))
9705 (parallel [(const_int 2) (const_int 6)])))
9709 (parallel [(const_int 3) (const_int 7)]))))))]
9711 "vphadd<u>wq\t{%1, %0|%0, %1}"
9712 [(set_attr "type" "sseiadd1")])
9714 (define_insn "xop_phadd<u>dq"
9715 [(set (match_operand:V2DI 0 "register_operand" "=x")
9719 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9720 (parallel [(const_int 0) (const_int 2)])))
9724 (parallel [(const_int 1) (const_int 3)])))))]
9726 "vphadd<u>dq\t{%1, %0|%0, %1}"
9727 [(set_attr "type" "sseiadd1")])
9729 (define_insn "xop_phsubbw"
9730 [(set (match_operand:V8HI 0 "register_operand" "=x")
9734 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9735 (parallel [(const_int 0) (const_int 2)
9736 (const_int 4) (const_int 6)
9737 (const_int 8) (const_int 10)
9738 (const_int 12) (const_int 14)])))
9742 (parallel [(const_int 1) (const_int 3)
9743 (const_int 5) (const_int 7)
9744 (const_int 9) (const_int 11)
9745 (const_int 13) (const_int 15)])))))]
9747 "vphsubbw\t{%1, %0|%0, %1}"
9748 [(set_attr "type" "sseiadd1")])
9750 (define_insn "xop_phsubwd"
9751 [(set (match_operand:V4SI 0 "register_operand" "=x")
9755 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9756 (parallel [(const_int 0) (const_int 2)
9757 (const_int 4) (const_int 6)])))
9761 (parallel [(const_int 1) (const_int 3)
9762 (const_int 5) (const_int 7)])))))]
9764 "vphsubwd\t{%1, %0|%0, %1}"
9765 [(set_attr "type" "sseiadd1")])
9767 (define_insn "xop_phsubdq"
9768 [(set (match_operand:V2DI 0 "register_operand" "=x")
9772 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9773 (parallel [(const_int 0) (const_int 2)])))
9777 (parallel [(const_int 1) (const_int 3)])))))]
9779 "vphsubdq\t{%1, %0|%0, %1}"
9780 [(set_attr "type" "sseiadd1")])
9782 ;; XOP permute instructions
9783 (define_insn "xop_pperm"
9784 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9786 [(match_operand:V16QI 1 "register_operand" "x,x")
9787 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9788 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9789 UNSPEC_XOP_PERMUTE))]
9790 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9791 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9792 [(set_attr "type" "sse4arg")
9793 (set_attr "mode" "TI")])
9795 ;; XOP pack instructions that combine two vectors into a smaller vector
9796 (define_insn "xop_pperm_pack_v2di_v4si"
9797 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9800 (match_operand:V2DI 1 "register_operand" "x,x"))
9802 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9803 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9804 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9805 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9806 [(set_attr "type" "sse4arg")
9807 (set_attr "mode" "TI")])
9809 (define_insn "xop_pperm_pack_v4si_v8hi"
9810 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9813 (match_operand:V4SI 1 "register_operand" "x,x"))
9815 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9816 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9817 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9818 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9819 [(set_attr "type" "sse4arg")
9820 (set_attr "mode" "TI")])
9822 (define_insn "xop_pperm_pack_v8hi_v16qi"
9823 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9826 (match_operand:V8HI 1 "register_operand" "x,x"))
9828 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9829 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9830 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9831 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9832 [(set_attr "type" "sse4arg")
9833 (set_attr "mode" "TI")])
9835 ;; XOP packed rotate instructions
9836 (define_expand "rotl<mode>3"
9837 [(set (match_operand:VI_128 0 "register_operand")
9839 (match_operand:VI_128 1 "nonimmediate_operand")
9840 (match_operand:SI 2 "general_operand")))]
9843 /* If we were given a scalar, convert it to parallel */
9844 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9846 rtvec vs = rtvec_alloc (<ssescalarnum>);
9847 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9848 rtx reg = gen_reg_rtx (<MODE>mode);
9849 rtx op2 = operands[2];
9852 if (GET_MODE (op2) != <ssescalarmode>mode)
9854 op2 = gen_reg_rtx (<ssescalarmode>mode);
9855 convert_move (op2, operands[2], false);
9858 for (i = 0; i < <ssescalarnum>; i++)
9859 RTVEC_ELT (vs, i) = op2;
9861 emit_insn (gen_vec_init<mode> (reg, par));
9862 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9867 (define_expand "rotr<mode>3"
9868 [(set (match_operand:VI_128 0 "register_operand")
9870 (match_operand:VI_128 1 "nonimmediate_operand")
9871 (match_operand:SI 2 "general_operand")))]
9874 /* If we were given a scalar, convert it to parallel */
9875 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9877 rtvec vs = rtvec_alloc (<ssescalarnum>);
9878 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9879 rtx neg = gen_reg_rtx (<MODE>mode);
9880 rtx reg = gen_reg_rtx (<MODE>mode);
9881 rtx op2 = operands[2];
9884 if (GET_MODE (op2) != <ssescalarmode>mode)
9886 op2 = gen_reg_rtx (<ssescalarmode>mode);
9887 convert_move (op2, operands[2], false);
9890 for (i = 0; i < <ssescalarnum>; i++)
9891 RTVEC_ELT (vs, i) = op2;
9893 emit_insn (gen_vec_init<mode> (reg, par));
9894 emit_insn (gen_neg<mode>2 (neg, reg));
9895 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9900 (define_insn "xop_rotl<mode>3"
9901 [(set (match_operand:VI_128 0 "register_operand" "=x")
9903 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9904 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9906 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9907 [(set_attr "type" "sseishft")
9908 (set_attr "length_immediate" "1")
9909 (set_attr "mode" "TI")])
9911 (define_insn "xop_rotr<mode>3"
9912 [(set (match_operand:VI_128 0 "register_operand" "=x")
9914 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9915 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9919 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
9920 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9922 [(set_attr "type" "sseishft")
9923 (set_attr "length_immediate" "1")
9924 (set_attr "mode" "TI")])
9926 (define_expand "vrotr<mode>3"
9927 [(match_operand:VI_128 0 "register_operand")
9928 (match_operand:VI_128 1 "register_operand")
9929 (match_operand:VI_128 2 "register_operand")]
9932 rtx reg = gen_reg_rtx (<MODE>mode);
9933 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9934 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9938 (define_expand "vrotl<mode>3"
9939 [(match_operand:VI_128 0 "register_operand")
9940 (match_operand:VI_128 1 "register_operand")
9941 (match_operand:VI_128 2 "register_operand")]
9944 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9948 (define_insn "xop_vrotl<mode>3"
9949 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9950 (if_then_else:VI_128
9952 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9955 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9959 (neg:VI_128 (match_dup 2)))))]
9960 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9961 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9962 [(set_attr "type" "sseishft")
9963 (set_attr "prefix_data16" "0")
9964 (set_attr "prefix_extra" "2")
9965 (set_attr "mode" "TI")])
9967 ;; XOP packed shift instructions.
9968 (define_expand "vlshr<mode>3"
9969 [(set (match_operand:VI12_128 0 "register_operand")
9971 (match_operand:VI12_128 1 "register_operand")
9972 (match_operand:VI12_128 2 "nonimmediate_operand")))]
9975 rtx neg = gen_reg_rtx (<MODE>mode);
9976 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9977 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
9981 (define_expand "vlshr<mode>3"
9982 [(set (match_operand:VI48_128 0 "register_operand")
9984 (match_operand:VI48_128 1 "register_operand")
9985 (match_operand:VI48_128 2 "nonimmediate_operand")))]
9986 "TARGET_AVX2 || TARGET_XOP"
9990 rtx neg = gen_reg_rtx (<MODE>mode);
9991 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9992 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
9997 (define_expand "vlshr<mode>3"
9998 [(set (match_operand:VI48_256 0 "register_operand")
10000 (match_operand:VI48_256 1 "register_operand")
10001 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10004 (define_expand "vashr<mode>3"
10005 [(set (match_operand:VI128_128 0 "register_operand")
10006 (ashiftrt:VI128_128
10007 (match_operand:VI128_128 1 "register_operand")
10008 (match_operand:VI128_128 2 "nonimmediate_operand")))]
10011 rtx neg = gen_reg_rtx (<MODE>mode);
10012 emit_insn (gen_neg<mode>2 (neg, operands[2]));
10013 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
10017 (define_expand "vashrv4si3"
10018 [(set (match_operand:V4SI 0 "register_operand")
10019 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
10020 (match_operand:V4SI 2 "nonimmediate_operand")))]
10021 "TARGET_AVX2 || TARGET_XOP"
10025 rtx neg = gen_reg_rtx (V4SImode);
10026 emit_insn (gen_negv4si2 (neg, operands[2]));
10027 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
10032 (define_expand "vashrv8si3"
10033 [(set (match_operand:V8SI 0 "register_operand")
10034 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
10035 (match_operand:V8SI 2 "nonimmediate_operand")))]
10038 (define_expand "vashl<mode>3"
10039 [(set (match_operand:VI12_128 0 "register_operand")
10041 (match_operand:VI12_128 1 "register_operand")
10042 (match_operand:VI12_128 2 "nonimmediate_operand")))]
10045 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10049 (define_expand "vashl<mode>3"
10050 [(set (match_operand:VI48_128 0 "register_operand")
10052 (match_operand:VI48_128 1 "register_operand")
10053 (match_operand:VI48_128 2 "nonimmediate_operand")))]
10054 "TARGET_AVX2 || TARGET_XOP"
10058 operands[2] = force_reg (<MODE>mode, operands[2]);
10059 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
10064 (define_expand "vashl<mode>3"
10065 [(set (match_operand:VI48_256 0 "register_operand")
10067 (match_operand:VI48_256 1 "register_operand")
10068 (match_operand:VI48_256 2 "nonimmediate_operand")))]
10071 (define_insn "xop_sha<mode>3"
10072 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10073 (if_then_else:VI_128
10075 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10078 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10082 (neg:VI_128 (match_dup 2)))))]
10083 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10084 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10085 [(set_attr "type" "sseishft")
10086 (set_attr "prefix_data16" "0")
10087 (set_attr "prefix_extra" "2")
10088 (set_attr "mode" "TI")])
10090 (define_insn "xop_shl<mode>3"
10091 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
10092 (if_then_else:VI_128
10094 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
10097 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
10101 (neg:VI_128 (match_dup 2)))))]
10102 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10103 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10104 [(set_attr "type" "sseishft")
10105 (set_attr "prefix_data16" "0")
10106 (set_attr "prefix_extra" "2")
10107 (set_attr "mode" "TI")])
10109 (define_expand "<shift_insn><mode>3"
10110 [(set (match_operand:VI1_AVX2 0 "register_operand")
10111 (any_shift:VI1_AVX2
10112 (match_operand:VI1_AVX2 1 "register_operand")
10113 (match_operand:SI 2 "nonmemory_operand")))]
10116 if (TARGET_XOP && <MODE>mode == V16QImode)
10118 bool negate = false;
10119 rtx (*gen) (rtx, rtx, rtx);
10123 if (<CODE> != ASHIFT)
10125 if (CONST_INT_P (operands[2]))
10126 operands[2] = GEN_INT (-INTVAL (operands[2]));
10130 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
10131 for (i = 0; i < 16; i++)
10132 XVECEXP (par, 0, i) = operands[2];
10134 tmp = gen_reg_rtx (V16QImode);
10135 emit_insn (gen_vec_initv16qi (tmp, par));
10138 emit_insn (gen_negv16qi2 (tmp, tmp));
10140 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
10141 emit_insn (gen (operands[0], operands[1], tmp));
10144 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
10148 (define_expand "ashrv2di3"
10149 [(set (match_operand:V2DI 0 "register_operand")
10151 (match_operand:V2DI 1 "register_operand")
10152 (match_operand:DI 2 "nonmemory_operand")))]
10155 rtx reg = gen_reg_rtx (V2DImode);
10157 bool negate = false;
10160 if (CONST_INT_P (operands[2]))
10161 operands[2] = GEN_INT (-INTVAL (operands[2]));
10165 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
10166 for (i = 0; i < 2; i++)
10167 XVECEXP (par, 0, i) = operands[2];
10169 emit_insn (gen_vec_initv2di (reg, par));
10172 emit_insn (gen_negv2di2 (reg, reg));
10174 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
10178 ;; XOP FRCZ support
10179 (define_insn "xop_frcz<mode>2"
10180 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
10182 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
10185 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
10186 [(set_attr "type" "ssecvt1")
10187 (set_attr "mode" "<MODE>")])
10190 (define_expand "xop_vmfrcz<mode>2"
10191 [(set (match_operand:VF_128 0 "register_operand")
10194 [(match_operand:VF_128 1 "nonimmediate_operand")]
10200 operands[3] = CONST0_RTX (<MODE>mode);
10203 (define_insn "*xop_vmfrcz_<mode>"
10204 [(set (match_operand:VF_128 0 "register_operand" "=x")
10207 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
10209 (match_operand:VF_128 2 "const0_operand")
10212 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
10213 [(set_attr "type" "ssecvt1")
10214 (set_attr "mode" "<MODE>")])
10216 (define_insn "xop_maskcmp<mode>3"
10217 [(set (match_operand:VI_128 0 "register_operand" "=x")
10218 (match_operator:VI_128 1 "ix86_comparison_int_operator"
10219 [(match_operand:VI_128 2 "register_operand" "x")
10220 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10222 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10223 [(set_attr "type" "sse4arg")
10224 (set_attr "prefix_data16" "0")
10225 (set_attr "prefix_rep" "0")
10226 (set_attr "prefix_extra" "2")
10227 (set_attr "length_immediate" "1")
10228 (set_attr "mode" "TI")])
10230 (define_insn "xop_maskcmp_uns<mode>3"
10231 [(set (match_operand:VI_128 0 "register_operand" "=x")
10232 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
10233 [(match_operand:VI_128 2 "register_operand" "x")
10234 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
10236 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10237 [(set_attr "type" "ssecmp")
10238 (set_attr "prefix_data16" "0")
10239 (set_attr "prefix_rep" "0")
10240 (set_attr "prefix_extra" "2")
10241 (set_attr "length_immediate" "1")
10242 (set_attr "mode" "TI")])
10244 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
10245 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
10246 ;; the exact instruction generated for the intrinsic.
10247 (define_insn "xop_maskcmp_uns2<mode>3"
10248 [(set (match_operand:VI_128 0 "register_operand" "=x")
10250 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
10251 [(match_operand:VI_128 2 "register_operand" "x")
10252 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
10253 UNSPEC_XOP_UNSIGNED_CMP))]
10255 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
10256 [(set_attr "type" "ssecmp")
10257 (set_attr "prefix_data16" "0")
10258 (set_attr "prefix_extra" "2")
10259 (set_attr "length_immediate" "1")
10260 (set_attr "mode" "TI")])
10262 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
10263 ;; being added here to be complete.
10264 (define_insn "xop_pcom_tf<mode>3"
10265 [(set (match_operand:VI_128 0 "register_operand" "=x")
10267 [(match_operand:VI_128 1 "register_operand" "x")
10268 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
10269 (match_operand:SI 3 "const_int_operand" "n")]
10270 UNSPEC_XOP_TRUEFALSE))]
10273 return ((INTVAL (operands[3]) != 0)
10274 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10275 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
10277 [(set_attr "type" "ssecmp")
10278 (set_attr "prefix_data16" "0")
10279 (set_attr "prefix_extra" "2")
10280 (set_attr "length_immediate" "1")
10281 (set_attr "mode" "TI")])
10283 (define_insn "xop_vpermil2<mode>3"
10284 [(set (match_operand:VF 0 "register_operand" "=x")
10286 [(match_operand:VF 1 "register_operand" "x")
10287 (match_operand:VF 2 "nonimmediate_operand" "%x")
10288 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
10289 (match_operand:SI 4 "const_0_to_3_operand" "n")]
10292 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
10293 [(set_attr "type" "sse4arg")
10294 (set_attr "length_immediate" "1")
10295 (set_attr "mode" "<MODE>")])
10297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10299 (define_insn "aesenc"
10300 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10301 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10302 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10306 aesenc\t{%2, %0|%0, %2}
10307 vaesenc\t{%2, %1, %0|%0, %1, %2}"
10308 [(set_attr "isa" "noavx,avx")
10309 (set_attr "type" "sselog1")
10310 (set_attr "prefix_extra" "1")
10311 (set_attr "prefix" "orig,vex")
10312 (set_attr "btver2_decode" "double,double")
10313 (set_attr "mode" "TI")])
10315 (define_insn "aesenclast"
10316 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10317 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10318 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10319 UNSPEC_AESENCLAST))]
10322 aesenclast\t{%2, %0|%0, %2}
10323 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
10324 [(set_attr "isa" "noavx,avx")
10325 (set_attr "type" "sselog1")
10326 (set_attr "prefix_extra" "1")
10327 (set_attr "prefix" "orig,vex")
10328 (set_attr "btver2_decode" "double,double")
10329 (set_attr "mode" "TI")])
10331 (define_insn "aesdec"
10332 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10334 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10338 aesdec\t{%2, %0|%0, %2}
10339 vaesdec\t{%2, %1, %0|%0, %1, %2}"
10340 [(set_attr "isa" "noavx,avx")
10341 (set_attr "type" "sselog1")
10342 (set_attr "prefix_extra" "1")
10343 (set_attr "prefix" "orig,vex")
10344 (set_attr "btver2_decode" "double,double")
10345 (set_attr "mode" "TI")])
10347 (define_insn "aesdeclast"
10348 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10349 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10350 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
10351 UNSPEC_AESDECLAST))]
10354 aesdeclast\t{%2, %0|%0, %2}
10355 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
10356 [(set_attr "isa" "noavx,avx")
10357 (set_attr "type" "sselog1")
10358 (set_attr "prefix_extra" "1")
10359 (set_attr "prefix" "orig,vex")
10360 (set_attr "btver2_decode" "double,double")
10361 (set_attr "mode" "TI")])
10363 (define_insn "aesimc"
10364 [(set (match_operand:V2DI 0 "register_operand" "=x")
10365 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10368 "%vaesimc\t{%1, %0|%0, %1}"
10369 [(set_attr "type" "sselog1")
10370 (set_attr "prefix_extra" "1")
10371 (set_attr "prefix" "maybe_vex")
10372 (set_attr "mode" "TI")])
10374 (define_insn "aeskeygenassist"
10375 [(set (match_operand:V2DI 0 "register_operand" "=x")
10376 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
10377 (match_operand:SI 2 "const_0_to_255_operand" "n")]
10378 UNSPEC_AESKEYGENASSIST))]
10380 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
10381 [(set_attr "type" "sselog1")
10382 (set_attr "prefix_extra" "1")
10383 (set_attr "length_immediate" "1")
10384 (set_attr "prefix" "maybe_vex")
10385 (set_attr "mode" "TI")])
10387 (define_insn "pclmulqdq"
10388 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10389 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
10390 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
10391 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10395 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
10396 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10397 [(set_attr "isa" "noavx,avx")
10398 (set_attr "type" "sselog1")
10399 (set_attr "prefix_extra" "1")
10400 (set_attr "length_immediate" "1")
10401 (set_attr "prefix" "orig,vex")
10402 (set_attr "mode" "TI")])
10404 (define_expand "avx_vzeroall"
10405 [(match_par_dup 0 [(const_int 0)])]
10408 int nregs = TARGET_64BIT ? 16 : 8;
10411 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
10413 XVECEXP (operands[0], 0, 0)
10414 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
10417 for (regno = 0; regno < nregs; regno++)
10418 XVECEXP (operands[0], 0, regno + 1)
10419 = gen_rtx_SET (VOIDmode,
10420 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
10421 CONST0_RTX (V8SImode));
10424 (define_insn "*avx_vzeroall"
10425 [(match_parallel 0 "vzeroall_operation"
10426 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
10429 [(set_attr "type" "sse")
10430 (set_attr "modrm" "0")
10431 (set_attr "memory" "none")
10432 (set_attr "prefix" "vex")
10433 (set_attr "btver2_decode" "vector")
10434 (set_attr "mode" "OI")])
10436 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
10437 ;; if the upper 128bits are unused.
10438 (define_insn "avx_vzeroupper"
10439 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
10442 [(set_attr "type" "sse")
10443 (set_attr "modrm" "0")
10444 (set_attr "memory" "none")
10445 (set_attr "prefix" "vex")
10446 (set_attr "btver2_decode" "vector")
10447 (set_attr "mode" "OI")])
10449 (define_mode_attr AVXTOSSEMODE
10450 [(V4DI "V2DI") (V2DI "V2DI")
10451 (V8SI "V4SI") (V4SI "V4SI")
10452 (V16HI "V8HI") (V8HI "V8HI")
10453 (V32QI "V16QI") (V16QI "V16QI")])
10455 (define_insn "avx2_pbroadcast<mode>"
10456 [(set (match_operand:VI 0 "register_operand" "=x")
10458 (vec_select:<ssescalarmode>
10459 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
10460 (parallel [(const_int 0)]))))]
10462 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
10463 [(set_attr "type" "ssemov")
10464 (set_attr "prefix_extra" "1")
10465 (set_attr "prefix" "vex")
10466 (set_attr "mode" "<sseinsnmode>")])
10468 (define_insn "avx2_pbroadcast<mode>_1"
10469 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
10470 (vec_duplicate:VI_256
10471 (vec_select:<ssescalarmode>
10472 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
10473 (parallel [(const_int 0)]))))]
10476 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
10477 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
10478 [(set_attr "type" "ssemov")
10479 (set_attr "prefix_extra" "1")
10480 (set_attr "prefix" "vex")
10481 (set_attr "mode" "<sseinsnmode>")])
10483 (define_insn "avx2_permvar<mode>"
10484 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10486 [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm")
10487 (match_operand:V8SI 2 "register_operand" "x")]
10490 "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
10491 [(set_attr "type" "sselog")
10492 (set_attr "prefix" "vex")
10493 (set_attr "mode" "OI")])
10495 (define_expand "avx2_perm<mode>"
10496 [(match_operand:VI8F_256 0 "register_operand")
10497 (match_operand:VI8F_256 1 "nonimmediate_operand")
10498 (match_operand:SI 2 "const_0_to_255_operand")]
10501 int mask = INTVAL (operands[2]);
10502 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
10503 GEN_INT ((mask >> 0) & 3),
10504 GEN_INT ((mask >> 2) & 3),
10505 GEN_INT ((mask >> 4) & 3),
10506 GEN_INT ((mask >> 6) & 3)));
10510 (define_insn "avx2_perm<mode>_1"
10511 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10512 (vec_select:VI8F_256
10513 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm")
10514 (parallel [(match_operand 2 "const_0_to_3_operand")
10515 (match_operand 3 "const_0_to_3_operand")
10516 (match_operand 4 "const_0_to_3_operand")
10517 (match_operand 5 "const_0_to_3_operand")])))]
10521 mask |= INTVAL (operands[2]) << 0;
10522 mask |= INTVAL (operands[3]) << 2;
10523 mask |= INTVAL (operands[4]) << 4;
10524 mask |= INTVAL (operands[5]) << 6;
10525 operands[2] = GEN_INT (mask);
10526 return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10528 [(set_attr "type" "sselog")
10529 (set_attr "prefix" "vex")
10530 (set_attr "mode" "<sseinsnmode>")])
10532 (define_insn "avx2_permv2ti"
10533 [(set (match_operand:V4DI 0 "register_operand" "=x")
10535 [(match_operand:V4DI 1 "register_operand" "x")
10536 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
10537 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10540 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10541 [(set_attr "type" "sselog")
10542 (set_attr "prefix" "vex")
10543 (set_attr "mode" "OI")])
10545 (define_insn "avx2_vec_dupv4df"
10546 [(set (match_operand:V4DF 0 "register_operand" "=x")
10547 (vec_duplicate:V4DF
10549 (match_operand:V2DF 1 "register_operand" "x")
10550 (parallel [(const_int 0)]))))]
10552 "vbroadcastsd\t{%1, %0|%0, %1}"
10553 [(set_attr "type" "sselog1")
10554 (set_attr "prefix" "vex")
10555 (set_attr "mode" "V4DF")])
10557 ;; Modes handled by AVX vec_dup patterns.
10558 (define_mode_iterator AVX_VEC_DUP_MODE
10559 [V8SI V8SF V4DI V4DF])
10561 (define_insn "vec_dup<mode>"
10562 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
10563 (vec_duplicate:AVX_VEC_DUP_MODE
10564 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
10567 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
10568 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
10570 [(set_attr "type" "ssemov")
10571 (set_attr "prefix_extra" "1")
10572 (set_attr "prefix" "vex")
10573 (set_attr "isa" "*,avx2,noavx2")
10574 (set_attr "mode" "V8SF")])
10576 (define_insn "avx2_vbroadcasti128_<mode>"
10577 [(set (match_operand:VI_256 0 "register_operand" "=x")
10579 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
10582 "vbroadcasti128\t{%1, %0|%0, %1}"
10583 [(set_attr "type" "ssemov")
10584 (set_attr "prefix_extra" "1")
10585 (set_attr "prefix" "vex")
10586 (set_attr "mode" "OI")])
10589 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
10590 (vec_duplicate:AVX_VEC_DUP_MODE
10591 (match_operand:<ssescalarmode> 1 "register_operand")))]
10592 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
10593 [(set (match_dup 2)
10594 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
10596 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
10597 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
10599 (define_insn "avx_vbroadcastf128_<mode>"
10600 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
10602 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
10606 vbroadcast<i128>\t{%1, %0|%0, %1}
10607 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
10608 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
10609 [(set_attr "type" "ssemov,sselog1,sselog1")
10610 (set_attr "prefix_extra" "1")
10611 (set_attr "length_immediate" "0,1,1")
10612 (set_attr "prefix" "vex")
10613 (set_attr "mode" "<sseinsnmode>")])
10615 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
10616 ;; If it so happens that the input is in memory, use vbroadcast.
10617 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
10618 (define_insn "*avx_vperm_broadcast_v4sf"
10619 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
10621 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
10622 (match_parallel 2 "avx_vbroadcast_operand"
10623 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10626 int elt = INTVAL (operands[3]);
10627 switch (which_alternative)
10631 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
10632 return "vbroadcastss\t{%1, %0|%0, %k1}";
10634 operands[2] = GEN_INT (elt * 0x55);
10635 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
10637 gcc_unreachable ();
10640 [(set_attr "type" "ssemov,ssemov,sselog1")
10641 (set_attr "prefix_extra" "1")
10642 (set_attr "length_immediate" "0,0,1")
10643 (set_attr "prefix" "vex")
10644 (set_attr "mode" "SF,SF,V4SF")])
10646 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
10647 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
10649 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
10650 (match_parallel 2 "avx_vbroadcast_operand"
10651 [(match_operand 3 "const_int_operand" "C,n,n")])))]
10654 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
10655 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
10657 rtx op0 = operands[0], op1 = operands[1];
10658 int elt = INTVAL (operands[3]);
10664 if (TARGET_AVX2 && elt == 0)
10666 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
10671 /* Shuffle element we care about into all elements of the 128-bit lane.
10672 The other lane gets shuffled too, but we don't care. */
10673 if (<MODE>mode == V4DFmode)
10674 mask = (elt & 1 ? 15 : 0);
10676 mask = (elt & 3) * 0x55;
10677 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
10679 /* Shuffle the lane we care about into both lanes of the dest. */
10680 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
10681 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
10685 operands[1] = adjust_address (op1, <ssescalarmode>mode,
10686 elt * GET_MODE_SIZE (<ssescalarmode>mode));
10689 (define_expand "avx_vpermil<mode>"
10690 [(set (match_operand:VF2 0 "register_operand")
10692 (match_operand:VF2 1 "nonimmediate_operand")
10693 (match_operand:SI 2 "const_0_to_255_operand")))]
10696 int mask = INTVAL (operands[2]);
10697 rtx perm[<ssescalarnum>];
10699 perm[0] = GEN_INT (mask & 1);
10700 perm[1] = GEN_INT ((mask >> 1) & 1);
10701 if (<MODE>mode == V4DFmode)
10703 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10704 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10708 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10711 (define_expand "avx_vpermil<mode>"
10712 [(set (match_operand:VF1 0 "register_operand")
10714 (match_operand:VF1 1 "nonimmediate_operand")
10715 (match_operand:SI 2 "const_0_to_255_operand")))]
10718 int mask = INTVAL (operands[2]);
10719 rtx perm[<ssescalarnum>];
10721 perm[0] = GEN_INT (mask & 3);
10722 perm[1] = GEN_INT ((mask >> 2) & 3);
10723 perm[2] = GEN_INT ((mask >> 4) & 3);
10724 perm[3] = GEN_INT ((mask >> 6) & 3);
10725 if (<MODE>mode == V8SFmode)
10727 perm[4] = GEN_INT ((mask & 3) + 4);
10728 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10729 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10730 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10734 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10737 (define_insn "*avx_vpermilp<mode>"
10738 [(set (match_operand:VF 0 "register_operand" "=x")
10740 (match_operand:VF 1 "nonimmediate_operand" "xm")
10741 (match_parallel 2 ""
10742 [(match_operand 3 "const_int_operand")])))]
10744 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
10746 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10747 operands[2] = GEN_INT (mask);
10748 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10750 [(set_attr "type" "sselog")
10751 (set_attr "prefix_extra" "1")
10752 (set_attr "length_immediate" "1")
10753 (set_attr "prefix" "vex")
10754 (set_attr "mode" "<MODE>")])
10756 (define_insn "avx_vpermilvar<mode>3"
10757 [(set (match_operand:VF 0 "register_operand" "=x")
10759 [(match_operand:VF 1 "register_operand" "x")
10760 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10763 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10764 [(set_attr "type" "sselog")
10765 (set_attr "prefix_extra" "1")
10766 (set_attr "prefix" "vex")
10767 (set_attr "btver2_decode" "vector")
10768 (set_attr "mode" "<MODE>")])
10770 (define_expand "avx_vperm2f128<mode>3"
10771 [(set (match_operand:AVX256MODE2P 0 "register_operand")
10772 (unspec:AVX256MODE2P
10773 [(match_operand:AVX256MODE2P 1 "register_operand")
10774 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
10775 (match_operand:SI 3 "const_0_to_255_operand")]
10776 UNSPEC_VPERMIL2F128))]
10779 int mask = INTVAL (operands[3]);
10780 if ((mask & 0x88) == 0)
10782 rtx perm[<ssescalarnum>], t1, t2;
10783 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10785 base = (mask & 3) * nelt2;
10786 for (i = 0; i < nelt2; ++i)
10787 perm[i] = GEN_INT (base + i);
10789 base = ((mask >> 4) & 3) * nelt2;
10790 for (i = 0; i < nelt2; ++i)
10791 perm[i + nelt2] = GEN_INT (base + i);
10793 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10794 operands[1], operands[2]);
10795 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10796 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10797 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10803 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10804 ;; means that in order to represent this properly in rtl we'd have to
10805 ;; nest *another* vec_concat with a zero operand and do the select from
10806 ;; a 4x wide vector. That doesn't seem very nice.
10807 (define_insn "*avx_vperm2f128<mode>_full"
10808 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10809 (unspec:AVX256MODE2P
10810 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10811 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10812 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10813 UNSPEC_VPERMIL2F128))]
10815 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10816 [(set_attr "type" "sselog")
10817 (set_attr "prefix_extra" "1")
10818 (set_attr "length_immediate" "1")
10819 (set_attr "prefix" "vex")
10820 (set_attr "mode" "<sseinsnmode>")])
10822 (define_insn "*avx_vperm2f128<mode>_nozero"
10823 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10824 (vec_select:AVX256MODE2P
10825 (vec_concat:<ssedoublevecmode>
10826 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10827 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10828 (match_parallel 3 ""
10829 [(match_operand 4 "const_int_operand")])))]
10831 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10833 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10835 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
10837 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
10838 operands[3] = GEN_INT (mask);
10839 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10841 [(set_attr "type" "sselog")
10842 (set_attr "prefix_extra" "1")
10843 (set_attr "length_immediate" "1")
10844 (set_attr "prefix" "vex")
10845 (set_attr "mode" "<sseinsnmode>")])
10847 (define_expand "avx_vinsertf128<mode>"
10848 [(match_operand:V_256 0 "register_operand")
10849 (match_operand:V_256 1 "register_operand")
10850 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
10851 (match_operand:SI 3 "const_0_to_1_operand")]
10854 rtx (*insn)(rtx, rtx, rtx);
10856 switch (INTVAL (operands[3]))
10859 insn = gen_vec_set_lo_<mode>;
10862 insn = gen_vec_set_hi_<mode>;
10865 gcc_unreachable ();
10868 emit_insn (insn (operands[0], operands[1], operands[2]));
10872 (define_insn "avx2_vec_set_lo_v4di"
10873 [(set (match_operand:V4DI 0 "register_operand" "=x")
10875 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
10877 (match_operand:V4DI 1 "register_operand" "x")
10878 (parallel [(const_int 2) (const_int 3)]))))]
10880 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10881 [(set_attr "type" "sselog")
10882 (set_attr "prefix_extra" "1")
10883 (set_attr "length_immediate" "1")
10884 (set_attr "prefix" "vex")
10885 (set_attr "mode" "OI")])
10887 (define_insn "avx2_vec_set_hi_v4di"
10888 [(set (match_operand:V4DI 0 "register_operand" "=x")
10891 (match_operand:V4DI 1 "register_operand" "x")
10892 (parallel [(const_int 0) (const_int 1)]))
10893 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
10895 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10896 [(set_attr "type" "sselog")
10897 (set_attr "prefix_extra" "1")
10898 (set_attr "length_immediate" "1")
10899 (set_attr "prefix" "vex")
10900 (set_attr "mode" "OI")])
10902 (define_insn "vec_set_lo_<mode>"
10903 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10904 (vec_concat:VI8F_256
10905 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10906 (vec_select:<ssehalfvecmode>
10907 (match_operand:VI8F_256 1 "register_operand" "x")
10908 (parallel [(const_int 2) (const_int 3)]))))]
10910 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10911 [(set_attr "type" "sselog")
10912 (set_attr "prefix_extra" "1")
10913 (set_attr "length_immediate" "1")
10914 (set_attr "prefix" "vex")
10915 (set_attr "mode" "<sseinsnmode>")])
10917 (define_insn "vec_set_hi_<mode>"
10918 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10919 (vec_concat:VI8F_256
10920 (vec_select:<ssehalfvecmode>
10921 (match_operand:VI8F_256 1 "register_operand" "x")
10922 (parallel [(const_int 0) (const_int 1)]))
10923 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10925 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10926 [(set_attr "type" "sselog")
10927 (set_attr "prefix_extra" "1")
10928 (set_attr "length_immediate" "1")
10929 (set_attr "prefix" "vex")
10930 (set_attr "mode" "<sseinsnmode>")])
10932 (define_insn "vec_set_lo_<mode>"
10933 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10934 (vec_concat:VI4F_256
10935 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10936 (vec_select:<ssehalfvecmode>
10937 (match_operand:VI4F_256 1 "register_operand" "x")
10938 (parallel [(const_int 4) (const_int 5)
10939 (const_int 6) (const_int 7)]))))]
10941 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10942 [(set_attr "type" "sselog")
10943 (set_attr "prefix_extra" "1")
10944 (set_attr "length_immediate" "1")
10945 (set_attr "prefix" "vex")
10946 (set_attr "mode" "<sseinsnmode>")])
10948 (define_insn "vec_set_hi_<mode>"
10949 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10950 (vec_concat:VI4F_256
10951 (vec_select:<ssehalfvecmode>
10952 (match_operand:VI4F_256 1 "register_operand" "x")
10953 (parallel [(const_int 0) (const_int 1)
10954 (const_int 2) (const_int 3)]))
10955 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10957 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10958 [(set_attr "type" "sselog")
10959 (set_attr "prefix_extra" "1")
10960 (set_attr "length_immediate" "1")
10961 (set_attr "prefix" "vex")
10962 (set_attr "mode" "<sseinsnmode>")])
10964 (define_insn "vec_set_lo_v16hi"
10965 [(set (match_operand:V16HI 0 "register_operand" "=x")
10967 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10969 (match_operand:V16HI 1 "register_operand" "x")
10970 (parallel [(const_int 8) (const_int 9)
10971 (const_int 10) (const_int 11)
10972 (const_int 12) (const_int 13)
10973 (const_int 14) (const_int 15)]))))]
10975 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10976 [(set_attr "type" "sselog")
10977 (set_attr "prefix_extra" "1")
10978 (set_attr "length_immediate" "1")
10979 (set_attr "prefix" "vex")
10980 (set_attr "mode" "OI")])
10982 (define_insn "vec_set_hi_v16hi"
10983 [(set (match_operand:V16HI 0 "register_operand" "=x")
10986 (match_operand:V16HI 1 "register_operand" "x")
10987 (parallel [(const_int 0) (const_int 1)
10988 (const_int 2) (const_int 3)
10989 (const_int 4) (const_int 5)
10990 (const_int 6) (const_int 7)]))
10991 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10993 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10994 [(set_attr "type" "sselog")
10995 (set_attr "prefix_extra" "1")
10996 (set_attr "length_immediate" "1")
10997 (set_attr "prefix" "vex")
10998 (set_attr "mode" "OI")])
11000 (define_insn "vec_set_lo_v32qi"
11001 [(set (match_operand:V32QI 0 "register_operand" "=x")
11003 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11005 (match_operand:V32QI 1 "register_operand" "x")
11006 (parallel [(const_int 16) (const_int 17)
11007 (const_int 18) (const_int 19)
11008 (const_int 20) (const_int 21)
11009 (const_int 22) (const_int 23)
11010 (const_int 24) (const_int 25)
11011 (const_int 26) (const_int 27)
11012 (const_int 28) (const_int 29)
11013 (const_int 30) (const_int 31)]))))]
11015 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11016 [(set_attr "type" "sselog")
11017 (set_attr "prefix_extra" "1")
11018 (set_attr "length_immediate" "1")
11019 (set_attr "prefix" "vex")
11020 (set_attr "mode" "OI")])
11022 (define_insn "vec_set_hi_v32qi"
11023 [(set (match_operand:V32QI 0 "register_operand" "=x")
11026 (match_operand:V32QI 1 "register_operand" "x")
11027 (parallel [(const_int 0) (const_int 1)
11028 (const_int 2) (const_int 3)
11029 (const_int 4) (const_int 5)
11030 (const_int 6) (const_int 7)
11031 (const_int 8) (const_int 9)
11032 (const_int 10) (const_int 11)
11033 (const_int 12) (const_int 13)
11034 (const_int 14) (const_int 15)]))
11035 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11037 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11038 [(set_attr "type" "sselog")
11039 (set_attr "prefix_extra" "1")
11040 (set_attr "length_immediate" "1")
11041 (set_attr "prefix" "vex")
11042 (set_attr "mode" "OI")])
11044 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
11045 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
11047 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
11048 (match_operand:V48_AVX2 1 "memory_operand" "m")]
11051 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11052 [(set_attr "type" "sselog1")
11053 (set_attr "prefix_extra" "1")
11054 (set_attr "prefix" "vex")
11055 (set_attr "btver2_decode" "vector")
11056 (set_attr "mode" "<sseinsnmode>")])
11058 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
11059 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
11061 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
11062 (match_operand:V48_AVX2 2 "register_operand" "x")
11066 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11067 [(set_attr "type" "sselog1")
11068 (set_attr "prefix_extra" "1")
11069 (set_attr "prefix" "vex")
11070 (set_attr "btver2_decode" "vector")
11071 (set_attr "mode" "<sseinsnmode>")])
11073 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
11074 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11075 (unspec:AVX256MODE2P
11076 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11080 "&& reload_completed"
11083 rtx op0 = operands[0];
11084 rtx op1 = operands[1];
11086 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
11088 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11089 emit_move_insn (op0, op1);
11093 (define_expand "vec_init<mode>"
11094 [(match_operand:V_256 0 "register_operand")
11098 ix86_expand_vector_init (false, operands[0], operands[1]);
11102 (define_expand "avx2_extracti128"
11103 [(match_operand:V2DI 0 "nonimmediate_operand")
11104 (match_operand:V4DI 1 "register_operand")
11105 (match_operand:SI 2 "const_0_to_1_operand")]
11108 rtx (*insn)(rtx, rtx);
11110 switch (INTVAL (operands[2]))
11113 insn = gen_vec_extract_lo_v4di;
11116 insn = gen_vec_extract_hi_v4di;
11119 gcc_unreachable ();
11122 emit_insn (insn (operands[0], operands[1]));
11126 (define_expand "avx2_inserti128"
11127 [(match_operand:V4DI 0 "register_operand")
11128 (match_operand:V4DI 1 "register_operand")
11129 (match_operand:V2DI 2 "nonimmediate_operand")
11130 (match_operand:SI 3 "const_0_to_1_operand")]
11133 rtx (*insn)(rtx, rtx, rtx);
11135 switch (INTVAL (operands[3]))
11138 insn = gen_avx2_vec_set_lo_v4di;
11141 insn = gen_avx2_vec_set_hi_v4di;
11144 gcc_unreachable ();
11147 emit_insn (insn (operands[0], operands[1], operands[2]));
11151 (define_insn "avx2_ashrv<mode>"
11152 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11154 (match_operand:VI4_AVX2 1 "register_operand" "x")
11155 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
11157 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
11158 [(set_attr "type" "sseishft")
11159 (set_attr "prefix" "vex")
11160 (set_attr "mode" "<sseinsnmode>")])
11162 (define_insn "avx2_<shift_insn>v<mode>"
11163 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
11164 (any_lshift:VI48_AVX2
11165 (match_operand:VI48_AVX2 1 "register_operand" "x")
11166 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
11168 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11169 [(set_attr "type" "sseishft")
11170 (set_attr "prefix" "vex")
11171 (set_attr "mode" "<sseinsnmode>")])
11173 (define_insn "avx_vec_concat<mode>"
11174 [(set (match_operand:V_256 0 "register_operand" "=x,x")
11176 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
11177 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
11180 switch (which_alternative)
11183 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11185 switch (get_attr_mode (insn))
11188 return "vmovaps\t{%1, %x0|%x0, %1}";
11190 return "vmovapd\t{%1, %x0|%x0, %1}";
11192 return "vmovdqa\t{%1, %x0|%x0, %1}";
11195 gcc_unreachable ();
11198 [(set_attr "type" "sselog,ssemov")
11199 (set_attr "prefix_extra" "1,*")
11200 (set_attr "length_immediate" "1,*")
11201 (set_attr "prefix" "vex")
11202 (set_attr "mode" "<sseinsnmode>")])
11204 (define_insn "vcvtph2ps"
11205 [(set (match_operand:V4SF 0 "register_operand" "=x")
11207 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11209 (parallel [(const_int 0) (const_int 1)
11210 (const_int 2) (const_int 3)])))]
11212 "vcvtph2ps\t{%1, %0|%0, %1}"
11213 [(set_attr "type" "ssecvt")
11214 (set_attr "prefix" "vex")
11215 (set_attr "mode" "V4SF")])
11217 (define_insn "*vcvtph2ps_load"
11218 [(set (match_operand:V4SF 0 "register_operand" "=x")
11219 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
11220 UNSPEC_VCVTPH2PS))]
11222 "vcvtph2ps\t{%1, %0|%0, %1}"
11223 [(set_attr "type" "ssecvt")
11224 (set_attr "prefix" "vex")
11225 (set_attr "mode" "V8SF")])
11227 (define_insn "vcvtph2ps256"
11228 [(set (match_operand:V8SF 0 "register_operand" "=x")
11229 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11230 UNSPEC_VCVTPH2PS))]
11232 "vcvtph2ps\t{%1, %0|%0, %1}"
11233 [(set_attr "type" "ssecvt")
11234 (set_attr "prefix" "vex")
11235 (set_attr "btver2_decode" "double")
11236 (set_attr "mode" "V8SF")])
11238 (define_expand "vcvtps2ph"
11239 [(set (match_operand:V8HI 0 "register_operand")
11241 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
11242 (match_operand:SI 2 "const_0_to_255_operand")]
11246 "operands[3] = CONST0_RTX (V4HImode);")
11248 (define_insn "*vcvtps2ph"
11249 [(set (match_operand:V8HI 0 "register_operand" "=x")
11251 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11252 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11254 (match_operand:V4HI 3 "const0_operand")))]
11256 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11257 [(set_attr "type" "ssecvt")
11258 (set_attr "prefix" "vex")
11259 (set_attr "mode" "V4SF")])
11261 (define_insn "*vcvtps2ph_store"
11262 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11263 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
11264 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11265 UNSPEC_VCVTPS2PH))]
11267 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11268 [(set_attr "type" "ssecvt")
11269 (set_attr "prefix" "vex")
11270 (set_attr "mode" "V4SF")])
11272 (define_insn "vcvtps2ph256"
11273 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
11274 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
11275 (match_operand:SI 2 "const_0_to_255_operand" "N")]
11276 UNSPEC_VCVTPS2PH))]
11278 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
11279 [(set_attr "type" "ssecvt")
11280 (set_attr "prefix" "vex")
11281 (set_attr "btver2_decode" "vector")
11282 (set_attr "mode" "V8SF")])
11284 ;; For gather* insn patterns
11285 (define_mode_iterator VEC_GATHER_MODE
11286 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
11287 (define_mode_attr VEC_GATHER_IDXSI
11288 [(V2DI "V4SI") (V2DF "V4SI")
11289 (V4DI "V4SI") (V4DF "V4SI")
11290 (V4SI "V4SI") (V4SF "V4SI")
11291 (V8SI "V8SI") (V8SF "V8SI")])
11292 (define_mode_attr VEC_GATHER_IDXDI
11293 [(V2DI "V2DI") (V2DF "V2DI")
11294 (V4DI "V4DI") (V4DF "V4DI")
11295 (V4SI "V2DI") (V4SF "V2DI")
11296 (V8SI "V4DI") (V8SF "V4DI")])
11297 (define_mode_attr VEC_GATHER_SRCDI
11298 [(V2DI "V2DI") (V2DF "V2DF")
11299 (V4DI "V4DI") (V4DF "V4DF")
11300 (V4SI "V4SI") (V4SF "V4SF")
11301 (V8SI "V4SI") (V8SF "V4SF")])
11303 (define_expand "avx2_gathersi<mode>"
11304 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11305 (unspec:VEC_GATHER_MODE
11306 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
11307 (mem:<ssescalarmode>
11309 [(match_operand 2 "vsib_address_operand")
11310 (match_operand:<VEC_GATHER_IDXSI>
11311 3 "register_operand")
11312 (match_operand:SI 5 "const1248_operand ")]))
11313 (mem:BLK (scratch))
11314 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
11316 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11320 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11321 operands[5]), UNSPEC_VSIBADDR);
11324 (define_insn "*avx2_gathersi<mode>"
11325 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11326 (unspec:VEC_GATHER_MODE
11327 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
11328 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11330 [(match_operand:P 3 "vsib_address_operand" "p")
11331 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
11332 (match_operand:SI 6 "const1248_operand" "n")]
11334 (mem:BLK (scratch))
11335 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
11337 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11339 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
11340 [(set_attr "type" "ssemov")
11341 (set_attr "prefix" "vex")
11342 (set_attr "mode" "<sseinsnmode>")])
11344 (define_insn "*avx2_gathersi<mode>_2"
11345 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11346 (unspec:VEC_GATHER_MODE
11348 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11350 [(match_operand:P 2 "vsib_address_operand" "p")
11351 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
11352 (match_operand:SI 5 "const1248_operand" "n")]
11354 (mem:BLK (scratch))
11355 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
11357 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11359 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
11360 [(set_attr "type" "ssemov")
11361 (set_attr "prefix" "vex")
11362 (set_attr "mode" "<sseinsnmode>")])
11364 (define_expand "avx2_gatherdi<mode>"
11365 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
11366 (unspec:VEC_GATHER_MODE
11367 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
11368 (mem:<ssescalarmode>
11370 [(match_operand 2 "vsib_address_operand")
11371 (match_operand:<VEC_GATHER_IDXDI>
11372 3 "register_operand")
11373 (match_operand:SI 5 "const1248_operand ")]))
11374 (mem:BLK (scratch))
11375 (match_operand:<VEC_GATHER_SRCDI>
11376 4 "register_operand")]
11378 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
11382 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
11383 operands[5]), UNSPEC_VSIBADDR);
11386 (define_insn "*avx2_gatherdi<mode>"
11387 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11388 (unspec:VEC_GATHER_MODE
11389 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11390 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11392 [(match_operand:P 3 "vsib_address_operand" "p")
11393 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11394 (match_operand:SI 6 "const1248_operand" "n")]
11396 (mem:BLK (scratch))
11397 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11399 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11401 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
11402 [(set_attr "type" "ssemov")
11403 (set_attr "prefix" "vex")
11404 (set_attr "mode" "<sseinsnmode>")])
11406 (define_insn "*avx2_gatherdi<mode>_2"
11407 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
11408 (unspec:VEC_GATHER_MODE
11410 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11412 [(match_operand:P 2 "vsib_address_operand" "p")
11413 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11414 (match_operand:SI 5 "const1248_operand" "n")]
11416 (mem:BLK (scratch))
11417 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11419 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
11422 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
11423 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
11424 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
11426 [(set_attr "type" "ssemov")
11427 (set_attr "prefix" "vex")
11428 (set_attr "mode" "<sseinsnmode>")])
11430 (define_insn "*avx2_gatherdi<mode>_3"
11431 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11432 (vec_select:<VEC_GATHER_SRCDI>
11434 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
11435 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
11437 [(match_operand:P 3 "vsib_address_operand" "p")
11438 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
11439 (match_operand:SI 6 "const1248_operand" "n")]
11441 (mem:BLK (scratch))
11442 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
11444 (parallel [(const_int 0) (const_int 1)
11445 (const_int 2) (const_int 3)])))
11446 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11448 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
11449 [(set_attr "type" "ssemov")
11450 (set_attr "prefix" "vex")
11451 (set_attr "mode" "<sseinsnmode>")])
11453 (define_insn "*avx2_gatherdi<mode>_4"
11454 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
11455 (vec_select:<VEC_GATHER_SRCDI>
11458 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
11460 [(match_operand:P 2 "vsib_address_operand" "p")
11461 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
11462 (match_operand:SI 5 "const1248_operand" "n")]
11464 (mem:BLK (scratch))
11465 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
11467 (parallel [(const_int 0) (const_int 1)
11468 (const_int 2) (const_int 3)])))
11469 (clobber (match_scratch:VI4F_256 1 "=&x"))]
11471 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
11472 [(set_attr "type" "ssemov")
11473 (set_attr "prefix" "vex")
11474 (set_attr "mode" "<sseinsnmode>")])