2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map the register class used
97 (define_mode_attr VSr [(V16QI "v")
111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
112 (define_mode_attr VSisa [(V16QI "*")
126 ;; A mode attribute to disparage use of GPR registers, except for scalar
128 (define_mode_attr ??r [(V16QI "??r")
139 ;; A mode attribute used for 128-bit constant values.
140 (define_mode_attr nW [(V16QI "W")
151 ;; Same size integer type for floating point data
152 (define_mode_attr VSi [(V4SF "v4si")
156 (define_mode_attr VSI [(V4SF "V4SI")
160 ;; Word size for same size conversion
161 (define_mode_attr VSc [(V4SF "w")
165 ;; Map into either s or v, depending on whether this is a scalar or vector
167 (define_mode_attr VSv [(V16QI "v")
177 ;; Appropriate type for add ops (and other simple FP ops)
178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
182 ;; Appropriate type for multiply ops
183 (define_mode_attr VStype_mul [(V2DF "vecdouble")
187 ;; Appropriate type for divide ops.
188 (define_mode_attr VStype_div [(V2DF "vecdiv")
192 ;; Map the scalar mode for a vector type
193 (define_mode_attr VS_scalar [(V1TI "TI")
201 ;; Map to a double-sized vector mode
202 (define_mode_attr VS_double [(V4SI "V8SI")
208 ;; Iterators for loading constants with xxspltib
209 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
212 ;; Vector reverse byte modes
213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217 ;; done on ISA 2.07 and not just ISA 3.0.
218 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
227 ;; insert to validate the operand number.
228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
229 (V8HI "const_0_to_7_operand")
230 (V4SI "const_0_to_3_operand")])
232 ;; Mode attribute to give the constraint for vector extract and insert
234 (define_mode_attr VSX_EX [(V16QI "v")
238 ;; Mode iterator for binary floating types other than double to
239 ;; optimize convert to that floating point type from an extract
240 ;; of an integer type
241 (define_mode_iterator VSX_EXTRACT_FL [SF
242 (IF "FLOAT128_2REG_P (IFmode)")
243 (KF "TARGET_FLOAT128_HW")
244 (TF "FLOAT128_2REG_P (TFmode)
245 || (FLOAT128_IEEE_P (TFmode)
246 && TARGET_FLOAT128_HW)")])
248 ;; Mode iterator for binary floating types that have a direct conversion
249 ;; from 64-bit integer to floating point
250 (define_mode_iterator FL_CONV [SF
252 (KF "TARGET_FLOAT128_HW")
253 (TF "TARGET_FLOAT128_HW
254 && FLOAT128_IEEE_P (TFmode)")])
256 ;; Iterator for the 2 short vector types to do a splat from an integer
257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
259 ;; Mode attribute to give the count for the splat instruction to splat
260 ;; the value in the 64-bit integer slot
261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
263 ;; Mode attribute to give the suffix for the splat instruction
264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
266 ;; Iterator for the move to mask instructions
267 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
268 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
270 ;; Constants for creating unspecs
271 (define_c_enum "unspec"
284 UNSPEC_VSX_UNS_FLOAT2
286 UNSPEC_VSX_UNS_FLOATE
288 UNSPEC_VSX_UNS_FLOATO
302 UNSPEC_VSX_SIGN_EXTEND
303 UNSPEC_VSX_XVCVBF16SPN
304 UNSPEC_VSX_XVCVSPBF16
305 UNSPEC_VSX_XVCVSPSXDS
316 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
317 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
349 UNSPEC_VSX_FIRST_MATCH_INDEX
350 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
351 UNSPEC_VSX_FIRST_MISMATCH_INDEX
352 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
366 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
367 UNSPEC_VSX_XVCVBF16SPN])
369 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
370 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
372 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
373 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
375 ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
376 (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
377 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
378 (V2DI "d") (V2DF "d")])
379 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
380 (V2DI "3") (V2DF "3")])
381 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
382 (V2DI "8") (V2DF "8")])
386 ;; The patterns for LE permuted loads and stores come before the general
387 ;; VSX moves so they match first.
388 (define_insn_and_split "*vsx_le_perm_load_<mode>"
389 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
390 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
391 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
397 (parallel [(const_int 1) (const_int 0)])))
401 (parallel [(const_int 1) (const_int 0)])))]
403 rtx mem = operands[1];
405 /* Don't apply the swap optimization if we've already performed register
406 allocation and the hard register destination is not in the altivec
408 if ((MEM_ALIGN (mem) >= 128)
409 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
410 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
412 rtx mem_address = XEXP (mem, 0);
413 enum machine_mode mode = GET_MODE (mem);
415 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
417 /* Replace the source memory address with masked address. */
418 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
419 emit_insn (lvx_set_expr);
422 else if (rs6000_quadword_masked_address_p (mem_address))
424 /* This rtl is already in the form that matches lvx
425 instruction, so leave it alone. */
428 /* Otherwise, fall through to transform into a swapping load. */
430 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
433 [(set_attr "type" "vecload")
434 (set_attr "length" "8")])
436 (define_insn_and_split "*vsx_le_perm_load_<mode>"
437 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
438 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
439 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
445 (parallel [(const_int 2) (const_int 3)
446 (const_int 0) (const_int 1)])))
450 (parallel [(const_int 2) (const_int 3)
451 (const_int 0) (const_int 1)])))]
453 rtx mem = operands[1];
455 /* Don't apply the swap optimization if we've already performed register
456 allocation and the hard register destination is not in the altivec
458 if ((MEM_ALIGN (mem) >= 128)
459 && (!HARD_REGISTER_P (operands[0])
460 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
462 rtx mem_address = XEXP (mem, 0);
463 enum machine_mode mode = GET_MODE (mem);
465 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
467 /* Replace the source memory address with masked address. */
468 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
469 emit_insn (lvx_set_expr);
472 else if (rs6000_quadword_masked_address_p (mem_address))
474 /* This rtl is already in the form that matches lvx
475 instruction, so leave it alone. */
478 /* Otherwise, fall through to transform into a swapping load. */
480 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
483 [(set_attr "type" "vecload")
484 (set_attr "length" "8")])
486 (define_insn_and_split "*vsx_le_perm_load_v8hi"
487 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
488 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
489 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
495 (parallel [(const_int 4) (const_int 5)
496 (const_int 6) (const_int 7)
497 (const_int 0) (const_int 1)
498 (const_int 2) (const_int 3)])))
502 (parallel [(const_int 4) (const_int 5)
503 (const_int 6) (const_int 7)
504 (const_int 0) (const_int 1)
505 (const_int 2) (const_int 3)])))]
507 rtx mem = operands[1];
509 /* Don't apply the swap optimization if we've already performed register
510 allocation and the hard register destination is not in the altivec
512 if ((MEM_ALIGN (mem) >= 128)
513 && (!HARD_REGISTER_P (operands[0])
514 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
516 rtx mem_address = XEXP (mem, 0);
517 enum machine_mode mode = GET_MODE (mem);
519 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
521 /* Replace the source memory address with masked address. */
522 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
523 emit_insn (lvx_set_expr);
526 else if (rs6000_quadword_masked_address_p (mem_address))
528 /* This rtl is already in the form that matches lvx
529 instruction, so leave it alone. */
532 /* Otherwise, fall through to transform into a swapping load. */
534 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
537 [(set_attr "type" "vecload")
538 (set_attr "length" "8")])
540 (define_insn_and_split "*vsx_le_perm_load_v16qi"
541 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
542 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
543 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
549 (parallel [(const_int 8) (const_int 9)
550 (const_int 10) (const_int 11)
551 (const_int 12) (const_int 13)
552 (const_int 14) (const_int 15)
553 (const_int 0) (const_int 1)
554 (const_int 2) (const_int 3)
555 (const_int 4) (const_int 5)
556 (const_int 6) (const_int 7)])))
560 (parallel [(const_int 8) (const_int 9)
561 (const_int 10) (const_int 11)
562 (const_int 12) (const_int 13)
563 (const_int 14) (const_int 15)
564 (const_int 0) (const_int 1)
565 (const_int 2) (const_int 3)
566 (const_int 4) (const_int 5)
567 (const_int 6) (const_int 7)])))]
569 rtx mem = operands[1];
571 /* Don't apply the swap optimization if we've already performed register
572 allocation and the hard register destination is not in the altivec
574 if ((MEM_ALIGN (mem) >= 128)
575 && (!HARD_REGISTER_P (operands[0])
576 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
578 rtx mem_address = XEXP (mem, 0);
579 enum machine_mode mode = GET_MODE (mem);
581 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
583 /* Replace the source memory address with masked address. */
584 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
585 emit_insn (lvx_set_expr);
588 else if (rs6000_quadword_masked_address_p (mem_address))
590 /* This rtl is already in the form that matches lvx
591 instruction, so leave it alone. */
594 /* Otherwise, fall through to transform into a swapping load. */
596 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
599 [(set_attr "type" "vecload")
600 (set_attr "length" "8")])
602 (define_insn "*vsx_le_perm_store_<mode>"
603 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
604 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
605 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
607 [(set_attr "type" "vecstore")
608 (set_attr "length" "12")])
611 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
612 (match_operand:VSX_D 1 "vsx_register_operand"))]
613 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
617 (parallel [(const_int 1) (const_int 0)])))
621 (parallel [(const_int 1) (const_int 0)])))]
623 rtx mem = operands[0];
625 /* Don't apply the swap optimization if we've already performed register
626 allocation and the hard register source is not in the altivec range. */
627 if ((MEM_ALIGN (mem) >= 128)
628 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
629 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
631 rtx mem_address = XEXP (mem, 0);
632 enum machine_mode mode = GET_MODE (mem);
633 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
635 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
636 emit_insn (stvx_set_expr);
639 else if (rs6000_quadword_masked_address_p (mem_address))
641 /* This rtl is already in the form that matches stvx instruction,
642 so leave it alone. */
645 /* Otherwise, fall through to transform into a swapping store. */
648 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
652 ;; The post-reload split requires that we re-permute the source
653 ;; register in case it is still live.
655 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
656 (match_operand:VSX_D 1 "vsx_register_operand"))]
657 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
661 (parallel [(const_int 1) (const_int 0)])))
665 (parallel [(const_int 1) (const_int 0)])))
669 (parallel [(const_int 1) (const_int 0)])))]
672 (define_insn "*vsx_le_perm_store_<mode>"
673 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
674 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
675 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
677 [(set_attr "type" "vecstore")
678 (set_attr "length" "12")])
681 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
682 (match_operand:VSX_W 1 "vsx_register_operand"))]
683 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
687 (parallel [(const_int 2) (const_int 3)
688 (const_int 0) (const_int 1)])))
692 (parallel [(const_int 2) (const_int 3)
693 (const_int 0) (const_int 1)])))]
695 rtx mem = operands[0];
697 /* Don't apply the swap optimization if we've already performed register
698 allocation and the hard register source is not in the altivec range. */
699 if ((MEM_ALIGN (mem) >= 128)
700 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
701 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
703 rtx mem_address = XEXP (mem, 0);
704 enum machine_mode mode = GET_MODE (mem);
705 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
707 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
708 emit_insn (stvx_set_expr);
711 else if (rs6000_quadword_masked_address_p (mem_address))
713 /* This rtl is already in the form that matches stvx instruction,
714 so leave it alone. */
717 /* Otherwise, fall through to transform into a swapping store. */
720 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
724 ;; The post-reload split requires that we re-permute the source
725 ;; register in case it is still live.
727 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
728 (match_operand:VSX_W 1 "vsx_register_operand"))]
729 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
733 (parallel [(const_int 2) (const_int 3)
734 (const_int 0) (const_int 1)])))
738 (parallel [(const_int 2) (const_int 3)
739 (const_int 0) (const_int 1)])))
743 (parallel [(const_int 2) (const_int 3)
744 (const_int 0) (const_int 1)])))]
747 (define_insn "*vsx_le_perm_store_v8hi"
748 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
749 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
750 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
752 [(set_attr "type" "vecstore")
753 (set_attr "length" "12")])
756 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
757 (match_operand:V8HI 1 "vsx_register_operand"))]
758 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
762 (parallel [(const_int 4) (const_int 5)
763 (const_int 6) (const_int 7)
764 (const_int 0) (const_int 1)
765 (const_int 2) (const_int 3)])))
769 (parallel [(const_int 4) (const_int 5)
770 (const_int 6) (const_int 7)
771 (const_int 0) (const_int 1)
772 (const_int 2) (const_int 3)])))]
774 rtx mem = operands[0];
776 /* Don't apply the swap optimization if we've already performed register
777 allocation and the hard register source is not in the altivec range. */
778 if ((MEM_ALIGN (mem) >= 128)
779 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
780 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
782 rtx mem_address = XEXP (mem, 0);
783 enum machine_mode mode = GET_MODE (mem);
784 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
786 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
787 emit_insn (stvx_set_expr);
790 else if (rs6000_quadword_masked_address_p (mem_address))
792 /* This rtl is already in the form that matches stvx instruction,
793 so leave it alone. */
796 /* Otherwise, fall through to transform into a swapping store. */
799 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
803 ;; The post-reload split requires that we re-permute the source
804 ;; register in case it is still live.
806 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
807 (match_operand:V8HI 1 "vsx_register_operand"))]
808 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
812 (parallel [(const_int 4) (const_int 5)
813 (const_int 6) (const_int 7)
814 (const_int 0) (const_int 1)
815 (const_int 2) (const_int 3)])))
819 (parallel [(const_int 4) (const_int 5)
820 (const_int 6) (const_int 7)
821 (const_int 0) (const_int 1)
822 (const_int 2) (const_int 3)])))
826 (parallel [(const_int 4) (const_int 5)
827 (const_int 6) (const_int 7)
828 (const_int 0) (const_int 1)
829 (const_int 2) (const_int 3)])))]
832 (define_insn "*vsx_le_perm_store_v16qi"
833 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
834 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
835 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
837 [(set_attr "type" "vecstore")
838 (set_attr "length" "12")])
841 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
842 (match_operand:V16QI 1 "vsx_register_operand"))]
843 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
847 (parallel [(const_int 8) (const_int 9)
848 (const_int 10) (const_int 11)
849 (const_int 12) (const_int 13)
850 (const_int 14) (const_int 15)
851 (const_int 0) (const_int 1)
852 (const_int 2) (const_int 3)
853 (const_int 4) (const_int 5)
854 (const_int 6) (const_int 7)])))
858 (parallel [(const_int 8) (const_int 9)
859 (const_int 10) (const_int 11)
860 (const_int 12) (const_int 13)
861 (const_int 14) (const_int 15)
862 (const_int 0) (const_int 1)
863 (const_int 2) (const_int 3)
864 (const_int 4) (const_int 5)
865 (const_int 6) (const_int 7)])))]
867 rtx mem = operands[0];
869 /* Don't apply the swap optimization if we've already performed register
870 allocation and the hard register source is not in the altivec range. */
871 if ((MEM_ALIGN (mem) >= 128)
872 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
873 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
875 rtx mem_address = XEXP (mem, 0);
876 enum machine_mode mode = GET_MODE (mem);
877 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
879 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
880 emit_insn (stvx_set_expr);
883 else if (rs6000_quadword_masked_address_p (mem_address))
885 /* This rtl is already in the form that matches stvx instruction,
886 so leave it alone. */
889 /* Otherwise, fall through to transform into a swapping store. */
892 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
896 ;; The post-reload split requires that we re-permute the source
897 ;; register in case it is still live.
899 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
900 (match_operand:V16QI 1 "vsx_register_operand"))]
901 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
905 (parallel [(const_int 8) (const_int 9)
906 (const_int 10) (const_int 11)
907 (const_int 12) (const_int 13)
908 (const_int 14) (const_int 15)
909 (const_int 0) (const_int 1)
910 (const_int 2) (const_int 3)
911 (const_int 4) (const_int 5)
912 (const_int 6) (const_int 7)])))
916 (parallel [(const_int 8) (const_int 9)
917 (const_int 10) (const_int 11)
918 (const_int 12) (const_int 13)
919 (const_int 14) (const_int 15)
920 (const_int 0) (const_int 1)
921 (const_int 2) (const_int 3)
922 (const_int 4) (const_int 5)
923 (const_int 6) (const_int 7)])))
927 (parallel [(const_int 8) (const_int 9)
928 (const_int 10) (const_int 11)
929 (const_int 12) (const_int 13)
930 (const_int 14) (const_int 15)
931 (const_int 0) (const_int 1)
932 (const_int 2) (const_int 3)
933 (const_int 4) (const_int 5)
934 (const_int 6) (const_int 7)])))]
937 ;; Little endian word swapping for 128-bit types that are either scalars or the
938 ;; special V1TI container class, which it is not appropriate to use vec_select
940 (define_insn "*vsx_le_permute_<mode>"
941 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
943 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
945 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
947 xxpermdi %x0,%x1,%x1,2
951 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
952 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
953 [(set_attr "length" "*,*,*,8,8,8")
954 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
956 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
957 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
960 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
963 "!BYTES_BIG_ENDIAN && TARGET_VSX"
968 [(set (match_dup 0) (match_dup 1))]
970 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
972 emit_note (NOTE_INSN_DELETED);
976 [(set_attr "length" "0,4")
977 (set_attr "type" "veclogical")])
979 (define_insn_and_split "*vsx_le_perm_load_<mode>"
980 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
981 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
982 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
986 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
989 rtx tmp = (can_create_pseudo_p ()
990 ? gen_reg_rtx_and_attrs (operands[0])
992 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
993 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
996 [(set_attr "type" "vecload,load")
997 (set_attr "length" "8,8")
998 (set_attr "isa" "<VSisa>,*")])
1000 (define_insn "*vsx_le_perm_store_<mode>"
1001 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1002 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1003 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1007 [(set_attr "type" "vecstore,store")
1008 (set_attr "length" "12,8")
1009 (set_attr "isa" "<VSisa>,*")])
1012 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1013 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1014 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1017 rtx tmp = (can_create_pseudo_p ()
1018 ? gen_reg_rtx_and_attrs (operands[0])
1020 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1021 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1025 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1026 ;; GPR registers on a little endian system.
1028 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1029 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1031 (set (match_operand:VSX_TI 2 "int_reg_operand")
1032 (rotate:VSX_TI (match_dup 0)
1034 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1035 && (rtx_equal_p (operands[0], operands[2])
1036 || peep2_reg_dead_p (2, operands[0]))"
1037 [(set (match_dup 2) (match_dup 1))])
1040 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1041 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1043 (set (match_operand:VSX_TI 2 "memory_operand")
1044 (rotate:VSX_TI (match_dup 0)
1046 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1047 && peep2_reg_dead_p (2, operands[0])"
1048 [(set (match_dup 2) (match_dup 1))])
1050 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1051 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1052 ;; floating point are handled by the more generic swap elimination pass.
1054 [(set (match_operand:TI 0 "vsx_register_operand")
1055 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1057 (set (match_operand:TI 2 "vsx_register_operand")
1058 (rotate:TI (match_dup 0)
1060 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1061 && (rtx_equal_p (operands[0], operands[2])
1062 || peep2_reg_dead_p (2, operands[0]))"
1063 [(set (match_dup 2) (match_dup 1))])
1065 ;; The post-reload split requires that we re-permute the source
1066 ;; register in case it is still live.
1068 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1069 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1070 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1073 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1074 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1075 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1079 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1080 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1081 (define_insn "xxspltib_v16qi"
1082 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1083 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1086 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1087 return "xxspltib %x0,%2";
1089 [(set_attr "type" "vecperm")])
1091 (define_insn "xxspltib_<mode>_nosplit"
1092 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1093 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1096 rtx op1 = operands[1];
1100 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1104 operands[2] = GEN_INT (value & 0xff);
1105 return "xxspltib %x0,%2";
1107 [(set_attr "type" "vecperm")])
1109 (define_insn_and_split "*xxspltib_<mode>_split"
1110 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1111 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1119 rtx op0 = operands[0];
1120 rtx op1 = operands[1];
1121 rtx tmp = ((can_create_pseudo_p ())
1122 ? gen_reg_rtx (V16QImode)
1123 : gen_lowpart (V16QImode, op0));
1125 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1129 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1131 if (<MODE>mode == V2DImode)
1132 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1134 else if (<MODE>mode == V4SImode)
1135 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1137 else if (<MODE>mode == V8HImode)
1138 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1145 [(set_attr "type" "vecperm")
1146 (set_attr "length" "8")])
1149 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1150 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1151 ;; all 1's, since the machine does not have to wait for the previous
1152 ;; instruction using the register being set (such as a store waiting on a slow
1153 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1155 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1156 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1157 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1158 (define_insn "vsx_mov<mode>_64bit"
1159 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1160 "=ZwO, wa, wa, r, we, ?wQ,
1161 ?&r, ??r, ??Y, <??r>, wa, v,
1162 ?wa, v, <??r>, wZ, v")
1164 (match_operand:VSX_M 1 "input_operand"
1165 "wa, ZwO, wa, we, r, r,
1166 wQ, Y, r, r, wE, jwM,
1167 ?jwM, W, <nW>, v, wZ"))]
1169 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1170 && (register_operand (operands[0], <MODE>mode)
1171 || register_operand (operands[1], <MODE>mode))"
1173 return rs6000_output_move_128bit (operands);
1176 "vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
1177 store, load, store, *, vecsimple, vecsimple,
1178 vecsimple, *, *, vecstore, vecload")
1179 (set_attr "num_insns"
1183 (set_attr "max_prefixed_insns"
1192 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1194 <VSisa>, *, *, *, *")])
1196 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1197 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1198 ;; LVX (VMX) STVX (VMX)
1199 (define_insn "*vsx_mov<mode>_32bit"
1200 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1201 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1202 wa, v, ?wa, v, <??r>,
1205 (match_operand:VSX_M 1 "input_operand"
1206 "wa, ZwO, wa, Y, r, r,
1207 wE, jwM, ?jwM, W, <nW>,
1210 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1211 && (register_operand (operands[0], <MODE>mode)
1212 || register_operand (operands[1], <MODE>mode))"
1214 return rs6000_output_move_128bit (operands);
1217 "vecstore, vecload, vecsimple, load, store, *,
1218 vecsimple, vecsimple, vecsimple, *, *,
1221 "*, *, *, 16, 16, 16,
1225 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1226 p9v, *, <VSisa>, *, *,
1229 ;; Explicit load/store expanders for the builtin functions
1230 (define_expand "vsx_load_<mode>"
1231 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1232 (match_operand:VSX_M 1 "memory_operand"))]
1233 "VECTOR_MEM_VSX_P (<MODE>mode)"
1235 /* Expand to swaps if needed, prior to swap optimization. */
1236 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1238 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1243 (define_expand "vsx_store_<mode>"
1244 [(set (match_operand:VSX_M 0 "memory_operand")
1245 (match_operand:VSX_M 1 "vsx_register_operand"))]
1246 "VECTOR_MEM_VSX_P (<MODE>mode)"
1248 /* Expand to swaps if needed, prior to swap optimization. */
1249 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1251 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1256 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1257 ;; when you really want their element-reversing behavior.
1258 (define_insn "vsx_ld_elemrev_v2di"
1259 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1261 (match_operand:V2DI 1 "memory_operand" "Z")
1262 (parallel [(const_int 1) (const_int 0)])))]
1263 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1265 [(set_attr "type" "vecload")])
1267 (define_insn "vsx_ld_elemrev_v1ti"
1268 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1270 (match_operand:V1TI 1 "memory_operand" "Z")
1271 (parallel [(const_int 0)])))]
1272 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1274 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1276 [(set_attr "type" "vecload")])
1278 (define_insn "vsx_ld_elemrev_v2df"
1279 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1281 (match_operand:V2DF 1 "memory_operand" "Z")
1282 (parallel [(const_int 1) (const_int 0)])))]
1283 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1285 [(set_attr "type" "vecload")])
1287 (define_insn "vsx_ld_elemrev_v4si"
1288 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1290 (match_operand:V4SI 1 "memory_operand" "Z")
1291 (parallel [(const_int 3) (const_int 2)
1292 (const_int 1) (const_int 0)])))]
1293 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1295 [(set_attr "type" "vecload")])
1297 (define_insn "vsx_ld_elemrev_v4sf"
1298 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1300 (match_operand:V4SF 1 "memory_operand" "Z")
1301 (parallel [(const_int 3) (const_int 2)
1302 (const_int 1) (const_int 0)])))]
1303 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1305 [(set_attr "type" "vecload")])
1307 (define_expand "vsx_ld_elemrev_v8hi"
1308 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1310 (match_operand:V8HI 1 "memory_operand" "Z")
1311 (parallel [(const_int 7) (const_int 6)
1312 (const_int 5) (const_int 4)
1313 (const_int 3) (const_int 2)
1314 (const_int 1) (const_int 0)])))]
1315 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1317 if (!TARGET_P9_VECTOR)
1319 rtx tmp = gen_reg_rtx (V4SImode);
1320 rtx subreg, subreg2, perm[16], pcv;
1321 /* 2 is leftmost element in register */
1322 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1325 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1326 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1327 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1329 for (i = 0; i < 16; ++i)
1330 perm[i] = GEN_INT (reorder[i]);
1332 pcv = force_reg (V16QImode,
1333 gen_rtx_CONST_VECTOR (V16QImode,
1334 gen_rtvec_v (16, perm)));
1335 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1341 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1342 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1344 (match_operand:V8HI 1 "memory_operand" "Z")
1345 (parallel [(const_int 7) (const_int 6)
1346 (const_int 5) (const_int 4)
1347 (const_int 3) (const_int 2)
1348 (const_int 1) (const_int 0)])))]
1349 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1351 [(set_attr "type" "vecload")])
1353 (define_expand "vsx_ld_elemrev_v16qi"
1354 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1356 (match_operand:V16QI 1 "memory_operand" "Z")
1357 (parallel [(const_int 15) (const_int 14)
1358 (const_int 13) (const_int 12)
1359 (const_int 11) (const_int 10)
1360 (const_int 9) (const_int 8)
1361 (const_int 7) (const_int 6)
1362 (const_int 5) (const_int 4)
1363 (const_int 3) (const_int 2)
1364 (const_int 1) (const_int 0)])))]
1365 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1367 if (!TARGET_P9_VECTOR)
1369 rtx tmp = gen_reg_rtx (V4SImode);
1370 rtx subreg, subreg2, perm[16], pcv;
1371 /* 3 is leftmost element in register */
1372 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1375 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1376 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1377 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1379 for (i = 0; i < 16; ++i)
1380 perm[i] = GEN_INT (reorder[i]);
1382 pcv = force_reg (V16QImode,
1383 gen_rtx_CONST_VECTOR (V16QImode,
1384 gen_rtvec_v (16, perm)));
1385 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1391 (define_insn "vsx_ld_elemrev_v16qi_internal"
1392 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1394 (match_operand:V16QI 1 "memory_operand" "Z")
1395 (parallel [(const_int 15) (const_int 14)
1396 (const_int 13) (const_int 12)
1397 (const_int 11) (const_int 10)
1398 (const_int 9) (const_int 8)
1399 (const_int 7) (const_int 6)
1400 (const_int 5) (const_int 4)
1401 (const_int 3) (const_int 2)
1402 (const_int 1) (const_int 0)])))]
1403 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1405 [(set_attr "type" "vecload")])
1407 (define_insn "vsx_st_elemrev_v1ti"
1408 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1410 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1411 (parallel [(const_int 0)])))
1412 (clobber (match_dup 1))]
1413 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1415 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1417 [(set_attr "type" "vecstore")])
1419 (define_insn "vsx_st_elemrev_v2df"
1420 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1422 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1423 (parallel [(const_int 1) (const_int 0)])))]
1424 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1426 [(set_attr "type" "vecstore")])
1428 (define_insn "vsx_st_elemrev_v2di"
1429 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1431 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1432 (parallel [(const_int 1) (const_int 0)])))]
1433 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1435 [(set_attr "type" "vecstore")])
1437 (define_insn "vsx_st_elemrev_v4sf"
1438 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1440 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1441 (parallel [(const_int 3) (const_int 2)
1442 (const_int 1) (const_int 0)])))]
1443 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1445 [(set_attr "type" "vecstore")])
1447 (define_insn "vsx_st_elemrev_v4si"
1448 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1450 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1451 (parallel [(const_int 3) (const_int 2)
1452 (const_int 1) (const_int 0)])))]
1453 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1455 [(set_attr "type" "vecstore")])
1457 (define_expand "vsx_st_elemrev_v8hi"
1458 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1460 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1461 (parallel [(const_int 7) (const_int 6)
1462 (const_int 5) (const_int 4)
1463 (const_int 3) (const_int 2)
1464 (const_int 1) (const_int 0)])))]
1465 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1467 if (!TARGET_P9_VECTOR)
1469 rtx mem_subreg, subreg, perm[16], pcv;
1470 rtx tmp = gen_reg_rtx (V8HImode);
1471 /* 2 is leftmost element in register */
1472 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1475 for (i = 0; i < 16; ++i)
1476 perm[i] = GEN_INT (reorder[i]);
1478 pcv = force_reg (V16QImode,
1479 gen_rtx_CONST_VECTOR (V16QImode,
1480 gen_rtvec_v (16, perm)));
1481 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1483 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1484 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1485 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1490 (define_insn "*vsx_st_elemrev_v2di_internal"
1491 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1493 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1494 (parallel [(const_int 1) (const_int 0)])))]
1495 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1497 [(set_attr "type" "vecstore")])
1499 (define_insn "*vsx_st_elemrev_v8hi_internal"
1500 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1502 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1503 (parallel [(const_int 7) (const_int 6)
1504 (const_int 5) (const_int 4)
1505 (const_int 3) (const_int 2)
1506 (const_int 1) (const_int 0)])))]
1507 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1509 [(set_attr "type" "vecstore")])
1511 (define_expand "vsx_st_elemrev_v16qi"
1512 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1514 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1515 (parallel [(const_int 15) (const_int 14)
1516 (const_int 13) (const_int 12)
1517 (const_int 11) (const_int 10)
1518 (const_int 9) (const_int 8)
1519 (const_int 7) (const_int 6)
1520 (const_int 5) (const_int 4)
1521 (const_int 3) (const_int 2)
1522 (const_int 1) (const_int 0)])))]
1523 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1525 if (!TARGET_P9_VECTOR)
1527 rtx mem_subreg, subreg, perm[16], pcv;
1528 rtx tmp = gen_reg_rtx (V16QImode);
1529 /* 3 is leftmost element in register */
1530 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1533 for (i = 0; i < 16; ++i)
1534 perm[i] = GEN_INT (reorder[i]);
1536 pcv = force_reg (V16QImode,
1537 gen_rtx_CONST_VECTOR (V16QImode,
1538 gen_rtvec_v (16, perm)));
1539 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1541 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1542 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1543 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1548 (define_insn "*vsx_st_elemrev_v16qi_internal"
1549 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1551 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1552 (parallel [(const_int 15) (const_int 14)
1553 (const_int 13) (const_int 12)
1554 (const_int 11) (const_int 10)
1555 (const_int 9) (const_int 8)
1556 (const_int 7) (const_int 6)
1557 (const_int 5) (const_int 4)
1558 (const_int 3) (const_int 2)
1559 (const_int 1) (const_int 0)])))]
1560 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1562 [(set_attr "type" "vecstore")])
1565 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1566 ;; instructions are now combined with the insn for the traditional floating
1568 (define_insn "*vsx_add<mode>3"
1569 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1570 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1571 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1572 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1573 "xvadd<sd>p %x0,%x1,%x2"
1574 [(set_attr "type" "<VStype_simple>")])
1576 (define_insn "*vsx_sub<mode>3"
1577 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1578 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1579 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1580 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1581 "xvsub<sd>p %x0,%x1,%x2"
1582 [(set_attr "type" "<VStype_simple>")])
1584 (define_insn "*vsx_mul<mode>3"
1585 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1586 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1587 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1588 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1589 "xvmul<sd>p %x0,%x1,%x2"
1590 [(set_attr "type" "<VStype_simple>")])
1592 ; Emulate vector with scalar for vec_mul in V2DImode
1593 (define_insn_and_split "vsx_mul_v2di"
1594 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1595 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1596 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1598 "VECTOR_MEM_VSX_P (V2DImode)"
1600 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1603 rtx op0 = operands[0];
1604 rtx op1 = operands[1];
1605 rtx op2 = operands[2];
1606 rtx op3 = gen_reg_rtx (DImode);
1607 rtx op4 = gen_reg_rtx (DImode);
1608 rtx op5 = gen_reg_rtx (DImode);
1609 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1610 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1611 if (TARGET_POWERPC64)
1612 emit_insn (gen_muldi3 (op5, op3, op4));
1615 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1616 emit_move_insn (op5, ret);
1618 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1619 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1620 if (TARGET_POWERPC64)
1621 emit_insn (gen_muldi3 (op3, op3, op4));
1624 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1625 emit_move_insn (op3, ret);
1627 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1630 [(set_attr "type" "mul")])
1632 (define_insn "*vsx_div<mode>3"
1633 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1634 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1635 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1636 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1637 "xvdiv<sd>p %x0,%x1,%x2"
1638 [(set_attr "type" "<VStype_div>")])
1640 ; Emulate vector with scalar for vec_div in V2DImode
1641 (define_insn_and_split "vsx_div_v2di"
1642 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1643 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1644 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1646 "VECTOR_MEM_VSX_P (V2DImode)"
1648 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1651 rtx op0 = operands[0];
1652 rtx op1 = operands[1];
1653 rtx op2 = operands[2];
1654 rtx op3 = gen_reg_rtx (DImode);
1655 rtx op4 = gen_reg_rtx (DImode);
1656 rtx op5 = gen_reg_rtx (DImode);
1657 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1658 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1659 if (TARGET_POWERPC64)
1660 emit_insn (gen_divdi3 (op5, op3, op4));
1663 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1664 rtx target = emit_library_call_value (libfunc,
1665 op5, LCT_NORMAL, DImode,
1668 emit_move_insn (op5, target);
1670 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1671 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1672 if (TARGET_POWERPC64)
1673 emit_insn (gen_divdi3 (op3, op3, op4));
1676 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1677 rtx target = emit_library_call_value (libfunc,
1678 op3, LCT_NORMAL, DImode,
1681 emit_move_insn (op3, target);
1683 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1686 [(set_attr "type" "div")])
1688 (define_insn_and_split "vsx_udiv_v2di"
1689 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1690 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1691 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1693 "VECTOR_MEM_VSX_P (V2DImode)"
1695 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1698 rtx op0 = operands[0];
1699 rtx op1 = operands[1];
1700 rtx op2 = operands[2];
1701 rtx op3 = gen_reg_rtx (DImode);
1702 rtx op4 = gen_reg_rtx (DImode);
1703 rtx op5 = gen_reg_rtx (DImode);
1704 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1705 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1706 if (TARGET_POWERPC64)
1707 emit_insn (gen_udivdi3 (op5, op3, op4));
1710 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1711 rtx target = emit_library_call_value (libfunc,
1712 op5, LCT_NORMAL, DImode,
1715 emit_move_insn (op5, target);
1717 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1718 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1719 if (TARGET_POWERPC64)
1720 emit_insn (gen_udivdi3 (op3, op3, op4));
1723 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1724 rtx target = emit_library_call_value (libfunc,
1725 op3, LCT_NORMAL, DImode,
1728 emit_move_insn (op3, target);
1730 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1733 [(set_attr "type" "div")])
1735 ;; *tdiv* instruction returning the FG flag
1736 (define_expand "vsx_tdiv<mode>3_fg"
1738 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1739 (match_operand:VSX_B 2 "vsx_register_operand")]
1741 (set (match_operand:SI 0 "gpc_reg_operand")
1742 (gt:SI (match_dup 3)
1744 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1746 operands[3] = gen_reg_rtx (CCFPmode);
1749 ;; *tdiv* instruction returning the FE flag
1750 (define_expand "vsx_tdiv<mode>3_fe"
1752 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1753 (match_operand:VSX_B 2 "vsx_register_operand")]
1755 (set (match_operand:SI 0 "gpc_reg_operand")
1756 (eq:SI (match_dup 3)
1758 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1760 operands[3] = gen_reg_rtx (CCFPmode);
1763 (define_insn "*vsx_tdiv<mode>3_internal"
1764 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1765 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1766 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1768 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1769 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1770 [(set_attr "type" "<VStype_simple>")])
1772 (define_insn "vsx_fre<mode>2"
1773 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1774 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1776 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1778 [(set_attr "type" "<VStype_simple>")])
1780 (define_insn "*vsx_neg<mode>2"
1781 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1782 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1783 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1784 "xvneg<sd>p %x0,%x1"
1785 [(set_attr "type" "<VStype_simple>")])
1787 (define_insn "*vsx_abs<mode>2"
1788 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1789 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1790 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1791 "xvabs<sd>p %x0,%x1"
1792 [(set_attr "type" "<VStype_simple>")])
1794 (define_insn "vsx_nabs<mode>2"
1795 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1798 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1799 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1800 "xvnabs<sd>p %x0,%x1"
1801 [(set_attr "type" "<VStype_simple>")])
1803 (define_insn "vsx_smax<mode>3"
1804 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1805 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1806 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1807 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1808 "xvmax<sd>p %x0,%x1,%x2"
1809 [(set_attr "type" "<VStype_simple>")])
1811 (define_insn "*vsx_smin<mode>3"
1812 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1813 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1814 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1815 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1816 "xvmin<sd>p %x0,%x1,%x2"
1817 [(set_attr "type" "<VStype_simple>")])
1819 (define_insn "*vsx_sqrt<mode>2"
1820 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1821 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1822 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1823 "xvsqrt<sd>p %x0,%x1"
1824 [(set_attr "type" "<sd>sqrt")])
1826 (define_insn "*vsx_rsqrte<mode>2"
1827 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1828 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1830 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831 "xvrsqrte<sd>p %x0,%x1"
1832 [(set_attr "type" "<VStype_simple>")])
1834 ;; *tsqrt* returning the fg flag
1835 (define_expand "vsx_tsqrt<mode>2_fg"
1837 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1839 (set (match_operand:SI 0 "gpc_reg_operand")
1840 (gt:SI (match_dup 2)
1842 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1844 operands[2] = gen_reg_rtx (CCFPmode);
1847 ;; *tsqrt* returning the fe flag
1848 (define_expand "vsx_tsqrt<mode>2_fe"
1850 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1852 (set (match_operand:SI 0 "gpc_reg_operand")
1853 (eq:SI (match_dup 2)
1855 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1857 operands[2] = gen_reg_rtx (CCFPmode);
1860 (define_insn "*vsx_tsqrt<mode>2_internal"
1861 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1862 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1864 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1865 "x<VSv>tsqrt<sd>p %0,%x1"
1866 [(set_attr "type" "<VStype_simple>")])
1868 ;; Fused vector multiply/add instructions. Support the classical Altivec
1869 ;; versions of fma, which allows the target to be a separate register from the
1870 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1873 (define_insn "*vsx_fmav4sf4"
1874 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1876 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1877 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1878 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1879 "VECTOR_UNIT_VSX_P (V4SFmode)"
1881 xvmaddasp %x0,%x1,%x2
1882 xvmaddmsp %x0,%x1,%x3
1883 vmaddfp %0,%1,%2,%3"
1884 [(set_attr "type" "vecfloat")])
1886 (define_insn "*vsx_fmav2df4"
1887 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1889 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1890 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1891 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1892 "VECTOR_UNIT_VSX_P (V2DFmode)"
1894 xvmaddadp %x0,%x1,%x2
1895 xvmaddmdp %x0,%x1,%x3"
1896 [(set_attr "type" "vecdouble")])
1898 (define_insn "*vsx_fms<mode>4"
1899 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1901 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1902 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1904 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1905 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1907 xvmsuba<sd>p %x0,%x1,%x2
1908 xvmsubm<sd>p %x0,%x1,%x3"
1909 [(set_attr "type" "<VStype_mul>")])
1911 (define_insn "*vsx_nfma<mode>4"
1912 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1915 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1916 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1917 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1918 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1920 xvnmadda<sd>p %x0,%x1,%x2
1921 xvnmaddm<sd>p %x0,%x1,%x3"
1922 [(set_attr "type" "<VStype_mul>")])
1924 (define_insn "*vsx_nfmsv4sf4"
1925 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1928 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1929 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1931 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1932 "VECTOR_UNIT_VSX_P (V4SFmode)"
1934 xvnmsubasp %x0,%x1,%x2
1935 xvnmsubmsp %x0,%x1,%x3
1936 vnmsubfp %0,%1,%2,%3"
1937 [(set_attr "type" "vecfloat")])
1939 (define_insn "*vsx_nfmsv2df4"
1940 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1943 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1944 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1946 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1947 "VECTOR_UNIT_VSX_P (V2DFmode)"
1949 xvnmsubadp %x0,%x1,%x2
1950 xvnmsubmdp %x0,%x1,%x3"
1951 [(set_attr "type" "vecdouble")])
1953 ;; Vector conditional expressions (no scalar version for these instructions)
1954 (define_insn "vsx_eq<mode>"
1955 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1956 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1957 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1958 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1959 "xvcmpeq<sd>p %x0,%x1,%x2"
1960 [(set_attr "type" "<VStype_simple>")])
1962 (define_insn "vsx_gt<mode>"
1963 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1964 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1965 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1966 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1967 "xvcmpgt<sd>p %x0,%x1,%x2"
1968 [(set_attr "type" "<VStype_simple>")])
1970 (define_insn "*vsx_ge<mode>"
1971 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1972 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1973 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1974 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1975 "xvcmpge<sd>p %x0,%x1,%x2"
1976 [(set_attr "type" "<VStype_simple>")])
1978 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1979 ;; indicate a combined status
1980 (define_insn "*vsx_eq_<mode>_p"
1981 [(set (reg:CC CR6_REGNO)
1983 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1984 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1986 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1987 (eq:VSX_F (match_dup 1)
1989 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1990 "xvcmpeq<sd>p. %x0,%x1,%x2"
1991 [(set_attr "type" "<VStype_simple>")])
1993 (define_insn "*vsx_gt_<mode>_p"
1994 [(set (reg:CC CR6_REGNO)
1996 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1997 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1999 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2000 (gt:VSX_F (match_dup 1)
2002 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2003 "xvcmpgt<sd>p. %x0,%x1,%x2"
2004 [(set_attr "type" "<VStype_simple>")])
2007 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2008 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2009 (define_insn "*xvtlsbb_internal"
2010 [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2011 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2015 [(set_attr "type" "logical")])
2017 ;; Vector Test Least Significant Bit by Byte
2018 ;; for the implementation of the builtin
2019 ;; __builtin_vec_test_lsbb_all_ones
2020 ;; int vec_test_lsbb_all_ones (vector unsigned char);
2022 ;; __builtin_vec_test_lsbb_all_zeros
2023 ;; int vec_test_lsbb_all_zeros (vector unsigned char);
2024 (define_expand "xvtlsbbo"
2026 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2028 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2029 (lt:SI (match_dup 2) (const_int 0)))]
2032 operands[2] = gen_reg_rtx (CCmode);
2034 (define_expand "xvtlsbbz"
2036 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2038 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2039 (eq:SI (match_dup 2) (const_int 0)))]
2042 operands[2] = gen_reg_rtx (CCmode);
2045 (define_insn "*vsx_ge_<mode>_p"
2046 [(set (reg:CC CR6_REGNO)
2048 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2049 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2051 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2052 (ge:VSX_F (match_dup 1)
2054 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2055 "xvcmpge<sd>p. %x0,%x1,%x2"
2056 [(set_attr "type" "<VStype_simple>")])
2059 (define_insn "*vsx_xxsel<mode>"
2060 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2062 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2063 (match_operand:VSX_L 4 "zero_constant" ""))
2064 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2065 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2066 "VECTOR_MEM_VSX_P (<MODE>mode)"
2067 "xxsel %x0,%x3,%x2,%x1"
2068 [(set_attr "type" "vecmove")
2069 (set_attr "isa" "<VSisa>")])
2071 (define_insn "*vsx_xxsel<mode>_uns"
2072 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2074 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2075 (match_operand:VSX_L 4 "zero_constant" ""))
2076 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2077 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2078 "VECTOR_MEM_VSX_P (<MODE>mode)"
2079 "xxsel %x0,%x3,%x2,%x1"
2080 [(set_attr "type" "vecmove")
2081 (set_attr "isa" "<VSisa>")])
2084 (define_insn "vsx_copysign<mode>3"
2085 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2087 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2088 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2090 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2091 "xvcpsgn<sd>p %x0,%x2,%x1"
2092 [(set_attr "type" "<VStype_simple>")])
2094 ;; For the conversions, limit the register class for the integer value to be
2095 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2096 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2097 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2098 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2099 ;; in allowing virtual registers.
2100 (define_insn "vsx_float<VSi><mode>2"
2101 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2102 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2103 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2104 "xvcvsx<VSc><sd>p %x0,%x1"
2105 [(set_attr "type" "<VStype_simple>")])
2107 (define_insn "vsx_floatuns<VSi><mode>2"
2108 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2109 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2110 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2111 "xvcvux<VSc><sd>p %x0,%x1"
2112 [(set_attr "type" "<VStype_simple>")])
2114 (define_insn "vsx_fix_trunc<mode><VSi>2"
2115 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2116 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2117 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2118 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2119 [(set_attr "type" "<VStype_simple>")])
2121 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2122 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2123 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2124 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2125 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2126 [(set_attr "type" "<VStype_simple>")])
2128 ;; Math rounding functions
2129 (define_insn "vsx_x<VSv>r<sd>pi"
2130 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2131 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2132 UNSPEC_VSX_ROUND_I))]
2133 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2134 "x<VSv>r<sd>pi %x0,%x1"
2135 [(set_attr "type" "<VStype_simple>")])
2137 (define_insn "vsx_x<VSv>r<sd>pic"
2138 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2139 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2140 UNSPEC_VSX_ROUND_IC))]
2141 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2142 "x<VSv>r<sd>pic %x0,%x1"
2143 [(set_attr "type" "<VStype_simple>")])
2145 (define_insn "vsx_btrunc<mode>2"
2146 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2147 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2148 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2149 "xvr<sd>piz %x0,%x1"
2150 [(set_attr "type" "<VStype_simple>")])
2152 (define_insn "*vsx_b2trunc<mode>2"
2153 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2154 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2156 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2157 "x<VSv>r<sd>piz %x0,%x1"
2158 [(set_attr "type" "<VStype_simple>")])
2160 (define_insn "vsx_floor<mode>2"
2161 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2162 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2164 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2165 "xvr<sd>pim %x0,%x1"
2166 [(set_attr "type" "<VStype_simple>")])
2168 (define_insn "vsx_ceil<mode>2"
2169 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2170 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2172 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2173 "xvr<sd>pip %x0,%x1"
2174 [(set_attr "type" "<VStype_simple>")])
2177 ;; VSX convert to/from double vector
2179 ;; Convert between single and double precision
2180 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2181 ;; scalar single precision instructions internally use the double format.
2182 ;; Prefer the altivec registers, since we likely will need to do a vperm
2183 (define_insn "vsx_xscvdpsp"
2184 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2185 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2186 UNSPEC_VSX_CVSPDP))]
2187 "VECTOR_UNIT_VSX_P (DFmode)"
2189 [(set_attr "type" "fp")])
2191 (define_insn "vsx_xvcvspdp_be"
2192 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2194 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2195 (parallel [(const_int 0) (const_int 2)]))))]
2196 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2198 [(set_attr "type" "vecdouble")])
2200 (define_insn "vsx_xvcvspdp_le"
2201 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2203 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2204 (parallel [(const_int 1) (const_int 3)]))))]
2205 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2207 [(set_attr "type" "vecdouble")])
2209 (define_expand "vsx_xvcvspdp"
2210 [(match_operand:V2DF 0 "vsx_register_operand")
2211 (match_operand:V4SF 1 "vsx_register_operand")]
2212 "VECTOR_UNIT_VSX_P (V4SFmode)"
2214 if (BYTES_BIG_ENDIAN)
2215 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2217 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2221 (define_insn "vsx_xvcvdpsp"
2222 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2223 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2224 UNSPEC_VSX_CVSPDP))]
2225 "VECTOR_UNIT_VSX_P (V2DFmode)"
2227 [(set_attr "type" "vecdouble")])
2229 ;; xscvspdp, represent the scalar SF type as V4SF
2230 (define_insn "vsx_xscvspdp"
2231 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2232 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2233 UNSPEC_VSX_CVSPDP))]
2234 "VECTOR_UNIT_VSX_P (V4SFmode)"
2236 [(set_attr "type" "fp")])
2238 ;; Same as vsx_xscvspdp, but use SF as the type
2239 (define_insn "vsx_xscvspdp_scalar2"
2240 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2241 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2242 UNSPEC_VSX_CVSPDP))]
2243 "VECTOR_UNIT_VSX_P (V4SFmode)"
2245 [(set_attr "type" "fp")])
2247 ;; Generate xvcvhpsp instruction
2248 (define_insn "vsx_xvcvhpsp"
2249 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2250 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2251 UNSPEC_VSX_CVHPSP))]
2254 [(set_attr "type" "vecfloat")])
2256 ;; Generate xvcvsphp
2257 (define_insn "vsx_xvcvsphp"
2258 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2259 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2260 UNSPEC_VSX_XVCVSPHP))]
2263 [(set_attr "type" "vecfloat")])
2265 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2266 ;; format of scalars is actually DF.
2267 (define_insn "vsx_xscvdpsp_scalar"
2268 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2269 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2270 UNSPEC_VSX_CVSPDP))]
2271 "VECTOR_UNIT_VSX_P (V4SFmode)"
2273 [(set_attr "type" "fp")])
2275 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2276 (define_insn "vsx_xscvdpspn"
2277 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2278 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2279 UNSPEC_VSX_CVDPSPN))]
2282 [(set_attr "type" "fp")])
2284 (define_insn "vsx_xscvspdpn"
2285 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2286 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2287 UNSPEC_VSX_CVSPDPN))]
2290 [(set_attr "type" "fp")])
2292 (define_insn "vsx_xscvdpspn_scalar"
2293 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2294 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2295 UNSPEC_VSX_CVDPSPN))]
2298 [(set_attr "type" "fp")])
2300 ;; Used by direct move to move a SFmode value from GPR to VSX register
2301 (define_insn "vsx_xscvspdpn_directmove"
2302 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2303 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2304 UNSPEC_VSX_CVSPDPN))]
2307 [(set_attr "type" "fp")])
2309 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2311 (define_insn "vsx_xvcv<su>xwsp"
2312 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2313 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2314 "VECTOR_UNIT_VSX_P (V4SFmode)"
2315 "xvcv<su>xwsp %x0,%x1"
2316 [(set_attr "type" "vecfloat")])
2318 (define_insn "vsx_xvcv<su>xddp"
2319 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2320 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2321 "VECTOR_UNIT_VSX_P (V2DFmode)"
2322 "xvcv<su>xddp %x0,%x1"
2323 [(set_attr "type" "vecdouble")])
2325 (define_insn "vsx_xvcvsp<su>xws"
2326 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2327 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2328 "VECTOR_UNIT_VSX_P (V4SFmode)"
2329 "xvcvsp<su>xws %x0,%x1"
2330 [(set_attr "type" "vecfloat")])
2332 (define_insn "vsx_xvcvdp<su>xds"
2333 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2334 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2335 "VECTOR_UNIT_VSX_P (V2DFmode)"
2336 "xvcvdp<su>xds %x0,%x1"
2337 [(set_attr "type" "vecdouble")])
2339 (define_expand "vsx_xvcvsxddp_scale"
2340 [(match_operand:V2DF 0 "vsx_register_operand")
2341 (match_operand:V2DI 1 "vsx_register_operand")
2342 (match_operand:QI 2 "immediate_operand")]
2343 "VECTOR_UNIT_VSX_P (V2DFmode)"
2345 rtx op0 = operands[0];
2346 rtx op1 = operands[1];
2347 int scale = INTVAL(operands[2]);
2348 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2350 rs6000_scale_v2df (op0, op0, -scale);
2354 (define_expand "vsx_xvcvuxddp_scale"
2355 [(match_operand:V2DF 0 "vsx_register_operand")
2356 (match_operand:V2DI 1 "vsx_register_operand")
2357 (match_operand:QI 2 "immediate_operand")]
2358 "VECTOR_UNIT_VSX_P (V2DFmode)"
2360 rtx op0 = operands[0];
2361 rtx op1 = operands[1];
2362 int scale = INTVAL(operands[2]);
2363 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2365 rs6000_scale_v2df (op0, op0, -scale);
2369 (define_expand "vsx_xvcvdpsxds_scale"
2370 [(match_operand:V2DI 0 "vsx_register_operand")
2371 (match_operand:V2DF 1 "vsx_register_operand")
2372 (match_operand:QI 2 "immediate_operand")]
2373 "VECTOR_UNIT_VSX_P (V2DFmode)"
2375 rtx op0 = operands[0];
2376 rtx op1 = operands[1];
2378 int scale = INTVAL (operands[2]);
2383 tmp = gen_reg_rtx (V2DFmode);
2384 rs6000_scale_v2df (tmp, op1, scale);
2386 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2390 ;; convert vector of 64-bit floating point numbers to vector of
2391 ;; 64-bit unsigned integer
2392 (define_expand "vsx_xvcvdpuxds_scale"
2393 [(match_operand:V2DI 0 "vsx_register_operand")
2394 (match_operand:V2DF 1 "vsx_register_operand")
2395 (match_operand:QI 2 "immediate_operand")]
2396 "VECTOR_UNIT_VSX_P (V2DFmode)"
2398 rtx op0 = operands[0];
2399 rtx op1 = operands[1];
2401 int scale = INTVAL (operands[2]);
2406 tmp = gen_reg_rtx (V2DFmode);
2407 rs6000_scale_v2df (tmp, op1, scale);
2409 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2413 ;; Convert from 64-bit to 32-bit types
2414 ;; Note, favor the Altivec registers since the usual use of these instructions
2415 ;; is in vector converts and we need to use the Altivec vperm instruction.
2417 (define_insn "vsx_xvcvdpsxws"
2418 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2419 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2420 UNSPEC_VSX_CVDPSXWS))]
2421 "VECTOR_UNIT_VSX_P (V2DFmode)"
2422 "xvcvdpsxws %x0,%x1"
2423 [(set_attr "type" "vecdouble")])
2425 (define_insn "vsx_xvcvdpuxws"
2426 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2427 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2428 UNSPEC_VSX_CVDPUXWS))]
2429 "VECTOR_UNIT_VSX_P (V2DFmode)"
2430 "xvcvdpuxws %x0,%x1"
2431 [(set_attr "type" "vecdouble")])
2433 (define_insn "vsx_xvcvsxdsp"
2434 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2435 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2436 UNSPEC_VSX_CVSXDSP))]
2437 "VECTOR_UNIT_VSX_P (V2DFmode)"
2439 [(set_attr "type" "vecfloat")])
2441 (define_insn "vsx_xvcvuxdsp"
2442 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2443 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2444 UNSPEC_VSX_CVUXDSP))]
2445 "VECTOR_UNIT_VSX_P (V2DFmode)"
2447 [(set_attr "type" "vecdouble")])
2449 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2450 ;; 64-bit floating point numbers.
2451 (define_insn "vsx_xvcv<su>xwdp_be"
2452 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2454 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2455 (parallel [(const_int 0) (const_int 2)]))))]
2456 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2457 "xvcv<su>xwdp %x0,%x1"
2458 [(set_attr "type" "vecdouble")])
2460 (define_insn "vsx_xvcv<su>xwdp_le"
2461 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2463 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2464 (parallel [(const_int 1) (const_int 3)]))))]
2465 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2466 "xvcv<su>xwdp %x0,%x1"
2467 [(set_attr "type" "vecdouble")])
2469 (define_expand "vsx_xvcv<su>xwdp"
2470 [(match_operand:V2DF 0 "vsx_register_operand")
2471 (match_operand:V4SI 1 "vsx_register_operand")
2473 "VECTOR_UNIT_VSX_P (V2DFmode)"
2475 if (BYTES_BIG_ENDIAN)
2476 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2478 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2482 (define_insn "vsx_xvcvsxwdp_df"
2483 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2484 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2485 UNSPEC_VSX_CVSXWDP))]
2488 [(set_attr "type" "vecdouble")])
2490 (define_insn "vsx_xvcvuxwdp_df"
2491 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2492 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493 UNSPEC_VSX_CVUXWDP))]
2496 [(set_attr "type" "vecdouble")])
2498 ;; Convert vector of 32-bit floating point numbers to vector of
2499 ;; 64-bit signed/unsigned integers.
2500 (define_insn "vsx_xvcvsp<su>xds_be"
2501 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2503 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2504 (parallel [(const_int 0) (const_int 2)]))))]
2505 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2506 "xvcvsp<su>xds %x0,%x1"
2507 [(set_attr "type" "vecdouble")])
2509 (define_insn "vsx_xvcvsp<su>xds_le"
2510 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2512 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2513 (parallel [(const_int 1) (const_int 3)]))))]
2514 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2515 "xvcvsp<su>xds %x0,%x1"
2516 [(set_attr "type" "vecdouble")])
2518 (define_expand "vsx_xvcvsp<su>xds"
2519 [(match_operand:V2DI 0 "vsx_register_operand")
2520 (match_operand:V4SF 1 "vsx_register_operand")
2522 "VECTOR_UNIT_VSX_P (V2DFmode)"
2524 if (BYTES_BIG_ENDIAN)
2525 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2527 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2531 ;; Generate float2 double
2532 ;; convert two double to float
2533 (define_expand "float2_v2df"
2534 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2535 (use (match_operand:V2DF 1 "register_operand" "wa"))
2536 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2537 "VECTOR_UNIT_VSX_P (V4SFmode)"
2539 rtx rtx_src1, rtx_src2, rtx_dst;
2541 rtx_dst = operands[0];
2542 rtx_src1 = operands[1];
2543 rtx_src2 = operands[2];
2545 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2550 ;; convert two long long signed ints to float
2551 (define_expand "float2_v2di"
2552 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2553 (use (match_operand:V2DI 1 "register_operand" "wa"))
2554 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2555 "VECTOR_UNIT_VSX_P (V4SFmode)"
2557 rtx rtx_src1, rtx_src2, rtx_dst;
2559 rtx_dst = operands[0];
2560 rtx_src1 = operands[1];
2561 rtx_src2 = operands[2];
2563 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2567 ;; Generate uns_float2
2568 ;; convert two long long unsigned ints to float
2569 (define_expand "uns_float2_v2di"
2570 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2571 (use (match_operand:V2DI 1 "register_operand" "wa"))
2572 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2573 "VECTOR_UNIT_VSX_P (V4SFmode)"
2575 rtx rtx_src1, rtx_src2, rtx_dst;
2577 rtx_dst = operands[0];
2578 rtx_src1 = operands[1];
2579 rtx_src2 = operands[2];
2581 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2586 ;; convert double or long long signed to float
2587 ;; (Only even words are valid, BE numbering)
2588 (define_expand "floate<mode>"
2589 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2590 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2591 "VECTOR_UNIT_VSX_P (V4SFmode)"
2593 if (BYTES_BIG_ENDIAN)
2595 /* Shift left one word to put even word correct location */
2597 rtx rtx_val = GEN_INT (4);
2599 rtx_tmp = gen_reg_rtx (V4SFmode);
2600 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2601 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2602 rtx_tmp, rtx_tmp, rtx_val));
2605 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2610 ;; Generate uns_floate
2611 ;; convert long long unsigned to float
2612 ;; (Only even words are valid, BE numbering)
2613 (define_expand "unsfloatev2di"
2614 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2615 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2616 "VECTOR_UNIT_VSX_P (V4SFmode)"
2618 if (BYTES_BIG_ENDIAN)
2620 /* Shift left one word to put even word correct location */
2622 rtx rtx_val = GEN_INT (4);
2624 rtx_tmp = gen_reg_rtx (V4SFmode);
2625 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2626 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2627 rtx_tmp, rtx_tmp, rtx_val));
2630 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2636 ;; convert double or long long signed to float
2637 ;; Only odd words are valid, BE numbering)
2638 (define_expand "floato<mode>"
2639 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2640 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2641 "VECTOR_UNIT_VSX_P (V4SFmode)"
2643 if (BYTES_BIG_ENDIAN)
2644 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2647 /* Shift left one word to put odd word correct location */
2649 rtx rtx_val = GEN_INT (4);
2651 rtx_tmp = gen_reg_rtx (V4SFmode);
2652 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2653 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2654 rtx_tmp, rtx_tmp, rtx_val));
2659 ;; Generate uns_floato
2660 ;; convert long long unsigned to float
2661 ;; (Only odd words are valid, BE numbering)
2662 (define_expand "unsfloatov2di"
2663 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2664 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2665 "VECTOR_UNIT_VSX_P (V4SFmode)"
2667 if (BYTES_BIG_ENDIAN)
2668 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2671 /* Shift left one word to put odd word correct location */
2673 rtx rtx_val = GEN_INT (4);
2675 rtx_tmp = gen_reg_rtx (V4SFmode);
2676 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2677 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2678 rtx_tmp, rtx_tmp, rtx_val));
2683 ;; Generate vsigned2
2684 ;; convert two double float vectors to a vector of single precision ints
2685 (define_expand "vsigned2_v2df"
2686 [(match_operand:V4SI 0 "register_operand" "=wa")
2687 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2688 (match_operand:V2DF 2 "register_operand" "wa")]
2689 UNSPEC_VSX_VSIGNED2)]
2692 rtx rtx_src1, rtx_src2, rtx_dst;
2693 bool signed_convert=true;
2695 rtx_dst = operands[0];
2696 rtx_src1 = operands[1];
2697 rtx_src2 = operands[2];
2699 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2703 ;; Generate vsignedo_v2df
2704 ;; signed double float to int convert odd word
2705 (define_expand "vsignedo_v2df"
2706 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2707 (match_operand:V2DF 1 "register_operand" "wa"))]
2710 if (BYTES_BIG_ENDIAN)
2713 rtx rtx_val = GEN_INT (12);
2714 rtx_tmp = gen_reg_rtx (V4SImode);
2716 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2718 /* Big endian word numbering for words in operand is 0 1 2 3.
2719 take (operand[1] operand[1]) and shift left one word
2720 0 1 2 3 0 1 2 3 => 1 2 3 0
2721 Words 1 and 3 are now are now where they need to be for result. */
2723 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2727 /* Little endian word numbering for operand is 3 2 1 0.
2728 Result words 3 and 1 are where they need to be. */
2729 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2733 [(set_attr "type" "veccomplex")])
2735 ;; Generate vsignede_v2df
2736 ;; signed double float to int even word
2737 (define_expand "vsignede_v2df"
2738 [(set (match_operand:V4SI 0 "register_operand" "=v")
2739 (match_operand:V2DF 1 "register_operand" "v"))]
2742 if (BYTES_BIG_ENDIAN)
2743 /* Big endian word numbering for words in operand is 0 1
2744 Result words 0 is where they need to be. */
2745 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2750 rtx rtx_val = GEN_INT (12);
2751 rtx_tmp = gen_reg_rtx (V4SImode);
2753 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2755 /* Little endian word numbering for operand is 3 2 1 0.
2756 take (operand[1] operand[1]) and shift left three words
2757 0 1 2 3 0 1 2 3 => 3 0 1 2
2758 Words 0 and 2 are now where they need to be for the result. */
2759 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2764 [(set_attr "type" "veccomplex")])
2766 ;; Generate unsigned2
2767 ;; convert two double float vectors to a vector of single precision
2769 (define_expand "vunsigned2_v2df"
2770 [(match_operand:V4SI 0 "register_operand" "=v")
2771 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2772 (match_operand:V2DF 2 "register_operand" "v")]
2773 UNSPEC_VSX_VSIGNED2)]
2776 rtx rtx_src1, rtx_src2, rtx_dst;
2777 bool signed_convert=false;
2779 rtx_dst = operands[0];
2780 rtx_src1 = operands[1];
2781 rtx_src2 = operands[2];
2783 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2787 ;; Generate vunsignedo_v2df
2788 ;; unsigned double float to int convert odd word
2789 (define_expand "vunsignedo_v2df"
2790 [(set (match_operand:V4SI 0 "register_operand" "=v")
2791 (match_operand:V2DF 1 "register_operand" "v"))]
2794 if (BYTES_BIG_ENDIAN)
2797 rtx rtx_val = GEN_INT (12);
2798 rtx_tmp = gen_reg_rtx (V4SImode);
2800 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2802 /* Big endian word numbering for words in operand is 0 1 2 3.
2803 take (operand[1] operand[1]) and shift left one word
2804 0 1 2 3 0 1 2 3 => 1 2 3 0
2805 Words 1 and 3 are now are now where they need to be for result. */
2807 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2811 /* Little endian word numbering for operand is 3 2 1 0.
2812 Result words 3 and 1 are where they need to be. */
2813 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2817 [(set_attr "type" "veccomplex")])
2819 ;; Generate vunsignede_v2df
2820 ;; unsigned double float to int even word
2821 (define_expand "vunsignede_v2df"
2822 [(set (match_operand:V4SI 0 "register_operand" "=v")
2823 (match_operand:V2DF 1 "register_operand" "v"))]
2826 if (BYTES_BIG_ENDIAN)
2827 /* Big endian word numbering for words in operand is 0 1
2828 Result words 0 is where they need to be. */
2829 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2834 rtx rtx_val = GEN_INT (12);
2835 rtx_tmp = gen_reg_rtx (V4SImode);
2837 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2839 /* Little endian word numbering for operand is 3 2 1 0.
2840 take (operand[1] operand[1]) and shift left three words
2841 0 1 2 3 0 1 2 3 => 3 0 1 2
2842 Words 0 and 2 are now where they need to be for the result. */
2843 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2848 [(set_attr "type" "veccomplex")])
2850 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2851 ;; since the xvrdpiz instruction does not truncate the value if the floating
2852 ;; point value is < LONG_MIN or > LONG_MAX.
2853 (define_insn "*vsx_float_fix_v2df2"
2854 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2857 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2859 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2860 && !flag_trapping_math && TARGET_FRIZ"
2862 [(set_attr "type" "vecdouble")])
2865 ;; Permute operations
2867 ;; Build a V2DF/V2DI vector from two scalars
2868 (define_insn "vsx_concat_<mode>"
2869 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2871 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2872 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2873 "VECTOR_MEM_VSX_P (<MODE>mode)"
2875 if (which_alternative == 0)
2876 return (BYTES_BIG_ENDIAN
2877 ? "xxpermdi %x0,%x1,%x2,0"
2878 : "xxpermdi %x0,%x2,%x1,0");
2880 else if (which_alternative == 1)
2881 return (BYTES_BIG_ENDIAN
2882 ? "mtvsrdd %x0,%1,%2"
2883 : "mtvsrdd %x0,%2,%1");
2888 [(set_attr "type" "vecperm,vecmove")])
2890 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2891 ;; word element in a vector register.
2892 (define_insn "*vsx_concat_<mode>_1"
2893 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2895 (vec_select:<VS_scalar>
2896 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2897 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2898 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2899 "VECTOR_MEM_VSX_P (<MODE>mode)"
2901 HOST_WIDE_INT dword = INTVAL (operands[2]);
2902 if (BYTES_BIG_ENDIAN)
2904 operands[4] = GEN_INT (2*dword);
2905 return "xxpermdi %x0,%x1,%x3,%4";
2909 operands[4] = GEN_INT (!dword);
2910 return "xxpermdi %x0,%x3,%x1,%4";
2913 [(set_attr "type" "vecperm")])
2915 (define_insn "*vsx_concat_<mode>_2"
2916 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2918 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2919 (vec_select:<VS_scalar>
2920 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2921 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2922 "VECTOR_MEM_VSX_P (<MODE>mode)"
2924 HOST_WIDE_INT dword = INTVAL (operands[3]);
2925 if (BYTES_BIG_ENDIAN)
2927 operands[4] = GEN_INT (dword);
2928 return "xxpermdi %x0,%x1,%x2,%4";
2932 operands[4] = GEN_INT (2 * !dword);
2933 return "xxpermdi %x0,%x2,%x1,%4";
2936 [(set_attr "type" "vecperm")])
2938 (define_insn "*vsx_concat_<mode>_3"
2939 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2941 (vec_select:<VS_scalar>
2942 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2943 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2944 (vec_select:<VS_scalar>
2945 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2946 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2947 "VECTOR_MEM_VSX_P (<MODE>mode)"
2949 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2950 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2951 if (BYTES_BIG_ENDIAN)
2953 operands[5] = GEN_INT ((2 * dword1) + dword2);
2954 return "xxpermdi %x0,%x1,%x3,%5";
2958 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2959 return "xxpermdi %x0,%x3,%x1,%5";
2962 [(set_attr "type" "vecperm")])
2964 ;; Special purpose concat using xxpermdi to glue two single precision values
2965 ;; together, relying on the fact that internally scalar floats are represented
2966 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2967 (define_insn "vsx_concat_v2sf"
2968 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2970 [(match_operand:SF 1 "vsx_register_operand" "wa")
2971 (match_operand:SF 2 "vsx_register_operand" "wa")]
2972 UNSPEC_VSX_CONCAT))]
2973 "VECTOR_MEM_VSX_P (V2DFmode)"
2975 if (BYTES_BIG_ENDIAN)
2976 return "xxpermdi %x0,%x1,%x2,0";
2978 return "xxpermdi %x0,%x2,%x1,0";
2980 [(set_attr "type" "vecperm")])
2982 ;; Concatenate 4 SImode elements into a V4SImode reg.
2983 (define_expand "vsx_init_v4si"
2984 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2985 (use (match_operand:SI 1 "gpc_reg_operand"))
2986 (use (match_operand:SI 2 "gpc_reg_operand"))
2987 (use (match_operand:SI 3 "gpc_reg_operand"))
2988 (use (match_operand:SI 4 "gpc_reg_operand"))]
2989 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2991 rtx a = gen_reg_rtx (DImode);
2992 rtx b = gen_reg_rtx (DImode);
2993 rtx c = gen_reg_rtx (DImode);
2994 rtx d = gen_reg_rtx (DImode);
2995 emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2996 emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2997 emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2998 emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2999 if (!BYTES_BIG_ENDIAN)
3005 rtx aa = gen_reg_rtx (DImode);
3006 rtx ab = gen_reg_rtx (DImode);
3007 rtx cc = gen_reg_rtx (DImode);
3008 rtx cd = gen_reg_rtx (DImode);
3009 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
3010 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
3011 emit_insn (gen_iordi3 (ab, aa, b));
3012 emit_insn (gen_iordi3 (cd, cc, d));
3014 rtx abcd = gen_reg_rtx (V2DImode);
3015 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3016 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3020 ;; xxpermdi for little endian loads and stores. We need several of
3021 ;; these since the form of the PARALLEL differs by mode.
3022 (define_insn "*vsx_xxpermdi2_le_<mode>"
3023 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3025 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3026 (parallel [(const_int 1) (const_int 0)])))]
3027 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3028 "xxpermdi %x0,%x1,%x1,2"
3029 [(set_attr "type" "vecperm")])
3031 (define_insn "xxswapd_v16qi"
3032 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3034 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3035 (parallel [(const_int 8) (const_int 9)
3036 (const_int 10) (const_int 11)
3037 (const_int 12) (const_int 13)
3038 (const_int 14) (const_int 15)
3039 (const_int 0) (const_int 1)
3040 (const_int 2) (const_int 3)
3041 (const_int 4) (const_int 5)
3042 (const_int 6) (const_int 7)])))]
3044 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3045 ;; mnemonic xxpermdi instead.
3046 "xxpermdi %x0,%x1,%x1,2"
3047 [(set_attr "type" "vecperm")])
3049 (define_insn "xxswapd_v8hi"
3050 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3052 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3053 (parallel [(const_int 4) (const_int 5)
3054 (const_int 6) (const_int 7)
3055 (const_int 0) (const_int 1)
3056 (const_int 2) (const_int 3)])))]
3058 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3059 ;; mnemonic xxpermdi instead.
3060 "xxpermdi %x0,%x1,%x1,2"
3061 [(set_attr "type" "vecperm")])
3063 (define_insn "xxswapd_<mode>"
3064 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3066 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3067 (parallel [(const_int 2) (const_int 3)
3068 (const_int 0) (const_int 1)])))]
3070 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3071 ;; mnemonic xxpermdi instead.
3072 "xxpermdi %x0,%x1,%x1,2"
3073 [(set_attr "type" "vecperm")])
3075 (define_insn "xxswapd_<mode>"
3076 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3078 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3079 (parallel [(const_int 1) (const_int 0)])))]
3081 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3082 ;; mnemonic xxpermdi instead.
3083 "xxpermdi %x0,%x1,%x1,2"
3084 [(set_attr "type" "vecperm")])
3086 (define_insn "xxgenpcvm_<mode>_internal"
3087 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3088 (unspec:VSX_EXTRACT_I4
3089 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3090 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3092 "TARGET_POWER10 && TARGET_64BIT"
3093 "xxgenpcv<wd>m %x0,%1,%2"
3094 [(set_attr "type" "vecsimple")])
3096 (define_expand "xxgenpcvm_<mode>"
3097 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3098 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3099 (use (match_operand:QI 2 "immediate_operand"))]
3102 if (!BYTES_BIG_ENDIAN)
3104 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3105 change swap upper and lower double words. */
3106 rtx tmp = gen_reg_rtx (<MODE>mode);
3108 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3111 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3116 ;; lxvd2x for little endian loads. We need several of
3117 ;; these since the form of the PARALLEL differs by mode.
3118 (define_insn "*vsx_lxvd2x2_le_<mode>"
3119 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3121 (match_operand:VSX_D 1 "memory_operand" "Z")
3122 (parallel [(const_int 1) (const_int 0)])))]
3123 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3125 [(set_attr "type" "vecload")])
3127 (define_insn "*vsx_lxvd2x4_le_<mode>"
3128 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3130 (match_operand:VSX_W 1 "memory_operand" "Z")
3131 (parallel [(const_int 2) (const_int 3)
3132 (const_int 0) (const_int 1)])))]
3133 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3135 [(set_attr "type" "vecload")])
3137 (define_insn "*vsx_lxvd2x8_le_V8HI"
3138 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3140 (match_operand:V8HI 1 "memory_operand" "Z")
3141 (parallel [(const_int 4) (const_int 5)
3142 (const_int 6) (const_int 7)
3143 (const_int 0) (const_int 1)
3144 (const_int 2) (const_int 3)])))]
3145 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3147 [(set_attr "type" "vecload")])
3149 (define_insn "*vsx_lxvd2x16_le_V16QI"
3150 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3152 (match_operand:V16QI 1 "memory_operand" "Z")
3153 (parallel [(const_int 8) (const_int 9)
3154 (const_int 10) (const_int 11)
3155 (const_int 12) (const_int 13)
3156 (const_int 14) (const_int 15)
3157 (const_int 0) (const_int 1)
3158 (const_int 2) (const_int 3)
3159 (const_int 4) (const_int 5)
3160 (const_int 6) (const_int 7)])))]
3161 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3163 [(set_attr "type" "vecload")])
3165 ;; stxvd2x for little endian stores. We need several of
3166 ;; these since the form of the PARALLEL differs by mode.
3167 (define_insn "*vsx_stxvd2x2_le_<mode>"
3168 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3170 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3171 (parallel [(const_int 1) (const_int 0)])))]
3172 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3174 [(set_attr "type" "vecstore")])
3176 (define_insn "*vsx_stxvd2x4_le_<mode>"
3177 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3179 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3180 (parallel [(const_int 2) (const_int 3)
3181 (const_int 0) (const_int 1)])))]
3182 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3184 [(set_attr "type" "vecstore")])
3186 (define_insn "*vsx_stxvd2x8_le_V8HI"
3187 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3189 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3190 (parallel [(const_int 4) (const_int 5)
3191 (const_int 6) (const_int 7)
3192 (const_int 0) (const_int 1)
3193 (const_int 2) (const_int 3)])))]
3194 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3196 [(set_attr "type" "vecstore")])
3198 (define_insn "*vsx_stxvd2x16_le_V16QI"
3199 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3201 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3202 (parallel [(const_int 8) (const_int 9)
3203 (const_int 10) (const_int 11)
3204 (const_int 12) (const_int 13)
3205 (const_int 14) (const_int 15)
3206 (const_int 0) (const_int 1)
3207 (const_int 2) (const_int 3)
3208 (const_int 4) (const_int 5)
3209 (const_int 6) (const_int 7)])))]
3210 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3212 [(set_attr "type" "vecstore")])
3214 ;; Convert a TImode value into V1TImode
3215 (define_expand "vsx_set_v1ti"
3216 [(match_operand:V1TI 0 "nonimmediate_operand")
3217 (match_operand:V1TI 1 "nonimmediate_operand")
3218 (match_operand:TI 2 "input_operand")
3219 (match_operand:QI 3 "u5bit_cint_operand")]
3220 "VECTOR_MEM_VSX_P (V1TImode)"
3222 if (operands[3] != const0_rtx)
3225 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3229 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3230 (define_expand "vsx_set_<mode>"
3231 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3232 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3233 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3234 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3235 "VECTOR_MEM_VSX_P (<MODE>mode)"
3237 rtx dest = operands[0];
3238 rtx vec_reg = operands[1];
3239 rtx value = operands[2];
3240 rtx ele = operands[3];
3241 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3243 if (ele == const0_rtx)
3245 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3246 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3249 else if (ele == const1_rtx)
3251 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3252 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3259 ;; Extract a DF/DI element from V2DF/V2DI
3260 ;; Optimize cases were we can do a simple or direct move.
3261 ;; Or see if we can avoid doing the move at all
3263 ;; There are some unresolved problems with reload that show up if an Altivec
3264 ;; register was picked. Limit the scalar value to FPRs for now.
3266 (define_insn "vsx_extract_<mode>"
3267 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3268 (vec_select:<VS_scalar>
3269 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa")
3271 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3272 "VECTOR_MEM_VSX_P (<MODE>mode)"
3274 int element = INTVAL (operands[2]);
3275 int op0_regno = REGNO (operands[0]);
3276 int op1_regno = REGNO (operands[1]);
3279 gcc_assert (IN_RANGE (element, 0, 1));
3280 gcc_assert (VSX_REGNO_P (op1_regno));
3282 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3284 if (op0_regno == op1_regno)
3285 return ASM_COMMENT_START " vec_extract to same register";
3287 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3288 && TARGET_POWERPC64)
3289 return "mfvsrd %0,%x1";
3291 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3294 else if (VSX_REGNO_P (op0_regno))
3295 return "xxlor %x0,%x1,%x1";
3301 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3302 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3303 return "mfvsrld %0,%x1";
3305 else if (VSX_REGNO_P (op0_regno))
3307 fldDM = element << 1;
3308 if (!BYTES_BIG_ENDIAN)
3310 operands[3] = GEN_INT (fldDM);
3311 return "xxpermdi %x0,%x1,%x1,%3";
3317 [(set_attr "type" "veclogical,mfvsr,mfvsr,vecperm")
3318 (set_attr "isa" "*,*,p8v,p9v")])
3320 ;; Optimize extracting a single scalar element from memory.
3321 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3322 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3323 (vec_select:<VSX_D:VS_scalar>
3324 (match_operand:VSX_D 1 "memory_operand" "m,m")
3325 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3326 (clobber (match_scratch:P 3 "=&b,&b"))]
3327 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3329 "&& reload_completed"
3330 [(set (match_dup 0) (match_dup 4))]
3332 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3333 operands[3], <VSX_D:VS_scalar>mode);
3335 [(set_attr "type" "fpload,load")
3336 (set_attr "length" "8")])
3338 ;; Optimize storing a single scalar element that is the right location to
3340 (define_insn "*vsx_extract_<mode>_store"
3341 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3342 (vec_select:<VS_scalar>
3343 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3344 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3345 "VECTOR_MEM_VSX_P (<MODE>mode)"
3350 [(set_attr "type" "fpstore")
3351 (set_attr "isa" "*,p7v,p9v")])
3353 ;; Variable V2DI/V2DF extract shift
3354 (define_insn "vsx_vslo_<mode>"
3355 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3356 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3357 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3359 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3361 [(set_attr "type" "vecperm")])
3363 ;; Variable V2DI/V2DF extract from a register
3364 (define_insn_and_split "vsx_extract_<mode>_var"
3365 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3366 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3367 (match_operand:DI 2 "gpc_reg_operand" "r")]
3368 UNSPEC_VSX_EXTRACT))
3369 (clobber (match_scratch:DI 3 "=r"))
3370 (clobber (match_scratch:V2DI 4 "=&v"))]
3371 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3373 "&& reload_completed"
3376 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3377 operands[3], operands[4]);
3381 ;; Variable V2DI/V2DF extract from memory
3382 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3383 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3384 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3385 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3386 UNSPEC_VSX_EXTRACT))
3387 (clobber (match_scratch:DI 3 "=&b,&b"))]
3388 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3390 "&& reload_completed"
3391 [(set (match_dup 0) (match_dup 4))]
3393 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3394 operands[3], <VS_scalar>mode);
3396 [(set_attr "type" "fpload,load")])
3398 ;; Extract a SF element from V4SF
3399 (define_insn_and_split "vsx_extract_v4sf"
3400 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3402 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3403 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3404 (clobber (match_scratch:V4SF 3 "=0"))]
3405 "VECTOR_UNIT_VSX_P (V4SFmode)"
3410 rtx op0 = operands[0];
3411 rtx op1 = operands[1];
3412 rtx op2 = operands[2];
3413 rtx op3 = operands[3];
3415 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3421 if (GET_CODE (op3) == SCRATCH)
3422 op3 = gen_reg_rtx (V4SFmode);
3423 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3426 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3429 [(set_attr "length" "8")
3430 (set_attr "type" "fp")])
3432 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3433 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3435 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3436 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3437 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3438 "VECTOR_MEM_VSX_P (V4SFmode)"
3440 "&& reload_completed"
3441 [(set (match_dup 0) (match_dup 4))]
3443 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3444 operands[3], SFmode);
3446 [(set_attr "type" "fpload,fpload,fpload,load")
3447 (set_attr "length" "8")
3448 (set_attr "isa" "*,p7v,p9v,*")])
3450 ;; Variable V4SF extract from a register
3451 (define_insn_and_split "vsx_extract_v4sf_var"
3452 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3453 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3454 (match_operand:DI 2 "gpc_reg_operand" "r")]
3455 UNSPEC_VSX_EXTRACT))
3456 (clobber (match_scratch:DI 3 "=r"))
3457 (clobber (match_scratch:V2DI 4 "=&v"))]
3458 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3460 "&& reload_completed"
3463 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3464 operands[3], operands[4]);
3468 ;; Variable V4SF extract from memory
3469 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3470 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3471 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3472 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3473 UNSPEC_VSX_EXTRACT))
3474 (clobber (match_scratch:DI 3 "=&b,&b"))]
3475 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3477 "&& reload_completed"
3478 [(set (match_dup 0) (match_dup 4))]
3480 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3481 operands[3], SFmode);
3483 [(set_attr "type" "fpload,load")])
3485 ;; Expand the builtin form of xxpermdi to canonical rtl.
3486 (define_expand "vsx_xxpermdi_<mode>"
3487 [(match_operand:VSX_L 0 "vsx_register_operand")
3488 (match_operand:VSX_L 1 "vsx_register_operand")
3489 (match_operand:VSX_L 2 "vsx_register_operand")
3490 (match_operand:QI 3 "u5bit_cint_operand")]
3491 "VECTOR_MEM_VSX_P (<MODE>mode)"
3493 rtx target = operands[0];
3494 rtx op0 = operands[1];
3495 rtx op1 = operands[2];
3496 int mask = INTVAL (operands[3]);
3497 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3498 rtx perm1 = GEN_INT ((mask & 1) + 2);
3499 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3501 if (<MODE>mode == V2DFmode)
3502 gen = gen_vsx_xxpermdi2_v2df_1;
3505 gen = gen_vsx_xxpermdi2_v2di_1;
3506 if (<MODE>mode != V2DImode)
3508 target = gen_lowpart (V2DImode, target);
3509 op0 = gen_lowpart (V2DImode, op0);
3510 op1 = gen_lowpart (V2DImode, op1);
3513 emit_insn (gen (target, op0, op1, perm0, perm1));
3517 ;; Special version of xxpermdi that retains big-endian semantics.
3518 (define_expand "vsx_xxpermdi_<mode>_be"
3519 [(match_operand:VSX_L 0 "vsx_register_operand")
3520 (match_operand:VSX_L 1 "vsx_register_operand")
3521 (match_operand:VSX_L 2 "vsx_register_operand")
3522 (match_operand:QI 3 "u5bit_cint_operand")]
3523 "VECTOR_MEM_VSX_P (<MODE>mode)"
3525 rtx target = operands[0];
3526 rtx op0 = operands[1];
3527 rtx op1 = operands[2];
3528 int mask = INTVAL (operands[3]);
3529 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3530 rtx perm1 = GEN_INT ((mask & 1) + 2);
3531 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3533 if (<MODE>mode == V2DFmode)
3534 gen = gen_vsx_xxpermdi2_v2df_1;
3537 gen = gen_vsx_xxpermdi2_v2di_1;
3538 if (<MODE>mode != V2DImode)
3540 target = gen_lowpart (V2DImode, target);
3541 op0 = gen_lowpart (V2DImode, op0);
3542 op1 = gen_lowpart (V2DImode, op1);
3545 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3546 transformation we don't want; it is necessary for
3547 rs6000_expand_vec_perm_const_1 but not for this use. So we
3548 prepare for that by reversing the transformation here. */
3549 if (BYTES_BIG_ENDIAN)
3550 emit_insn (gen (target, op0, op1, perm0, perm1));
3553 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3554 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3555 emit_insn (gen (target, op1, op0, p0, p1));
3560 (define_insn "vsx_xxpermdi2_<mode>_1"
3561 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3563 (vec_concat:<VS_double>
3564 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3565 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3566 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3567 (match_operand 4 "const_2_to_3_operand" "")])))]
3568 "VECTOR_MEM_VSX_P (<MODE>mode)"
3572 /* For little endian, swap operands and invert/swap selectors
3573 to get the correct xxpermdi. The operand swap sets up the
3574 inputs as a little endian array. The selectors are swapped
3575 because they are defined to use big endian ordering. The
3576 selectors are inverted to get the correct doublewords for
3577 little endian ordering. */
3578 if (BYTES_BIG_ENDIAN)
3580 op3 = INTVAL (operands[3]);
3581 op4 = INTVAL (operands[4]);
3585 op3 = 3 - INTVAL (operands[4]);
3586 op4 = 3 - INTVAL (operands[3]);
3589 mask = (op3 << 1) | (op4 - 2);
3590 operands[3] = GEN_INT (mask);
3592 if (BYTES_BIG_ENDIAN)
3593 return "xxpermdi %x0,%x1,%x2,%3";
3595 return "xxpermdi %x0,%x2,%x1,%3";
3597 [(set_attr "type" "vecperm")])
3599 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3600 ;; none of the small types were allowed in a vector register, so we had to
3601 ;; extract to a DImode and either do a direct move or store.
3602 (define_expand "vsx_extract_<mode>"
3603 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3604 (vec_select:<VS_scalar>
3605 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3606 (parallel [(match_operand:QI 2 "const_int_operand")])))
3607 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3608 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3610 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3611 if (TARGET_P9_VECTOR)
3613 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3619 (define_insn "vsx_extract_<mode>_p9"
3620 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3621 (vec_select:<VS_scalar>
3622 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3623 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3624 (clobber (match_scratch:SI 3 "=r,X"))]
3625 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3627 if (which_alternative == 0)
3632 HOST_WIDE_INT elt = INTVAL (operands[2]);
3633 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3634 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3637 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3638 HOST_WIDE_INT offset = unit_size * elt_adj;
3640 operands[2] = GEN_INT (offset);
3642 return "xxextractuw %x0,%x1,%2";
3644 return "vextractu<wd> %0,%1,%2";
3647 [(set_attr "type" "vecsimple")
3648 (set_attr "isa" "p9v,*")])
3651 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3652 (vec_select:<VS_scalar>
3653 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3654 (parallel [(match_operand:QI 2 "const_int_operand")])))
3655 (clobber (match_operand:SI 3 "int_reg_operand"))]
3656 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3659 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3660 rtx op1 = operands[1];
3661 rtx op2 = operands[2];
3662 rtx op3 = operands[3];
3663 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3665 emit_move_insn (op3, GEN_INT (offset));
3666 if (BYTES_BIG_ENDIAN)
3667 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3669 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3673 ;; Optimize zero extracts to eliminate the AND after the extract.
3674 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3675 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3677 (vec_select:<VS_scalar>
3678 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3679 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3680 (clobber (match_scratch:SI 3 "=r,X"))]
3681 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3683 "&& reload_completed"
3684 [(parallel [(set (match_dup 4)
3685 (vec_select:<VS_scalar>
3687 (parallel [(match_dup 2)])))
3688 (clobber (match_dup 3))])]
3690 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3692 [(set_attr "isa" "p9v,*")])
3694 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3695 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3696 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3697 (vec_select:<VS_scalar>
3698 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3699 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3700 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3701 (clobber (match_scratch:SI 4 "=X,&r"))]
3702 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3704 "&& reload_completed"
3705 [(parallel [(set (match_dup 3)
3706 (vec_select:<VS_scalar>
3708 (parallel [(match_dup 2)])))
3709 (clobber (match_dup 4))])
3713 (define_insn_and_split "*vsx_extract_si"
3714 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3716 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3717 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3718 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3719 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3721 "&& reload_completed"
3724 rtx dest = operands[0];
3725 rtx src = operands[1];
3726 rtx element = operands[2];
3727 rtx vec_tmp = operands[3];
3730 if (!BYTES_BIG_ENDIAN)
3731 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3733 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3735 value = INTVAL (element);
3737 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3741 if (MEM_P (operands[0]))
3743 if (can_create_pseudo_p ())
3744 dest = rs6000_force_indexed_or_indirect_mem (dest);
3746 if (TARGET_P8_VECTOR)
3747 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3749 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3752 else if (TARGET_P8_VECTOR)
3753 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3755 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3756 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3760 [(set_attr "type" "mfvsr,vecperm,fpstore")
3761 (set_attr "length" "8")
3762 (set_attr "isa" "*,p8v,*")])
3764 (define_insn_and_split "*vsx_extract_<mode>_p8"
3765 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3766 (vec_select:<VS_scalar>
3767 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3768 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3769 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3770 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3771 && !TARGET_P9_VECTOR"
3773 "&& reload_completed"
3776 rtx dest = operands[0];
3777 rtx src = operands[1];
3778 rtx element = operands[2];
3779 rtx vec_tmp = operands[3];
3782 if (!BYTES_BIG_ENDIAN)
3783 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3785 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3787 value = INTVAL (element);
3788 if (<MODE>mode == V16QImode)
3791 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3795 else if (<MODE>mode == V8HImode)
3798 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3805 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3806 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3809 [(set_attr "type" "mfvsr")])
3811 ;; Optimize extracting a single scalar element from memory.
3812 (define_insn_and_split "*vsx_extract_<mode>_load"
3813 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3814 (vec_select:<VS_scalar>
3815 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3816 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3817 (clobber (match_scratch:DI 3 "=&b"))]
3818 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3820 "&& reload_completed"
3821 [(set (match_dup 0) (match_dup 4))]
3823 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3824 operands[3], <VS_scalar>mode);
3826 [(set_attr "type" "load")
3827 (set_attr "length" "8")])
3829 ;; Variable V16QI/V8HI/V4SI extract from a register
3830 (define_insn_and_split "vsx_extract_<mode>_var"
3831 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3833 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3834 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3835 UNSPEC_VSX_EXTRACT))
3836 (clobber (match_scratch:DI 3 "=r,r"))
3837 (clobber (match_scratch:V2DI 4 "=X,&v"))]
3838 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3840 "&& reload_completed"
3843 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3844 operands[3], operands[4]);
3847 [(set_attr "isa" "p9v,*")])
3849 ;; Variable V16QI/V8HI/V4SI extract from memory
3850 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3851 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3853 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3854 (match_operand:DI 2 "gpc_reg_operand" "r")]
3855 UNSPEC_VSX_EXTRACT))
3856 (clobber (match_scratch:DI 3 "=&b"))]
3857 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3859 "&& reload_completed"
3860 [(set (match_dup 0) (match_dup 4))]
3862 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3863 operands[3], <VS_scalar>mode);
3865 [(set_attr "type" "load")])
3868 (define_expand "vextractl<mode>"
3869 [(set (match_operand:V2DI 0 "altivec_register_operand")
3870 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3871 (match_operand:VI2 2 "altivec_register_operand")
3872 (match_operand:SI 3 "register_operand")]
3876 if (BYTES_BIG_ENDIAN)
3878 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
3879 operands[2], operands[3]));
3880 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3883 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
3884 operands[1], operands[3]));
3888 (define_insn "vextractl<mode>_internal"
3889 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3890 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3891 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3892 (match_operand:SI 3 "register_operand" "r")]
3895 "vext<du_or_d><wd>vlx %0,%1,%2,%3"
3896 [(set_attr "type" "vecsimple")])
3898 (define_expand "vextractr<mode>"
3899 [(set (match_operand:V2DI 0 "altivec_register_operand")
3900 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3901 (match_operand:VI2 2 "altivec_register_operand")
3902 (match_operand:SI 3 "register_operand")]
3906 if (BYTES_BIG_ENDIAN)
3908 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
3909 operands[2], operands[3]));
3910 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3913 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
3914 operands[1], operands[3]));
3918 (define_insn "vextractr<mode>_internal"
3919 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3920 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3921 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3922 (match_operand:SI 3 "register_operand" "r")]
3925 "vext<du_or_d><wd>vrx %0,%1,%2,%3"
3926 [(set_attr "type" "vecsimple")])
3928 (define_expand "vinsertvl_<mode>"
3929 [(set (match_operand:VI2 0 "altivec_register_operand")
3930 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
3931 (match_operand:VI2 2 "altivec_register_operand")
3932 (match_operand:SI 3 "register_operand" "r")]
3936 if (BYTES_BIG_ENDIAN)
3937 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
3938 operands[1], operands[2]));
3940 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
3941 operands[1], operands[2]));
3945 (define_insn "vinsertvl_internal_<mode>"
3946 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
3947 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
3948 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3949 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
3952 "vins<wd>vlx %0,%1,%2"
3953 [(set_attr "type" "vecsimple")])
3955 (define_expand "vinsertvr_<mode>"
3956 [(set (match_operand:VI2 0 "altivec_register_operand")
3957 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
3958 (match_operand:VI2 2 "altivec_register_operand")
3959 (match_operand:SI 3 "register_operand" "r")]
3963 if (BYTES_BIG_ENDIAN)
3964 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
3965 operands[1], operands[2]));
3967 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
3968 operands[1], operands[2]));
3972 (define_insn "vinsertvr_internal_<mode>"
3973 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
3974 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
3975 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3976 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
3979 "vins<wd>vrx %0,%1,%2"
3980 [(set_attr "type" "vecsimple")])
3982 (define_expand "vinsertgl_<mode>"
3983 [(set (match_operand:VI2 0 "altivec_register_operand")
3984 (unspec:VI2 [(match_operand:SI 1 "register_operand")
3985 (match_operand:VI2 2 "altivec_register_operand")
3986 (match_operand:SI 3 "register_operand")]
3990 if (BYTES_BIG_ENDIAN)
3991 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
3992 operands[1], operands[2]));
3994 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
3995 operands[1], operands[2]));
3999 (define_insn "vinsertgl_internal_<mode>"
4000 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4001 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4002 (match_operand:SI 2 "register_operand" "r")
4003 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4006 "vins<wd>lx %0,%1,%2"
4007 [(set_attr "type" "vecsimple")])
4009 (define_expand "vinsertgr_<mode>"
4010 [(set (match_operand:VI2 0 "altivec_register_operand")
4011 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4012 (match_operand:VI2 2 "altivec_register_operand")
4013 (match_operand:SI 3 "register_operand")]
4017 if (BYTES_BIG_ENDIAN)
4018 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4019 operands[1], operands[2]));
4021 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4022 operands[1], operands[2]));
4026 (define_insn "vinsertgr_internal_<mode>"
4027 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4028 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4029 (match_operand:SI 2 "register_operand" "r")
4030 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4033 "vins<wd>rx %0,%1,%2"
4034 [(set_attr "type" "vecsimple")])
4036 (define_expand "vreplace_elt_<mode>"
4037 [(set (match_operand:REPLACE_ELT 0 "register_operand")
4038 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4039 (match_operand:<VS_scalar> 2 "register_operand")
4040 (match_operand:QI 3 "const_0_to_3_operand")]
4041 UNSPEC_REPLACE_ELT))]
4045 /* Immediate value is the word index, convert to byte index and adjust for
4046 Endianness if needed. */
4047 if (BYTES_BIG_ENDIAN)
4048 index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4051 index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4053 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4058 [(set_attr "type" "vecsimple")])
4060 (define_expand "vreplace_un_<mode>"
4061 [(set (match_operand:REPLACE_ELT 0 "register_operand")
4062 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4063 (match_operand:<VS_scalar> 2 "register_operand")
4064 (match_operand:QI 3 "const_0_to_12_operand")]
4065 UNSPEC_REPLACE_UN))]
4068 /* Immediate value is the byte index Big Endian numbering. */
4069 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4070 operands[2], operands[3]));
4073 [(set_attr "type" "vecsimple")])
4075 (define_insn "vreplace_elt_<mode>_inst"
4076 [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
4077 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4078 (match_operand:<VS_scalar> 2 "register_operand" "r")
4079 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4080 UNSPEC_REPLACE_ELT))]
4082 "vins<REPLACE_ELT_char> %0,%2,%3"
4083 [(set_attr "type" "vecsimple")])
4085 ;; VSX_EXTRACT optimizations
4086 ;; Optimize double d = (double) vec_extract (vi, <n>)
4087 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4088 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4089 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4092 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4093 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4094 (clobber (match_scratch:V4SI 3 "=v"))]
4095 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4100 rtx dest = operands[0];
4101 rtx src = operands[1];
4102 rtx element = operands[2];
4103 rtx v4si_tmp = operands[3];
4106 if (!BYTES_BIG_ENDIAN)
4107 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4109 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4111 value = INTVAL (element);
4114 if (GET_CODE (v4si_tmp) == SCRATCH)
4115 v4si_tmp = gen_reg_rtx (V4SImode);
4116 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4121 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4125 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4126 ;; where <type> is a floating point type that supported by the hardware that is
4127 ;; not double. First convert the value to double, and then to the desired
4129 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4130 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4131 (any_float:VSX_EXTRACT_FL
4133 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4134 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4135 (clobber (match_scratch:V4SI 3 "=v"))
4136 (clobber (match_scratch:DF 4 "=wa"))]
4137 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4142 rtx dest = operands[0];
4143 rtx src = operands[1];
4144 rtx element = operands[2];
4145 rtx v4si_tmp = operands[3];
4146 rtx df_tmp = operands[4];
4149 if (!BYTES_BIG_ENDIAN)
4150 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4152 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4154 value = INTVAL (element);
4157 if (GET_CODE (v4si_tmp) == SCRATCH)
4158 v4si_tmp = gen_reg_rtx (V4SImode);
4159 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4164 if (GET_CODE (df_tmp) == SCRATCH)
4165 df_tmp = gen_reg_rtx (DFmode);
4167 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4169 if (<MODE>mode == SFmode)
4170 emit_insn (gen_truncdfsf2 (dest, df_tmp));
4171 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4172 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4173 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4174 && TARGET_FLOAT128_HW)
4175 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4176 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4177 emit_insn (gen_extenddfif2 (dest, df_tmp));
4178 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4179 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4186 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4187 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4188 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4189 ;; vector short or vector unsigned short.
4190 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
4191 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4193 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4194 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4195 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4196 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4197 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4198 && TARGET_P9_VECTOR"
4200 "&& reload_completed"
4201 [(parallel [(set (match_dup 3)
4202 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4204 (parallel [(match_dup 2)])))
4205 (clobber (scratch:SI))])
4207 (sign_extend:DI (match_dup 3)))
4209 (float:<FL_CONV:MODE> (match_dup 4)))]
4211 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4213 [(set_attr "isa" "<FL_CONV:VSisa>")])
4215 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
4216 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4217 (unsigned_float:FL_CONV
4218 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4219 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4220 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4221 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4222 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4223 && TARGET_P9_VECTOR"
4225 "&& reload_completed"
4226 [(parallel [(set (match_dup 3)
4227 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4229 (parallel [(match_dup 2)])))
4230 (clobber (scratch:SI))])
4232 (float:<FL_CONV:MODE> (match_dup 4)))]
4234 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4236 [(set_attr "isa" "<FL_CONV:VSisa>")])
4238 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4239 (define_insn "vsx_set_<mode>_p9"
4240 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4241 (unspec:VSX_EXTRACT_I
4242 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4243 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
4244 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4246 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4248 int ele = INTVAL (operands[3]);
4249 int nunits = GET_MODE_NUNITS (<MODE>mode);
4251 if (!BYTES_BIG_ENDIAN)
4252 ele = nunits - 1 - ele;
4254 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
4255 if (<MODE>mode == V4SImode)
4256 return "xxinsertw %x0,%x2,%3";
4258 return "vinsert<wd> %0,%2,%3";
4260 [(set_attr "type" "vecperm")])
4262 (define_insn_and_split "vsx_set_v4sf_p9"
4263 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4265 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4266 (match_operand:SF 2 "gpc_reg_operand" "wa")
4267 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4269 (clobber (match_scratch:SI 4 "=&wa"))]
4270 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4272 "&& reload_completed"
4274 (unspec:V4SF [(match_dup 2)]
4275 UNSPEC_VSX_CVDPSPN))
4276 (parallel [(set (match_dup 4)
4277 (vec_select:SI (match_dup 6)
4278 (parallel [(match_dup 7)])))
4279 (clobber (scratch:SI))])
4281 (unspec:V4SI [(match_dup 8)
4286 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4288 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4289 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4290 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4291 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4293 [(set_attr "type" "vecperm")
4294 (set_attr "length" "12")
4295 (set_attr "isa" "p9v")])
4297 ;; Special case setting 0.0f to a V4SF element
4298 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4299 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4301 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4302 (match_operand:SF 2 "zero_fp_constant" "j")
4303 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4305 (clobber (match_scratch:SI 4 "=&wa"))]
4306 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4308 "&& reload_completed"
4312 (unspec:V4SI [(match_dup 5)
4317 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4319 [(set_attr "type" "vecperm")
4320 (set_attr "length" "8")
4321 (set_attr "isa" "p9v")])
4323 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4324 ;; that is in the default scalar position (1 for big endian, 2 for little
4325 ;; endian). We just need to do an xxinsertw since the element is in the
4326 ;; correct location.
4328 (define_insn "*vsx_insert_extract_v4sf_p9"
4329 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4331 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4332 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4334 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4335 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4337 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4338 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4340 int ele = INTVAL (operands[4]);
4342 if (!BYTES_BIG_ENDIAN)
4343 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4345 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4346 return "xxinsertw %x0,%x2,%4";
4348 [(set_attr "type" "vecperm")])
4350 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4351 ;; that is in the default scalar position (1 for big endian, 2 for little
4352 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4354 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4355 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4357 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4358 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4360 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4361 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4363 (clobber (match_scratch:SI 5 "=&wa"))]
4364 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4365 && TARGET_P9_VECTOR && TARGET_POWERPC64
4366 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4369 [(parallel [(set (match_dup 5)
4370 (vec_select:SI (match_dup 6)
4371 (parallel [(match_dup 3)])))
4372 (clobber (scratch:SI))])
4374 (unspec:V4SI [(match_dup 8)
4379 if (GET_CODE (operands[5]) == SCRATCH)
4380 operands[5] = gen_reg_rtx (SImode);
4382 operands[6] = gen_lowpart (V4SImode, operands[2]);
4383 operands[7] = gen_lowpart (V4SImode, operands[0]);
4384 operands[8] = gen_lowpart (V4SImode, operands[1]);
4386 [(set_attr "type" "vecperm")
4387 (set_attr "isa" "p9v")])
4389 ;; Expanders for builtins
4390 (define_expand "vsx_mergel_<mode>"
4391 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4392 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4393 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4394 "VECTOR_MEM_VSX_P (<MODE>mode)"
4396 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4397 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4398 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4399 emit_insn (gen_rtx_SET (operands[0], x));
4403 (define_expand "vsx_mergeh_<mode>"
4404 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4405 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4406 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4407 "VECTOR_MEM_VSX_P (<MODE>mode)"
4409 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4410 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4411 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4412 emit_insn (gen_rtx_SET (operands[0], x));
4417 ;; We separate the register splat insn from the memory splat insn to force the
4418 ;; register allocator to generate the indexed form of the SPLAT when it is
4419 ;; given an offsettable memory reference. Otherwise, if the register and
4420 ;; memory insns were combined into a single insn, the register allocator will
4421 ;; load the value into a register, and then do a double word permute.
4422 (define_expand "vsx_splat_<mode>"
4423 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4424 (vec_duplicate:VSX_D
4425 (match_operand:<VS_scalar> 1 "input_operand")))]
4426 "VECTOR_MEM_VSX_P (<MODE>mode)"
4428 rtx op1 = operands[1];
4430 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4431 else if (!REG_P (op1))
4432 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4435 (define_insn "vsx_splat_<mode>_reg"
4436 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4437 (vec_duplicate:VSX_D
4438 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4439 "VECTOR_MEM_VSX_P (<MODE>mode)"
4441 xxpermdi %x0,%x1,%x1,0
4443 [(set_attr "type" "vecperm,vecmove")])
4445 (define_insn "vsx_splat_<mode>_mem"
4446 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4447 (vec_duplicate:VSX_D
4448 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4449 "VECTOR_MEM_VSX_P (<MODE>mode)"
4451 [(set_attr "type" "vecload")])
4453 ;; V4SI splat support
4454 (define_insn "vsx_splat_v4si"
4455 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4457 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4462 [(set_attr "type" "vecperm,vecload")])
4464 ;; SImode is not currently allowed in vector registers. This pattern
4465 ;; allows us to use direct move to get the value in a vector register
4466 ;; so that we can use XXSPLTW
4467 (define_insn "vsx_splat_v4si_di"
4468 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4471 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4472 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4476 [(set_attr "type" "vecperm")
4477 (set_attr "isa" "p8v,*")])
4479 ;; V4SF splat (ISA 3.0)
4480 (define_insn_and_split "vsx_splat_v4sf"
4481 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4483 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4489 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4491 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4493 (unspec:V4SF [(match_dup 0)
4494 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4496 [(set_attr "type" "vecload,vecperm,vecperm")
4497 (set_attr "length" "*,8,*")
4498 (set_attr "isa" "*,p8v,*")])
4500 ;; V4SF/V4SI splat from a vector element
4501 (define_insn "vsx_xxspltw_<mode>"
4502 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4503 (vec_duplicate:VSX_W
4504 (vec_select:<VS_scalar>
4505 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4507 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4508 "VECTOR_MEM_VSX_P (<MODE>mode)"
4510 if (!BYTES_BIG_ENDIAN)
4511 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4513 return "xxspltw %x0,%x1,%2";
4515 [(set_attr "type" "vecperm")])
4517 (define_insn "vsx_xxspltw_<mode>_direct"
4518 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4519 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4520 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4521 UNSPEC_VSX_XXSPLTW))]
4522 "VECTOR_MEM_VSX_P (<MODE>mode)"
4523 "xxspltw %x0,%x1,%2"
4524 [(set_attr "type" "vecperm")])
4526 ;; V16QI/V8HI splat support on ISA 2.07
4527 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4528 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4529 (vec_duplicate:VSX_SPLAT_I
4530 (truncate:<VS_scalar>
4531 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4532 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4533 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4534 [(set_attr "type" "vecperm")])
4536 ;; V2DF/V2DI splat for use by vec_splat builtin
4537 (define_insn "vsx_xxspltd_<mode>"
4538 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4539 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4540 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4541 UNSPEC_VSX_XXSPLTD))]
4542 "VECTOR_MEM_VSX_P (<MODE>mode)"
4544 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4545 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4546 return "xxpermdi %x0,%x1,%x1,0";
4548 return "xxpermdi %x0,%x1,%x1,3";
4550 [(set_attr "type" "vecperm")])
4552 ;; V4SF/V4SI interleave
4553 (define_insn "vsx_xxmrghw_<mode>"
4554 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4556 (vec_concat:<VS_double>
4557 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4558 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4559 (parallel [(const_int 0) (const_int 4)
4560 (const_int 1) (const_int 5)])))]
4561 "VECTOR_MEM_VSX_P (<MODE>mode)"
4563 if (BYTES_BIG_ENDIAN)
4564 return "xxmrghw %x0,%x1,%x2";
4566 return "xxmrglw %x0,%x2,%x1";
4568 [(set_attr "type" "vecperm")])
4570 (define_insn "vsx_xxmrglw_<mode>"
4571 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4573 (vec_concat:<VS_double>
4574 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4575 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4576 (parallel [(const_int 2) (const_int 6)
4577 (const_int 3) (const_int 7)])))]
4578 "VECTOR_MEM_VSX_P (<MODE>mode)"
4580 if (BYTES_BIG_ENDIAN)
4581 return "xxmrglw %x0,%x1,%x2";
4583 return "xxmrghw %x0,%x2,%x1";
4585 [(set_attr "type" "vecperm")])
4587 ;; Shift left double by word immediate
4588 (define_insn "vsx_xxsldwi_<mode>"
4589 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4590 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4591 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4592 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4594 "VECTOR_MEM_VSX_P (<MODE>mode)"
4595 "xxsldwi %x0,%x1,%x2,%3"
4596 [(set_attr "type" "vecperm")
4597 (set_attr "isa" "<VSisa>")])
4600 ;; Vector reduction insns and splitters
4602 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4603 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4607 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4608 (parallel [(const_int 1)]))
4611 (parallel [(const_int 0)])))
4613 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4614 "VECTOR_UNIT_VSX_P (V2DFmode)"
4619 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4620 ? gen_reg_rtx (V2DFmode)
4622 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4623 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4626 [(set_attr "length" "8")
4627 (set_attr "type" "veccomplex")])
4629 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4630 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4632 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4633 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4634 (clobber (match_scratch:V4SF 2 "=&wa"))
4635 (clobber (match_scratch:V4SF 3 "=&wa"))]
4636 "VECTOR_UNIT_VSX_P (V4SFmode)"
4641 rtx op0 = operands[0];
4642 rtx op1 = operands[1];
4643 rtx tmp2, tmp3, tmp4;
4645 if (can_create_pseudo_p ())
4647 tmp2 = gen_reg_rtx (V4SFmode);
4648 tmp3 = gen_reg_rtx (V4SFmode);
4649 tmp4 = gen_reg_rtx (V4SFmode);
4658 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4659 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4660 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4661 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4664 [(set_attr "length" "16")
4665 (set_attr "type" "veccomplex")])
4667 ;; Combiner patterns with the vector reduction patterns that knows we can get
4668 ;; to the top element of the V2DF array without doing an extract.
4670 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4671 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4676 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4677 (parallel [(const_int 1)]))
4680 (parallel [(const_int 0)])))
4682 (parallel [(const_int 1)])))
4683 (clobber (match_scratch:DF 2 "=0,&wa"))]
4684 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4689 rtx hi = gen_highpart (DFmode, operands[1]);
4690 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4691 ? gen_reg_rtx (DFmode)
4694 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4695 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4698 [(set_attr "length" "8")
4699 (set_attr "type" "veccomplex")])
4701 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4702 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4705 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4706 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4707 (parallel [(const_int 3)])))
4708 (clobber (match_scratch:V4SF 2 "=&wa"))
4709 (clobber (match_scratch:V4SF 3 "=&wa"))
4710 (clobber (match_scratch:V4SF 4 "=0"))]
4711 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4716 rtx op0 = operands[0];
4717 rtx op1 = operands[1];
4718 rtx tmp2, tmp3, tmp4, tmp5;
4720 if (can_create_pseudo_p ())
4722 tmp2 = gen_reg_rtx (V4SFmode);
4723 tmp3 = gen_reg_rtx (V4SFmode);
4724 tmp4 = gen_reg_rtx (V4SFmode);
4725 tmp5 = gen_reg_rtx (V4SFmode);
4735 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4736 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4737 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4738 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4739 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4742 [(set_attr "length" "20")
4743 (set_attr "type" "veccomplex")])
4746 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4748 [(set (match_operand:P 0 "base_reg_operand")
4749 (match_operand:P 1 "short_cint_operand"))
4750 (set (match_operand:VSX_M 2 "vsx_register_operand")
4751 (mem:VSX_M (plus:P (match_dup 0)
4752 (match_operand:P 3 "int_reg_operand"))))]
4753 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4754 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4755 [(set_attr "length" "8")
4756 (set_attr "type" "vecload")])
4759 [(set (match_operand:P 0 "base_reg_operand")
4760 (match_operand:P 1 "short_cint_operand"))
4761 (set (match_operand:VSX_M 2 "vsx_register_operand")
4762 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4764 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4765 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4766 [(set_attr "length" "8")
4767 (set_attr "type" "vecload")])
4770 ;; ISA 3.0 vector extend sign support
4772 (define_insn "vsx_sign_extend_qi_<mode>"
4773 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4775 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4776 UNSPEC_VSX_SIGN_EXTEND))]
4779 [(set_attr "type" "vecexts")])
4781 (define_insn "vsx_sign_extend_hi_<mode>"
4782 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4784 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4785 UNSPEC_VSX_SIGN_EXTEND))]
4788 [(set_attr "type" "vecexts")])
4790 (define_insn "*vsx_sign_extend_si_v2di"
4791 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4792 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4793 UNSPEC_VSX_SIGN_EXTEND))]
4796 [(set_attr "type" "vecexts")])
4799 ;; ISA 3.0 Binary Floating-Point Support
4801 ;; VSX Scalar Extract Exponent Quad-Precision
4802 (define_insn "xsxexpqp_<mode>"
4803 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4804 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4805 UNSPEC_VSX_SXEXPDP))]
4808 [(set_attr "type" "vecmove")])
4810 ;; VSX Scalar Extract Exponent Double-Precision
4811 (define_insn "xsxexpdp"
4812 [(set (match_operand:DI 0 "register_operand" "=r")
4813 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4814 UNSPEC_VSX_SXEXPDP))]
4815 "TARGET_P9_VECTOR && TARGET_64BIT"
4817 [(set_attr "type" "integer")])
4819 ;; VSX Scalar Extract Significand Quad-Precision
4820 (define_insn "xsxsigqp_<mode>"
4821 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4822 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4826 [(set_attr "type" "vecmove")])
4828 ;; VSX Scalar Extract Significand Double-Precision
4829 (define_insn "xsxsigdp"
4830 [(set (match_operand:DI 0 "register_operand" "=r")
4831 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4833 "TARGET_P9_VECTOR && TARGET_64BIT"
4835 [(set_attr "type" "integer")])
4837 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4838 (define_insn "xsiexpqpf_<mode>"
4839 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4841 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4842 (match_operand:DI 2 "altivec_register_operand" "v")]
4843 UNSPEC_VSX_SIEXPQP))]
4846 [(set_attr "type" "vecmove")])
4848 ;; VSX Scalar Insert Exponent Quad-Precision
4849 (define_insn "xsiexpqp_<mode>"
4850 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4851 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4852 (match_operand:DI 2 "altivec_register_operand" "v")]
4853 UNSPEC_VSX_SIEXPQP))]
4856 [(set_attr "type" "vecmove")])
4858 ;; VSX Scalar Insert Exponent Double-Precision
4859 (define_insn "xsiexpdp"
4860 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4861 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4862 (match_operand:DI 2 "register_operand" "r")]
4863 UNSPEC_VSX_SIEXPDP))]
4864 "TARGET_P9_VECTOR && TARGET_64BIT"
4865 "xsiexpdp %x0,%1,%2"
4866 [(set_attr "type" "fpsimple")])
4868 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4869 (define_insn "xsiexpdpf"
4870 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4871 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4872 (match_operand:DI 2 "register_operand" "r")]
4873 UNSPEC_VSX_SIEXPDP))]
4874 "TARGET_P9_VECTOR && TARGET_64BIT"
4875 "xsiexpdp %x0,%1,%2"
4876 [(set_attr "type" "fpsimple")])
4878 ;; VSX Scalar Compare Exponents Double-Precision
4879 (define_expand "xscmpexpdp_<code>"
4883 [(match_operand:DF 1 "vsx_register_operand" "wa")
4884 (match_operand:DF 2 "vsx_register_operand" "wa")]
4885 UNSPEC_VSX_SCMPEXPDP)
4887 (set (match_operand:SI 0 "register_operand" "=r")
4888 (CMP_TEST:SI (match_dup 3)
4892 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4894 emit_move_insn (operands[0], const0_rtx);
4898 operands[3] = gen_reg_rtx (CCFPmode);
4901 (define_insn "*xscmpexpdp"
4902 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4904 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4905 (match_operand:DF 2 "vsx_register_operand" "wa")]
4906 UNSPEC_VSX_SCMPEXPDP)
4907 (match_operand:SI 3 "zero_constant" "j")))]
4909 "xscmpexpdp %0,%x1,%x2"
4910 [(set_attr "type" "fpcompare")])
4912 ;; VSX Scalar Compare Exponents Quad-Precision
4913 (define_expand "xscmpexpqp_<code>_<mode>"
4917 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4918 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4919 UNSPEC_VSX_SCMPEXPQP)
4921 (set (match_operand:SI 0 "register_operand" "=r")
4922 (CMP_TEST:SI (match_dup 3)
4926 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4928 emit_move_insn (operands[0], const0_rtx);
4932 operands[3] = gen_reg_rtx (CCFPmode);
4935 (define_insn "*xscmpexpqp"
4936 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4938 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4939 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4940 UNSPEC_VSX_SCMPEXPQP)
4941 (match_operand:SI 3 "zero_constant" "j")))]
4943 "xscmpexpqp %0,%1,%2"
4944 [(set_attr "type" "fpcompare")])
4946 ;; VSX Scalar Test Data Class Quad-Precision
4947 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4948 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4949 ;; setting the eq bit if any of the conditions tested by operand 2
4950 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4951 (define_expand "xststdcqp_<mode>"
4955 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4956 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4959 (set (match_operand:SI 0 "register_operand" "=r")
4960 (eq:SI (match_dup 3)
4964 operands[3] = gen_reg_rtx (CCFPmode);
4967 ;; VSX Scalar Test Data Class Double- and Single-Precision
4968 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4969 ;; if any of the conditions tested by operand 2 are satisfied.
4970 ;; The gt and unordered bits are cleared to zero.)
4971 (define_expand "xststdc<sd>p"
4975 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4976 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4979 (set (match_operand:SI 0 "register_operand" "=r")
4980 (eq:SI (match_dup 3)
4984 operands[3] = gen_reg_rtx (CCFPmode);
4985 operands[4] = CONST0_RTX (SImode);
4988 ;; The VSX Scalar Test Negative Quad-Precision
4989 (define_expand "xststdcnegqp_<mode>"
4993 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4997 (set (match_operand:SI 0 "register_operand" "=r")
4998 (lt:SI (match_dup 2)
5002 operands[2] = gen_reg_rtx (CCFPmode);
5005 ;; The VSX Scalar Test Negative Double- and Single-Precision
5006 (define_expand "xststdcneg<sd>p"
5010 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5014 (set (match_operand:SI 0 "register_operand" "=r")
5015 (lt:SI (match_dup 2)
5019 operands[2] = gen_reg_rtx (CCFPmode);
5020 operands[3] = CONST0_RTX (SImode);
5023 (define_insn "*xststdcqp_<mode>"
5024 [(set (match_operand:CCFP 0 "" "=y")
5027 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5028 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5032 "xststdcqp %0,%1,%2"
5033 [(set_attr "type" "fpcompare")])
5035 (define_insn "*xststdc<sd>p"
5036 [(set (match_operand:CCFP 0 "" "=y")
5038 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5039 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5041 (match_operand:SI 3 "zero_constant" "j")))]
5043 "xststdc<sd>p %0,%x1,%2"
5044 [(set_attr "type" "fpcompare")])
5046 ;; VSX Vector Extract Exponent Double and Single Precision
5047 (define_insn "xvxexp<sd>p"
5048 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5050 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5053 "xvxexp<sd>p %x0,%x1"
5054 [(set_attr "type" "vecsimple")])
5056 ;; VSX Vector Extract Significand Double and Single Precision
5057 (define_insn "xvxsig<sd>p"
5058 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5060 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5063 "xvxsig<sd>p %x0,%x1"
5064 [(set_attr "type" "vecsimple")])
5066 ;; VSX Vector Insert Exponent Double and Single Precision
5067 (define_insn "xviexp<sd>p"
5068 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5070 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5071 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5074 "xviexp<sd>p %x0,%x1,%x2"
5075 [(set_attr "type" "vecsimple")])
5077 ;; VSX Vector Test Data Class Double and Single Precision
5078 ;; The corresponding elements of the result vector are all ones
5079 ;; if any of the conditions tested by operand 3 are satisfied.
5080 (define_insn "xvtstdc<sd>p"
5081 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5083 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5084 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5085 UNSPEC_VSX_VTSTDC))]
5087 "xvtstdc<sd>p %x0,%x1,%2"
5088 [(set_attr "type" "vecsimple")])
5090 ;; ISA 3.0 String Operations Support
5092 ;; Compare vectors producing a vector result and a predicate, setting CR6
5093 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
5094 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
5095 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5096 ;; to use Power8 instructions.
5097 (define_insn "*vsx_ne_<mode>_p"
5098 [(set (reg:CC CR6_REGNO)
5100 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5101 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5103 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5104 (ne:VSX_EXTRACT_I (match_dup 1)
5107 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5108 [(set_attr "type" "vecsimple")])
5110 (define_insn "*vector_nez_<mode>_p"
5111 [(set (reg:CC CR6_REGNO)
5112 (unspec:CC [(unspec:VI
5113 [(match_operand:VI 1 "gpc_reg_operand" "v")
5114 (match_operand:VI 2 "gpc_reg_operand" "v")]
5117 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5118 (unspec:VI [(match_dup 1)
5122 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5123 [(set_attr "type" "vecsimple")])
5125 ;; Return first position of match between vectors using natural order
5126 ;; for both LE and BE execution modes.
5127 (define_expand "first_match_index_<mode>"
5128 [(match_operand:SI 0 "register_operand")
5129 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5130 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5131 UNSPEC_VSX_FIRST_MATCH_INDEX)]
5136 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5137 rtx not_result = gen_reg_rtx (<MODE>mode);
5139 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5141 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5143 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5145 if (<MODE>mode == V16QImode)
5147 if (!BYTES_BIG_ENDIAN)
5148 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5150 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5154 rtx tmp = gen_reg_rtx (SImode);
5155 if (!BYTES_BIG_ENDIAN)
5156 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5158 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5159 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5164 ;; Return first position of match between vectors or end of string (EOS) using
5165 ;; natural element order for both LE and BE execution modes.
5166 (define_expand "first_match_or_eos_index_<mode>"
5167 [(match_operand:SI 0 "register_operand")
5168 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5169 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5170 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5174 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5175 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5176 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5177 rtx and_result = gen_reg_rtx (<MODE>mode);
5178 rtx result = gen_reg_rtx (<MODE>mode);
5179 rtx vzero = gen_reg_rtx (<MODE>mode);
5181 /* Vector with zeros in elements that correspond to zeros in operands. */
5182 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5183 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5184 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5185 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5187 /* Vector with ones in elments that do not match. */
5188 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5191 /* Create vector with ones in elements where there was a zero in one of
5192 the source elements or the elements that match. */
5193 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5194 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5196 if (<MODE>mode == V16QImode)
5198 if (!BYTES_BIG_ENDIAN)
5199 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5201 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5205 rtx tmp = gen_reg_rtx (SImode);
5206 if (!BYTES_BIG_ENDIAN)
5207 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5209 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5210 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5215 ;; Return first position of mismatch between vectors using natural
5216 ;; element order for both LE and BE execution modes.
5217 (define_expand "first_mismatch_index_<mode>"
5218 [(match_operand:SI 0 "register_operand")
5219 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5220 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5221 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5225 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5227 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5229 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5231 if (<MODE>mode == V16QImode)
5233 if (!BYTES_BIG_ENDIAN)
5234 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5236 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5240 rtx tmp = gen_reg_rtx (SImode);
5241 if (!BYTES_BIG_ENDIAN)
5242 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5244 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5245 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5250 ;; Return first position of mismatch between vectors or end of string (EOS)
5251 ;; using natural element order for both LE and BE execution modes.
5252 (define_expand "first_mismatch_or_eos_index_<mode>"
5253 [(match_operand:SI 0 "register_operand")
5254 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5255 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5256 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5260 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5261 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5262 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5263 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5264 rtx and_result = gen_reg_rtx (<MODE>mode);
5265 rtx result = gen_reg_rtx (<MODE>mode);
5266 rtx vzero = gen_reg_rtx (<MODE>mode);
5268 /* Vector with zeros in elements that correspond to zeros in operands. */
5269 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5271 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5272 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5273 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5275 /* Vector with ones in elments that match. */
5276 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5278 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5280 /* Create vector with ones in elements where there was a zero in one of
5281 the source elements or the elements did not match. */
5282 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5283 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5285 if (<MODE>mode == V16QImode)
5287 if (!BYTES_BIG_ENDIAN)
5288 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5290 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5294 rtx tmp = gen_reg_rtx (SImode);
5295 if (!BYTES_BIG_ENDIAN)
5296 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5298 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5299 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5304 ;; Load VSX Vector with Length
5305 (define_expand "lxvl"
5307 (ashift:DI (match_operand:DI 2 "register_operand")
5309 (set (match_operand:V16QI 0 "vsx_register_operand")
5311 [(match_operand:DI 1 "gpc_reg_operand")
5312 (mem:V16QI (match_dup 1))
5315 "TARGET_P9_VECTOR && TARGET_64BIT"
5317 operands[3] = gen_reg_rtx (DImode);
5320 (define_insn "*lxvl"
5321 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5323 [(match_operand:DI 1 "gpc_reg_operand" "b")
5324 (mem:V16QI (match_dup 1))
5325 (match_operand:DI 2 "register_operand" "r")]
5327 "TARGET_P9_VECTOR && TARGET_64BIT"
5329 [(set_attr "type" "vecload")])
5331 (define_insn "lxvll"
5332 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5333 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5334 (mem:V16QI (match_dup 1))
5335 (match_operand:DI 2 "register_operand" "r")]
5339 [(set_attr "type" "vecload")])
5341 ;; Expand for builtin xl_len_r
5342 (define_expand "xl_len_r"
5343 [(match_operand:V16QI 0 "vsx_register_operand")
5344 (match_operand:DI 1 "register_operand")
5345 (match_operand:DI 2 "register_operand")]
5348 rtx shift_mask = gen_reg_rtx (V16QImode);
5349 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5350 rtx tmp = gen_reg_rtx (DImode);
5352 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5353 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5354 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5355 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5360 (define_insn "stxvll"
5361 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5362 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5363 (mem:V16QI (match_dup 1))
5364 (match_operand:DI 2 "register_operand" "r")]
5368 [(set_attr "type" "vecstore")])
5370 ;; Store VSX Vector with Length
5371 (define_expand "stxvl"
5373 (ashift:DI (match_operand:DI 2 "register_operand")
5375 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5377 [(match_operand:V16QI 0 "vsx_register_operand")
5378 (mem:V16QI (match_dup 1))
5381 "TARGET_P9_VECTOR && TARGET_64BIT"
5383 operands[3] = gen_reg_rtx (DImode);
5386 ;; Define optab for vector access with length vectorization exploitation.
5387 (define_expand "len_load_v16qi"
5388 [(match_operand:V16QI 0 "vlogical_operand")
5389 (match_operand:V16QI 1 "memory_operand")
5390 (match_operand:QI 2 "gpc_reg_operand")]
5391 "TARGET_P9_VECTOR && TARGET_64BIT"
5393 rtx mem = XEXP (operands[1], 0);
5394 mem = force_reg (DImode, mem);
5395 rtx len = gen_lowpart (DImode, operands[2]);
5396 emit_insn (gen_lxvl (operands[0], mem, len));
5400 (define_expand "len_store_v16qi"
5401 [(match_operand:V16QI 0 "memory_operand")
5402 (match_operand:V16QI 1 "vlogical_operand")
5403 (match_operand:QI 2 "gpc_reg_operand")
5405 "TARGET_P9_VECTOR && TARGET_64BIT"
5407 rtx mem = XEXP (operands[0], 0);
5408 mem = force_reg (DImode, mem);
5409 rtx len = gen_lowpart (DImode, operands[2]);
5410 emit_insn (gen_stxvl (operands[1], mem, len));
5414 (define_insn "*stxvl"
5415 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5417 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5418 (mem:V16QI (match_dup 1))
5419 (match_operand:DI 2 "register_operand" "r")]
5421 "TARGET_P9_VECTOR && TARGET_64BIT"
5423 [(set_attr "type" "vecstore")])
5425 ;; Expand for builtin xst_len_r
5426 (define_expand "xst_len_r"
5427 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5428 (match_operand:DI 1 "register_operand" "b")
5429 (match_operand:DI 2 "register_operand" "r")]
5432 rtx shift_mask = gen_reg_rtx (V16QImode);
5433 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5434 rtx tmp = gen_reg_rtx (DImode);
5436 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5437 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5439 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5440 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5444 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5445 (define_insn "vcmpneb"
5446 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5448 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5449 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5452 [(set_attr "type" "vecsimple")])
5454 ;; Vector Compare Not Equal or Zero Byte
5455 (define_insn "vcmpnezb"
5456 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5458 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5459 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5463 [(set_attr "type" "vecsimple")])
5465 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5466 (define_insn "vcmpnezb_p"
5467 [(set (reg:CC CR6_REGNO)
5469 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5470 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5472 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5478 "vcmpnezb. %0,%1,%2"
5479 [(set_attr "type" "vecsimple")])
5481 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5482 (define_insn "vcmpneh"
5483 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5485 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5486 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5489 [(set_attr "type" "vecsimple")])
5491 ;; Vector Compare Not Equal or Zero Half Word
5492 (define_insn "vcmpnezh"
5493 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5494 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5495 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5499 [(set_attr "type" "vecsimple")])
5501 ;; Vector Compare Not Equal Word (specified/not+eq:)
5502 (define_insn "vcmpnew"
5503 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5505 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5506 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5509 [(set_attr "type" "vecsimple")])
5511 ;; Vector Compare Not Equal or Zero Word
5512 (define_insn "vcmpnezw"
5513 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5514 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5515 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5519 [(set_attr "type" "vecsimple")])
5521 ;; Vector Count Leading Zero Least-Significant Bits Byte
5522 (define_insn "vclzlsbb_<mode>"
5523 [(set (match_operand:SI 0 "register_operand" "=r")
5525 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5529 [(set_attr "type" "vecsimple")])
5531 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5532 (define_insn "vctzlsbb_<mode>"
5533 [(set (match_operand:SI 0 "register_operand" "=r")
5535 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5539 [(set_attr "type" "vecsimple")])
5541 ;; Vector Extract Unsigned Byte Left-Indexed
5542 (define_insn "vextublx"
5543 [(set (match_operand:SI 0 "register_operand" "=r")
5545 [(match_operand:SI 1 "register_operand" "r")
5546 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5550 [(set_attr "type" "vecsimple")])
5552 ;; Vector Extract Unsigned Byte Right-Indexed
5553 (define_insn "vextubrx"
5554 [(set (match_operand:SI 0 "register_operand" "=r")
5556 [(match_operand:SI 1 "register_operand" "r")
5557 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5561 [(set_attr "type" "vecsimple")])
5563 ;; Vector Extract Unsigned Half Word Left-Indexed
5564 (define_insn "vextuhlx"
5565 [(set (match_operand:SI 0 "register_operand" "=r")
5567 [(match_operand:SI 1 "register_operand" "r")
5568 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5572 [(set_attr "type" "vecsimple")])
5574 ;; Vector Extract Unsigned Half Word Right-Indexed
5575 (define_insn "vextuhrx"
5576 [(set (match_operand:SI 0 "register_operand" "=r")
5578 [(match_operand:SI 1 "register_operand" "r")
5579 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5583 [(set_attr "type" "vecsimple")])
5585 ;; Vector Extract Unsigned Word Left-Indexed
5586 (define_insn "vextuwlx"
5587 [(set (match_operand:SI 0 "register_operand" "=r")
5589 [(match_operand:SI 1 "register_operand" "r")
5590 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5594 [(set_attr "type" "vecsimple")])
5596 ;; Vector Extract Unsigned Word Right-Indexed
5597 (define_insn "vextuwrx"
5598 [(set (match_operand:SI 0 "register_operand" "=r")
5600 [(match_operand:SI 1 "register_operand" "r")
5601 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5605 [(set_attr "type" "vecsimple")])
5607 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5608 ;; endian version needs to adjust the byte number, and the V4SI element in
5610 (define_insn "extract4b"
5611 [(set (match_operand:V2DI 0 "vsx_register_operand")
5612 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5613 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5614 UNSPEC_XXEXTRACTUW))]
5617 if (!BYTES_BIG_ENDIAN)
5618 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5620 return "xxextractuw %x0,%x1,%2";
5623 (define_expand "insert4b"
5624 [(set (match_operand:V16QI 0 "vsx_register_operand")
5625 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5626 (match_operand:V16QI 2 "vsx_register_operand")
5627 (match_operand:QI 3 "const_0_to_12_operand")]
5631 if (!BYTES_BIG_ENDIAN)
5633 rtx op1 = operands[1];
5634 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5635 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5636 operands[1] = v4si_tmp;
5637 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5641 (define_insn "*insert4b_internal"
5642 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5643 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5644 (match_operand:V16QI 2 "vsx_register_operand" "0")
5645 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5648 "xxinsertw %x0,%x1,%3"
5649 [(set_attr "type" "vecperm")])
5652 ;; Generate vector extract four float 32 values from left four elements
5653 ;; of eight element vector of float 16 values.
5654 (define_expand "vextract_fp_from_shorth"
5655 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5656 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5657 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5661 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5662 int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
5665 rtx mask = gen_reg_rtx (V16QImode);
5666 rtx tmp = gen_reg_rtx (V16QImode);
5669 for (i = 0; i < 16; i++)
5670 if (!BYTES_BIG_ENDIAN)
5671 rvals[i] = GEN_INT (vals_le[i]);
5673 rvals[i] = GEN_INT (vals_be[i]);
5675 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5676 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5677 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5678 conversion instruction. */
5679 v = gen_rtvec_v (16, rvals);
5680 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5681 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5682 operands[1], mask));
5683 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5687 ;; Generate vector extract four float 32 values from right four elements
5688 ;; of eight element vector of float 16 values.
5689 (define_expand "vextract_fp_from_shortl"
5690 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5691 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5692 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5695 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5696 int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
5700 rtx mask = gen_reg_rtx (V16QImode);
5701 rtx tmp = gen_reg_rtx (V16QImode);
5704 for (i = 0; i < 16; i++)
5705 if (!BYTES_BIG_ENDIAN)
5706 rvals[i] = GEN_INT (vals_le[i]);
5708 rvals[i] = GEN_INT (vals_be[i]);
5710 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5711 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5712 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5713 conversion instruction. */
5714 v = gen_rtvec_v (16, rvals);
5715 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5716 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5717 operands[1], mask));
5718 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5722 ;; Support for ISA 3.0 vector byte reverse
5724 ;; Swap all bytes with in a vector
5725 (define_insn "p9_xxbrq_v1ti"
5726 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5727 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5730 [(set_attr "type" "vecperm")])
5732 (define_expand "p9_xxbrq_v16qi"
5733 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5734 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5737 rtx op0 = gen_reg_rtx (V1TImode);
5738 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5739 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5740 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5744 ;; Swap all bytes in each 64-bit element
5745 (define_insn "p9_xxbrd_v2di"
5746 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5747 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5750 [(set_attr "type" "vecperm")])
5752 (define_expand "p9_xxbrd_v2df"
5753 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5754 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5757 rtx op0 = gen_reg_rtx (V2DImode);
5758 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5759 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5760 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5764 ;; Swap all bytes in each 32-bit element
5765 (define_insn "p9_xxbrw_v4si"
5766 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5767 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5770 [(set_attr "type" "vecperm")])
5772 (define_expand "p9_xxbrw_v4sf"
5773 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5774 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5777 rtx op0 = gen_reg_rtx (V4SImode);
5778 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5779 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5780 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5784 ;; Swap all bytes in each element of vector
5785 (define_expand "revb_<mode>"
5786 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5787 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5790 if (TARGET_P9_VECTOR)
5791 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5794 /* Want to have the elements in reverse order relative
5795 to the endian mode in use, i.e. in LE mode, put elements
5797 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5798 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5805 ;; Reversing bytes in vector char is just a NOP.
5806 (define_expand "revb_v16qi"
5807 [(set (match_operand:V16QI 0 "vsx_register_operand")
5808 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5811 emit_move_insn (operands[0], operands[1]);
5815 ;; Swap all bytes in each 16-bit element
5816 (define_insn "p9_xxbrh_v8hi"
5817 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5818 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5821 [(set_attr "type" "vecperm")])
5824 ;; Operand numbers for the following peephole2
5826 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5827 (SFBOOL_TMP_VSX 1) ;; vector temporary
5828 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5829 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5830 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5831 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5832 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5833 (SFBOOL_SHL_D 7) ;; shift left dest
5834 (SFBOOL_SHL_A 8) ;; shift left arg
5835 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5836 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5837 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5838 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5839 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5841 ;; Attempt to optimize some common GLIBC operations using logical operations to
5842 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5843 ;; after macro expansion that looks like:
5848 ;; } ieee_float_shape_type;
5854 ;; ieee_float_shape_type gf_u;
5855 ;; gf_u.value = (t1);
5856 ;; (is) = gf_u.word;
5860 ;; ieee_float_shape_type sf_u;
5861 ;; sf_u.word = (is & 0xfffff000);
5862 ;; (t1) = sf_u.value;
5866 ;; This would result in two direct move operations (convert to memory format,
5867 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5868 ;; scalar format). With this peephole, we eliminate the direct move to the
5869 ;; GPR, and instead move the integer mask value to the vector register after a
5870 ;; shift and do the VSX logical operation.
5872 ;; The insns for dealing with SFmode in GPR registers looks like:
5873 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5875 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5877 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5879 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5881 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5883 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5886 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5887 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5889 ;; MFVSRWZ (aka zero_extend)
5890 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5892 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5894 ;; AND/IOR/XOR operation on int
5895 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5896 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5897 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5900 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5901 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5905 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5906 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5908 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5909 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5910 to compare registers, when the mode is different. */
5911 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5912 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5913 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5914 && (REG_P (operands[SFBOOL_BOOL_A2])
5915 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5916 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5917 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5918 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5919 || (REG_P (operands[SFBOOL_BOOL_A2])
5920 && REGNO (operands[SFBOOL_MFVSR_D])
5921 == REGNO (operands[SFBOOL_BOOL_A2])))
5922 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5923 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5924 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5925 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5926 [(set (match_dup SFBOOL_TMP_GPR)
5927 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5930 (set (match_dup SFBOOL_TMP_VSX_DI)
5931 (match_dup SFBOOL_TMP_GPR))
5933 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5934 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5935 (match_dup SFBOOL_TMP_VSX)))]
5937 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5938 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5939 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5940 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5941 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5942 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5944 if (CONST_INT_P (bool_a2))
5946 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5947 emit_move_insn (tmp_gpr, bool_a2);
5948 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5952 int regno_bool_a1 = REGNO (bool_a1);
5953 int regno_bool_a2 = REGNO (bool_a2);
5954 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5955 ? regno_bool_a2 : regno_bool_a1);
5956 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5959 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5960 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5961 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5964 ;; Support signed/unsigned long long to float conversion vectorization.
5965 ;; Note that any_float (pc) here is just for code attribute <su>.
5966 (define_expand "vec_pack<su>_float_v2di"
5967 [(match_operand:V4SF 0 "vfloat_operand")
5968 (match_operand:V2DI 1 "vint_operand")
5969 (match_operand:V2DI 2 "vint_operand")
5973 rtx r1 = gen_reg_rtx (V4SFmode);
5974 rtx r2 = gen_reg_rtx (V4SFmode);
5975 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5976 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5977 rs6000_expand_extract_even (operands[0], r1, r2);
5981 ;; Support float to signed/unsigned long long conversion vectorization.
5982 ;; Note that any_fix (pc) here is just for code attribute <su>.
5983 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5984 [(match_operand:V2DI 0 "vint_operand")
5985 (match_operand:V4SF 1 "vfloat_operand")
5989 rtx reg = gen_reg_rtx (V4SFmode);
5990 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5991 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5995 ;; Note that any_fix (pc) here is just for code attribute <su>.
5996 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5997 [(match_operand:V2DI 0 "vint_operand")
5998 (match_operand:V4SF 1 "vfloat_operand")
6002 rtx reg = gen_reg_rtx (V4SFmode);
6003 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6004 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6008 (define_insn "vsx_<xvcvbf16>"
6009 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6010 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6013 "<xvcvbf16> %x0,%x1"
6014 [(set_attr "type" "vecfloat")])
6016 (define_insn "vec_mtvsrbmi"
6017 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6018 (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6024 (define_insn "vec_mtvsr_<mode>"
6025 [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6026 (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6030 [(set_attr "type" "vecsimple")])
6032 (define_insn "vec_cntmb_<mode>"
6033 [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6034 (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6035 (match_operand:QI 2 "const_0_to_1_operand" "n")]
6038 "vcntmb<VSX_MM_SUFFIX> %0,%1,%2"
6039 [(set_attr "type" "vecsimple")])
6041 (define_insn "vec_extract_<mode>"
6042 [(set (match_operand:SI 0 "register_operand" "=r")
6043 (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6046 "vextract<VSX_MM_SUFFIX>m %0,%1"
6047 [(set_attr "type" "vecsimple")])
6049 (define_insn "vec_expand_<mode>"
6050 [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6051 (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6054 "vexpand<VSX_MM_SUFFIX>m %0,%1"
6055 [(set_attr "type" "vecsimple")])