2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map into the appropriate suffix based on the type
97 (define_mode_attr VSs [(V16QI "sp")
110 ;; Map the register class used
111 (define_mode_attr VSr [(V16QI "v")
125 ;; Map the register class used for float<->int conversions (floating point side)
126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
128 (define_mode_attr VSr2 [(V2DF "wd")
136 (define_mode_attr VSr3 [(V2DF "wa")
144 ;; Map the register class for sp<->dp float conversions, destination
145 (define_mode_attr VSr4 [(SF "ws")
150 ;; Map the register class for sp<->dp float conversions, source
151 (define_mode_attr VSr5 [(SF "ws")
156 ;; The VSX register class that a type can occupy, even if it is not the
157 ;; preferred register class (VSr is the preferred register class that will get
159 (define_mode_attr VSa [(V16QI "wa")
173 ;; Same size integer type for floating point data
174 (define_mode_attr VSi [(V4SF "v4si")
178 (define_mode_attr VSI [(V4SF "V4SI")
182 ;; Word size for same size conversion
183 (define_mode_attr VSc [(V4SF "w")
187 ;; Map into either s or v, depending on whether this is a scalar or vector
189 (define_mode_attr VSv [(V16QI "v")
199 ;; Appropriate type for add ops (and other simple FP ops)
200 (define_mode_attr VStype_simple [(V2DF "vecdouble")
204 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
208 ;; Appropriate type for multiply ops
209 (define_mode_attr VStype_mul [(V2DF "vecdouble")
213 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
217 ;; Appropriate type for divide ops.
218 (define_mode_attr VStype_div [(V2DF "vecdiv")
222 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
226 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
228 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
232 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
236 ;; Iterator and modes for sp<->dp conversions
237 ;; Because scalar SF values are represented internally as double, use the
238 ;; V4SF type to represent this than SF.
239 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
241 (define_mode_attr VS_spdp_res [(DF "V4SF")
245 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
249 (define_mode_attr VS_spdp_type [(DF "fp")
253 ;; Map the scalar mode for a vector type
254 (define_mode_attr VS_scalar [(V1TI "TI")
262 ;; Map to a double-sized vector mode
263 (define_mode_attr VS_double [(V4SI "V8SI")
269 ;; Map register class for 64-bit element in 128-bit vector for direct moves
271 (define_mode_attr VS_64dm [(V2DF "wk")
274 ;; Map register class for 64-bit element in 128-bit vector for normal register
276 (define_mode_attr VS_64reg [(V2DF "ws")
279 ;; Iterators for loading constants with xxspltib
280 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
281 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
283 ;; Vector reverse byte modes
284 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
286 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
287 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
288 ;; done on ISA 2.07 and not just ISA 3.0.
289 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
290 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
292 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
296 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
297 ;; insert to validate the operand number.
298 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
299 (V8HI "const_0_to_7_operand")
300 (V4SI "const_0_to_3_operand")])
302 ;; Mode attribute to give the constraint for vector extract and insert
304 (define_mode_attr VSX_EX [(V16QI "v")
308 ;; Mode iterator for binary floating types other than double to
309 ;; optimize convert to that floating point type from an extract
310 ;; of an integer type
311 (define_mode_iterator VSX_EXTRACT_FL [SF
312 (IF "FLOAT128_2REG_P (IFmode)")
313 (KF "TARGET_FLOAT128_HW")
314 (TF "FLOAT128_2REG_P (TFmode)
315 || (FLOAT128_IEEE_P (TFmode)
316 && TARGET_FLOAT128_HW)")])
318 ;; Mode iterator for binary floating types that have a direct conversion
319 ;; from 64-bit integer to floating point
320 (define_mode_iterator FL_CONV [SF
322 (KF "TARGET_FLOAT128_HW")
323 (TF "TARGET_FLOAT128_HW
324 && FLOAT128_IEEE_P (TFmode)")])
326 ;; Iterator for the 2 short vector types to do a splat from an integer
327 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
329 ;; Mode attribute to give the count for the splat instruction to splat
330 ;; the value in the 64-bit integer slot
331 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
333 ;; Mode attribute to give the suffix for the splat instruction
334 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
336 ;; Constants for creating unspecs
337 (define_c_enum "unspec"
354 UNSPEC_VSX_UNS_FLOAT2
356 UNSPEC_VSX_UNS_FLOATE
358 UNSPEC_VSX_UNS_FLOATO
374 UNSPEC_VSX_XVCVDPSXDS
376 UNSPEC_VSX_XVCVDPUXDS
377 UNSPEC_VSX_SIGN_EXTEND
378 UNSPEC_VSX_XVCVSPSXWS
379 UNSPEC_VSX_XVCVSPSXDS
388 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
389 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
422 UNSPEC_VSX_FIRST_MATCH_INDEX
423 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
424 UNSPEC_VSX_FIRST_MISMATCH_INDEX
425 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
430 ;; The patterns for LE permuted loads and stores come before the general
431 ;; VSX moves so they match first.
432 (define_insn_and_split "*vsx_le_perm_load_<mode>"
433 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
434 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
435 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
441 (parallel [(const_int 1) (const_int 0)])))
445 (parallel [(const_int 1) (const_int 0)])))]
447 rtx mem = operands[1];
449 /* Don't apply the swap optimization if we've already performed register
450 allocation and the hard register destination is not in the altivec
452 if ((MEM_ALIGN (mem) >= 128)
453 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
454 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
456 rtx mem_address = XEXP (mem, 0);
457 enum machine_mode mode = GET_MODE (mem);
459 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
461 /* Replace the source memory address with masked address. */
462 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
463 emit_insn (lvx_set_expr);
466 else if (rs6000_quadword_masked_address_p (mem_address))
468 /* This rtl is already in the form that matches lvx
469 instruction, so leave it alone. */
472 /* Otherwise, fall through to transform into a swapping load. */
474 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
477 [(set_attr "type" "vecload")
478 (set_attr "length" "8")])
480 (define_insn_and_split "*vsx_le_perm_load_<mode>"
481 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
482 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
483 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
489 (parallel [(const_int 2) (const_int 3)
490 (const_int 0) (const_int 1)])))
494 (parallel [(const_int 2) (const_int 3)
495 (const_int 0) (const_int 1)])))]
497 rtx mem = operands[1];
499 /* Don't apply the swap optimization if we've already performed register
500 allocation and the hard register destination is not in the altivec
502 if ((MEM_ALIGN (mem) >= 128)
503 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
504 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
506 rtx mem_address = XEXP (mem, 0);
507 enum machine_mode mode = GET_MODE (mem);
509 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
511 /* Replace the source memory address with masked address. */
512 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
513 emit_insn (lvx_set_expr);
516 else if (rs6000_quadword_masked_address_p (mem_address))
518 /* This rtl is already in the form that matches lvx
519 instruction, so leave it alone. */
522 /* Otherwise, fall through to transform into a swapping load. */
524 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
527 [(set_attr "type" "vecload")
528 (set_attr "length" "8")])
530 (define_insn_and_split "*vsx_le_perm_load_v8hi"
531 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
532 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
533 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
539 (parallel [(const_int 4) (const_int 5)
540 (const_int 6) (const_int 7)
541 (const_int 0) (const_int 1)
542 (const_int 2) (const_int 3)])))
546 (parallel [(const_int 4) (const_int 5)
547 (const_int 6) (const_int 7)
548 (const_int 0) (const_int 1)
549 (const_int 2) (const_int 3)])))]
551 rtx mem = operands[1];
553 /* Don't apply the swap optimization if we've already performed register
554 allocation and the hard register destination is not in the altivec
556 if ((MEM_ALIGN (mem) >= 128)
557 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
558 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
560 rtx mem_address = XEXP (mem, 0);
561 enum machine_mode mode = GET_MODE (mem);
563 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
565 /* Replace the source memory address with masked address. */
566 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
567 emit_insn (lvx_set_expr);
570 else if (rs6000_quadword_masked_address_p (mem_address))
572 /* This rtl is already in the form that matches lvx
573 instruction, so leave it alone. */
576 /* Otherwise, fall through to transform into a swapping load. */
578 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
581 [(set_attr "type" "vecload")
582 (set_attr "length" "8")])
584 (define_insn_and_split "*vsx_le_perm_load_v16qi"
585 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
586 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
587 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
593 (parallel [(const_int 8) (const_int 9)
594 (const_int 10) (const_int 11)
595 (const_int 12) (const_int 13)
596 (const_int 14) (const_int 15)
597 (const_int 0) (const_int 1)
598 (const_int 2) (const_int 3)
599 (const_int 4) (const_int 5)
600 (const_int 6) (const_int 7)])))
604 (parallel [(const_int 8) (const_int 9)
605 (const_int 10) (const_int 11)
606 (const_int 12) (const_int 13)
607 (const_int 14) (const_int 15)
608 (const_int 0) (const_int 1)
609 (const_int 2) (const_int 3)
610 (const_int 4) (const_int 5)
611 (const_int 6) (const_int 7)])))]
613 rtx mem = operands[1];
615 /* Don't apply the swap optimization if we've already performed register
616 allocation and the hard register destination is not in the altivec
618 if ((MEM_ALIGN (mem) >= 128)
619 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
620 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
622 rtx mem_address = XEXP (mem, 0);
623 enum machine_mode mode = GET_MODE (mem);
625 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
627 /* Replace the source memory address with masked address. */
628 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
629 emit_insn (lvx_set_expr);
632 else if (rs6000_quadword_masked_address_p (mem_address))
634 /* This rtl is already in the form that matches lvx
635 instruction, so leave it alone. */
638 /* Otherwise, fall through to transform into a swapping load. */
640 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
643 [(set_attr "type" "vecload")
644 (set_attr "length" "8")])
646 (define_insn "*vsx_le_perm_store_<mode>"
647 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
648 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
649 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
651 [(set_attr "type" "vecstore")
652 (set_attr "length" "12")])
655 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
656 (match_operand:VSX_D 1 "vsx_register_operand"))]
657 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
661 (parallel [(const_int 1) (const_int 0)])))
665 (parallel [(const_int 1) (const_int 0)])))]
667 rtx mem = operands[0];
669 /* Don't apply the swap optimization if we've already performed register
670 allocation and the hard register source is not in the altivec range. */
671 if ((MEM_ALIGN (mem) >= 128)
672 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
673 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
675 rtx mem_address = XEXP (mem, 0);
676 enum machine_mode mode = GET_MODE (mem);
677 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
679 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
680 emit_insn (stvx_set_expr);
683 else if (rs6000_quadword_masked_address_p (mem_address))
685 /* This rtl is already in the form that matches stvx instruction,
686 so leave it alone. */
689 /* Otherwise, fall through to transform into a swapping store. */
692 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
696 ;; The post-reload split requires that we re-permute the source
697 ;; register in case it is still live.
699 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
700 (match_operand:VSX_D 1 "vsx_register_operand"))]
701 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
705 (parallel [(const_int 1) (const_int 0)])))
709 (parallel [(const_int 1) (const_int 0)])))
713 (parallel [(const_int 1) (const_int 0)])))]
716 (define_insn "*vsx_le_perm_store_<mode>"
717 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
718 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
719 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
721 [(set_attr "type" "vecstore")
722 (set_attr "length" "12")])
725 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
726 (match_operand:VSX_W 1 "vsx_register_operand"))]
727 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
731 (parallel [(const_int 2) (const_int 3)
732 (const_int 0) (const_int 1)])))
736 (parallel [(const_int 2) (const_int 3)
737 (const_int 0) (const_int 1)])))]
739 rtx mem = operands[0];
741 /* Don't apply the swap optimization if we've already performed register
742 allocation and the hard register source is not in the altivec range. */
743 if ((MEM_ALIGN (mem) >= 128)
744 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
745 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
747 rtx mem_address = XEXP (mem, 0);
748 enum machine_mode mode = GET_MODE (mem);
749 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
751 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
752 emit_insn (stvx_set_expr);
755 else if (rs6000_quadword_masked_address_p (mem_address))
757 /* This rtl is already in the form that matches stvx instruction,
758 so leave it alone. */
761 /* Otherwise, fall through to transform into a swapping store. */
764 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
768 ;; The post-reload split requires that we re-permute the source
769 ;; register in case it is still live.
771 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
772 (match_operand:VSX_W 1 "vsx_register_operand"))]
773 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
777 (parallel [(const_int 2) (const_int 3)
778 (const_int 0) (const_int 1)])))
782 (parallel [(const_int 2) (const_int 3)
783 (const_int 0) (const_int 1)])))
787 (parallel [(const_int 2) (const_int 3)
788 (const_int 0) (const_int 1)])))]
791 (define_insn "*vsx_le_perm_store_v8hi"
792 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
793 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
794 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
796 [(set_attr "type" "vecstore")
797 (set_attr "length" "12")])
800 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
801 (match_operand:V8HI 1 "vsx_register_operand"))]
802 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
806 (parallel [(const_int 4) (const_int 5)
807 (const_int 6) (const_int 7)
808 (const_int 0) (const_int 1)
809 (const_int 2) (const_int 3)])))
813 (parallel [(const_int 4) (const_int 5)
814 (const_int 6) (const_int 7)
815 (const_int 0) (const_int 1)
816 (const_int 2) (const_int 3)])))]
818 rtx mem = operands[0];
820 /* Don't apply the swap optimization if we've already performed register
821 allocation and the hard register source is not in the altivec range. */
822 if ((MEM_ALIGN (mem) >= 128)
823 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
824 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
826 rtx mem_address = XEXP (mem, 0);
827 enum machine_mode mode = GET_MODE (mem);
828 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
830 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
831 emit_insn (stvx_set_expr);
834 else if (rs6000_quadword_masked_address_p (mem_address))
836 /* This rtl is already in the form that matches stvx instruction,
837 so leave it alone. */
840 /* Otherwise, fall through to transform into a swapping store. */
843 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
847 ;; The post-reload split requires that we re-permute the source
848 ;; register in case it is still live.
850 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
851 (match_operand:V8HI 1 "vsx_register_operand"))]
852 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
856 (parallel [(const_int 4) (const_int 5)
857 (const_int 6) (const_int 7)
858 (const_int 0) (const_int 1)
859 (const_int 2) (const_int 3)])))
863 (parallel [(const_int 4) (const_int 5)
864 (const_int 6) (const_int 7)
865 (const_int 0) (const_int 1)
866 (const_int 2) (const_int 3)])))
870 (parallel [(const_int 4) (const_int 5)
871 (const_int 6) (const_int 7)
872 (const_int 0) (const_int 1)
873 (const_int 2) (const_int 3)])))]
876 (define_insn "*vsx_le_perm_store_v16qi"
877 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
878 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
879 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
881 [(set_attr "type" "vecstore")
882 (set_attr "length" "12")])
885 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
886 (match_operand:V16QI 1 "vsx_register_operand"))]
887 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
891 (parallel [(const_int 8) (const_int 9)
892 (const_int 10) (const_int 11)
893 (const_int 12) (const_int 13)
894 (const_int 14) (const_int 15)
895 (const_int 0) (const_int 1)
896 (const_int 2) (const_int 3)
897 (const_int 4) (const_int 5)
898 (const_int 6) (const_int 7)])))
902 (parallel [(const_int 8) (const_int 9)
903 (const_int 10) (const_int 11)
904 (const_int 12) (const_int 13)
905 (const_int 14) (const_int 15)
906 (const_int 0) (const_int 1)
907 (const_int 2) (const_int 3)
908 (const_int 4) (const_int 5)
909 (const_int 6) (const_int 7)])))]
911 rtx mem = operands[0];
913 /* Don't apply the swap optimization if we've already performed register
914 allocation and the hard register source is not in the altivec range. */
915 if ((MEM_ALIGN (mem) >= 128)
916 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
917 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
919 rtx mem_address = XEXP (mem, 0);
920 enum machine_mode mode = GET_MODE (mem);
921 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
923 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
924 emit_insn (stvx_set_expr);
927 else if (rs6000_quadword_masked_address_p (mem_address))
929 /* This rtl is already in the form that matches stvx instruction,
930 so leave it alone. */
933 /* Otherwise, fall through to transform into a swapping store. */
936 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
940 ;; The post-reload split requires that we re-permute the source
941 ;; register in case it is still live.
943 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
944 (match_operand:V16QI 1 "vsx_register_operand"))]
945 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
949 (parallel [(const_int 8) (const_int 9)
950 (const_int 10) (const_int 11)
951 (const_int 12) (const_int 13)
952 (const_int 14) (const_int 15)
953 (const_int 0) (const_int 1)
954 (const_int 2) (const_int 3)
955 (const_int 4) (const_int 5)
956 (const_int 6) (const_int 7)])))
960 (parallel [(const_int 8) (const_int 9)
961 (const_int 10) (const_int 11)
962 (const_int 12) (const_int 13)
963 (const_int 14) (const_int 15)
964 (const_int 0) (const_int 1)
965 (const_int 2) (const_int 3)
966 (const_int 4) (const_int 5)
967 (const_int 6) (const_int 7)])))
971 (parallel [(const_int 8) (const_int 9)
972 (const_int 10) (const_int 11)
973 (const_int 12) (const_int 13)
974 (const_int 14) (const_int 15)
975 (const_int 0) (const_int 1)
976 (const_int 2) (const_int 3)
977 (const_int 4) (const_int 5)
978 (const_int 6) (const_int 7)])))]
981 ;; Little endian word swapping for 128-bit types that are either scalars or the
982 ;; special V1TI container class, which it is not appropriate to use vec_select
984 (define_insn "*vsx_le_permute_<mode>"
985 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
987 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
989 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
991 xxpermdi %x0,%x1,%x1,2
995 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
996 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
997 [(set_attr "length" "4,4,4,8,8,8")
998 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1000 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1001 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1004 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1007 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1012 [(set (match_dup 0) (match_dup 1))]
1014 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1016 emit_note (NOTE_INSN_DELETED);
1020 [(set_attr "length" "0,4")
1021 (set_attr "type" "veclogical")])
1023 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1024 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1025 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1026 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1030 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1033 rtx tmp = (can_create_pseudo_p ()
1034 ? gen_reg_rtx_and_attrs (operands[0])
1036 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1037 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1040 [(set_attr "type" "vecload,load")
1041 (set_attr "length" "8,8")])
1043 (define_insn "*vsx_le_perm_store_<mode>"
1044 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1045 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1046 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1050 [(set_attr "type" "vecstore,store")
1051 (set_attr "length" "12,8")])
1054 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1055 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1056 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1059 rtx tmp = (can_create_pseudo_p ()
1060 ? gen_reg_rtx_and_attrs (operands[0])
1062 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1063 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1067 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1068 ;; GPR registers on a little endian system.
1070 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1071 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1073 (set (match_operand:VSX_TI 2 "int_reg_operand")
1074 (rotate:VSX_TI (match_dup 0)
1076 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1077 && (rtx_equal_p (operands[0], operands[2])
1078 || peep2_reg_dead_p (2, operands[0]))"
1079 [(set (match_dup 2) (match_dup 1))])
1082 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1083 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1085 (set (match_operand:VSX_TI 2 "memory_operand")
1086 (rotate:VSX_TI (match_dup 0)
1088 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1089 && peep2_reg_dead_p (2, operands[0])"
1090 [(set (match_dup 2) (match_dup 1))])
1092 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1093 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1094 ;; floating point are handled by the more generic swap elimination pass.
1096 [(set (match_operand:TI 0 "vsx_register_operand")
1097 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1099 (set (match_operand:TI 2 "vsx_register_operand")
1100 (rotate:TI (match_dup 0)
1102 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1103 && (rtx_equal_p (operands[0], operands[2])
1104 || peep2_reg_dead_p (2, operands[0]))"
1105 [(set (match_dup 2) (match_dup 1))])
1107 ;; The post-reload split requires that we re-permute the source
1108 ;; register in case it is still live.
1110 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1111 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1112 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1115 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1116 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1117 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1121 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1122 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1123 (define_insn "xxspltib_v16qi"
1124 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1125 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1128 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1129 return "xxspltib %x0,%2";
1131 [(set_attr "type" "vecperm")])
1133 (define_insn "xxspltib_<mode>_nosplit"
1134 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1135 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1138 rtx op1 = operands[1];
1142 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1146 operands[2] = GEN_INT (value & 0xff);
1147 return "xxspltib %x0,%2";
1149 [(set_attr "type" "vecperm")])
1151 (define_insn_and_split "*xxspltib_<mode>_split"
1152 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1153 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1161 rtx op0 = operands[0];
1162 rtx op1 = operands[1];
1163 rtx tmp = ((can_create_pseudo_p ())
1164 ? gen_reg_rtx (V16QImode)
1165 : gen_lowpart (V16QImode, op0));
1167 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1171 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1173 if (<MODE>mode == V2DImode)
1174 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1176 else if (<MODE>mode == V4SImode)
1177 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1179 else if (<MODE>mode == V8HImode)
1180 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1187 [(set_attr "type" "vecperm")
1188 (set_attr "length" "8")])
1191 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1192 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1193 ;; all 1's, since the machine does not have to wait for the previous
1194 ;; instruction using the register being set (such as a store waiting on a slow
1195 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1197 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1198 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1199 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1200 (define_insn "*vsx_mov<mode>_64bit"
1201 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1202 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
1203 ?&r, ??r, ??Y, ??r, wo, v,
1204 ?<VSa>, *r, v, ??r, wZ, v")
1206 (match_operand:VSX_M 1 "input_operand"
1207 "<VSa>, ZwO, <VSa>, we, r, r,
1208 wQ, Y, r, r, wE, jwM,
1209 ?jwM, jwM, W, W, v, wZ"))]
1211 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1212 && (register_operand (operands[0], <MODE>mode)
1213 || register_operand (operands[1], <MODE>mode))"
1215 return rs6000_output_move_128bit (operands);
1218 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1219 store, load, store, *, vecsimple, vecsimple,
1220 vecsimple, *, *, *, vecstore, vecload")
1225 4, 8, 20, 20, 4, 4")])
1227 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1228 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
1229 ;; LVX (VMX) STVX (VMX)
1230 (define_insn "*vsx_mov<mode>_32bit"
1231 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1232 "=ZwO, <VSa>, <VSa>, ??r, ??Y, ??r,
1233 wo, v, ?<VSa>, *r, v, ??r,
1236 (match_operand:VSX_M 1 "input_operand"
1237 "<VSa>, ZwO, <VSa>, Y, r, r,
1238 wE, jwM, ?jwM, jwM, W, W,
1241 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1242 && (register_operand (operands[0], <MODE>mode)
1243 || register_operand (operands[1], <MODE>mode))"
1245 return rs6000_output_move_128bit (operands);
1248 "vecstore, vecload, vecsimple, load, store, *,
1249 vecsimple, vecsimple, vecsimple, *, *, *,
1253 "4, 4, 4, 16, 16, 16,
1254 4, 4, 4, 16, 20, 32,
1257 ;; Explicit load/store expanders for the builtin functions
1258 (define_expand "vsx_load_<mode>"
1259 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1260 (match_operand:VSX_M 1 "memory_operand"))]
1261 "VECTOR_MEM_VSX_P (<MODE>mode)"
1263 /* Expand to swaps if needed, prior to swap optimization. */
1264 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1266 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1271 (define_expand "vsx_store_<mode>"
1272 [(set (match_operand:VSX_M 0 "memory_operand")
1273 (match_operand:VSX_M 1 "vsx_register_operand"))]
1274 "VECTOR_MEM_VSX_P (<MODE>mode)"
1276 /* Expand to swaps if needed, prior to swap optimization. */
1277 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1279 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1284 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1285 ;; when you really want their element-reversing behavior.
1286 (define_insn "vsx_ld_elemrev_v2di"
1287 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1289 (match_operand:V2DI 1 "memory_operand" "Z")
1290 (parallel [(const_int 1) (const_int 0)])))]
1291 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1293 [(set_attr "type" "vecload")])
1295 (define_insn "vsx_ld_elemrev_v1ti"
1296 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1298 (match_operand:V1TI 1 "memory_operand" "Z")
1299 (parallel [(const_int 0)])))]
1300 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1302 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1304 [(set_attr "type" "vecload")])
1306 (define_insn "vsx_ld_elemrev_v2df"
1307 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1309 (match_operand:V2DF 1 "memory_operand" "Z")
1310 (parallel [(const_int 1) (const_int 0)])))]
1311 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1313 [(set_attr "type" "vecload")])
1315 (define_insn "vsx_ld_elemrev_v4si"
1316 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1318 (match_operand:V4SI 1 "memory_operand" "Z")
1319 (parallel [(const_int 3) (const_int 2)
1320 (const_int 1) (const_int 0)])))]
1321 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1323 [(set_attr "type" "vecload")])
1325 (define_insn "vsx_ld_elemrev_v4sf"
1326 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1328 (match_operand:V4SF 1 "memory_operand" "Z")
1329 (parallel [(const_int 3) (const_int 2)
1330 (const_int 1) (const_int 0)])))]
1331 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1333 [(set_attr "type" "vecload")])
1335 (define_expand "vsx_ld_elemrev_v8hi"
1336 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1338 (match_operand:V8HI 1 "memory_operand" "Z")
1339 (parallel [(const_int 7) (const_int 6)
1340 (const_int 5) (const_int 4)
1341 (const_int 3) (const_int 2)
1342 (const_int 1) (const_int 0)])))]
1343 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1345 if (!TARGET_P9_VECTOR)
1347 rtx tmp = gen_reg_rtx (V4SImode);
1348 rtx subreg, subreg2, perm[16], pcv;
1349 /* 2 is leftmost element in register */
1350 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1353 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1354 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1355 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1357 for (i = 0; i < 16; ++i)
1358 perm[i] = GEN_INT (reorder[i]);
1360 pcv = force_reg (V16QImode,
1361 gen_rtx_CONST_VECTOR (V16QImode,
1362 gen_rtvec_v (16, perm)));
1363 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1369 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1370 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1372 (match_operand:V8HI 1 "memory_operand" "Z")
1373 (parallel [(const_int 7) (const_int 6)
1374 (const_int 5) (const_int 4)
1375 (const_int 3) (const_int 2)
1376 (const_int 1) (const_int 0)])))]
1377 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1379 [(set_attr "type" "vecload")])
1381 (define_expand "vsx_ld_elemrev_v16qi"
1382 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1384 (match_operand:V16QI 1 "memory_operand" "Z")
1385 (parallel [(const_int 15) (const_int 14)
1386 (const_int 13) (const_int 12)
1387 (const_int 11) (const_int 10)
1388 (const_int 9) (const_int 8)
1389 (const_int 7) (const_int 6)
1390 (const_int 5) (const_int 4)
1391 (const_int 3) (const_int 2)
1392 (const_int 1) (const_int 0)])))]
1393 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1395 if (!TARGET_P9_VECTOR)
1397 rtx tmp = gen_reg_rtx (V4SImode);
1398 rtx subreg, subreg2, perm[16], pcv;
1399 /* 3 is leftmost element in register */
1400 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1403 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1404 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1405 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1407 for (i = 0; i < 16; ++i)
1408 perm[i] = GEN_INT (reorder[i]);
1410 pcv = force_reg (V16QImode,
1411 gen_rtx_CONST_VECTOR (V16QImode,
1412 gen_rtvec_v (16, perm)));
1413 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1419 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1420 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1422 (match_operand:V16QI 1 "memory_operand" "Z")
1423 (parallel [(const_int 15) (const_int 14)
1424 (const_int 13) (const_int 12)
1425 (const_int 11) (const_int 10)
1426 (const_int 9) (const_int 8)
1427 (const_int 7) (const_int 6)
1428 (const_int 5) (const_int 4)
1429 (const_int 3) (const_int 2)
1430 (const_int 1) (const_int 0)])))]
1431 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1433 [(set_attr "type" "vecload")])
1435 (define_insn "vsx_st_elemrev_v1ti"
1436 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1438 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1439 (parallel [(const_int 0)])))
1440 (clobber (match_dup 1))]
1441 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1443 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1445 [(set_attr "type" "vecstore")])
1447 (define_insn "vsx_st_elemrev_v2df"
1448 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1450 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1451 (parallel [(const_int 1) (const_int 0)])))]
1452 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1454 [(set_attr "type" "vecstore")])
1456 (define_insn "vsx_st_elemrev_v2di"
1457 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1459 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1460 (parallel [(const_int 1) (const_int 0)])))]
1461 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1463 [(set_attr "type" "vecstore")])
1465 (define_insn "vsx_st_elemrev_v4sf"
1466 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1468 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1469 (parallel [(const_int 3) (const_int 2)
1470 (const_int 1) (const_int 0)])))]
1471 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1473 [(set_attr "type" "vecstore")])
1475 (define_insn "vsx_st_elemrev_v4si"
1476 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1478 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1479 (parallel [(const_int 3) (const_int 2)
1480 (const_int 1) (const_int 0)])))]
1481 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1483 [(set_attr "type" "vecstore")])
1485 (define_expand "vsx_st_elemrev_v8hi"
1486 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1488 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1489 (parallel [(const_int 7) (const_int 6)
1490 (const_int 5) (const_int 4)
1491 (const_int 3) (const_int 2)
1492 (const_int 1) (const_int 0)])))]
1493 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1495 if (!TARGET_P9_VECTOR)
1497 rtx mem_subreg, subreg, perm[16], pcv;
1498 rtx tmp = gen_reg_rtx (V8HImode);
1499 /* 2 is leftmost element in register */
1500 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1503 for (i = 0; i < 16; ++i)
1504 perm[i] = GEN_INT (reorder[i]);
1506 pcv = force_reg (V16QImode,
1507 gen_rtx_CONST_VECTOR (V16QImode,
1508 gen_rtvec_v (16, perm)));
1509 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1511 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1512 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1513 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1518 (define_insn "*vsx_st_elemrev_v2di_internal"
1519 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1521 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1522 (parallel [(const_int 1) (const_int 0)])))]
1523 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1525 [(set_attr "type" "vecstore")])
1527 (define_insn "*vsx_st_elemrev_v8hi_internal"
1528 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1530 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1531 (parallel [(const_int 7) (const_int 6)
1532 (const_int 5) (const_int 4)
1533 (const_int 3) (const_int 2)
1534 (const_int 1) (const_int 0)])))]
1535 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1537 [(set_attr "type" "vecstore")])
1539 (define_expand "vsx_st_elemrev_v16qi"
1540 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1542 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1543 (parallel [(const_int 15) (const_int 14)
1544 (const_int 13) (const_int 12)
1545 (const_int 11) (const_int 10)
1546 (const_int 9) (const_int 8)
1547 (const_int 7) (const_int 6)
1548 (const_int 5) (const_int 4)
1549 (const_int 3) (const_int 2)
1550 (const_int 1) (const_int 0)])))]
1551 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1553 if (!TARGET_P9_VECTOR)
1555 rtx mem_subreg, subreg, perm[16], pcv;
1556 rtx tmp = gen_reg_rtx (V16QImode);
1557 /* 3 is leftmost element in register */
1558 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1561 for (i = 0; i < 16; ++i)
1562 perm[i] = GEN_INT (reorder[i]);
1564 pcv = force_reg (V16QImode,
1565 gen_rtx_CONST_VECTOR (V16QImode,
1566 gen_rtvec_v (16, perm)));
1567 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1569 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1570 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1571 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1576 (define_insn "*vsx_st_elemrev_v16qi_internal"
1577 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1579 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1580 (parallel [(const_int 15) (const_int 14)
1581 (const_int 13) (const_int 12)
1582 (const_int 11) (const_int 10)
1583 (const_int 9) (const_int 8)
1584 (const_int 7) (const_int 6)
1585 (const_int 5) (const_int 4)
1586 (const_int 3) (const_int 2)
1587 (const_int 1) (const_int 0)])))]
1588 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1590 [(set_attr "type" "vecstore")])
1593 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1594 ;; instructions are now combined with the insn for the traditional floating
1596 (define_insn "*vsx_add<mode>3"
1597 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1598 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1599 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1600 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1601 "xvadd<VSs> %x0,%x1,%x2"
1602 [(set_attr "type" "<VStype_simple>")
1603 (set_attr "fp_type" "<VSfptype_simple>")])
1605 (define_insn "*vsx_sub<mode>3"
1606 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1607 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1608 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1609 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1610 "xvsub<VSs> %x0,%x1,%x2"
1611 [(set_attr "type" "<VStype_simple>")
1612 (set_attr "fp_type" "<VSfptype_simple>")])
1614 (define_insn "*vsx_mul<mode>3"
1615 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1616 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1617 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1618 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1619 "xvmul<VSs> %x0,%x1,%x2"
1620 [(set_attr "type" "<VStype_simple>")
1621 (set_attr "fp_type" "<VSfptype_mul>")])
1623 ; Emulate vector with scalar for vec_mul in V2DImode
1624 (define_insn_and_split "vsx_mul_v2di"
1625 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1626 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1627 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1629 "VECTOR_MEM_VSX_P (V2DImode)"
1631 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1634 rtx op0 = operands[0];
1635 rtx op1 = operands[1];
1636 rtx op2 = operands[2];
1637 rtx op3 = gen_reg_rtx (DImode);
1638 rtx op4 = gen_reg_rtx (DImode);
1639 rtx op5 = gen_reg_rtx (DImode);
1640 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1641 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1642 if (TARGET_POWERPC64)
1643 emit_insn (gen_muldi3 (op5, op3, op4));
1646 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1647 emit_move_insn (op5, ret);
1649 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1650 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1651 if (TARGET_POWERPC64)
1652 emit_insn (gen_muldi3 (op3, op3, op4));
1655 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1656 emit_move_insn (op3, ret);
1658 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1661 [(set_attr "type" "mul")])
1663 (define_insn "*vsx_div<mode>3"
1664 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1665 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1666 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1667 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1668 "xvdiv<VSs> %x0,%x1,%x2"
1669 [(set_attr "type" "<VStype_div>")
1670 (set_attr "fp_type" "<VSfptype_div>")])
1672 ; Emulate vector with scalar for vec_div in V2DImode
1673 (define_insn_and_split "vsx_div_v2di"
1674 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1675 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1676 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1678 "VECTOR_MEM_VSX_P (V2DImode)"
1680 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1683 rtx op0 = operands[0];
1684 rtx op1 = operands[1];
1685 rtx op2 = operands[2];
1686 rtx op3 = gen_reg_rtx (DImode);
1687 rtx op4 = gen_reg_rtx (DImode);
1688 rtx op5 = gen_reg_rtx (DImode);
1689 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1690 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1691 if (TARGET_POWERPC64)
1692 emit_insn (gen_divdi3 (op5, op3, op4));
1695 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1696 rtx target = emit_library_call_value (libfunc,
1697 op5, LCT_NORMAL, DImode,
1700 emit_move_insn (op5, target);
1702 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1703 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1704 if (TARGET_POWERPC64)
1705 emit_insn (gen_divdi3 (op3, op3, op4));
1708 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1709 rtx target = emit_library_call_value (libfunc,
1710 op3, LCT_NORMAL, DImode,
1713 emit_move_insn (op3, target);
1715 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1718 [(set_attr "type" "div")])
1720 (define_insn_and_split "vsx_udiv_v2di"
1721 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1722 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1723 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1725 "VECTOR_MEM_VSX_P (V2DImode)"
1727 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1730 rtx op0 = operands[0];
1731 rtx op1 = operands[1];
1732 rtx op2 = operands[2];
1733 rtx op3 = gen_reg_rtx (DImode);
1734 rtx op4 = gen_reg_rtx (DImode);
1735 rtx op5 = gen_reg_rtx (DImode);
1736 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1737 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1738 if (TARGET_POWERPC64)
1739 emit_insn (gen_udivdi3 (op5, op3, op4));
1742 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1743 rtx target = emit_library_call_value (libfunc,
1744 op5, LCT_NORMAL, DImode,
1747 emit_move_insn (op5, target);
1749 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1750 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1751 if (TARGET_POWERPC64)
1752 emit_insn (gen_udivdi3 (op3, op3, op4));
1755 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756 rtx target = emit_library_call_value (libfunc,
1757 op3, LCT_NORMAL, DImode,
1760 emit_move_insn (op3, target);
1762 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1765 [(set_attr "type" "div")])
1767 ;; *tdiv* instruction returning the FG flag
1768 (define_expand "vsx_tdiv<mode>3_fg"
1770 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1771 (match_operand:VSX_B 2 "vsx_register_operand")]
1773 (set (match_operand:SI 0 "gpc_reg_operand")
1774 (gt:SI (match_dup 3)
1776 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1778 operands[3] = gen_reg_rtx (CCFPmode);
1781 ;; *tdiv* instruction returning the FE flag
1782 (define_expand "vsx_tdiv<mode>3_fe"
1784 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1785 (match_operand:VSX_B 2 "vsx_register_operand")]
1787 (set (match_operand:SI 0 "gpc_reg_operand")
1788 (eq:SI (match_dup 3)
1790 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1792 operands[3] = gen_reg_rtx (CCFPmode);
1795 (define_insn "*vsx_tdiv<mode>3_internal"
1796 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1797 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1798 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1800 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1801 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1802 [(set_attr "type" "<VStype_simple>")
1803 (set_attr "fp_type" "<VSfptype_simple>")])
1805 (define_insn "vsx_fre<mode>2"
1806 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1807 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1809 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1811 [(set_attr "type" "<VStype_simple>")
1812 (set_attr "fp_type" "<VSfptype_simple>")])
1814 (define_insn "*vsx_neg<mode>2"
1815 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1816 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1817 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1818 "xvneg<VSs> %x0,%x1"
1819 [(set_attr "type" "<VStype_simple>")
1820 (set_attr "fp_type" "<VSfptype_simple>")])
1822 (define_insn "*vsx_abs<mode>2"
1823 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1824 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1825 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1826 "xvabs<VSs> %x0,%x1"
1827 [(set_attr "type" "<VStype_simple>")
1828 (set_attr "fp_type" "<VSfptype_simple>")])
1830 (define_insn "vsx_nabs<mode>2"
1831 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1834 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1835 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1836 "xvnabs<VSs> %x0,%x1"
1837 [(set_attr "type" "<VStype_simple>")
1838 (set_attr "fp_type" "<VSfptype_simple>")])
1840 (define_insn "vsx_smax<mode>3"
1841 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1842 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1843 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1844 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1845 "xvmax<VSs> %x0,%x1,%x2"
1846 [(set_attr "type" "<VStype_simple>")
1847 (set_attr "fp_type" "<VSfptype_simple>")])
1849 (define_insn "*vsx_smin<mode>3"
1850 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1851 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1852 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1853 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1854 "xvmin<VSs> %x0,%x1,%x2"
1855 [(set_attr "type" "<VStype_simple>")
1856 (set_attr "fp_type" "<VSfptype_simple>")])
1858 (define_insn "*vsx_sqrt<mode>2"
1859 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1860 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1861 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1862 "xvsqrt<VSs> %x0,%x1"
1863 [(set_attr "type" "<VStype_sqrt>")
1864 (set_attr "fp_type" "<VSfptype_sqrt>")])
1866 (define_insn "*vsx_rsqrte<mode>2"
1867 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1868 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1870 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1871 "xvrsqrte<VSs> %x0,%x1"
1872 [(set_attr "type" "<VStype_simple>")
1873 (set_attr "fp_type" "<VSfptype_simple>")])
1875 ;; *tsqrt* returning the fg flag
1876 (define_expand "vsx_tsqrt<mode>2_fg"
1878 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1880 (set (match_operand:SI 0 "gpc_reg_operand")
1881 (gt:SI (match_dup 2)
1883 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1885 operands[2] = gen_reg_rtx (CCFPmode);
1888 ;; *tsqrt* returning the fe flag
1889 (define_expand "vsx_tsqrt<mode>2_fe"
1891 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1893 (set (match_operand:SI 0 "gpc_reg_operand")
1894 (eq:SI (match_dup 2)
1896 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1898 operands[2] = gen_reg_rtx (CCFPmode);
1901 (define_insn "*vsx_tsqrt<mode>2_internal"
1902 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1903 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1905 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1906 "x<VSv>tsqrt<VSs> %0,%x1"
1907 [(set_attr "type" "<VStype_simple>")
1908 (set_attr "fp_type" "<VSfptype_simple>")])
1910 ;; Fused vector multiply/add instructions. Support the classical Altivec
1911 ;; versions of fma, which allows the target to be a separate register from the
1912 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1915 (define_insn "*vsx_fmav4sf4"
1916 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1918 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1919 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1920 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1921 "VECTOR_UNIT_VSX_P (V4SFmode)"
1923 xvmaddasp %x0,%x1,%x2
1924 xvmaddmsp %x0,%x1,%x3
1925 xvmaddasp %x0,%x1,%x2
1926 xvmaddmsp %x0,%x1,%x3
1927 vmaddfp %0,%1,%2,%3"
1928 [(set_attr "type" "vecfloat")])
1930 (define_insn "*vsx_fmav2df4"
1931 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1933 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1934 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1935 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1936 "VECTOR_UNIT_VSX_P (V2DFmode)"
1938 xvmaddadp %x0,%x1,%x2
1939 xvmaddmdp %x0,%x1,%x3
1940 xvmaddadp %x0,%x1,%x2
1941 xvmaddmdp %x0,%x1,%x3"
1942 [(set_attr "type" "vecdouble")])
1944 (define_insn "*vsx_fms<mode>4"
1945 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1947 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1948 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1950 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1951 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1953 xvmsuba<VSs> %x0,%x1,%x2
1954 xvmsubm<VSs> %x0,%x1,%x3
1955 xvmsuba<VSs> %x0,%x1,%x2
1956 xvmsubm<VSs> %x0,%x1,%x3"
1957 [(set_attr "type" "<VStype_mul>")])
1959 (define_insn "*vsx_nfma<mode>4"
1960 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1963 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1964 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1965 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1966 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1968 xvnmadda<VSs> %x0,%x1,%x2
1969 xvnmaddm<VSs> %x0,%x1,%x3
1970 xvnmadda<VSs> %x0,%x1,%x2
1971 xvnmaddm<VSs> %x0,%x1,%x3"
1972 [(set_attr "type" "<VStype_mul>")
1973 (set_attr "fp_type" "<VSfptype_mul>")])
1975 (define_insn "*vsx_nfmsv4sf4"
1976 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1979 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1980 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1982 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1983 "VECTOR_UNIT_VSX_P (V4SFmode)"
1985 xvnmsubasp %x0,%x1,%x2
1986 xvnmsubmsp %x0,%x1,%x3
1987 xvnmsubasp %x0,%x1,%x2
1988 xvnmsubmsp %x0,%x1,%x3
1989 vnmsubfp %0,%1,%2,%3"
1990 [(set_attr "type" "vecfloat")])
1992 (define_insn "*vsx_nfmsv2df4"
1993 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1996 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1997 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1999 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2000 "VECTOR_UNIT_VSX_P (V2DFmode)"
2002 xvnmsubadp %x0,%x1,%x2
2003 xvnmsubmdp %x0,%x1,%x3
2004 xvnmsubadp %x0,%x1,%x2
2005 xvnmsubmdp %x0,%x1,%x3"
2006 [(set_attr "type" "vecdouble")])
2008 ;; Vector conditional expressions (no scalar version for these instructions)
2009 (define_insn "vsx_eq<mode>"
2010 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2011 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2012 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2013 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2014 "xvcmpeq<VSs> %x0,%x1,%x2"
2015 [(set_attr "type" "<VStype_simple>")
2016 (set_attr "fp_type" "<VSfptype_simple>")])
2018 (define_insn "vsx_gt<mode>"
2019 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2020 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2021 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2022 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2023 "xvcmpgt<VSs> %x0,%x1,%x2"
2024 [(set_attr "type" "<VStype_simple>")
2025 (set_attr "fp_type" "<VSfptype_simple>")])
2027 (define_insn "*vsx_ge<mode>"
2028 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2029 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2030 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2031 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2032 "xvcmpge<VSs> %x0,%x1,%x2"
2033 [(set_attr "type" "<VStype_simple>")
2034 (set_attr "fp_type" "<VSfptype_simple>")])
2036 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2037 ;; indicate a combined status
2038 (define_insn "*vsx_eq_<mode>_p"
2039 [(set (reg:CC CR6_REGNO)
2041 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2042 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2044 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2045 (eq:VSX_F (match_dup 1)
2047 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2048 "xvcmpeq<VSs>. %x0,%x1,%x2"
2049 [(set_attr "type" "<VStype_simple>")])
2051 (define_insn "*vsx_gt_<mode>_p"
2052 [(set (reg:CC CR6_REGNO)
2054 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2057 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2058 (gt:VSX_F (match_dup 1)
2060 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061 "xvcmpgt<VSs>. %x0,%x1,%x2"
2062 [(set_attr "type" "<VStype_simple>")])
2064 (define_insn "*vsx_ge_<mode>_p"
2065 [(set (reg:CC CR6_REGNO)
2067 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2070 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071 (ge:VSX_F (match_dup 1)
2073 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074 "xvcmpge<VSs>. %x0,%x1,%x2"
2075 [(set_attr "type" "<VStype_simple>")])
2078 (define_insn "*vsx_xxsel<mode>"
2079 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2081 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2082 (match_operand:VSX_L 4 "zero_constant" ""))
2083 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2084 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2085 "VECTOR_MEM_VSX_P (<MODE>mode)"
2086 "xxsel %x0,%x3,%x2,%x1"
2087 [(set_attr "type" "vecmove")])
2089 (define_insn "*vsx_xxsel<mode>_uns"
2090 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2092 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2093 (match_operand:VSX_L 4 "zero_constant" ""))
2094 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2095 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2096 "VECTOR_MEM_VSX_P (<MODE>mode)"
2097 "xxsel %x0,%x3,%x2,%x1"
2098 [(set_attr "type" "vecmove")])
2101 (define_insn "vsx_copysign<mode>3"
2102 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2104 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2105 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2107 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2108 "xvcpsgn<VSs> %x0,%x2,%x1"
2109 [(set_attr "type" "<VStype_simple>")
2110 (set_attr "fp_type" "<VSfptype_simple>")])
2112 ;; For the conversions, limit the register class for the integer value to be
2113 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2114 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2115 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2116 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2117 ;; in allowing virtual registers.
2118 (define_insn "vsx_float<VSi><mode>2"
2119 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2120 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2121 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2122 "xvcvsx<VSc><VSs> %x0,%x1"
2123 [(set_attr "type" "<VStype_simple>")
2124 (set_attr "fp_type" "<VSfptype_simple>")])
2126 (define_insn "vsx_floatuns<VSi><mode>2"
2127 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2128 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2129 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2130 "xvcvux<VSc><VSs> %x0,%x1"
2131 [(set_attr "type" "<VStype_simple>")
2132 (set_attr "fp_type" "<VSfptype_simple>")])
2134 (define_insn "vsx_fix_trunc<mode><VSi>2"
2135 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2136 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2137 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2138 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2139 [(set_attr "type" "<VStype_simple>")
2140 (set_attr "fp_type" "<VSfptype_simple>")])
2142 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2143 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2144 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2145 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2146 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2147 [(set_attr "type" "<VStype_simple>")
2148 (set_attr "fp_type" "<VSfptype_simple>")])
2150 ;; Math rounding functions
2151 (define_insn "vsx_x<VSv>r<VSs>i"
2152 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2153 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2154 UNSPEC_VSX_ROUND_I))]
2155 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2156 "x<VSv>r<VSs>i %x0,%x1"
2157 [(set_attr "type" "<VStype_simple>")
2158 (set_attr "fp_type" "<VSfptype_simple>")])
2160 (define_insn "vsx_x<VSv>r<VSs>ic"
2161 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2162 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2163 UNSPEC_VSX_ROUND_IC))]
2164 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2165 "x<VSv>r<VSs>ic %x0,%x1"
2166 [(set_attr "type" "<VStype_simple>")
2167 (set_attr "fp_type" "<VSfptype_simple>")])
2169 (define_insn "vsx_btrunc<mode>2"
2170 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2171 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2172 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2173 "xvr<VSs>iz %x0,%x1"
2174 [(set_attr "type" "<VStype_simple>")
2175 (set_attr "fp_type" "<VSfptype_simple>")])
2177 (define_insn "*vsx_b2trunc<mode>2"
2178 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2179 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2181 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2182 "x<VSv>r<VSs>iz %x0,%x1"
2183 [(set_attr "type" "<VStype_simple>")
2184 (set_attr "fp_type" "<VSfptype_simple>")])
2186 (define_insn "vsx_floor<mode>2"
2187 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2188 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2190 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2191 "xvr<VSs>im %x0,%x1"
2192 [(set_attr "type" "<VStype_simple>")
2193 (set_attr "fp_type" "<VSfptype_simple>")])
2195 (define_insn "vsx_ceil<mode>2"
2196 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2197 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2199 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2200 "xvr<VSs>ip %x0,%x1"
2201 [(set_attr "type" "<VStype_simple>")
2202 (set_attr "fp_type" "<VSfptype_simple>")])
2205 ;; VSX convert to/from double vector
2207 ;; Convert between single and double precision
2208 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2209 ;; scalar single precision instructions internally use the double format.
2210 ;; Prefer the altivec registers, since we likely will need to do a vperm
2211 (define_insn "vsx_<VS_spdp_insn>"
2212 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2213 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2214 UNSPEC_VSX_CVSPDP))]
2215 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2216 "<VS_spdp_insn> %x0,%x1"
2217 [(set_attr "type" "<VS_spdp_type>")])
2219 ;; xscvspdp, represent the scalar SF type as V4SF
2220 (define_insn "vsx_xscvspdp"
2221 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2222 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2223 UNSPEC_VSX_CVSPDP))]
2224 "VECTOR_UNIT_VSX_P (V4SFmode)"
2226 [(set_attr "type" "fp")])
2228 ;; Same as vsx_xscvspdp, but use SF as the type
2229 (define_insn "vsx_xscvspdp_scalar2"
2230 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2231 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2232 UNSPEC_VSX_CVSPDP))]
2233 "VECTOR_UNIT_VSX_P (V4SFmode)"
2235 [(set_attr "type" "fp")])
2237 ;; Generate xvcvhpsp instruction
2238 (define_insn "vsx_xvcvhpsp"
2239 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2240 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2241 UNSPEC_VSX_CVHPSP))]
2244 [(set_attr "type" "vecfloat")])
2246 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2247 ;; format of scalars is actually DF.
2248 (define_insn "vsx_xscvdpsp_scalar"
2249 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2250 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2251 UNSPEC_VSX_CVSPDP))]
2252 "VECTOR_UNIT_VSX_P (V4SFmode)"
2254 [(set_attr "type" "fp")])
2256 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2257 (define_insn "vsx_xscvdpspn"
2258 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2259 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2260 UNSPEC_VSX_CVDPSPN))]
2263 [(set_attr "type" "fp")])
2265 (define_insn "vsx_xscvspdpn"
2266 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2267 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2268 UNSPEC_VSX_CVSPDPN))]
2271 [(set_attr "type" "fp")])
2273 (define_insn "vsx_xscvdpspn_scalar"
2274 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2275 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2276 UNSPEC_VSX_CVDPSPN))]
2279 [(set_attr "type" "fp")])
2281 ;; Used by direct move to move a SFmode value from GPR to VSX register
2282 (define_insn "vsx_xscvspdpn_directmove"
2283 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2284 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2285 UNSPEC_VSX_CVSPDPN))]
2288 [(set_attr "type" "fp")])
2290 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2292 (define_expand "vsx_xvcvsxddp_scale"
2293 [(match_operand:V2DF 0 "vsx_register_operand")
2294 (match_operand:V2DI 1 "vsx_register_operand")
2295 (match_operand:QI 2 "immediate_operand")]
2296 "VECTOR_UNIT_VSX_P (V2DFmode)"
2298 rtx op0 = operands[0];
2299 rtx op1 = operands[1];
2300 int scale = INTVAL(operands[2]);
2301 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2303 rs6000_scale_v2df (op0, op0, -scale);
2307 (define_insn "vsx_xvcvsxddp"
2308 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2309 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2310 UNSPEC_VSX_XVCVSXDDP))]
2311 "VECTOR_UNIT_VSX_P (V2DFmode)"
2313 [(set_attr "type" "vecdouble")])
2315 (define_expand "vsx_xvcvuxddp_scale"
2316 [(match_operand:V2DF 0 "vsx_register_operand")
2317 (match_operand:V2DI 1 "vsx_register_operand")
2318 (match_operand:QI 2 "immediate_operand")]
2319 "VECTOR_UNIT_VSX_P (V2DFmode)"
2321 rtx op0 = operands[0];
2322 rtx op1 = operands[1];
2323 int scale = INTVAL(operands[2]);
2324 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2326 rs6000_scale_v2df (op0, op0, -scale);
2330 (define_insn "vsx_xvcvuxddp"
2331 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2332 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2333 UNSPEC_VSX_XVCVUXDDP))]
2334 "VECTOR_UNIT_VSX_P (V2DFmode)"
2336 [(set_attr "type" "vecdouble")])
2338 (define_expand "vsx_xvcvdpsxds_scale"
2339 [(match_operand:V2DI 0 "vsx_register_operand")
2340 (match_operand:V2DF 1 "vsx_register_operand")
2341 (match_operand:QI 2 "immediate_operand")]
2342 "VECTOR_UNIT_VSX_P (V2DFmode)"
2344 rtx op0 = operands[0];
2345 rtx op1 = operands[1];
2347 int scale = INTVAL (operands[2]);
2352 tmp = gen_reg_rtx (V2DFmode);
2353 rs6000_scale_v2df (tmp, op1, scale);
2355 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2359 ;; convert vector of 64-bit floating point numbers to vector of
2360 ;; 64-bit signed integer
2361 (define_insn "vsx_xvcvdpsxds"
2362 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2363 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2364 UNSPEC_VSX_XVCVDPSXDS))]
2365 "VECTOR_UNIT_VSX_P (V2DFmode)"
2366 "xvcvdpsxds %x0,%x1"
2367 [(set_attr "type" "vecdouble")])
2369 ;; convert vector of 32-bit floating point numbers to vector of
2370 ;; 32-bit signed integer
2371 (define_insn "vsx_xvcvspsxws"
2372 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2373 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2374 UNSPEC_VSX_XVCVSPSXWS))]
2375 "VECTOR_UNIT_VSX_P (V4SFmode)"
2376 "xvcvspsxws %x0,%x1"
2377 [(set_attr "type" "vecfloat")])
2379 ;; convert vector of 64-bit floating point numbers to vector of
2380 ;; 64-bit unsigned integer
2381 (define_expand "vsx_xvcvdpuxds_scale"
2382 [(match_operand:V2DI 0 "vsx_register_operand")
2383 (match_operand:V2DF 1 "vsx_register_operand")
2384 (match_operand:QI 2 "immediate_operand")]
2385 "VECTOR_UNIT_VSX_P (V2DFmode)"
2387 rtx op0 = operands[0];
2388 rtx op1 = operands[1];
2390 int scale = INTVAL (operands[2]);
2395 tmp = gen_reg_rtx (V2DFmode);
2396 rs6000_scale_v2df (tmp, op1, scale);
2398 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2402 ;; convert vector of 32-bit floating point numbers to vector of
2403 ;; 32-bit unsigned integer
2404 (define_insn "vsx_xvcvspuxws"
2405 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2406 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2407 UNSPEC_VSX_XVCVSPSXWS))]
2408 "VECTOR_UNIT_VSX_P (V4SFmode)"
2409 "xvcvspuxws %x0,%x1"
2410 [(set_attr "type" "vecfloat")])
2412 (define_insn "vsx_xvcvdpuxds"
2413 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2414 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2415 UNSPEC_VSX_XVCVDPUXDS))]
2416 "VECTOR_UNIT_VSX_P (V2DFmode)"
2417 "xvcvdpuxds %x0,%x1"
2418 [(set_attr "type" "vecdouble")])
2420 ;; Convert from 64-bit to 32-bit types
2421 ;; Note, favor the Altivec registers since the usual use of these instructions
2422 ;; is in vector converts and we need to use the Altivec vperm instruction.
2424 (define_insn "vsx_xvcvdpsxws"
2425 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2426 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2427 UNSPEC_VSX_CVDPSXWS))]
2428 "VECTOR_UNIT_VSX_P (V2DFmode)"
2429 "xvcvdpsxws %x0,%x1"
2430 [(set_attr "type" "vecdouble")])
2432 (define_insn "vsx_xvcvdpuxws"
2433 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2434 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2435 UNSPEC_VSX_CVDPUXWS))]
2436 "VECTOR_UNIT_VSX_P (V2DFmode)"
2437 "xvcvdpuxws %x0,%x1"
2438 [(set_attr "type" "vecdouble")])
2440 (define_insn "vsx_xvcvsxdsp"
2441 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2442 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2443 UNSPEC_VSX_CVSXDSP))]
2444 "VECTOR_UNIT_VSX_P (V2DFmode)"
2446 [(set_attr "type" "vecfloat")])
2448 (define_insn "vsx_xvcvuxdsp"
2449 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2450 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2451 UNSPEC_VSX_CVUXDSP))]
2452 "VECTOR_UNIT_VSX_P (V2DFmode)"
2454 [(set_attr "type" "vecdouble")])
2456 (define_insn "vsx_xvcdpsp"
2457 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2458 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2459 UNSPEC_VSX_XVCDPSP))]
2460 "VECTOR_UNIT_VSX_P (V2DFmode)"
2462 [(set_attr "type" "vecdouble")])
2464 ;; Convert from 32-bit to 64-bit types
2465 ;; Provide both vector and scalar targets
2466 (define_insn "vsx_xvcvsxwdp"
2467 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2468 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2469 UNSPEC_VSX_CVSXWDP))]
2470 "VECTOR_UNIT_VSX_P (V2DFmode)"
2472 [(set_attr "type" "vecdouble")])
2474 (define_insn "vsx_xvcvsxwdp_df"
2475 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2476 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2477 UNSPEC_VSX_CVSXWDP))]
2480 [(set_attr "type" "vecdouble")])
2482 (define_insn "vsx_xvcvuxwdp"
2483 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2484 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2485 UNSPEC_VSX_CVUXWDP))]
2486 "VECTOR_UNIT_VSX_P (V2DFmode)"
2488 [(set_attr "type" "vecdouble")])
2490 (define_insn "vsx_xvcvuxwdp_df"
2491 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2492 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493 UNSPEC_VSX_CVUXWDP))]
2496 [(set_attr "type" "vecdouble")])
2498 (define_insn "vsx_xvcvspsxds"
2499 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2500 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2501 UNSPEC_VSX_CVSPSXDS))]
2502 "VECTOR_UNIT_VSX_P (V2DFmode)"
2503 "xvcvspsxds %x0,%x1"
2504 [(set_attr "type" "vecdouble")])
2506 (define_insn "vsx_xvcvspuxds"
2507 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2508 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2509 UNSPEC_VSX_CVSPUXDS))]
2510 "VECTOR_UNIT_VSX_P (V2DFmode)"
2511 "xvcvspuxds %x0,%x1"
2512 [(set_attr "type" "vecdouble")])
2514 (define_insn "vsx_xvcvsxwsp"
2515 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2516 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2517 UNSPEC_VSX_CVSXWSP))]
2518 "VECTOR_UNIT_VSX_P (V4SFmode)"
2520 [(set_attr "type" "vecfloat")])
2522 (define_insn "vsx_xvcvuxwsp"
2523 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2524 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2525 UNSPEC_VSX_CVUXWSP))]
2526 "VECTOR_UNIT_VSX_P (V4SFmode)"
2528 [(set_attr "type" "vecfloat")])
2530 ;; Generate float2 double
2531 ;; convert two double to float
2532 (define_expand "float2_v2df"
2533 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2534 (use (match_operand:V2DF 1 "register_operand" "wa"))
2535 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2536 "VECTOR_UNIT_VSX_P (V4SFmode)"
2538 rtx rtx_src1, rtx_src2, rtx_dst;
2540 rtx_dst = operands[0];
2541 rtx_src1 = operands[1];
2542 rtx_src2 = operands[2];
2544 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2549 ;; convert two long long signed ints to float
2550 (define_expand "float2_v2di"
2551 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2552 (use (match_operand:V2DI 1 "register_operand" "wa"))
2553 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2554 "VECTOR_UNIT_VSX_P (V4SFmode)"
2556 rtx rtx_src1, rtx_src2, rtx_dst;
2558 rtx_dst = operands[0];
2559 rtx_src1 = operands[1];
2560 rtx_src2 = operands[2];
2562 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2566 ;; Generate uns_float2
2567 ;; convert two long long unsigned ints to float
2568 (define_expand "uns_float2_v2di"
2569 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2570 (use (match_operand:V2DI 1 "register_operand" "wa"))
2571 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2572 "VECTOR_UNIT_VSX_P (V4SFmode)"
2574 rtx rtx_src1, rtx_src2, rtx_dst;
2576 rtx_dst = operands[0];
2577 rtx_src1 = operands[1];
2578 rtx_src2 = operands[2];
2580 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2585 ;; convert double or long long signed to float
2586 ;; (Only even words are valid, BE numbering)
2587 (define_expand "floate<mode>"
2588 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2589 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2590 "VECTOR_UNIT_VSX_P (V4SFmode)"
2592 if (VECTOR_ELT_ORDER_BIG)
2594 /* Shift left one word to put even word correct location */
2596 rtx rtx_val = GEN_INT (4);
2598 rtx_tmp = gen_reg_rtx (V4SFmode);
2599 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2600 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2601 rtx_tmp, rtx_tmp, rtx_val));
2604 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2609 ;; Generate uns_floate
2610 ;; convert long long unsigned to float
2611 ;; (Only even words are valid, BE numbering)
2612 (define_expand "unsfloatev2di"
2613 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2614 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2615 "VECTOR_UNIT_VSX_P (V4SFmode)"
2617 if (VECTOR_ELT_ORDER_BIG)
2619 /* Shift left one word to put even word correct location */
2621 rtx rtx_val = GEN_INT (4);
2623 rtx_tmp = gen_reg_rtx (V4SFmode);
2624 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2625 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2626 rtx_tmp, rtx_tmp, rtx_val));
2629 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2635 ;; convert double or long long signed to float
2636 ;; Only odd words are valid, BE numbering)
2637 (define_expand "floato<mode>"
2638 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2639 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2640 "VECTOR_UNIT_VSX_P (V4SFmode)"
2642 if (VECTOR_ELT_ORDER_BIG)
2643 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2646 /* Shift left one word to put odd word correct location */
2648 rtx rtx_val = GEN_INT (4);
2650 rtx_tmp = gen_reg_rtx (V4SFmode);
2651 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2652 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2653 rtx_tmp, rtx_tmp, rtx_val));
2658 ;; Generate uns_floato
2659 ;; convert long long unsigned to float
2660 ;; (Only odd words are valid, BE numbering)
2661 (define_expand "unsfloatov2di"
2662 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2663 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2664 "VECTOR_UNIT_VSX_P (V4SFmode)"
2666 if (VECTOR_ELT_ORDER_BIG)
2667 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2670 /* Shift left one word to put odd word correct location */
2672 rtx rtx_val = GEN_INT (4);
2674 rtx_tmp = gen_reg_rtx (V4SFmode);
2675 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2676 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2677 rtx_tmp, rtx_tmp, rtx_val));
2682 ;; Generate vsigned2
2683 ;; convert two double float vectors to a vector of single precision ints
2684 (define_expand "vsigned2_v2df"
2685 [(match_operand:V4SI 0 "register_operand" "=wa")
2686 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2687 (match_operand:V2DF 2 "register_operand" "wa")]
2688 UNSPEC_VSX_VSIGNED2)]
2691 rtx rtx_src1, rtx_src2, rtx_dst;
2692 bool signed_convert=true;
2694 rtx_dst = operands[0];
2695 rtx_src1 = operands[1];
2696 rtx_src2 = operands[2];
2698 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2702 ;; Generate vsignedo_v2df
2703 ;; signed double float to int convert odd word
2704 (define_expand "vsignedo_v2df"
2705 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2706 (match_operand:V2DF 1 "register_operand" "wa"))]
2709 if (VECTOR_ELT_ORDER_BIG)
2712 rtx rtx_val = GEN_INT (12);
2713 rtx_tmp = gen_reg_rtx (V4SImode);
2715 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2717 /* Big endian word numbering for words in operand is 0 1 2 3.
2718 take (operand[1] operand[1]) and shift left one word
2719 0 1 2 3 0 1 2 3 => 1 2 3 0
2720 Words 1 and 3 are now are now where they need to be for result. */
2722 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2726 /* Little endian word numbering for operand is 3 2 1 0.
2727 Result words 3 and 1 are where they need to be. */
2728 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2732 [(set_attr "type" "veccomplex")])
2734 ;; Generate vsignede_v2df
2735 ;; signed double float to int even word
2736 (define_expand "vsignede_v2df"
2737 [(set (match_operand:V4SI 0 "register_operand" "=v")
2738 (match_operand:V2DF 1 "register_operand" "v"))]
2741 if (VECTOR_ELT_ORDER_BIG)
2742 /* Big endian word numbering for words in operand is 0 1
2743 Result words 0 is where they need to be. */
2744 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2749 rtx rtx_val = GEN_INT (12);
2750 rtx_tmp = gen_reg_rtx (V4SImode);
2752 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2754 /* Little endian word numbering for operand is 3 2 1 0.
2755 take (operand[1] operand[1]) and shift left three words
2756 0 1 2 3 0 1 2 3 => 3 0 1 2
2757 Words 0 and 2 are now where they need to be for the result. */
2758 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2763 [(set_attr "type" "veccomplex")])
2765 ;; Generate unsigned2
2766 ;; convert two double float vectors to a vector of single precision
2768 (define_expand "vunsigned2_v2df"
2769 [(match_operand:V4SI 0 "register_operand" "=v")
2770 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2771 (match_operand:V2DF 2 "register_operand" "v")]
2772 UNSPEC_VSX_VSIGNED2)]
2775 rtx rtx_src1, rtx_src2, rtx_dst;
2776 bool signed_convert=false;
2778 rtx_dst = operands[0];
2779 rtx_src1 = operands[1];
2780 rtx_src2 = operands[2];
2782 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2786 ;; Generate vunsignedo_v2df
2787 ;; unsigned double float to int convert odd word
2788 (define_expand "vunsignedo_v2df"
2789 [(set (match_operand:V4SI 0 "register_operand" "=v")
2790 (match_operand:V2DF 1 "register_operand" "v"))]
2793 if (VECTOR_ELT_ORDER_BIG)
2796 rtx rtx_val = GEN_INT (12);
2797 rtx_tmp = gen_reg_rtx (V4SImode);
2799 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2801 /* Big endian word numbering for words in operand is 0 1 2 3.
2802 take (operand[1] operand[1]) and shift left one word
2803 0 1 2 3 0 1 2 3 => 1 2 3 0
2804 Words 1 and 3 are now are now where they need to be for result. */
2806 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2810 /* Little endian word numbering for operand is 3 2 1 0.
2811 Result words 3 and 1 are where they need to be. */
2812 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2816 [(set_attr "type" "veccomplex")])
2818 ;; Generate vunsignede_v2df
2819 ;; unsigned double float to int even word
2820 (define_expand "vunsignede_v2df"
2821 [(set (match_operand:V4SI 0 "register_operand" "=v")
2822 (match_operand:V2DF 1 "register_operand" "v"))]
2825 if (VECTOR_ELT_ORDER_BIG)
2826 /* Big endian word numbering for words in operand is 0 1
2827 Result words 0 is where they need to be. */
2828 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2833 rtx rtx_val = GEN_INT (12);
2834 rtx_tmp = gen_reg_rtx (V4SImode);
2836 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2838 /* Little endian word numbering for operand is 3 2 1 0.
2839 take (operand[1] operand[1]) and shift left three words
2840 0 1 2 3 0 1 2 3 => 3 0 1 2
2841 Words 0 and 2 are now where they need to be for the result. */
2842 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2847 [(set_attr "type" "veccomplex")])
2849 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2850 ;; since the xvrdpiz instruction does not truncate the value if the floating
2851 ;; point value is < LONG_MIN or > LONG_MAX.
2852 (define_insn "*vsx_float_fix_v2df2"
2853 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2856 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2857 "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2858 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2859 && !flag_trapping_math && TARGET_FRIZ"
2861 [(set_attr "type" "vecdouble")
2862 (set_attr "fp_type" "fp_addsub_d")])
2865 ;; Permute operations
2867 ;; Build a V2DF/V2DI vector from two scalars
2868 (define_insn "vsx_concat_<mode>"
2869 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2871 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2872 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2873 "VECTOR_MEM_VSX_P (<MODE>mode)"
2875 if (which_alternative == 0)
2876 return (BYTES_BIG_ENDIAN
2877 ? "xxpermdi %x0,%x1,%x2,0"
2878 : "xxpermdi %x0,%x2,%x1,0");
2880 else if (which_alternative == 1)
2881 return (BYTES_BIG_ENDIAN
2882 ? "mtvsrdd %x0,%1,%2"
2883 : "mtvsrdd %x0,%2,%1");
2888 [(set_attr "type" "vecperm")])
2890 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2891 ;; word element in a vector register.
2892 (define_insn "*vsx_concat_<mode>_1"
2893 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2895 (vec_select:<VS_scalar>
2896 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2897 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2898 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2899 "VECTOR_MEM_VSX_P (<MODE>mode)"
2901 HOST_WIDE_INT dword = INTVAL (operands[2]);
2902 if (BYTES_BIG_ENDIAN)
2904 operands[4] = GEN_INT (2*dword);
2905 return "xxpermdi %x0,%x1,%x3,%4";
2909 operands[4] = GEN_INT (!dword);
2910 return "xxpermdi %x0,%x3,%x1,%4";
2913 [(set_attr "type" "vecperm")])
2915 (define_insn "*vsx_concat_<mode>_2"
2916 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2918 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2919 (vec_select:<VS_scalar>
2920 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2921 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2922 "VECTOR_MEM_VSX_P (<MODE>mode)"
2924 HOST_WIDE_INT dword = INTVAL (operands[3]);
2925 if (BYTES_BIG_ENDIAN)
2927 operands[4] = GEN_INT (dword);
2928 return "xxpermdi %x0,%x1,%x2,%4";
2932 operands[4] = GEN_INT (2 * !dword);
2933 return "xxpermdi %x0,%x2,%x1,%4";
2936 [(set_attr "type" "vecperm")])
2938 (define_insn "*vsx_concat_<mode>_3"
2939 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2941 (vec_select:<VS_scalar>
2942 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2943 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2944 (vec_select:<VS_scalar>
2945 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2946 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2947 "VECTOR_MEM_VSX_P (<MODE>mode)"
2949 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2950 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2951 if (BYTES_BIG_ENDIAN)
2953 operands[5] = GEN_INT ((2 * dword1) + dword2);
2954 return "xxpermdi %x0,%x1,%x3,%5";
2958 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2959 return "xxpermdi %x0,%x3,%x1,%5";
2962 [(set_attr "type" "vecperm")])
2964 ;; Special purpose concat using xxpermdi to glue two single precision values
2965 ;; together, relying on the fact that internally scalar floats are represented
2966 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2967 (define_insn "vsx_concat_v2sf"
2968 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2970 [(match_operand:SF 1 "vsx_register_operand" "ww")
2971 (match_operand:SF 2 "vsx_register_operand" "ww")]
2972 UNSPEC_VSX_CONCAT))]
2973 "VECTOR_MEM_VSX_P (V2DFmode)"
2975 if (BYTES_BIG_ENDIAN)
2976 return "xxpermdi %x0,%x1,%x2,0";
2978 return "xxpermdi %x0,%x2,%x1,0";
2980 [(set_attr "type" "vecperm")])
2982 ;; V4SImode initialization splitter
2983 (define_insn_and_split "vsx_init_v4si"
2984 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2986 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2987 (match_operand:SI 2 "reg_or_cint_operand" "rn")
2988 (match_operand:SI 3 "reg_or_cint_operand" "rn")
2989 (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2990 UNSPEC_VSX_VEC_INIT))
2991 (clobber (match_scratch:DI 5 "=&r"))
2992 (clobber (match_scratch:DI 6 "=&r"))]
2993 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2995 "&& reload_completed"
2998 rs6000_split_v4si_init (operands);
3002 ;; xxpermdi for little endian loads and stores. We need several of
3003 ;; these since the form of the PARALLEL differs by mode.
3004 (define_insn "*vsx_xxpermdi2_le_<mode>"
3005 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3007 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3008 (parallel [(const_int 1) (const_int 0)])))]
3009 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3010 "xxpermdi %x0,%x1,%x1,2"
3011 [(set_attr "type" "vecperm")])
3013 (define_insn "*vsx_xxpermdi4_le_<mode>"
3014 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3016 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3017 (parallel [(const_int 2) (const_int 3)
3018 (const_int 0) (const_int 1)])))]
3019 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3020 "xxpermdi %x0,%x1,%x1,2"
3021 [(set_attr "type" "vecperm")])
3023 (define_insn "*vsx_xxpermdi8_le_V8HI"
3024 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3026 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3027 (parallel [(const_int 4) (const_int 5)
3028 (const_int 6) (const_int 7)
3029 (const_int 0) (const_int 1)
3030 (const_int 2) (const_int 3)])))]
3031 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3032 "xxpermdi %x0,%x1,%x1,2"
3033 [(set_attr "type" "vecperm")])
3035 (define_insn "*vsx_xxpermdi16_le_V16QI"
3036 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3038 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3039 (parallel [(const_int 8) (const_int 9)
3040 (const_int 10) (const_int 11)
3041 (const_int 12) (const_int 13)
3042 (const_int 14) (const_int 15)
3043 (const_int 0) (const_int 1)
3044 (const_int 2) (const_int 3)
3045 (const_int 4) (const_int 5)
3046 (const_int 6) (const_int 7)])))]
3047 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3048 "xxpermdi %x0,%x1,%x1,2"
3049 [(set_attr "type" "vecperm")])
3051 ;; lxvd2x for little endian loads. We need several of
3052 ;; these since the form of the PARALLEL differs by mode.
3053 (define_insn "*vsx_lxvd2x2_le_<mode>"
3054 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3056 (match_operand:VSX_D 1 "memory_operand" "Z")
3057 (parallel [(const_int 1) (const_int 0)])))]
3058 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3060 [(set_attr "type" "vecload")])
3062 (define_insn "*vsx_lxvd2x4_le_<mode>"
3063 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3065 (match_operand:VSX_W 1 "memory_operand" "Z")
3066 (parallel [(const_int 2) (const_int 3)
3067 (const_int 0) (const_int 1)])))]
3068 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3070 [(set_attr "type" "vecload")])
3072 (define_insn "*vsx_lxvd2x8_le_V8HI"
3073 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3075 (match_operand:V8HI 1 "memory_operand" "Z")
3076 (parallel [(const_int 4) (const_int 5)
3077 (const_int 6) (const_int 7)
3078 (const_int 0) (const_int 1)
3079 (const_int 2) (const_int 3)])))]
3080 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3082 [(set_attr "type" "vecload")])
3084 (define_insn "*vsx_lxvd2x16_le_V16QI"
3085 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3087 (match_operand:V16QI 1 "memory_operand" "Z")
3088 (parallel [(const_int 8) (const_int 9)
3089 (const_int 10) (const_int 11)
3090 (const_int 12) (const_int 13)
3091 (const_int 14) (const_int 15)
3092 (const_int 0) (const_int 1)
3093 (const_int 2) (const_int 3)
3094 (const_int 4) (const_int 5)
3095 (const_int 6) (const_int 7)])))]
3096 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3098 [(set_attr "type" "vecload")])
3100 ;; stxvd2x for little endian stores. We need several of
3101 ;; these since the form of the PARALLEL differs by mode.
3102 (define_insn "*vsx_stxvd2x2_le_<mode>"
3103 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3105 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3106 (parallel [(const_int 1) (const_int 0)])))]
3107 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3109 [(set_attr "type" "vecstore")])
3111 (define_insn "*vsx_stxvd2x4_le_<mode>"
3112 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3114 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3115 (parallel [(const_int 2) (const_int 3)
3116 (const_int 0) (const_int 1)])))]
3117 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3119 [(set_attr "type" "vecstore")])
3121 (define_insn "*vsx_stxvd2x8_le_V8HI"
3122 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3124 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3125 (parallel [(const_int 4) (const_int 5)
3126 (const_int 6) (const_int 7)
3127 (const_int 0) (const_int 1)
3128 (const_int 2) (const_int 3)])))]
3129 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3131 [(set_attr "type" "vecstore")])
3133 (define_insn "*vsx_stxvd2x16_le_V16QI"
3134 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3136 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3137 (parallel [(const_int 8) (const_int 9)
3138 (const_int 10) (const_int 11)
3139 (const_int 12) (const_int 13)
3140 (const_int 14) (const_int 15)
3141 (const_int 0) (const_int 1)
3142 (const_int 2) (const_int 3)
3143 (const_int 4) (const_int 5)
3144 (const_int 6) (const_int 7)])))]
3145 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3147 [(set_attr "type" "vecstore")])
3149 ;; Convert a TImode value into V1TImode
3150 (define_expand "vsx_set_v1ti"
3151 [(match_operand:V1TI 0 "nonimmediate_operand")
3152 (match_operand:V1TI 1 "nonimmediate_operand")
3153 (match_operand:TI 2 "input_operand")
3154 (match_operand:QI 3 "u5bit_cint_operand")]
3155 "VECTOR_MEM_VSX_P (V1TImode)"
3157 if (operands[3] != const0_rtx)
3160 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3164 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3165 (define_expand "vsx_set_<mode>"
3166 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3167 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3168 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3169 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3170 "VECTOR_MEM_VSX_P (<MODE>mode)"
3172 rtx dest = operands[0];
3173 rtx vec_reg = operands[1];
3174 rtx value = operands[2];
3175 rtx ele = operands[3];
3176 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3178 if (ele == const0_rtx)
3180 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3181 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3184 else if (ele == const1_rtx)
3186 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3187 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3194 ;; Extract a DF/DI element from V2DF/V2DI
3195 ;; Optimize cases were we can do a simple or direct move.
3196 ;; Or see if we can avoid doing the move at all
3198 ;; There are some unresolved problems with reload that show up if an Altivec
3199 ;; register was picked. Limit the scalar value to FPRs for now.
3201 (define_insn "vsx_extract_<mode>"
3202 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3204 (vec_select:<VS_scalar>
3205 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
3208 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3209 "VECTOR_MEM_VSX_P (<MODE>mode)"
3211 int element = INTVAL (operands[2]);
3212 int op0_regno = REGNO (operands[0]);
3213 int op1_regno = REGNO (operands[1]);
3216 gcc_assert (IN_RANGE (element, 0, 1));
3217 gcc_assert (VSX_REGNO_P (op1_regno));
3219 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3221 if (op0_regno == op1_regno)
3222 return ASM_COMMENT_START " vec_extract to same register";
3224 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3225 && TARGET_POWERPC64)
3226 return "mfvsrd %0,%x1";
3228 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3231 else if (VSX_REGNO_P (op0_regno))
3232 return "xxlor %x0,%x1,%x1";
3238 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3239 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3240 return "mfvsrld %0,%x1";
3242 else if (VSX_REGNO_P (op0_regno))
3244 fldDM = element << 1;
3245 if (!BYTES_BIG_ENDIAN)
3247 operands[3] = GEN_INT (fldDM);
3248 return "xxpermdi %x0,%x1,%x1,%3";
3254 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3256 ;; Optimize extracting a single scalar element from memory.
3257 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3258 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3259 (vec_select:<VSX_D:VS_scalar>
3260 (match_operand:VSX_D 1 "memory_operand" "m,m")
3261 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3262 (clobber (match_scratch:P 3 "=&b,&b"))]
3263 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3265 "&& reload_completed"
3266 [(set (match_dup 0) (match_dup 4))]
3268 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3269 operands[3], <VSX_D:VS_scalar>mode);
3271 [(set_attr "type" "fpload,load")
3272 (set_attr "length" "8")])
3274 ;; Optimize storing a single scalar element that is the right location to
3276 (define_insn "*vsx_extract_<mode>_store"
3277 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3278 (vec_select:<VS_scalar>
3279 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3280 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3281 "VECTOR_MEM_VSX_P (<MODE>mode)"
3286 [(set_attr "type" "fpstore")
3287 (set_attr "length" "4")])
3289 ;; Variable V2DI/V2DF extract shift
3290 (define_insn "vsx_vslo_<mode>"
3291 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3292 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3293 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3295 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3297 [(set_attr "type" "vecperm")])
3299 ;; Variable V2DI/V2DF extract
3300 (define_insn_and_split "vsx_extract_<mode>_var"
3301 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3302 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3303 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3304 UNSPEC_VSX_EXTRACT))
3305 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3306 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3307 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3309 "&& reload_completed"
3312 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3313 operands[3], operands[4]);
3317 ;; Extract a SF element from V4SF
3318 (define_insn_and_split "vsx_extract_v4sf"
3319 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3321 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3322 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3323 (clobber (match_scratch:V4SF 3 "=0"))]
3324 "VECTOR_UNIT_VSX_P (V4SFmode)"
3329 rtx op0 = operands[0];
3330 rtx op1 = operands[1];
3331 rtx op2 = operands[2];
3332 rtx op3 = operands[3];
3334 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3340 if (GET_CODE (op3) == SCRATCH)
3341 op3 = gen_reg_rtx (V4SFmode);
3342 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3345 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3348 [(set_attr "length" "8")
3349 (set_attr "type" "fp")])
3351 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3352 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3354 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3355 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3356 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3357 "VECTOR_MEM_VSX_P (V4SFmode)"
3359 "&& reload_completed"
3360 [(set (match_dup 0) (match_dup 4))]
3362 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3363 operands[3], SFmode);
3365 [(set_attr "type" "fpload,fpload,fpload,load")
3366 (set_attr "length" "8")])
3368 ;; Variable V4SF extract
3369 (define_insn_and_split "vsx_extract_v4sf_var"
3370 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3371 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3372 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3373 UNSPEC_VSX_EXTRACT))
3374 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3375 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3376 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3378 "&& reload_completed"
3381 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3382 operands[3], operands[4]);
3386 ;; Expand the builtin form of xxpermdi to canonical rtl.
3387 (define_expand "vsx_xxpermdi_<mode>"
3388 [(match_operand:VSX_L 0 "vsx_register_operand")
3389 (match_operand:VSX_L 1 "vsx_register_operand")
3390 (match_operand:VSX_L 2 "vsx_register_operand")
3391 (match_operand:QI 3 "u5bit_cint_operand")]
3392 "VECTOR_MEM_VSX_P (<MODE>mode)"
3394 rtx target = operands[0];
3395 rtx op0 = operands[1];
3396 rtx op1 = operands[2];
3397 int mask = INTVAL (operands[3]);
3398 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3399 rtx perm1 = GEN_INT ((mask & 1) + 2);
3400 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3402 if (<MODE>mode == V2DFmode)
3403 gen = gen_vsx_xxpermdi2_v2df_1;
3406 gen = gen_vsx_xxpermdi2_v2di_1;
3407 if (<MODE>mode != V2DImode)
3409 target = gen_lowpart (V2DImode, target);
3410 op0 = gen_lowpart (V2DImode, op0);
3411 op1 = gen_lowpart (V2DImode, op1);
3414 emit_insn (gen (target, op0, op1, perm0, perm1));
3418 ;; Special version of xxpermdi that retains big-endian semantics.
3419 (define_expand "vsx_xxpermdi_<mode>_be"
3420 [(match_operand:VSX_L 0 "vsx_register_operand")
3421 (match_operand:VSX_L 1 "vsx_register_operand")
3422 (match_operand:VSX_L 2 "vsx_register_operand")
3423 (match_operand:QI 3 "u5bit_cint_operand")]
3424 "VECTOR_MEM_VSX_P (<MODE>mode)"
3426 rtx target = operands[0];
3427 rtx op0 = operands[1];
3428 rtx op1 = operands[2];
3429 int mask = INTVAL (operands[3]);
3430 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3431 rtx perm1 = GEN_INT ((mask & 1) + 2);
3432 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3434 if (<MODE>mode == V2DFmode)
3435 gen = gen_vsx_xxpermdi2_v2df_1;
3438 gen = gen_vsx_xxpermdi2_v2di_1;
3439 if (<MODE>mode != V2DImode)
3441 target = gen_lowpart (V2DImode, target);
3442 op0 = gen_lowpart (V2DImode, op0);
3443 op1 = gen_lowpart (V2DImode, op1);
3446 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3447 transformation we don't want; it is necessary for
3448 rs6000_expand_vec_perm_const_1 but not for this use. So we
3449 prepare for that by reversing the transformation here. */
3450 if (BYTES_BIG_ENDIAN)
3451 emit_insn (gen (target, op0, op1, perm0, perm1));
3454 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3455 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3456 emit_insn (gen (target, op1, op0, p0, p1));
3461 (define_insn "vsx_xxpermdi2_<mode>_1"
3462 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3464 (vec_concat:<VS_double>
3465 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3466 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3467 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3468 (match_operand 4 "const_2_to_3_operand" "")])))]
3469 "VECTOR_MEM_VSX_P (<MODE>mode)"
3473 /* For little endian, swap operands and invert/swap selectors
3474 to get the correct xxpermdi. The operand swap sets up the
3475 inputs as a little endian array. The selectors are swapped
3476 because they are defined to use big endian ordering. The
3477 selectors are inverted to get the correct doublewords for
3478 little endian ordering. */
3479 if (BYTES_BIG_ENDIAN)
3481 op3 = INTVAL (operands[3]);
3482 op4 = INTVAL (operands[4]);
3486 op3 = 3 - INTVAL (operands[4]);
3487 op4 = 3 - INTVAL (operands[3]);
3490 mask = (op3 << 1) | (op4 - 2);
3491 operands[3] = GEN_INT (mask);
3493 if (BYTES_BIG_ENDIAN)
3494 return "xxpermdi %x0,%x1,%x2,%3";
3496 return "xxpermdi %x0,%x2,%x1,%3";
3498 [(set_attr "type" "vecperm")])
3500 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3501 ;; none of the small types were allowed in a vector register, so we had to
3502 ;; extract to a DImode and either do a direct move or store.
3503 (define_expand "vsx_extract_<mode>"
3504 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3505 (vec_select:<VS_scalar>
3506 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3507 (parallel [(match_operand:QI 2 "const_int_operand")])))
3508 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3509 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3511 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3512 if (TARGET_P9_VECTOR)
3514 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3520 (define_insn "vsx_extract_<mode>_p9"
3521 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3522 (vec_select:<VS_scalar>
3523 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3524 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3525 (clobber (match_scratch:SI 3 "=r,X"))]
3526 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3528 if (which_alternative == 0)
3533 HOST_WIDE_INT elt = INTVAL (operands[2]);
3534 HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3535 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3538 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3539 HOST_WIDE_INT offset = unit_size * elt_adj;
3541 operands[2] = GEN_INT (offset);
3543 return "xxextractuw %x0,%x1,%2";
3545 return "vextractu<wd> %0,%1,%2";
3548 [(set_attr "type" "vecsimple")])
3551 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3552 (vec_select:<VS_scalar>
3553 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3554 (parallel [(match_operand:QI 2 "const_int_operand")])))
3555 (clobber (match_operand:SI 3 "int_reg_operand"))]
3556 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3559 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3560 rtx op1 = operands[1];
3561 rtx op2 = operands[2];
3562 rtx op3 = operands[3];
3563 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3565 emit_move_insn (op3, GEN_INT (offset));
3566 if (VECTOR_ELT_ORDER_BIG)
3567 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3569 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3573 ;; Optimize zero extracts to eliminate the AND after the extract.
3574 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3575 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3577 (vec_select:<VS_scalar>
3578 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3579 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3580 (clobber (match_scratch:SI 3 "=r,X"))]
3581 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3583 "&& reload_completed"
3584 [(parallel [(set (match_dup 4)
3585 (vec_select:<VS_scalar>
3587 (parallel [(match_dup 2)])))
3588 (clobber (match_dup 3))])]
3590 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3593 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3594 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3595 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3596 (vec_select:<VS_scalar>
3597 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3598 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3599 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3600 (clobber (match_scratch:SI 4 "=X,&r"))]
3601 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3603 "&& reload_completed"
3604 [(parallel [(set (match_dup 3)
3605 (vec_select:<VS_scalar>
3607 (parallel [(match_dup 2)])))
3608 (clobber (match_dup 4))])
3612 (define_insn_and_split "*vsx_extract_si"
3613 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3615 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3616 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3617 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3618 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3620 "&& reload_completed"
3623 rtx dest = operands[0];
3624 rtx src = operands[1];
3625 rtx element = operands[2];
3626 rtx vec_tmp = operands[3];
3629 if (!VECTOR_ELT_ORDER_BIG)
3630 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3632 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3634 value = INTVAL (element);
3636 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3640 if (MEM_P (operands[0]))
3642 if (can_create_pseudo_p ())
3643 dest = rs6000_address_for_fpconvert (dest);
3645 if (TARGET_P8_VECTOR)
3646 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3648 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3651 else if (TARGET_P8_VECTOR)
3652 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3654 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3655 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3659 [(set_attr "type" "mftgpr,vecperm,fpstore")
3660 (set_attr "length" "8")])
3662 (define_insn_and_split "*vsx_extract_<mode>_p8"
3663 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3664 (vec_select:<VS_scalar>
3665 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3666 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3667 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3668 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3669 && !TARGET_P9_VECTOR"
3671 "&& reload_completed"
3674 rtx dest = operands[0];
3675 rtx src = operands[1];
3676 rtx element = operands[2];
3677 rtx vec_tmp = operands[3];
3680 if (!VECTOR_ELT_ORDER_BIG)
3681 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3683 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3685 value = INTVAL (element);
3686 if (<MODE>mode == V16QImode)
3689 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3693 else if (<MODE>mode == V8HImode)
3696 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3703 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3704 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3707 [(set_attr "type" "mftgpr")])
3709 ;; Optimize extracting a single scalar element from memory.
3710 (define_insn_and_split "*vsx_extract_<mode>_load"
3711 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3712 (vec_select:<VS_scalar>
3713 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3714 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3715 (clobber (match_scratch:DI 3 "=&b"))]
3716 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3718 "&& reload_completed"
3719 [(set (match_dup 0) (match_dup 4))]
3721 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3722 operands[3], <VS_scalar>mode);
3724 [(set_attr "type" "load")
3725 (set_attr "length" "8")])
3727 ;; Variable V16QI/V8HI/V4SI extract
3728 (define_insn_and_split "vsx_extract_<mode>_var"
3729 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3731 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3732 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3733 UNSPEC_VSX_EXTRACT))
3734 (clobber (match_scratch:DI 3 "=r,r,&b"))
3735 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3736 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3738 "&& reload_completed"
3741 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3742 operands[3], operands[4]);
3746 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3747 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3749 (unspec:<VSX_EXTRACT_I:VS_scalar>
3750 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3751 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3752 UNSPEC_VSX_EXTRACT)))
3753 (clobber (match_scratch:DI 3 "=r,r,&b"))
3754 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3755 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3757 "&& reload_completed"
3760 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3761 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3762 operands[1], operands[2],
3763 operands[3], operands[4]);
3767 ;; VSX_EXTRACT optimizations
3768 ;; Optimize double d = (double) vec_extract (vi, <n>)
3769 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3770 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3771 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3774 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3775 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3776 (clobber (match_scratch:V4SI 3 "=v"))]
3777 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3782 rtx dest = operands[0];
3783 rtx src = operands[1];
3784 rtx element = operands[2];
3785 rtx v4si_tmp = operands[3];
3788 if (!VECTOR_ELT_ORDER_BIG)
3789 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3791 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3793 value = INTVAL (element);
3796 if (GET_CODE (v4si_tmp) == SCRATCH)
3797 v4si_tmp = gen_reg_rtx (V4SImode);
3798 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3803 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3807 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3808 ;; where <type> is a floating point type that supported by the hardware that is
3809 ;; not double. First convert the value to double, and then to the desired
3811 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3812 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3813 (any_float:VSX_EXTRACT_FL
3815 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3816 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3817 (clobber (match_scratch:V4SI 3 "=v"))
3818 (clobber (match_scratch:DF 4 "=ws"))]
3819 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3824 rtx dest = operands[0];
3825 rtx src = operands[1];
3826 rtx element = operands[2];
3827 rtx v4si_tmp = operands[3];
3828 rtx df_tmp = operands[4];
3831 if (!VECTOR_ELT_ORDER_BIG)
3832 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3834 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3836 value = INTVAL (element);
3839 if (GET_CODE (v4si_tmp) == SCRATCH)
3840 v4si_tmp = gen_reg_rtx (V4SImode);
3841 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3846 if (GET_CODE (df_tmp) == SCRATCH)
3847 df_tmp = gen_reg_rtx (DFmode);
3849 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3851 if (<MODE>mode == SFmode)
3852 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3853 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3854 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3855 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3856 && TARGET_FLOAT128_HW)
3857 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3858 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3859 emit_insn (gen_extenddfif2 (dest, df_tmp));
3860 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3861 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3868 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3869 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3870 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3871 ;; vector short or vector unsigned short.
3872 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3873 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3875 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3876 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3877 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3878 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3879 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3880 && TARGET_P9_VECTOR"
3882 "&& reload_completed"
3883 [(parallel [(set (match_dup 3)
3884 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3886 (parallel [(match_dup 2)])))
3887 (clobber (scratch:SI))])
3889 (sign_extend:DI (match_dup 3)))
3891 (float:<FL_CONV:MODE> (match_dup 4)))]
3893 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3896 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3897 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3898 (unsigned_float:FL_CONV
3899 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3900 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3901 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3902 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3903 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3904 && TARGET_P9_VECTOR"
3906 "&& reload_completed"
3907 [(parallel [(set (match_dup 3)
3908 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3910 (parallel [(match_dup 2)])))
3911 (clobber (scratch:SI))])
3913 (float:<FL_CONV:MODE> (match_dup 4)))]
3915 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3918 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3919 (define_insn "vsx_set_<mode>_p9"
3920 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3921 (unspec:VSX_EXTRACT_I
3922 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3923 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3924 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3926 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3928 int ele = INTVAL (operands[3]);
3929 int nunits = GET_MODE_NUNITS (<MODE>mode);
3931 if (!VECTOR_ELT_ORDER_BIG)
3932 ele = nunits - 1 - ele;
3934 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3935 if (<MODE>mode == V4SImode)
3936 return "xxinsertw %x0,%x2,%3";
3938 return "vinsert<wd> %0,%2,%3";
3940 [(set_attr "type" "vecperm")])
3942 (define_insn_and_split "vsx_set_v4sf_p9"
3943 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3945 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3946 (match_operand:SF 2 "gpc_reg_operand" "ww")
3947 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3949 (clobber (match_scratch:SI 4 "=&wJwK"))]
3950 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3952 "&& reload_completed"
3954 (unspec:V4SF [(match_dup 2)]
3955 UNSPEC_VSX_CVDPSPN))
3956 (parallel [(set (match_dup 4)
3957 (vec_select:SI (match_dup 6)
3958 (parallel [(match_dup 7)])))
3959 (clobber (scratch:SI))])
3961 (unspec:V4SI [(match_dup 8)
3966 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3968 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3969 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3970 operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3971 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3973 [(set_attr "type" "vecperm")
3974 (set_attr "length" "12")])
3976 ;; Special case setting 0.0f to a V4SF element
3977 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3978 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3980 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3981 (match_operand:SF 2 "zero_fp_constant" "j")
3982 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3984 (clobber (match_scratch:SI 4 "=&wJwK"))]
3985 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3987 "&& reload_completed"
3991 (unspec:V4SI [(match_dup 5)
3996 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3998 [(set_attr "type" "vecperm")
3999 (set_attr "length" "8")])
4001 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4002 ;; that is in the default scalar position (1 for big endian, 2 for little
4003 ;; endian). We just need to do an xxinsertw since the element is in the
4004 ;; correct location.
4006 (define_insn "*vsx_insert_extract_v4sf_p9"
4007 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4009 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4010 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4012 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4013 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4015 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4016 && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4018 int ele = INTVAL (operands[4]);
4020 if (!VECTOR_ELT_ORDER_BIG)
4021 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4023 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4024 return "xxinsertw %x0,%x2,%4";
4026 [(set_attr "type" "vecperm")])
4028 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4029 ;; that is in the default scalar position (1 for big endian, 2 for little
4030 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4032 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4033 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4035 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4036 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4038 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4039 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4041 (clobber (match_scratch:SI 5 "=&wJwK"))]
4042 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4043 && TARGET_P9_VECTOR && TARGET_POWERPC64
4044 && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4047 [(parallel [(set (match_dup 5)
4048 (vec_select:SI (match_dup 6)
4049 (parallel [(match_dup 3)])))
4050 (clobber (scratch:SI))])
4052 (unspec:V4SI [(match_dup 8)
4057 if (GET_CODE (operands[5]) == SCRATCH)
4058 operands[5] = gen_reg_rtx (SImode);
4060 operands[6] = gen_lowpart (V4SImode, operands[2]);
4061 operands[7] = gen_lowpart (V4SImode, operands[0]);
4062 operands[8] = gen_lowpart (V4SImode, operands[1]);
4064 [(set_attr "type" "vecperm")])
4066 ;; Expanders for builtins
4067 (define_expand "vsx_mergel_<mode>"
4068 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4069 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4070 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4071 "VECTOR_MEM_VSX_P (<MODE>mode)"
4076 /* Special handling for LE with -maltivec=be. */
4077 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4079 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4080 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4084 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4085 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4088 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4089 emit_insn (gen_rtx_SET (operands[0], x));
4093 (define_expand "vsx_mergeh_<mode>"
4094 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4095 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4096 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4097 "VECTOR_MEM_VSX_P (<MODE>mode)"
4102 /* Special handling for LE with -maltivec=be. */
4103 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4105 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4106 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4110 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4111 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4114 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4115 emit_insn (gen_rtx_SET (operands[0], x));
4120 ;; We separate the register splat insn from the memory splat insn to force the
4121 ;; register allocator to generate the indexed form of the SPLAT when it is
4122 ;; given an offsettable memory reference. Otherwise, if the register and
4123 ;; memory insns were combined into a single insn, the register allocator will
4124 ;; load the value into a register, and then do a double word permute.
4125 (define_expand "vsx_splat_<mode>"
4126 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4127 (vec_duplicate:VSX_D
4128 (match_operand:<VS_scalar> 1 "input_operand")))]
4129 "VECTOR_MEM_VSX_P (<MODE>mode)"
4131 rtx op1 = operands[1];
4133 operands[1] = rs6000_address_for_fpconvert (op1);
4134 else if (!REG_P (op1))
4135 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4138 (define_insn "vsx_splat_<mode>_reg"
4139 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4140 (vec_duplicate:VSX_D
4141 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4142 "VECTOR_MEM_VSX_P (<MODE>mode)"
4144 xxpermdi %x0,%x1,%x1,0
4146 [(set_attr "type" "vecperm")])
4148 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4149 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4150 (vec_duplicate:VSX_D
4151 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4152 "VECTOR_MEM_VSX_P (<MODE>mode)"
4154 [(set_attr "type" "vecload")])
4156 ;; V4SI splat support
4157 (define_insn "vsx_splat_v4si"
4158 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4160 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4165 [(set_attr "type" "vecperm,vecload")])
4167 ;; SImode is not currently allowed in vector registers. This pattern
4168 ;; allows us to use direct move to get the value in a vector register
4169 ;; so that we can use XXSPLTW
4170 (define_insn "vsx_splat_v4si_di"
4171 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4174 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4175 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4179 [(set_attr "type" "vecperm")])
4181 ;; V4SF splat (ISA 3.0)
4182 (define_insn_and_split "vsx_splat_v4sf"
4183 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4185 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4191 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4193 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4195 (unspec:V4SF [(match_dup 0)
4196 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4198 [(set_attr "type" "vecload,vecperm,mftgpr")
4199 (set_attr "length" "4,8,4")])
4201 ;; V4SF/V4SI splat from a vector element
4202 (define_insn "vsx_xxspltw_<mode>"
4203 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4204 (vec_duplicate:VSX_W
4205 (vec_select:<VS_scalar>
4206 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4208 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4209 "VECTOR_MEM_VSX_P (<MODE>mode)"
4211 if (!BYTES_BIG_ENDIAN)
4212 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4214 return "xxspltw %x0,%x1,%2";
4216 [(set_attr "type" "vecperm")])
4218 (define_insn "vsx_xxspltw_<mode>_direct"
4219 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4220 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4221 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4222 UNSPEC_VSX_XXSPLTW))]
4223 "VECTOR_MEM_VSX_P (<MODE>mode)"
4224 "xxspltw %x0,%x1,%2"
4225 [(set_attr "type" "vecperm")])
4227 ;; V16QI/V8HI splat support on ISA 2.07
4228 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4229 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4230 (vec_duplicate:VSX_SPLAT_I
4231 (truncate:<VS_scalar>
4232 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4233 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4234 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4235 [(set_attr "type" "vecperm")])
4237 ;; V2DF/V2DI splat for use by vec_splat builtin
4238 (define_insn "vsx_xxspltd_<mode>"
4239 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4240 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4241 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4242 UNSPEC_VSX_XXSPLTD))]
4243 "VECTOR_MEM_VSX_P (<MODE>mode)"
4245 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4246 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4247 return "xxpermdi %x0,%x1,%x1,0";
4249 return "xxpermdi %x0,%x1,%x1,3";
4251 [(set_attr "type" "vecperm")])
4253 ;; V4SF/V4SI interleave
4254 (define_insn "vsx_xxmrghw_<mode>"
4255 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4257 (vec_concat:<VS_double>
4258 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4259 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4260 (parallel [(const_int 0) (const_int 4)
4261 (const_int 1) (const_int 5)])))]
4262 "VECTOR_MEM_VSX_P (<MODE>mode)"
4264 if (BYTES_BIG_ENDIAN)
4265 return "xxmrghw %x0,%x1,%x2";
4267 return "xxmrglw %x0,%x2,%x1";
4269 [(set_attr "type" "vecperm")])
4271 (define_insn "vsx_xxmrglw_<mode>"
4272 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4274 (vec_concat:<VS_double>
4275 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4276 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4277 (parallel [(const_int 2) (const_int 6)
4278 (const_int 3) (const_int 7)])))]
4279 "VECTOR_MEM_VSX_P (<MODE>mode)"
4281 if (BYTES_BIG_ENDIAN)
4282 return "xxmrglw %x0,%x1,%x2";
4284 return "xxmrghw %x0,%x2,%x1";
4286 [(set_attr "type" "vecperm")])
4288 ;; Shift left double by word immediate
4289 (define_insn "vsx_xxsldwi_<mode>"
4290 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4291 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4292 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4293 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4295 "VECTOR_MEM_VSX_P (<MODE>mode)"
4296 "xxsldwi %x0,%x1,%x2,%3"
4297 [(set_attr "type" "vecperm")])
4300 ;; Vector reduction insns and splitters
4302 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4303 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4307 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4308 (parallel [(const_int 1)]))
4311 (parallel [(const_int 0)])))
4313 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4314 "VECTOR_UNIT_VSX_P (V2DFmode)"
4319 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4320 ? gen_reg_rtx (V2DFmode)
4322 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4323 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4326 [(set_attr "length" "8")
4327 (set_attr "type" "veccomplex")])
4329 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4330 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4332 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4333 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4334 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4335 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4336 "VECTOR_UNIT_VSX_P (V4SFmode)"
4341 rtx op0 = operands[0];
4342 rtx op1 = operands[1];
4343 rtx tmp2, tmp3, tmp4;
4345 if (can_create_pseudo_p ())
4347 tmp2 = gen_reg_rtx (V4SFmode);
4348 tmp3 = gen_reg_rtx (V4SFmode);
4349 tmp4 = gen_reg_rtx (V4SFmode);
4358 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4359 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4360 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4361 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4364 [(set_attr "length" "16")
4365 (set_attr "type" "veccomplex")])
4367 ;; Combiner patterns with the vector reduction patterns that knows we can get
4368 ;; to the top element of the V2DF array without doing an extract.
4370 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4371 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4376 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4377 (parallel [(const_int 1)]))
4380 (parallel [(const_int 0)])))
4382 (parallel [(const_int 1)])))
4383 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4384 "VECTOR_UNIT_VSX_P (V2DFmode)"
4389 rtx hi = gen_highpart (DFmode, operands[1]);
4390 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4391 ? gen_reg_rtx (DFmode)
4394 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4395 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4398 [(set_attr "length" "8")
4399 (set_attr "type" "veccomplex")])
4401 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4402 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4405 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4406 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4407 (parallel [(const_int 3)])))
4408 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4409 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4410 (clobber (match_scratch:V4SF 4 "=0,0"))]
4411 "VECTOR_UNIT_VSX_P (V4SFmode)"
4416 rtx op0 = operands[0];
4417 rtx op1 = operands[1];
4418 rtx tmp2, tmp3, tmp4, tmp5;
4420 if (can_create_pseudo_p ())
4422 tmp2 = gen_reg_rtx (V4SFmode);
4423 tmp3 = gen_reg_rtx (V4SFmode);
4424 tmp4 = gen_reg_rtx (V4SFmode);
4425 tmp5 = gen_reg_rtx (V4SFmode);
4435 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4436 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4437 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4438 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4439 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4442 [(set_attr "length" "20")
4443 (set_attr "type" "veccomplex")])
4446 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4448 [(set (match_operand:P 0 "base_reg_operand")
4449 (match_operand:P 1 "short_cint_operand"))
4450 (set (match_operand:VSX_M 2 "vsx_register_operand")
4451 (mem:VSX_M (plus:P (match_dup 0)
4452 (match_operand:P 3 "int_reg_operand"))))]
4453 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4454 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4455 [(set_attr "length" "8")
4456 (set_attr "type" "vecload")])
4459 [(set (match_operand:P 0 "base_reg_operand")
4460 (match_operand:P 1 "short_cint_operand"))
4461 (set (match_operand:VSX_M 2 "vsx_register_operand")
4462 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4464 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4465 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4466 [(set_attr "length" "8")
4467 (set_attr "type" "vecload")])
4470 ;; ISA 3.0 vector extend sign support
4472 (define_insn "vsx_sign_extend_qi_<mode>"
4473 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4475 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4476 UNSPEC_VSX_SIGN_EXTEND))]
4479 [(set_attr "type" "vecexts")])
4481 (define_insn "vsx_sign_extend_hi_<mode>"
4482 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4484 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4485 UNSPEC_VSX_SIGN_EXTEND))]
4488 [(set_attr "type" "vecexts")])
4490 (define_insn "*vsx_sign_extend_si_v2di"
4491 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4492 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4493 UNSPEC_VSX_SIGN_EXTEND))]
4496 [(set_attr "type" "vecexts")])
4499 ;; ISA 3.0 Binary Floating-Point Support
4501 ;; VSX Scalar Extract Exponent Quad-Precision
4502 (define_insn "xsxexpqp_<mode>"
4503 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4504 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4505 UNSPEC_VSX_SXEXPDP))]
4508 [(set_attr "type" "vecmove")])
4510 ;; VSX Scalar Extract Exponent Double-Precision
4511 (define_insn "xsxexpdp"
4512 [(set (match_operand:DI 0 "register_operand" "=r")
4513 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4514 UNSPEC_VSX_SXEXPDP))]
4515 "TARGET_P9_VECTOR && TARGET_64BIT"
4517 [(set_attr "type" "integer")])
4519 ;; VSX Scalar Extract Significand Quad-Precision
4520 (define_insn "xsxsigqp_<mode>"
4521 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4522 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4526 [(set_attr "type" "vecmove")])
4528 ;; VSX Scalar Extract Significand Double-Precision
4529 (define_insn "xsxsigdp"
4530 [(set (match_operand:DI 0 "register_operand" "=r")
4531 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4533 "TARGET_P9_VECTOR && TARGET_64BIT"
4535 [(set_attr "type" "integer")])
4537 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4538 (define_insn "xsiexpqpf_<mode>"
4539 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4541 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4542 (match_operand:DI 2 "altivec_register_operand" "v")]
4543 UNSPEC_VSX_SIEXPQP))]
4546 [(set_attr "type" "vecmove")])
4548 ;; VSX Scalar Insert Exponent Quad-Precision
4549 (define_insn "xsiexpqp_<mode>"
4550 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4551 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4552 (match_operand:DI 2 "altivec_register_operand" "v")]
4553 UNSPEC_VSX_SIEXPQP))]
4556 [(set_attr "type" "vecmove")])
4558 ;; VSX Scalar Insert Exponent Double-Precision
4559 (define_insn "xsiexpdp"
4560 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4561 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4562 (match_operand:DI 2 "register_operand" "r")]
4563 UNSPEC_VSX_SIEXPDP))]
4564 "TARGET_P9_VECTOR && TARGET_64BIT"
4565 "xsiexpdp %x0,%1,%2"
4566 [(set_attr "type" "fpsimple")])
4568 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4569 (define_insn "xsiexpdpf"
4570 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4571 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4572 (match_operand:DI 2 "register_operand" "r")]
4573 UNSPEC_VSX_SIEXPDP))]
4574 "TARGET_P9_VECTOR && TARGET_64BIT"
4575 "xsiexpdp %x0,%1,%2"
4576 [(set_attr "type" "fpsimple")])
4578 ;; VSX Scalar Compare Exponents Double-Precision
4579 (define_expand "xscmpexpdp_<code>"
4583 [(match_operand:DF 1 "vsx_register_operand" "wa")
4584 (match_operand:DF 2 "vsx_register_operand" "wa")]
4585 UNSPEC_VSX_SCMPEXPDP)
4587 (set (match_operand:SI 0 "register_operand" "=r")
4588 (CMP_TEST:SI (match_dup 3)
4592 operands[3] = gen_reg_rtx (CCFPmode);
4595 (define_insn "*xscmpexpdp"
4596 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4598 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4599 (match_operand:DF 2 "vsx_register_operand" "wa")]
4600 UNSPEC_VSX_SCMPEXPDP)
4601 (match_operand:SI 3 "zero_constant" "j")))]
4603 "xscmpexpdp %0,%x1,%x2"
4604 [(set_attr "type" "fpcompare")])
4606 ;; VSX Scalar Test Data Class Quad-Precision
4607 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4608 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4609 ;; setting the eq bit if any of the conditions tested by operand 2
4610 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4611 (define_expand "xststdcqp_<mode>"
4615 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4616 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4619 (set (match_operand:SI 0 "register_operand" "=r")
4620 (eq:SI (match_dup 3)
4624 operands[3] = gen_reg_rtx (CCFPmode);
4627 ;; VSX Scalar Test Data Class Double- and Single-Precision
4628 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4629 ;; if any of the conditions tested by operand 2 are satisfied.
4630 ;; The gt and unordered bits are cleared to zero.)
4631 (define_expand "xststdc<Fvsx>"
4635 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4636 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4639 (set (match_operand:SI 0 "register_operand" "=r")
4640 (eq:SI (match_dup 3)
4644 operands[3] = gen_reg_rtx (CCFPmode);
4645 operands[4] = CONST0_RTX (SImode);
4648 ;; The VSX Scalar Test Negative Quad-Precision
4649 (define_expand "xststdcnegqp_<mode>"
4653 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4657 (set (match_operand:SI 0 "register_operand" "=r")
4658 (lt:SI (match_dup 2)
4662 operands[2] = gen_reg_rtx (CCFPmode);
4665 ;; The VSX Scalar Test Negative Double- and Single-Precision
4666 (define_expand "xststdcneg<Fvsx>"
4670 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4674 (set (match_operand:SI 0 "register_operand" "=r")
4675 (lt:SI (match_dup 2)
4679 operands[2] = gen_reg_rtx (CCFPmode);
4680 operands[3] = CONST0_RTX (SImode);
4683 (define_insn "*xststdcqp_<mode>"
4684 [(set (match_operand:CCFP 0 "" "=y")
4687 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4688 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4692 "xststdcqp %0,%1,%2"
4693 [(set_attr "type" "fpcompare")])
4695 (define_insn "*xststdc<Fvsx>"
4696 [(set (match_operand:CCFP 0 "" "=y")
4698 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4699 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4701 (match_operand:SI 3 "zero_constant" "j")))]
4703 "xststdc<Fvsx> %0,%x1,%2"
4704 [(set_attr "type" "fpcompare")])
4706 ;; VSX Vector Extract Exponent Double and Single Precision
4707 (define_insn "xvxexp<VSs>"
4708 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4710 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4713 "xvxexp<VSs> %x0,%x1"
4714 [(set_attr "type" "vecsimple")])
4716 ;; VSX Vector Extract Significand Double and Single Precision
4717 (define_insn "xvxsig<VSs>"
4718 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4720 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4723 "xvxsig<VSs> %x0,%x1"
4724 [(set_attr "type" "vecsimple")])
4726 ;; VSX Vector Insert Exponent Double and Single Precision
4727 (define_insn "xviexp<VSs>"
4728 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4730 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4731 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4734 "xviexp<VSs> %x0,%x1,%x2"
4735 [(set_attr "type" "vecsimple")])
4737 ;; VSX Vector Test Data Class Double and Single Precision
4738 ;; The corresponding elements of the result vector are all ones
4739 ;; if any of the conditions tested by operand 3 are satisfied.
4740 (define_insn "xvtstdc<VSs>"
4741 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4743 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4744 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4745 UNSPEC_VSX_VTSTDC))]
4747 "xvtstdc<VSs> %x0,%x1,%2"
4748 [(set_attr "type" "vecsimple")])
4750 ;; ISA 3.0 String Operations Support
4752 ;; Compare vectors producing a vector result and a predicate, setting CR6
4753 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4754 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4755 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4756 ;; to use Power8 instructions.
4757 (define_insn "*vsx_ne_<mode>_p"
4758 [(set (reg:CC CR6_REGNO)
4760 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4761 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4763 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4764 (ne:VSX_EXTRACT_I (match_dup 1)
4767 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4768 [(set_attr "type" "vecsimple")])
4770 (define_insn "*vector_nez_<mode>_p"
4771 [(set (reg:CC CR6_REGNO)
4772 (unspec:CC [(unspec:VI
4773 [(match_operand:VI 1 "gpc_reg_operand" "v")
4774 (match_operand:VI 2 "gpc_reg_operand" "v")]
4777 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4778 (unspec:VI [(match_dup 1)
4782 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4783 [(set_attr "type" "vecsimple")])
4785 ;; Return first position of match between vectors
4786 (define_expand "first_match_index_<mode>"
4787 [(match_operand:SI 0 "register_operand")
4788 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4789 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4790 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4795 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4796 rtx not_result = gen_reg_rtx (<MODE>mode);
4798 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4800 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4802 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4804 if (<MODE>mode == V16QImode)
4805 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4808 rtx tmp = gen_reg_rtx (SImode);
4809 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4810 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4815 ;; Return first position of match between vectors or end of string (EOS)
4816 (define_expand "first_match_or_eos_index_<mode>"
4817 [(match_operand:SI 0 "register_operand")
4818 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4819 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4820 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4824 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4825 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4826 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4827 rtx and_result = gen_reg_rtx (<MODE>mode);
4828 rtx result = gen_reg_rtx (<MODE>mode);
4829 rtx vzero = gen_reg_rtx (<MODE>mode);
4831 /* Vector with zeros in elements that correspond to zeros in operands. */
4832 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4833 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4834 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4835 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4837 /* Vector with ones in elments that do not match. */
4838 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4841 /* Create vector with ones in elements where there was a zero in one of
4842 the source elements or the elements that match. */
4843 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4844 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4846 if (<MODE>mode == V16QImode)
4847 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4850 rtx tmp = gen_reg_rtx (SImode);
4851 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4852 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4857 ;; Return first position of mismatch between vectors
4858 (define_expand "first_mismatch_index_<mode>"
4859 [(match_operand:SI 0 "register_operand")
4860 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4861 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4862 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4866 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4868 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4870 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4872 if (<MODE>mode == V16QImode)
4873 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4876 rtx tmp = gen_reg_rtx (SImode);
4877 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4878 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4883 ;; Return first position of mismatch between vectors or end of string (EOS)
4884 (define_expand "first_mismatch_or_eos_index_<mode>"
4885 [(match_operand:SI 0 "register_operand")
4886 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4887 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4888 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4892 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4893 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4894 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4895 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4896 rtx and_result = gen_reg_rtx (<MODE>mode);
4897 rtx result = gen_reg_rtx (<MODE>mode);
4898 rtx vzero = gen_reg_rtx (<MODE>mode);
4900 /* Vector with zeros in elements that correspond to zeros in operands. */
4901 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4903 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4904 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4905 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4907 /* Vector with ones in elments that match. */
4908 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4910 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4912 /* Create vector with ones in elements where there was a zero in one of
4913 the source elements or the elements did not match. */
4914 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4915 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4917 if (<MODE>mode == V16QImode)
4918 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4921 rtx tmp = gen_reg_rtx (SImode);
4922 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4923 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4928 ;; Load VSX Vector with Length
4929 (define_expand "lxvl"
4931 (ashift:DI (match_operand:DI 2 "register_operand")
4933 (set (match_operand:V16QI 0 "vsx_register_operand")
4935 [(match_operand:DI 1 "gpc_reg_operand")
4936 (mem:V16QI (match_dup 1))
4939 "TARGET_P9_VECTOR && TARGET_64BIT"
4941 operands[3] = gen_reg_rtx (DImode);
4944 (define_insn "*lxvl"
4945 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4947 [(match_operand:DI 1 "gpc_reg_operand" "b")
4948 (mem:V16QI (match_dup 1))
4949 (match_operand:DI 2 "register_operand" "r")]
4951 "TARGET_P9_VECTOR && TARGET_64BIT"
4953 [(set_attr "type" "vecload")])
4955 (define_insn "lxvll"
4956 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4957 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4958 (mem:V16QI (match_dup 1))
4959 (match_operand:DI 2 "register_operand" "r")]
4963 [(set_attr "type" "vecload")])
4965 ;; Expand for builtin xl_len_r
4966 (define_expand "xl_len_r"
4967 [(match_operand:V16QI 0 "vsx_register_operand")
4968 (match_operand:DI 1 "register_operand")
4969 (match_operand:DI 2 "register_operand")]
4972 rtx shift_mask = gen_reg_rtx (V16QImode);
4973 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4974 rtx tmp = gen_reg_rtx (DImode);
4976 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4977 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4978 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4979 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4984 (define_insn "stxvll"
4985 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4986 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4987 (mem:V16QI (match_dup 1))
4988 (match_operand:DI 2 "register_operand" "r")]
4992 [(set_attr "type" "vecstore")])
4994 ;; Store VSX Vector with Length
4995 (define_expand "stxvl"
4997 (ashift:DI (match_operand:DI 2 "register_operand")
4999 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5001 [(match_operand:V16QI 0 "vsx_register_operand")
5002 (mem:V16QI (match_dup 1))
5005 "TARGET_P9_VECTOR && TARGET_64BIT"
5007 operands[3] = gen_reg_rtx (DImode);
5010 (define_insn "*stxvl"
5011 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5013 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5014 (mem:V16QI (match_dup 1))
5015 (match_operand:DI 2 "register_operand" "r")]
5017 "TARGET_P9_VECTOR && TARGET_64BIT"
5019 [(set_attr "type" "vecstore")])
5021 ;; Expand for builtin xst_len_r
5022 (define_expand "xst_len_r"
5023 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5024 (match_operand:DI 1 "register_operand" "b")
5025 (match_operand:DI 2 "register_operand" "r")]
5028 rtx shift_mask = gen_reg_rtx (V16QImode);
5029 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5030 rtx tmp = gen_reg_rtx (DImode);
5032 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5033 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5035 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5036 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5040 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5041 (define_insn "vcmpneb"
5042 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5044 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5045 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5048 [(set_attr "type" "vecsimple")])
5050 ;; Vector Compare Not Equal or Zero Byte
5051 (define_insn "vcmpnezb"
5052 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5054 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5055 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5059 [(set_attr "type" "vecsimple")])
5061 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5062 (define_insn "vcmpneh"
5063 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5065 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5066 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5069 [(set_attr "type" "vecsimple")])
5071 ;; Vector Compare Not Equal or Zero Half Word
5072 (define_insn "vcmpnezh"
5073 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5074 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5075 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5079 [(set_attr "type" "vecsimple")])
5081 ;; Vector Compare Not Equal Word (specified/not+eq:)
5082 (define_insn "vcmpnew"
5083 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5085 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5086 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5089 [(set_attr "type" "vecsimple")])
5091 ;; Vector Compare Not Equal or Zero Word
5092 (define_insn "vcmpnezw"
5093 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5094 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5095 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5099 [(set_attr "type" "vecsimple")])
5101 ;; Vector Count Leading Zero Least-Significant Bits Byte
5102 (define_insn "vclzlsbb"
5103 [(set (match_operand:SI 0 "register_operand" "=r")
5105 [(match_operand:V16QI 1 "altivec_register_operand" "v")]
5109 [(set_attr "type" "vecsimple")])
5111 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5112 (define_insn "vctzlsbb_<mode>"
5113 [(set (match_operand:SI 0 "register_operand" "=r")
5115 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5119 [(set_attr "type" "vecsimple")])
5121 ;; Vector Extract Unsigned Byte Left-Indexed
5122 (define_insn "vextublx"
5123 [(set (match_operand:SI 0 "register_operand" "=r")
5125 [(match_operand:SI 1 "register_operand" "r")
5126 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5130 [(set_attr "type" "vecsimple")])
5132 ;; Vector Extract Unsigned Byte Right-Indexed
5133 (define_insn "vextubrx"
5134 [(set (match_operand:SI 0 "register_operand" "=r")
5136 [(match_operand:SI 1 "register_operand" "r")
5137 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5141 [(set_attr "type" "vecsimple")])
5143 ;; Vector Extract Unsigned Half Word Left-Indexed
5144 (define_insn "vextuhlx"
5145 [(set (match_operand:SI 0 "register_operand" "=r")
5147 [(match_operand:SI 1 "register_operand" "r")
5148 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5152 [(set_attr "type" "vecsimple")])
5154 ;; Vector Extract Unsigned Half Word Right-Indexed
5155 (define_insn "vextuhrx"
5156 [(set (match_operand:SI 0 "register_operand" "=r")
5158 [(match_operand:SI 1 "register_operand" "r")
5159 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5163 [(set_attr "type" "vecsimple")])
5165 ;; Vector Extract Unsigned Word Left-Indexed
5166 (define_insn "vextuwlx"
5167 [(set (match_operand:SI 0 "register_operand" "=r")
5169 [(match_operand:SI 1 "register_operand" "r")
5170 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5174 [(set_attr "type" "vecsimple")])
5176 ;; Vector Extract Unsigned Word Right-Indexed
5177 (define_insn "vextuwrx"
5178 [(set (match_operand:SI 0 "register_operand" "=r")
5180 [(match_operand:SI 1 "register_operand" "r")
5181 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5185 [(set_attr "type" "vecsimple")])
5187 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5188 ;; endian version needs to adjust the byte number, and the V4SI element in
5190 (define_insn "extract4b"
5191 [(set (match_operand:V2DI 0 "vsx_register_operand")
5192 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5193 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5194 UNSPEC_XXEXTRACTUW))]
5197 if (!VECTOR_ELT_ORDER_BIG)
5198 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5200 return "xxextractuw %x0,%x1,%2";
5203 (define_expand "insert4b"
5204 [(set (match_operand:V16QI 0 "vsx_register_operand")
5205 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5206 (match_operand:V16QI 2 "vsx_register_operand")
5207 (match_operand:QI 3 "const_0_to_12_operand")]
5211 if (!VECTOR_ELT_ORDER_BIG)
5213 rtx op1 = operands[1];
5214 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5215 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5216 operands[1] = v4si_tmp;
5217 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5221 (define_insn "*insert4b_internal"
5222 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5223 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5224 (match_operand:V16QI 2 "vsx_register_operand" "0")
5225 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5228 "xxinsertw %x0,%x1,%3"
5229 [(set_attr "type" "vecperm")])
5232 ;; Generate vector extract four float 32 values from left four elements
5233 ;; of eight element vector of float 16 values.
5234 (define_expand "vextract_fp_from_shorth"
5235 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5236 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5237 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5240 int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5244 rtx mask = gen_reg_rtx (V16QImode);
5245 rtx tmp = gen_reg_rtx (V16QImode);
5248 for (i = 0; i < 16; i++)
5249 rvals[i] = GEN_INT (vals[i]);
5251 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5252 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5253 src half words 0,1,2,3 for the conversion instruction. */
5254 v = gen_rtvec_v (16, rvals);
5255 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5256 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5257 operands[1], mask));
5258 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5262 ;; Generate vector extract four float 32 values from right four elements
5263 ;; of eight element vector of float 16 values.
5264 (define_expand "vextract_fp_from_shortl"
5265 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5266 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5267 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5270 int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5273 rtx mask = gen_reg_rtx (V16QImode);
5274 rtx tmp = gen_reg_rtx (V16QImode);
5277 for (i = 0; i < 16; i++)
5278 rvals[i] = GEN_INT (vals[i]);
5280 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5281 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5282 src half words 4,5,6,7 for the conversion instruction. */
5283 v = gen_rtvec_v (16, rvals);
5284 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5285 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5286 operands[1], mask));
5287 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5291 ;; Support for ISA 3.0 vector byte reverse
5293 ;; Swap all bytes with in a vector
5294 (define_insn "p9_xxbrq_v1ti"
5295 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5296 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5299 [(set_attr "type" "vecperm")])
5301 (define_expand "p9_xxbrq_v16qi"
5302 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5303 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5306 rtx op0 = gen_reg_rtx (V1TImode);
5307 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5308 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5309 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5313 ;; Swap all bytes in each 64-bit element
5314 (define_insn "p9_xxbrd_v2di"
5315 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5316 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5319 [(set_attr "type" "vecperm")])
5321 (define_expand "p9_xxbrd_v2df"
5322 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5323 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5326 rtx op0 = gen_reg_rtx (V2DImode);
5327 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5328 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5329 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5333 ;; Swap all bytes in each 32-bit element
5334 (define_insn "p9_xxbrw_v4si"
5335 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5336 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5339 [(set_attr "type" "vecperm")])
5341 (define_expand "p9_xxbrw_v4sf"
5342 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5343 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5346 rtx op0 = gen_reg_rtx (V4SImode);
5347 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5348 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5349 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5353 ;; Swap all bytes in each element of vector
5354 (define_expand "revb_<mode>"
5355 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5356 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5359 if (TARGET_P9_VECTOR)
5360 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5363 /* Want to have the elements in reverse order relative
5364 to the endian mode in use, i.e. in LE mode, put elements
5366 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5367 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5374 ;; Reversing bytes in vector char is just a NOP.
5375 (define_expand "revb_v16qi"
5376 [(set (match_operand:V16QI 0 "vsx_register_operand")
5377 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5380 emit_move_insn (operands[0], operands[1]);
5384 ;; Swap all bytes in each 16-bit element
5385 (define_insn "p9_xxbrh_v8hi"
5386 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5387 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5390 [(set_attr "type" "vecperm")])
5393 ;; Operand numbers for the following peephole2
5395 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5396 (SFBOOL_TMP_VSX 1) ;; vector temporary
5397 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5398 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5399 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5400 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5401 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5402 (SFBOOL_SHL_D 7) ;; shift left dest
5403 (SFBOOL_SHL_A 8) ;; shift left arg
5404 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5405 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5406 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5407 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5408 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5410 ;; Attempt to optimize some common GLIBC operations using logical operations to
5411 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5412 ;; after macro expansion that looks like:
5417 ;; } ieee_float_shape_type;
5423 ;; ieee_float_shape_type gf_u;
5424 ;; gf_u.value = (t1);
5425 ;; (is) = gf_u.word;
5429 ;; ieee_float_shape_type sf_u;
5430 ;; sf_u.word = (is & 0xfffff000);
5431 ;; (t1) = sf_u.value;
5435 ;; This would result in two direct move operations (convert to memory format,
5436 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5437 ;; scalar format). With this peephole, we eliminate the direct move to the
5438 ;; GPR, and instead move the integer mask value to the vector register after a
5439 ;; shift and do the VSX logical operation.
5441 ;; The insns for dealing with SFmode in GPR registers looks like:
5442 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5444 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5446 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5448 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5450 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5452 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5455 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5456 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5458 ;; MFVSRWZ (aka zero_extend)
5459 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5461 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5463 ;; AND/IOR/XOR operation on int
5464 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5465 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5466 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5469 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5470 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5474 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5475 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5477 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5478 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5479 to compare registers, when the mode is different. */
5480 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5481 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5482 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5483 && (REG_P (operands[SFBOOL_BOOL_A2])
5484 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5485 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5486 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5487 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5488 || (REG_P (operands[SFBOOL_BOOL_A2])
5489 && REGNO (operands[SFBOOL_MFVSR_D])
5490 == REGNO (operands[SFBOOL_BOOL_A2])))
5491 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5492 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5493 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5494 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5495 [(set (match_dup SFBOOL_TMP_GPR)
5496 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5499 (set (match_dup SFBOOL_TMP_VSX_DI)
5500 (match_dup SFBOOL_TMP_GPR))
5502 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5503 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5504 (match_dup SFBOOL_TMP_VSX)))]
5506 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5507 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5508 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5509 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5510 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5511 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5513 if (CONST_INT_P (bool_a2))
5515 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5516 emit_move_insn (tmp_gpr, bool_a2);
5517 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5521 int regno_bool_a1 = REGNO (bool_a1);
5522 int regno_bool_a2 = REGNO (bool_a2);
5523 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5524 ? regno_bool_a2 : regno_bool_a1);
5525 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5528 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5529 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5530 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);