2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map into the appropriate suffix based on the type
97 (define_mode_attr VSs [(V16QI "sp")
110 ;; Map the register class used
111 (define_mode_attr VSr [(V16QI "v")
125 ;; Map the register class used for float<->int conversions (floating point side)
126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
128 (define_mode_attr VSr2 [(V2DF "wd")
136 (define_mode_attr VSr3 [(V2DF "wa")
144 ;; Map the register class for sp<->dp float conversions, destination
145 (define_mode_attr VSr4 [(SF "ws")
150 ;; Map the register class for sp<->dp float conversions, source
151 (define_mode_attr VSr5 [(SF "ws")
156 ;; The VSX register class that a type can occupy, even if it is not the
157 ;; preferred register class (VSr is the preferred register class that will get
159 (define_mode_attr VSa [(V16QI "wa")
173 ;; Same size integer type for floating point data
174 (define_mode_attr VSi [(V4SF "v4si")
178 (define_mode_attr VSI [(V4SF "V4SI")
182 ;; Word size for same size conversion
183 (define_mode_attr VSc [(V4SF "w")
187 ;; Map into either s or v, depending on whether this is a scalar or vector
189 (define_mode_attr VSv [(V16QI "v")
199 ;; Appropriate type for add ops (and other simple FP ops)
200 (define_mode_attr VStype_simple [(V2DF "vecdouble")
204 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
208 ;; Appropriate type for multiply ops
209 (define_mode_attr VStype_mul [(V2DF "vecdouble")
213 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
217 ;; Appropriate type for divide ops.
218 (define_mode_attr VStype_div [(V2DF "vecdiv")
222 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
226 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
228 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
232 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
236 ;; Iterator and modes for sp<->dp conversions
237 ;; Because scalar SF values are represented internally as double, use the
238 ;; V4SF type to represent this than SF.
239 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
241 (define_mode_attr VS_spdp_res [(DF "V4SF")
245 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
249 (define_mode_attr VS_spdp_type [(DF "fp")
253 ;; Map the scalar mode for a vector type
254 (define_mode_attr VS_scalar [(V1TI "TI")
262 ;; Map to a double-sized vector mode
263 (define_mode_attr VS_double [(V4SI "V8SI")
269 ;; Map register class for 64-bit element in 128-bit vector for direct moves
271 (define_mode_attr VS_64dm [(V2DF "wk")
274 ;; Map register class for 64-bit element in 128-bit vector for normal register
276 (define_mode_attr VS_64reg [(V2DF "ws")
279 ;; Iterators for loading constants with xxspltib
280 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
281 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
283 ;; Vector reverse byte modes
284 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
286 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
287 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
288 ;; done on ISA 2.07 and not just ISA 3.0.
289 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
290 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
292 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
296 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
297 ;; insert to validate the operand number.
298 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
299 (V8HI "const_0_to_7_operand")
300 (V4SI "const_0_to_3_operand")])
302 ;; Mode attribute to give the constraint for vector extract and insert
304 (define_mode_attr VSX_EX [(V16QI "v")
308 ;; Mode iterator for binary floating types other than double to
309 ;; optimize convert to that floating point type from an extract
310 ;; of an integer type
311 (define_mode_iterator VSX_EXTRACT_FL [SF
312 (IF "FLOAT128_2REG_P (IFmode)")
313 (KF "TARGET_FLOAT128_HW")
314 (TF "FLOAT128_2REG_P (TFmode)
315 || (FLOAT128_IEEE_P (TFmode)
316 && TARGET_FLOAT128_HW)")])
318 ;; Mode iterator for binary floating types that have a direct conversion
319 ;; from 64-bit integer to floating point
320 (define_mode_iterator FL_CONV [SF
322 (KF "TARGET_FLOAT128_HW")
323 (TF "TARGET_FLOAT128_HW
324 && FLOAT128_IEEE_P (TFmode)")])
326 ;; Iterator for the 2 short vector types to do a splat from an integer
327 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
329 ;; Mode attribute to give the count for the splat instruction to splat
330 ;; the value in the 64-bit integer slot
331 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
333 ;; Mode attribute to give the suffix for the splat instruction
334 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
336 ;; Constants for creating unspecs
337 (define_c_enum "unspec"
354 UNSPEC_VSX_UNS_FLOAT2
356 UNSPEC_VSX_UNS_FLOATE
358 UNSPEC_VSX_UNS_FLOATO
374 UNSPEC_VSX_XVCVDPSXDS
376 UNSPEC_VSX_XVCVDPUXDS
377 UNSPEC_VSX_SIGN_EXTEND
378 UNSPEC_VSX_XVCVSPSXWS
379 UNSPEC_VSX_XVCVSPSXDS
388 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
389 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
422 UNSPEC_VSX_FIRST_MATCH_INDEX
423 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
424 UNSPEC_VSX_FIRST_MISMATCH_INDEX
425 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
430 ;; The patterns for LE permuted loads and stores come before the general
431 ;; VSX moves so they match first.
432 (define_insn_and_split "*vsx_le_perm_load_<mode>"
433 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
434 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
435 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
441 (parallel [(const_int 1) (const_int 0)])))
445 (parallel [(const_int 1) (const_int 0)])))]
448 rtx mem = operands[1];
450 /* Don't apply the swap optimization if we've already performed register
451 allocation and the hard register destination is not in the altivec
453 if ((MEM_ALIGN (mem) >= 128)
454 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
455 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
457 rtx mem_address = XEXP (mem, 0);
458 enum machine_mode mode = GET_MODE (mem);
460 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
462 /* Replace the source memory address with masked address. */
463 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
464 emit_insn (lvx_set_expr);
467 else if (rs6000_quadword_masked_address_p (mem_address))
469 /* This rtl is already in the form that matches lvx
470 instruction, so leave it alone. */
473 /* Otherwise, fall through to transform into a swapping load. */
475 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
479 [(set_attr "type" "vecload")
480 (set_attr "length" "8")])
482 (define_insn_and_split "*vsx_le_perm_load_<mode>"
483 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
484 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
485 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
491 (parallel [(const_int 2) (const_int 3)
492 (const_int 0) (const_int 1)])))
496 (parallel [(const_int 2) (const_int 3)
497 (const_int 0) (const_int 1)])))]
500 rtx mem = operands[1];
502 /* Don't apply the swap optimization if we've already performed register
503 allocation and the hard register destination is not in the altivec
505 if ((MEM_ALIGN (mem) >= 128)
506 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
507 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
509 rtx mem_address = XEXP (mem, 0);
510 enum machine_mode mode = GET_MODE (mem);
512 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
514 /* Replace the source memory address with masked address. */
515 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
516 emit_insn (lvx_set_expr);
519 else if (rs6000_quadword_masked_address_p (mem_address))
521 /* This rtl is already in the form that matches lvx
522 instruction, so leave it alone. */
525 /* Otherwise, fall through to transform into a swapping load. */
527 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
531 [(set_attr "type" "vecload")
532 (set_attr "length" "8")])
534 (define_insn_and_split "*vsx_le_perm_load_v8hi"
535 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
536 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
537 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
543 (parallel [(const_int 4) (const_int 5)
544 (const_int 6) (const_int 7)
545 (const_int 0) (const_int 1)
546 (const_int 2) (const_int 3)])))
550 (parallel [(const_int 4) (const_int 5)
551 (const_int 6) (const_int 7)
552 (const_int 0) (const_int 1)
553 (const_int 2) (const_int 3)])))]
556 rtx mem = operands[1];
558 /* Don't apply the swap optimization if we've already performed register
559 allocation and the hard register destination is not in the altivec
561 if ((MEM_ALIGN (mem) >= 128)
562 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
563 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
565 rtx mem_address = XEXP (mem, 0);
566 enum machine_mode mode = GET_MODE (mem);
568 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
570 /* Replace the source memory address with masked address. */
571 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
572 emit_insn (lvx_set_expr);
575 else if (rs6000_quadword_masked_address_p (mem_address))
577 /* This rtl is already in the form that matches lvx
578 instruction, so leave it alone. */
581 /* Otherwise, fall through to transform into a swapping load. */
583 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
587 [(set_attr "type" "vecload")
588 (set_attr "length" "8")])
590 (define_insn_and_split "*vsx_le_perm_load_v16qi"
591 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
592 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
593 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
599 (parallel [(const_int 8) (const_int 9)
600 (const_int 10) (const_int 11)
601 (const_int 12) (const_int 13)
602 (const_int 14) (const_int 15)
603 (const_int 0) (const_int 1)
604 (const_int 2) (const_int 3)
605 (const_int 4) (const_int 5)
606 (const_int 6) (const_int 7)])))
610 (parallel [(const_int 8) (const_int 9)
611 (const_int 10) (const_int 11)
612 (const_int 12) (const_int 13)
613 (const_int 14) (const_int 15)
614 (const_int 0) (const_int 1)
615 (const_int 2) (const_int 3)
616 (const_int 4) (const_int 5)
617 (const_int 6) (const_int 7)])))]
620 rtx mem = operands[1];
622 /* Don't apply the swap optimization if we've already performed register
623 allocation and the hard register destination is not in the altivec
625 if ((MEM_ALIGN (mem) >= 128)
626 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
627 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
629 rtx mem_address = XEXP (mem, 0);
630 enum machine_mode mode = GET_MODE (mem);
632 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
634 /* Replace the source memory address with masked address. */
635 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
636 emit_insn (lvx_set_expr);
639 else if (rs6000_quadword_masked_address_p (mem_address))
641 /* This rtl is already in the form that matches lvx
642 instruction, so leave it alone. */
645 /* Otherwise, fall through to transform into a swapping load. */
647 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
651 [(set_attr "type" "vecload")
652 (set_attr "length" "8")])
654 (define_insn "*vsx_le_perm_store_<mode>"
655 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
656 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
657 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
659 [(set_attr "type" "vecstore")
660 (set_attr "length" "12")])
663 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "")
664 (match_operand:VSX_D 1 "vsx_register_operand" ""))]
665 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
669 (parallel [(const_int 1) (const_int 0)])))
673 (parallel [(const_int 1) (const_int 0)])))]
675 rtx mem = operands[0];
677 /* Don't apply the swap optimization if we've already performed register
678 allocation and the hard register source is not in the altivec range. */
679 if ((MEM_ALIGN (mem) >= 128)
680 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
681 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
683 rtx mem_address = XEXP (mem, 0);
684 enum machine_mode mode = GET_MODE (mem);
685 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
687 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
688 emit_insn (stvx_set_expr);
691 else if (rs6000_quadword_masked_address_p (mem_address))
693 /* This rtl is already in the form that matches stvx instruction,
694 so leave it alone. */
697 /* Otherwise, fall through to transform into a swapping store. */
700 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
704 ;; The post-reload split requires that we re-permute the source
705 ;; register in case it is still live.
707 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "")
708 (match_operand:VSX_D 1 "vsx_register_operand" ""))]
709 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
713 (parallel [(const_int 1) (const_int 0)])))
717 (parallel [(const_int 1) (const_int 0)])))
721 (parallel [(const_int 1) (const_int 0)])))]
724 (define_insn "*vsx_le_perm_store_<mode>"
725 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
726 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
727 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
729 [(set_attr "type" "vecstore")
730 (set_attr "length" "12")])
733 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "")
734 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
735 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
739 (parallel [(const_int 2) (const_int 3)
740 (const_int 0) (const_int 1)])))
744 (parallel [(const_int 2) (const_int 3)
745 (const_int 0) (const_int 1)])))]
747 rtx mem = operands[0];
749 /* Don't apply the swap optimization if we've already performed register
750 allocation and the hard register source is not in the altivec range. */
751 if ((MEM_ALIGN (mem) >= 128)
752 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
753 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
755 rtx mem_address = XEXP (mem, 0);
756 enum machine_mode mode = GET_MODE (mem);
757 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
759 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
760 emit_insn (stvx_set_expr);
763 else if (rs6000_quadword_masked_address_p (mem_address))
765 /* This rtl is already in the form that matches stvx instruction,
766 so leave it alone. */
769 /* Otherwise, fall through to transform into a swapping store. */
772 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
776 ;; The post-reload split requires that we re-permute the source
777 ;; register in case it is still live.
779 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "")
780 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
781 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
785 (parallel [(const_int 2) (const_int 3)
786 (const_int 0) (const_int 1)])))
790 (parallel [(const_int 2) (const_int 3)
791 (const_int 0) (const_int 1)])))
795 (parallel [(const_int 2) (const_int 3)
796 (const_int 0) (const_int 1)])))]
799 (define_insn "*vsx_le_perm_store_v8hi"
800 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
801 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
802 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
804 [(set_attr "type" "vecstore")
805 (set_attr "length" "12")])
808 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "")
809 (match_operand:V8HI 1 "vsx_register_operand" ""))]
810 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
814 (parallel [(const_int 4) (const_int 5)
815 (const_int 6) (const_int 7)
816 (const_int 0) (const_int 1)
817 (const_int 2) (const_int 3)])))
821 (parallel [(const_int 4) (const_int 5)
822 (const_int 6) (const_int 7)
823 (const_int 0) (const_int 1)
824 (const_int 2) (const_int 3)])))]
826 rtx mem = operands[0];
828 /* Don't apply the swap optimization if we've already performed register
829 allocation and the hard register source is not in the altivec range. */
830 if ((MEM_ALIGN (mem) >= 128)
831 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
832 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
834 rtx mem_address = XEXP (mem, 0);
835 enum machine_mode mode = GET_MODE (mem);
836 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
838 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
839 emit_insn (stvx_set_expr);
842 else if (rs6000_quadword_masked_address_p (mem_address))
844 /* This rtl is already in the form that matches stvx instruction,
845 so leave it alone. */
848 /* Otherwise, fall through to transform into a swapping store. */
851 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
855 ;; The post-reload split requires that we re-permute the source
856 ;; register in case it is still live.
858 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "")
859 (match_operand:V8HI 1 "vsx_register_operand" ""))]
860 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
864 (parallel [(const_int 4) (const_int 5)
865 (const_int 6) (const_int 7)
866 (const_int 0) (const_int 1)
867 (const_int 2) (const_int 3)])))
871 (parallel [(const_int 4) (const_int 5)
872 (const_int 6) (const_int 7)
873 (const_int 0) (const_int 1)
874 (const_int 2) (const_int 3)])))
878 (parallel [(const_int 4) (const_int 5)
879 (const_int 6) (const_int 7)
880 (const_int 0) (const_int 1)
881 (const_int 2) (const_int 3)])))]
884 (define_insn "*vsx_le_perm_store_v16qi"
885 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
886 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
887 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
889 [(set_attr "type" "vecstore")
890 (set_attr "length" "12")])
893 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "")
894 (match_operand:V16QI 1 "vsx_register_operand" ""))]
895 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
899 (parallel [(const_int 8) (const_int 9)
900 (const_int 10) (const_int 11)
901 (const_int 12) (const_int 13)
902 (const_int 14) (const_int 15)
903 (const_int 0) (const_int 1)
904 (const_int 2) (const_int 3)
905 (const_int 4) (const_int 5)
906 (const_int 6) (const_int 7)])))
910 (parallel [(const_int 8) (const_int 9)
911 (const_int 10) (const_int 11)
912 (const_int 12) (const_int 13)
913 (const_int 14) (const_int 15)
914 (const_int 0) (const_int 1)
915 (const_int 2) (const_int 3)
916 (const_int 4) (const_int 5)
917 (const_int 6) (const_int 7)])))]
919 rtx mem = operands[0];
921 /* Don't apply the swap optimization if we've already performed register
922 allocation and the hard register source is not in the altivec range. */
923 if ((MEM_ALIGN (mem) >= 128)
924 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
925 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
927 rtx mem_address = XEXP (mem, 0);
928 enum machine_mode mode = GET_MODE (mem);
929 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
931 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
932 emit_insn (stvx_set_expr);
935 else if (rs6000_quadword_masked_address_p (mem_address))
937 /* This rtl is already in the form that matches stvx instruction,
938 so leave it alone. */
941 /* Otherwise, fall through to transform into a swapping store. */
944 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
948 ;; The post-reload split requires that we re-permute the source
949 ;; register in case it is still live.
951 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "")
952 (match_operand:V16QI 1 "vsx_register_operand" ""))]
953 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
957 (parallel [(const_int 8) (const_int 9)
958 (const_int 10) (const_int 11)
959 (const_int 12) (const_int 13)
960 (const_int 14) (const_int 15)
961 (const_int 0) (const_int 1)
962 (const_int 2) (const_int 3)
963 (const_int 4) (const_int 5)
964 (const_int 6) (const_int 7)])))
968 (parallel [(const_int 8) (const_int 9)
969 (const_int 10) (const_int 11)
970 (const_int 12) (const_int 13)
971 (const_int 14) (const_int 15)
972 (const_int 0) (const_int 1)
973 (const_int 2) (const_int 3)
974 (const_int 4) (const_int 5)
975 (const_int 6) (const_int 7)])))
979 (parallel [(const_int 8) (const_int 9)
980 (const_int 10) (const_int 11)
981 (const_int 12) (const_int 13)
982 (const_int 14) (const_int 15)
983 (const_int 0) (const_int 1)
984 (const_int 2) (const_int 3)
985 (const_int 4) (const_int 5)
986 (const_int 6) (const_int 7)])))]
989 ;; Little endian word swapping for 128-bit types that are either scalars or the
990 ;; special V1TI container class, which it is not appropriate to use vec_select
992 (define_insn "*vsx_le_permute_<mode>"
993 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
995 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
997 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
999 xxpermdi %x0,%x1,%x1,2
1002 mr %0,%L1\;mr %L0,%1
1003 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1004 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1005 [(set_attr "length" "4,4,4,8,8,8")
1006 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1008 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1009 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1012 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1015 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1020 [(set (match_dup 0) (match_dup 1))]
1022 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1024 emit_note (NOTE_INSN_DELETED);
1028 [(set_attr "length" "0,4")
1029 (set_attr "type" "veclogical")])
1031 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1032 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1033 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1034 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1038 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1042 rtx tmp = (can_create_pseudo_p ()
1043 ? gen_reg_rtx_and_attrs (operands[0])
1045 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1046 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1050 [(set_attr "type" "vecload,load")
1051 (set_attr "length" "8,8")])
1053 (define_insn "*vsx_le_perm_store_<mode>"
1054 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1055 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1056 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1060 [(set_attr "type" "vecstore,store")
1061 (set_attr "length" "12,8")])
1064 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
1065 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
1066 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1069 rtx tmp = (can_create_pseudo_p ()
1070 ? gen_reg_rtx_and_attrs (operands[0])
1072 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1073 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1077 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1078 ;; GPR registers on a little endian system.
1080 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1081 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1083 (set (match_operand:VSX_TI 2 "int_reg_operand")
1084 (rotate:VSX_TI (match_dup 0)
1086 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1087 && (rtx_equal_p (operands[0], operands[2])
1088 || peep2_reg_dead_p (2, operands[0]))"
1089 [(set (match_dup 2) (match_dup 1))])
1092 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1093 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1095 (set (match_operand:VSX_TI 2 "memory_operand")
1096 (rotate:VSX_TI (match_dup 0)
1098 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1099 && peep2_reg_dead_p (2, operands[0])"
1100 [(set (match_dup 2) (match_dup 1))])
1102 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1103 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1104 ;; floating point are handled by the more generic swap elimination pass.
1106 [(set (match_operand:TI 0 "vsx_register_operand" "")
1107 (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
1109 (set (match_operand:TI 2 "vsx_register_operand" "")
1110 (rotate:TI (match_dup 0)
1112 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1113 && (rtx_equal_p (operands[0], operands[2])
1114 || peep2_reg_dead_p (2, operands[0]))"
1115 [(set (match_dup 2) (match_dup 1))])
1117 ;; The post-reload split requires that we re-permute the source
1118 ;; register in case it is still live.
1120 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
1121 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
1122 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1125 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1126 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1127 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1131 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1132 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1133 (define_insn "xxspltib_v16qi"
1134 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1135 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1138 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1139 return "xxspltib %x0,%2";
1141 [(set_attr "type" "vecperm")])
1143 (define_insn "xxspltib_<mode>_nosplit"
1144 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1145 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1148 rtx op1 = operands[1];
1152 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1156 operands[2] = GEN_INT (value & 0xff);
1157 return "xxspltib %x0,%2";
1159 [(set_attr "type" "vecperm")])
1161 (define_insn_and_split "*xxspltib_<mode>_split"
1162 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1163 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1171 rtx op0 = operands[0];
1172 rtx op1 = operands[1];
1173 rtx tmp = ((can_create_pseudo_p ())
1174 ? gen_reg_rtx (V16QImode)
1175 : gen_lowpart (V16QImode, op0));
1177 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1181 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1183 if (<MODE>mode == V2DImode)
1184 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1186 else if (<MODE>mode == V4SImode)
1187 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1189 else if (<MODE>mode == V8HImode)
1190 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1197 [(set_attr "type" "vecperm")
1198 (set_attr "length" "8")])
1201 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1202 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1203 ;; all 1's, since the machine does not have to wait for the previous
1204 ;; instruction using the register being set (such as a store waiting on a slow
1205 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1207 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1208 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1209 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1210 (define_insn "*vsx_mov<mode>_64bit"
1211 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1212 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
1213 ?&r, ??r, ??Y, ??r, wo, v,
1214 ?<VSa>, *r, v, ??r, wZ, v")
1216 (match_operand:VSX_M 1 "input_operand"
1217 "<VSa>, ZwO, <VSa>, we, r, r,
1218 wQ, Y, r, r, wE, jwM,
1219 ?jwM, jwM, W, W, v, wZ"))]
1221 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1222 && (register_operand (operands[0], <MODE>mode)
1223 || register_operand (operands[1], <MODE>mode))"
1225 return rs6000_output_move_128bit (operands);
1228 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1229 store, load, store, *, vecsimple, vecsimple,
1230 vecsimple, *, *, *, vecstore, vecload")
1235 4, 8, 20, 20, 4, 4")])
1237 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1238 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
1239 ;; LVX (VMX) STVX (VMX)
1240 (define_insn "*vsx_mov<mode>_32bit"
1241 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1242 "=ZwO, <VSa>, <VSa>, ??r, ??Y, ??r,
1243 wo, v, ?<VSa>, *r, v, ??r,
1246 (match_operand:VSX_M 1 "input_operand"
1247 "<VSa>, ZwO, <VSa>, Y, r, r,
1248 wE, jwM, ?jwM, jwM, W, W,
1251 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1252 && (register_operand (operands[0], <MODE>mode)
1253 || register_operand (operands[1], <MODE>mode))"
1255 return rs6000_output_move_128bit (operands);
1258 "vecstore, vecload, vecsimple, load, store, *,
1259 vecsimple, vecsimple, vecsimple, *, *, *,
1263 "4, 4, 4, 16, 16, 16,
1264 4, 4, 4, 16, 20, 32,
1267 ;; Explicit load/store expanders for the builtin functions
1268 (define_expand "vsx_load_<mode>"
1269 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
1270 (match_operand:VSX_M 1 "memory_operand" ""))]
1271 "VECTOR_MEM_VSX_P (<MODE>mode)"
1273 /* Expand to swaps if needed, prior to swap optimization. */
1274 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1276 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1281 (define_expand "vsx_store_<mode>"
1282 [(set (match_operand:VSX_M 0 "memory_operand" "")
1283 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
1284 "VECTOR_MEM_VSX_P (<MODE>mode)"
1286 /* Expand to swaps if needed, prior to swap optimization. */
1287 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1289 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1294 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1295 ;; when you really want their element-reversing behavior.
1296 (define_insn "vsx_ld_elemrev_v2di"
1297 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1299 (match_operand:V2DI 1 "memory_operand" "Z")
1300 (parallel [(const_int 1) (const_int 0)])))]
1301 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1303 [(set_attr "type" "vecload")])
1305 (define_insn "vsx_ld_elemrev_v1ti"
1306 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1308 (match_operand:V1TI 1 "memory_operand" "Z")
1309 (parallel [(const_int 0)])))]
1310 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1312 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1314 [(set_attr "type" "vecload")])
1316 (define_insn "vsx_ld_elemrev_v2df"
1317 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1319 (match_operand:V2DF 1 "memory_operand" "Z")
1320 (parallel [(const_int 1) (const_int 0)])))]
1321 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1323 [(set_attr "type" "vecload")])
1325 (define_insn "vsx_ld_elemrev_v4si"
1326 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1328 (match_operand:V4SI 1 "memory_operand" "Z")
1329 (parallel [(const_int 3) (const_int 2)
1330 (const_int 1) (const_int 0)])))]
1331 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1333 [(set_attr "type" "vecload")])
1335 (define_insn "vsx_ld_elemrev_v4sf"
1336 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1338 (match_operand:V4SF 1 "memory_operand" "Z")
1339 (parallel [(const_int 3) (const_int 2)
1340 (const_int 1) (const_int 0)])))]
1341 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1343 [(set_attr "type" "vecload")])
1345 (define_expand "vsx_ld_elemrev_v8hi"
1346 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1348 (match_operand:V8HI 1 "memory_operand" "Z")
1349 (parallel [(const_int 7) (const_int 6)
1350 (const_int 5) (const_int 4)
1351 (const_int 3) (const_int 2)
1352 (const_int 1) (const_int 0)])))]
1353 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1355 if (!TARGET_P9_VECTOR)
1357 rtx tmp = gen_reg_rtx (V4SImode);
1358 rtx subreg, subreg2, perm[16], pcv;
1359 /* 2 is leftmost element in register */
1360 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1363 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1364 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1365 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1367 for (i = 0; i < 16; ++i)
1368 perm[i] = GEN_INT (reorder[i]);
1370 pcv = force_reg (V16QImode,
1371 gen_rtx_CONST_VECTOR (V16QImode,
1372 gen_rtvec_v (16, perm)));
1373 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1379 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1380 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1382 (match_operand:V8HI 1 "memory_operand" "Z")
1383 (parallel [(const_int 7) (const_int 6)
1384 (const_int 5) (const_int 4)
1385 (const_int 3) (const_int 2)
1386 (const_int 1) (const_int 0)])))]
1387 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1389 [(set_attr "type" "vecload")])
1391 (define_expand "vsx_ld_elemrev_v16qi"
1392 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1394 (match_operand:V16QI 1 "memory_operand" "Z")
1395 (parallel [(const_int 15) (const_int 14)
1396 (const_int 13) (const_int 12)
1397 (const_int 11) (const_int 10)
1398 (const_int 9) (const_int 8)
1399 (const_int 7) (const_int 6)
1400 (const_int 5) (const_int 4)
1401 (const_int 3) (const_int 2)
1402 (const_int 1) (const_int 0)])))]
1403 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1405 if (!TARGET_P9_VECTOR)
1407 rtx tmp = gen_reg_rtx (V4SImode);
1408 rtx subreg, subreg2, perm[16], pcv;
1409 /* 3 is leftmost element in register */
1410 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1413 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1414 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1415 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1417 for (i = 0; i < 16; ++i)
1418 perm[i] = GEN_INT (reorder[i]);
1420 pcv = force_reg (V16QImode,
1421 gen_rtx_CONST_VECTOR (V16QImode,
1422 gen_rtvec_v (16, perm)));
1423 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1429 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1430 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1432 (match_operand:V16QI 1 "memory_operand" "Z")
1433 (parallel [(const_int 15) (const_int 14)
1434 (const_int 13) (const_int 12)
1435 (const_int 11) (const_int 10)
1436 (const_int 9) (const_int 8)
1437 (const_int 7) (const_int 6)
1438 (const_int 5) (const_int 4)
1439 (const_int 3) (const_int 2)
1440 (const_int 1) (const_int 0)])))]
1441 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1443 [(set_attr "type" "vecload")])
1445 (define_insn "vsx_st_elemrev_v1ti"
1446 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1448 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1449 (parallel [(const_int 0)])))
1450 (clobber (match_dup 1))]
1451 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1453 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1455 [(set_attr "type" "vecstore")])
1457 (define_insn "vsx_st_elemrev_v2df"
1458 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1460 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1461 (parallel [(const_int 1) (const_int 0)])))]
1462 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1464 [(set_attr "type" "vecstore")])
1466 (define_insn "vsx_st_elemrev_v2di"
1467 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1469 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1470 (parallel [(const_int 1) (const_int 0)])))]
1471 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1473 [(set_attr "type" "vecstore")])
1475 (define_insn "vsx_st_elemrev_v4sf"
1476 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1478 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1479 (parallel [(const_int 3) (const_int 2)
1480 (const_int 1) (const_int 0)])))]
1481 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1483 [(set_attr "type" "vecstore")])
1485 (define_insn "vsx_st_elemrev_v4si"
1486 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1488 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1489 (parallel [(const_int 3) (const_int 2)
1490 (const_int 1) (const_int 0)])))]
1491 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1493 [(set_attr "type" "vecstore")])
1495 (define_expand "vsx_st_elemrev_v8hi"
1496 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1498 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1499 (parallel [(const_int 7) (const_int 6)
1500 (const_int 5) (const_int 4)
1501 (const_int 3) (const_int 2)
1502 (const_int 1) (const_int 0)])))]
1503 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1505 if (!TARGET_P9_VECTOR)
1507 rtx mem_subreg, subreg, perm[16], pcv;
1508 rtx tmp = gen_reg_rtx (V8HImode);
1509 /* 2 is leftmost element in register */
1510 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1513 for (i = 0; i < 16; ++i)
1514 perm[i] = GEN_INT (reorder[i]);
1516 pcv = force_reg (V16QImode,
1517 gen_rtx_CONST_VECTOR (V16QImode,
1518 gen_rtvec_v (16, perm)));
1519 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1521 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1522 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1523 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1528 (define_insn "*vsx_st_elemrev_v2di_internal"
1529 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1531 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1532 (parallel [(const_int 1) (const_int 0)])))]
1533 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1535 [(set_attr "type" "vecstore")])
1537 (define_insn "*vsx_st_elemrev_v8hi_internal"
1538 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1540 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1541 (parallel [(const_int 7) (const_int 6)
1542 (const_int 5) (const_int 4)
1543 (const_int 3) (const_int 2)
1544 (const_int 1) (const_int 0)])))]
1545 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1547 [(set_attr "type" "vecstore")])
1549 (define_expand "vsx_st_elemrev_v16qi"
1550 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1552 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1553 (parallel [(const_int 15) (const_int 14)
1554 (const_int 13) (const_int 12)
1555 (const_int 11) (const_int 10)
1556 (const_int 9) (const_int 8)
1557 (const_int 7) (const_int 6)
1558 (const_int 5) (const_int 4)
1559 (const_int 3) (const_int 2)
1560 (const_int 1) (const_int 0)])))]
1561 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1563 if (!TARGET_P9_VECTOR)
1565 rtx mem_subreg, subreg, perm[16], pcv;
1566 rtx tmp = gen_reg_rtx (V16QImode);
1567 /* 3 is leftmost element in register */
1568 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1571 for (i = 0; i < 16; ++i)
1572 perm[i] = GEN_INT (reorder[i]);
1574 pcv = force_reg (V16QImode,
1575 gen_rtx_CONST_VECTOR (V16QImode,
1576 gen_rtvec_v (16, perm)));
1577 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1579 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1580 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1581 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1586 (define_insn "*vsx_st_elemrev_v16qi_internal"
1587 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1589 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1590 (parallel [(const_int 15) (const_int 14)
1591 (const_int 13) (const_int 12)
1592 (const_int 11) (const_int 10)
1593 (const_int 9) (const_int 8)
1594 (const_int 7) (const_int 6)
1595 (const_int 5) (const_int 4)
1596 (const_int 3) (const_int 2)
1597 (const_int 1) (const_int 0)])))]
1598 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1600 [(set_attr "type" "vecstore")])
1603 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1604 ;; instructions are now combined with the insn for the traditional floating
1606 (define_insn "*vsx_add<mode>3"
1607 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1608 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1609 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1610 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1611 "xvadd<VSs> %x0,%x1,%x2"
1612 [(set_attr "type" "<VStype_simple>")
1613 (set_attr "fp_type" "<VSfptype_simple>")])
1615 (define_insn "*vsx_sub<mode>3"
1616 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1617 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1618 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1619 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1620 "xvsub<VSs> %x0,%x1,%x2"
1621 [(set_attr "type" "<VStype_simple>")
1622 (set_attr "fp_type" "<VSfptype_simple>")])
1624 (define_insn "*vsx_mul<mode>3"
1625 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1626 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1627 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1628 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1629 "xvmul<VSs> %x0,%x1,%x2"
1630 [(set_attr "type" "<VStype_simple>")
1631 (set_attr "fp_type" "<VSfptype_mul>")])
1633 ; Emulate vector with scalar for vec_mul in V2DImode
1634 (define_insn_and_split "vsx_mul_v2di"
1635 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1636 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1637 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1639 "VECTOR_MEM_VSX_P (V2DImode)"
1641 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1645 rtx op0 = operands[0];
1646 rtx op1 = operands[1];
1647 rtx op2 = operands[2];
1648 rtx op3 = gen_reg_rtx (DImode);
1649 rtx op4 = gen_reg_rtx (DImode);
1650 rtx op5 = gen_reg_rtx (DImode);
1651 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1652 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1653 if (TARGET_POWERPC64)
1654 emit_insn (gen_muldi3 (op5, op3, op4));
1657 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1658 emit_move_insn (op5, ret);
1660 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1661 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1662 if (TARGET_POWERPC64)
1663 emit_insn (gen_muldi3 (op3, op3, op4));
1666 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1667 emit_move_insn (op3, ret);
1669 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1672 [(set_attr "type" "mul")])
1674 (define_insn "*vsx_div<mode>3"
1675 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1676 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1677 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1678 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1679 "xvdiv<VSs> %x0,%x1,%x2"
1680 [(set_attr "type" "<VStype_div>")
1681 (set_attr "fp_type" "<VSfptype_div>")])
1683 ; Emulate vector with scalar for vec_div in V2DImode
1684 (define_insn_and_split "vsx_div_v2di"
1685 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1686 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1687 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1689 "VECTOR_MEM_VSX_P (V2DImode)"
1691 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1695 rtx op0 = operands[0];
1696 rtx op1 = operands[1];
1697 rtx op2 = operands[2];
1698 rtx op3 = gen_reg_rtx (DImode);
1699 rtx op4 = gen_reg_rtx (DImode);
1700 rtx op5 = gen_reg_rtx (DImode);
1701 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1702 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1703 if (TARGET_POWERPC64)
1704 emit_insn (gen_divdi3 (op5, op3, op4));
1707 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1708 rtx target = emit_library_call_value (libfunc,
1709 op5, LCT_NORMAL, DImode,
1712 emit_move_insn (op5, target);
1714 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1715 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1716 if (TARGET_POWERPC64)
1717 emit_insn (gen_divdi3 (op3, op3, op4));
1720 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1721 rtx target = emit_library_call_value (libfunc,
1722 op3, LCT_NORMAL, DImode,
1725 emit_move_insn (op3, target);
1727 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1730 [(set_attr "type" "div")])
1732 (define_insn_and_split "vsx_udiv_v2di"
1733 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1734 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1735 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1737 "VECTOR_MEM_VSX_P (V2DImode)"
1739 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1743 rtx op0 = operands[0];
1744 rtx op1 = operands[1];
1745 rtx op2 = operands[2];
1746 rtx op3 = gen_reg_rtx (DImode);
1747 rtx op4 = gen_reg_rtx (DImode);
1748 rtx op5 = gen_reg_rtx (DImode);
1749 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1750 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1751 if (TARGET_POWERPC64)
1752 emit_insn (gen_udivdi3 (op5, op3, op4));
1755 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756 rtx target = emit_library_call_value (libfunc,
1757 op5, LCT_NORMAL, DImode,
1760 emit_move_insn (op5, target);
1762 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1763 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1764 if (TARGET_POWERPC64)
1765 emit_insn (gen_udivdi3 (op3, op3, op4));
1768 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1769 rtx target = emit_library_call_value (libfunc,
1770 op3, LCT_NORMAL, DImode,
1773 emit_move_insn (op3, target);
1775 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1778 [(set_attr "type" "div")])
1780 ;; *tdiv* instruction returning the FG flag
1781 (define_expand "vsx_tdiv<mode>3_fg"
1783 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1784 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1786 (set (match_operand:SI 0 "gpc_reg_operand" "")
1787 (gt:SI (match_dup 3)
1789 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1791 operands[3] = gen_reg_rtx (CCFPmode);
1794 ;; *tdiv* instruction returning the FE flag
1795 (define_expand "vsx_tdiv<mode>3_fe"
1797 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1798 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1800 (set (match_operand:SI 0 "gpc_reg_operand" "")
1801 (eq:SI (match_dup 3)
1803 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1805 operands[3] = gen_reg_rtx (CCFPmode);
1808 (define_insn "*vsx_tdiv<mode>3_internal"
1809 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1810 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1811 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1813 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1814 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1815 [(set_attr "type" "<VStype_simple>")
1816 (set_attr "fp_type" "<VSfptype_simple>")])
1818 (define_insn "vsx_fre<mode>2"
1819 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1820 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1822 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1824 [(set_attr "type" "<VStype_simple>")
1825 (set_attr "fp_type" "<VSfptype_simple>")])
1827 (define_insn "*vsx_neg<mode>2"
1828 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1829 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1830 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831 "xvneg<VSs> %x0,%x1"
1832 [(set_attr "type" "<VStype_simple>")
1833 (set_attr "fp_type" "<VSfptype_simple>")])
1835 (define_insn "*vsx_abs<mode>2"
1836 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1837 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1838 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839 "xvabs<VSs> %x0,%x1"
1840 [(set_attr "type" "<VStype_simple>")
1841 (set_attr "fp_type" "<VSfptype_simple>")])
1843 (define_insn "vsx_nabs<mode>2"
1844 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1847 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1848 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1849 "xvnabs<VSs> %x0,%x1"
1850 [(set_attr "type" "<VStype_simple>")
1851 (set_attr "fp_type" "<VSfptype_simple>")])
1853 (define_insn "vsx_smax<mode>3"
1854 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1855 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1856 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1857 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1858 "xvmax<VSs> %x0,%x1,%x2"
1859 [(set_attr "type" "<VStype_simple>")
1860 (set_attr "fp_type" "<VSfptype_simple>")])
1862 (define_insn "*vsx_smin<mode>3"
1863 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1864 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1865 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1866 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1867 "xvmin<VSs> %x0,%x1,%x2"
1868 [(set_attr "type" "<VStype_simple>")
1869 (set_attr "fp_type" "<VSfptype_simple>")])
1871 (define_insn "*vsx_sqrt<mode>2"
1872 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1873 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1874 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1875 "xvsqrt<VSs> %x0,%x1"
1876 [(set_attr "type" "<VStype_sqrt>")
1877 (set_attr "fp_type" "<VSfptype_sqrt>")])
1879 (define_insn "*vsx_rsqrte<mode>2"
1880 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1881 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1883 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1884 "xvrsqrte<VSs> %x0,%x1"
1885 [(set_attr "type" "<VStype_simple>")
1886 (set_attr "fp_type" "<VSfptype_simple>")])
1888 ;; *tsqrt* returning the fg flag
1889 (define_expand "vsx_tsqrt<mode>2_fg"
1891 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1893 (set (match_operand:SI 0 "gpc_reg_operand" "")
1894 (gt:SI (match_dup 2)
1896 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1898 operands[2] = gen_reg_rtx (CCFPmode);
1901 ;; *tsqrt* returning the fe flag
1902 (define_expand "vsx_tsqrt<mode>2_fe"
1904 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1906 (set (match_operand:SI 0 "gpc_reg_operand" "")
1907 (eq:SI (match_dup 2)
1909 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1911 operands[2] = gen_reg_rtx (CCFPmode);
1914 (define_insn "*vsx_tsqrt<mode>2_internal"
1915 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1916 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1918 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1919 "x<VSv>tsqrt<VSs> %0,%x1"
1920 [(set_attr "type" "<VStype_simple>")
1921 (set_attr "fp_type" "<VSfptype_simple>")])
1923 ;; Fused vector multiply/add instructions. Support the classical Altivec
1924 ;; versions of fma, which allows the target to be a separate register from the
1925 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1928 (define_insn "*vsx_fmav4sf4"
1929 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1931 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1932 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1933 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1934 "VECTOR_UNIT_VSX_P (V4SFmode)"
1936 xvmaddasp %x0,%x1,%x2
1937 xvmaddmsp %x0,%x1,%x3
1938 xvmaddasp %x0,%x1,%x2
1939 xvmaddmsp %x0,%x1,%x3
1940 vmaddfp %0,%1,%2,%3"
1941 [(set_attr "type" "vecfloat")])
1943 (define_insn "*vsx_fmav2df4"
1944 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1946 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1947 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1948 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1949 "VECTOR_UNIT_VSX_P (V2DFmode)"
1951 xvmaddadp %x0,%x1,%x2
1952 xvmaddmdp %x0,%x1,%x3
1953 xvmaddadp %x0,%x1,%x2
1954 xvmaddmdp %x0,%x1,%x3"
1955 [(set_attr "type" "vecdouble")])
1957 (define_insn "*vsx_fms<mode>4"
1958 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1960 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1961 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1963 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1964 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1966 xvmsuba<VSs> %x0,%x1,%x2
1967 xvmsubm<VSs> %x0,%x1,%x3
1968 xvmsuba<VSs> %x0,%x1,%x2
1969 xvmsubm<VSs> %x0,%x1,%x3"
1970 [(set_attr "type" "<VStype_mul>")])
1972 (define_insn "*vsx_nfma<mode>4"
1973 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1976 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1977 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1978 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1979 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1981 xvnmadda<VSs> %x0,%x1,%x2
1982 xvnmaddm<VSs> %x0,%x1,%x3
1983 xvnmadda<VSs> %x0,%x1,%x2
1984 xvnmaddm<VSs> %x0,%x1,%x3"
1985 [(set_attr "type" "<VStype_mul>")
1986 (set_attr "fp_type" "<VSfptype_mul>")])
1988 (define_insn "*vsx_nfmsv4sf4"
1989 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1992 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1993 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1995 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1996 "VECTOR_UNIT_VSX_P (V4SFmode)"
1998 xvnmsubasp %x0,%x1,%x2
1999 xvnmsubmsp %x0,%x1,%x3
2000 xvnmsubasp %x0,%x1,%x2
2001 xvnmsubmsp %x0,%x1,%x3
2002 vnmsubfp %0,%1,%2,%3"
2003 [(set_attr "type" "vecfloat")])
2005 (define_insn "*vsx_nfmsv2df4"
2006 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
2009 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
2010 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
2012 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2013 "VECTOR_UNIT_VSX_P (V2DFmode)"
2015 xvnmsubadp %x0,%x1,%x2
2016 xvnmsubmdp %x0,%x1,%x3
2017 xvnmsubadp %x0,%x1,%x2
2018 xvnmsubmdp %x0,%x1,%x3"
2019 [(set_attr "type" "vecdouble")])
2021 ;; Vector conditional expressions (no scalar version for these instructions)
2022 (define_insn "vsx_eq<mode>"
2023 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2025 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2026 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027 "xvcmpeq<VSs> %x0,%x1,%x2"
2028 [(set_attr "type" "<VStype_simple>")
2029 (set_attr "fp_type" "<VSfptype_simple>")])
2031 (define_insn "vsx_gt<mode>"
2032 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2033 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2034 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2035 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2036 "xvcmpgt<VSs> %x0,%x1,%x2"
2037 [(set_attr "type" "<VStype_simple>")
2038 (set_attr "fp_type" "<VSfptype_simple>")])
2040 (define_insn "*vsx_ge<mode>"
2041 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2042 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2043 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2044 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2045 "xvcmpge<VSs> %x0,%x1,%x2"
2046 [(set_attr "type" "<VStype_simple>")
2047 (set_attr "fp_type" "<VSfptype_simple>")])
2049 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2050 ;; indicate a combined status
2051 (define_insn "*vsx_eq_<mode>_p"
2052 [(set (reg:CC CR6_REGNO)
2054 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2057 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2058 (eq:VSX_F (match_dup 1)
2060 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061 "xvcmpeq<VSs>. %x0,%x1,%x2"
2062 [(set_attr "type" "<VStype_simple>")])
2064 (define_insn "*vsx_gt_<mode>_p"
2065 [(set (reg:CC CR6_REGNO)
2067 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2070 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071 (gt:VSX_F (match_dup 1)
2073 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074 "xvcmpgt<VSs>. %x0,%x1,%x2"
2075 [(set_attr "type" "<VStype_simple>")])
2077 (define_insn "*vsx_ge_<mode>_p"
2078 [(set (reg:CC CR6_REGNO)
2080 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2081 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2083 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2084 (ge:VSX_F (match_dup 1)
2086 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087 "xvcmpge<VSs>. %x0,%x1,%x2"
2088 [(set_attr "type" "<VStype_simple>")])
2091 (define_insn "*vsx_xxsel<mode>"
2092 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2094 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2095 (match_operand:VSX_L 4 "zero_constant" ""))
2096 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2097 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2098 "VECTOR_MEM_VSX_P (<MODE>mode)"
2099 "xxsel %x0,%x3,%x2,%x1"
2100 [(set_attr "type" "vecmove")])
2102 (define_insn "*vsx_xxsel<mode>_uns"
2103 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2105 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2106 (match_operand:VSX_L 4 "zero_constant" ""))
2107 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2108 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2109 "VECTOR_MEM_VSX_P (<MODE>mode)"
2110 "xxsel %x0,%x3,%x2,%x1"
2111 [(set_attr "type" "vecmove")])
2114 (define_insn "vsx_copysign<mode>3"
2115 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2117 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2118 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2120 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121 "xvcpsgn<VSs> %x0,%x2,%x1"
2122 [(set_attr "type" "<VStype_simple>")
2123 (set_attr "fp_type" "<VSfptype_simple>")])
2125 ;; For the conversions, limit the register class for the integer value to be
2126 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2127 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2128 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2129 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2130 ;; in allowing virtual registers.
2131 (define_insn "vsx_float<VSi><mode>2"
2132 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2133 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2134 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2135 "xvcvsx<VSc><VSs> %x0,%x1"
2136 [(set_attr "type" "<VStype_simple>")
2137 (set_attr "fp_type" "<VSfptype_simple>")])
2139 (define_insn "vsx_floatuns<VSi><mode>2"
2140 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2141 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2142 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2143 "xvcvux<VSc><VSs> %x0,%x1"
2144 [(set_attr "type" "<VStype_simple>")
2145 (set_attr "fp_type" "<VSfptype_simple>")])
2147 (define_insn "vsx_fix_trunc<mode><VSi>2"
2148 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2149 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2150 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2151 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2152 [(set_attr "type" "<VStype_simple>")
2153 (set_attr "fp_type" "<VSfptype_simple>")])
2155 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2156 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2157 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2158 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2159 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2160 [(set_attr "type" "<VStype_simple>")
2161 (set_attr "fp_type" "<VSfptype_simple>")])
2163 ;; Math rounding functions
2164 (define_insn "vsx_x<VSv>r<VSs>i"
2165 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2167 UNSPEC_VSX_ROUND_I))]
2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169 "x<VSv>r<VSs>i %x0,%x1"
2170 [(set_attr "type" "<VStype_simple>")
2171 (set_attr "fp_type" "<VSfptype_simple>")])
2173 (define_insn "vsx_x<VSv>r<VSs>ic"
2174 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2175 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2176 UNSPEC_VSX_ROUND_IC))]
2177 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2178 "x<VSv>r<VSs>ic %x0,%x1"
2179 [(set_attr "type" "<VStype_simple>")
2180 (set_attr "fp_type" "<VSfptype_simple>")])
2182 (define_insn "vsx_btrunc<mode>2"
2183 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2184 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2185 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2186 "xvr<VSs>iz %x0,%x1"
2187 [(set_attr "type" "<VStype_simple>")
2188 (set_attr "fp_type" "<VSfptype_simple>")])
2190 (define_insn "*vsx_b2trunc<mode>2"
2191 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2192 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2194 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2195 "x<VSv>r<VSs>iz %x0,%x1"
2196 [(set_attr "type" "<VStype_simple>")
2197 (set_attr "fp_type" "<VSfptype_simple>")])
2199 (define_insn "vsx_floor<mode>2"
2200 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2201 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2203 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2204 "xvr<VSs>im %x0,%x1"
2205 [(set_attr "type" "<VStype_simple>")
2206 (set_attr "fp_type" "<VSfptype_simple>")])
2208 (define_insn "vsx_ceil<mode>2"
2209 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2210 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2212 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2213 "xvr<VSs>ip %x0,%x1"
2214 [(set_attr "type" "<VStype_simple>")
2215 (set_attr "fp_type" "<VSfptype_simple>")])
2218 ;; VSX convert to/from double vector
2220 ;; Convert between single and double precision
2221 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2222 ;; scalar single precision instructions internally use the double format.
2223 ;; Prefer the altivec registers, since we likely will need to do a vperm
2224 (define_insn "vsx_<VS_spdp_insn>"
2225 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2226 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2227 UNSPEC_VSX_CVSPDP))]
2228 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2229 "<VS_spdp_insn> %x0,%x1"
2230 [(set_attr "type" "<VS_spdp_type>")])
2232 ;; xscvspdp, represent the scalar SF type as V4SF
2233 (define_insn "vsx_xscvspdp"
2234 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236 UNSPEC_VSX_CVSPDP))]
2237 "VECTOR_UNIT_VSX_P (V4SFmode)"
2239 [(set_attr "type" "fp")])
2241 ;; Same as vsx_xscvspdp, but use SF as the type
2242 (define_insn "vsx_xscvspdp_scalar2"
2243 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2244 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2245 UNSPEC_VSX_CVSPDP))]
2246 "VECTOR_UNIT_VSX_P (V4SFmode)"
2248 [(set_attr "type" "fp")])
2250 ;; Generate xvcvhpsp instruction
2251 (define_insn "vsx_xvcvhpsp"
2252 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2253 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2254 UNSPEC_VSX_CVHPSP))]
2257 [(set_attr "type" "vecfloat")])
2259 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2260 ;; format of scalars is actually DF.
2261 (define_insn "vsx_xscvdpsp_scalar"
2262 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2263 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2264 UNSPEC_VSX_CVSPDP))]
2265 "VECTOR_UNIT_VSX_P (V4SFmode)"
2267 [(set_attr "type" "fp")])
2269 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2270 (define_insn "vsx_xscvdpspn"
2271 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2272 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2273 UNSPEC_VSX_CVDPSPN))]
2276 [(set_attr "type" "fp")])
2278 (define_insn "vsx_xscvspdpn"
2279 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2280 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2281 UNSPEC_VSX_CVSPDPN))]
2284 [(set_attr "type" "fp")])
2286 (define_insn "vsx_xscvdpspn_scalar"
2287 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2288 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2289 UNSPEC_VSX_CVDPSPN))]
2292 [(set_attr "type" "fp")])
2294 ;; Used by direct move to move a SFmode value from GPR to VSX register
2295 (define_insn "vsx_xscvspdpn_directmove"
2296 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2297 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2298 UNSPEC_VSX_CVSPDPN))]
2301 [(set_attr "type" "fp")])
2303 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2305 (define_expand "vsx_xvcvsxddp_scale"
2306 [(match_operand:V2DF 0 "vsx_register_operand" "")
2307 (match_operand:V2DI 1 "vsx_register_operand" "")
2308 (match_operand:QI 2 "immediate_operand" "")]
2309 "VECTOR_UNIT_VSX_P (V2DFmode)"
2311 rtx op0 = operands[0];
2312 rtx op1 = operands[1];
2313 int scale = INTVAL(operands[2]);
2314 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2316 rs6000_scale_v2df (op0, op0, -scale);
2320 (define_insn "vsx_xvcvsxddp"
2321 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2322 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2323 UNSPEC_VSX_XVCVSXDDP))]
2324 "VECTOR_UNIT_VSX_P (V2DFmode)"
2326 [(set_attr "type" "vecdouble")])
2328 (define_expand "vsx_xvcvuxddp_scale"
2329 [(match_operand:V2DF 0 "vsx_register_operand" "")
2330 (match_operand:V2DI 1 "vsx_register_operand" "")
2331 (match_operand:QI 2 "immediate_operand" "")]
2332 "VECTOR_UNIT_VSX_P (V2DFmode)"
2334 rtx op0 = operands[0];
2335 rtx op1 = operands[1];
2336 int scale = INTVAL(operands[2]);
2337 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2339 rs6000_scale_v2df (op0, op0, -scale);
2343 (define_insn "vsx_xvcvuxddp"
2344 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2345 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2346 UNSPEC_VSX_XVCVUXDDP))]
2347 "VECTOR_UNIT_VSX_P (V2DFmode)"
2349 [(set_attr "type" "vecdouble")])
2351 (define_expand "vsx_xvcvdpsxds_scale"
2352 [(match_operand:V2DI 0 "vsx_register_operand" "")
2353 (match_operand:V2DF 1 "vsx_register_operand" "")
2354 (match_operand:QI 2 "immediate_operand" "")]
2355 "VECTOR_UNIT_VSX_P (V2DFmode)"
2357 rtx op0 = operands[0];
2358 rtx op1 = operands[1];
2360 int scale = INTVAL (operands[2]);
2365 tmp = gen_reg_rtx (V2DFmode);
2366 rs6000_scale_v2df (tmp, op1, scale);
2368 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2372 ;; convert vector of 64-bit floating point numbers to vector of
2373 ;; 64-bit signed integer
2374 (define_insn "vsx_xvcvdpsxds"
2375 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2376 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2377 UNSPEC_VSX_XVCVDPSXDS))]
2378 "VECTOR_UNIT_VSX_P (V2DFmode)"
2379 "xvcvdpsxds %x0,%x1"
2380 [(set_attr "type" "vecdouble")])
2382 ;; convert vector of 32-bit floating point numbers to vector of
2383 ;; 32-bit signed integer
2384 (define_insn "vsx_xvcvspsxws"
2385 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2386 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2387 UNSPEC_VSX_XVCVSPSXWS))]
2388 "VECTOR_UNIT_VSX_P (V4SFmode)"
2389 "xvcvspsxws %x0,%x1"
2390 [(set_attr "type" "vecfloat")])
2392 ;; convert vector of 64-bit floating point numbers to vector of
2393 ;; 64-bit unsigned integer
2394 (define_expand "vsx_xvcvdpuxds_scale"
2395 [(match_operand:V2DI 0 "vsx_register_operand" "")
2396 (match_operand:V2DF 1 "vsx_register_operand" "")
2397 (match_operand:QI 2 "immediate_operand" "")]
2398 "VECTOR_UNIT_VSX_P (V2DFmode)"
2400 rtx op0 = operands[0];
2401 rtx op1 = operands[1];
2403 int scale = INTVAL (operands[2]);
2408 tmp = gen_reg_rtx (V2DFmode);
2409 rs6000_scale_v2df (tmp, op1, scale);
2411 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2415 ;; convert vector of 32-bit floating point numbers to vector of
2416 ;; 32-bit unsigned integer
2417 (define_insn "vsx_xvcvspuxws"
2418 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2419 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2420 UNSPEC_VSX_XVCVSPSXWS))]
2421 "VECTOR_UNIT_VSX_P (V4SFmode)"
2422 "xvcvspuxws %x0,%x1"
2423 [(set_attr "type" "vecfloat")])
2425 (define_insn "vsx_xvcvdpuxds"
2426 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2427 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2428 UNSPEC_VSX_XVCVDPUXDS))]
2429 "VECTOR_UNIT_VSX_P (V2DFmode)"
2430 "xvcvdpuxds %x0,%x1"
2431 [(set_attr "type" "vecdouble")])
2433 ;; Convert from 64-bit to 32-bit types
2434 ;; Note, favor the Altivec registers since the usual use of these instructions
2435 ;; is in vector converts and we need to use the Altivec vperm instruction.
2437 (define_insn "vsx_xvcvdpsxws"
2438 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2439 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2440 UNSPEC_VSX_CVDPSXWS))]
2441 "VECTOR_UNIT_VSX_P (V2DFmode)"
2442 "xvcvdpsxws %x0,%x1"
2443 [(set_attr "type" "vecdouble")])
2445 (define_insn "vsx_xvcvdpuxws"
2446 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2447 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2448 UNSPEC_VSX_CVDPUXWS))]
2449 "VECTOR_UNIT_VSX_P (V2DFmode)"
2450 "xvcvdpuxws %x0,%x1"
2451 [(set_attr "type" "vecdouble")])
2453 (define_insn "vsx_xvcvsxdsp"
2454 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2455 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2456 UNSPEC_VSX_CVSXDSP))]
2457 "VECTOR_UNIT_VSX_P (V2DFmode)"
2459 [(set_attr "type" "vecfloat")])
2461 (define_insn "vsx_xvcvuxdsp"
2462 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2463 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2464 UNSPEC_VSX_CVUXDSP))]
2465 "VECTOR_UNIT_VSX_P (V2DFmode)"
2467 [(set_attr "type" "vecdouble")])
2469 (define_insn "vsx_xvcdpsp"
2470 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2471 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2472 UNSPEC_VSX_XVCDPSP))]
2473 "VECTOR_UNIT_VSX_P (V2DFmode)"
2475 [(set_attr "type" "vecdouble")])
2477 ;; Convert from 32-bit to 64-bit types
2478 ;; Provide both vector and scalar targets
2479 (define_insn "vsx_xvcvsxwdp"
2480 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2481 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2482 UNSPEC_VSX_CVSXWDP))]
2483 "VECTOR_UNIT_VSX_P (V2DFmode)"
2485 [(set_attr "type" "vecdouble")])
2487 (define_insn "vsx_xvcvsxwdp_df"
2488 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2489 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2490 UNSPEC_VSX_CVSXWDP))]
2493 [(set_attr "type" "vecdouble")])
2495 (define_insn "vsx_xvcvuxwdp"
2496 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2497 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2498 UNSPEC_VSX_CVUXWDP))]
2499 "VECTOR_UNIT_VSX_P (V2DFmode)"
2501 [(set_attr "type" "vecdouble")])
2503 (define_insn "vsx_xvcvuxwdp_df"
2504 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2505 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2506 UNSPEC_VSX_CVUXWDP))]
2509 [(set_attr "type" "vecdouble")])
2511 (define_insn "vsx_xvcvspsxds"
2512 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2513 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2514 UNSPEC_VSX_CVSPSXDS))]
2515 "VECTOR_UNIT_VSX_P (V2DFmode)"
2516 "xvcvspsxds %x0,%x1"
2517 [(set_attr "type" "vecdouble")])
2519 (define_insn "vsx_xvcvspuxds"
2520 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2521 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2522 UNSPEC_VSX_CVSPUXDS))]
2523 "VECTOR_UNIT_VSX_P (V2DFmode)"
2524 "xvcvspuxds %x0,%x1"
2525 [(set_attr "type" "vecdouble")])
2527 (define_insn "vsx_xvcvsxwsp"
2528 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2529 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2530 UNSPEC_VSX_CVSXWSP))]
2531 "VECTOR_UNIT_VSX_P (V4SFmode)"
2533 [(set_attr "type" "vecfloat")])
2535 (define_insn "vsx_xvcvuxwsp"
2536 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2537 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2538 UNSPEC_VSX_CVUXWSP))]
2539 "VECTOR_UNIT_VSX_P (V4SFmode)"
2541 [(set_attr "type" "vecfloat")])
2543 ;; Generate float2 double
2544 ;; convert two double to float
2545 (define_expand "float2_v2df"
2546 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2547 (use (match_operand:V2DF 1 "register_operand" "wa"))
2548 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2549 "VECTOR_UNIT_VSX_P (V4SFmode)"
2551 rtx rtx_src1, rtx_src2, rtx_dst;
2553 rtx_dst = operands[0];
2554 rtx_src1 = operands[1];
2555 rtx_src2 = operands[2];
2557 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2562 ;; convert two long long signed ints to float
2563 (define_expand "float2_v2di"
2564 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2565 (use (match_operand:V2DI 1 "register_operand" "wa"))
2566 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2567 "VECTOR_UNIT_VSX_P (V4SFmode)"
2569 rtx rtx_src1, rtx_src2, rtx_dst;
2571 rtx_dst = operands[0];
2572 rtx_src1 = operands[1];
2573 rtx_src2 = operands[2];
2575 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2579 ;; Generate uns_float2
2580 ;; convert two long long unsigned ints to float
2581 (define_expand "uns_float2_v2di"
2582 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2583 (use (match_operand:V2DI 1 "register_operand" "wa"))
2584 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2585 "VECTOR_UNIT_VSX_P (V4SFmode)"
2587 rtx rtx_src1, rtx_src2, rtx_dst;
2589 rtx_dst = operands[0];
2590 rtx_src1 = operands[1];
2591 rtx_src2 = operands[2];
2593 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2598 ;; convert double or long long signed to float
2599 ;; (Only even words are valid, BE numbering)
2600 (define_expand "floate<mode>"
2601 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2603 "VECTOR_UNIT_VSX_P (V4SFmode)"
2605 if (VECTOR_ELT_ORDER_BIG)
2607 /* Shift left one word to put even word correct location */
2609 rtx rtx_val = GEN_INT (4);
2611 rtx_tmp = gen_reg_rtx (V4SFmode);
2612 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2613 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2614 rtx_tmp, rtx_tmp, rtx_val));
2617 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2622 ;; Generate uns_floate
2623 ;; convert long long unsigned to float
2624 ;; (Only even words are valid, BE numbering)
2625 (define_expand "unsfloatev2di"
2626 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2627 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2628 "VECTOR_UNIT_VSX_P (V4SFmode)"
2630 if (VECTOR_ELT_ORDER_BIG)
2632 /* Shift left one word to put even word correct location */
2634 rtx rtx_val = GEN_INT (4);
2636 rtx_tmp = gen_reg_rtx (V4SFmode);
2637 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2638 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2639 rtx_tmp, rtx_tmp, rtx_val));
2642 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2648 ;; convert double or long long signed to float
2649 ;; Only odd words are valid, BE numbering)
2650 (define_expand "floato<mode>"
2651 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2652 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2653 "VECTOR_UNIT_VSX_P (V4SFmode)"
2655 if (VECTOR_ELT_ORDER_BIG)
2656 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2659 /* Shift left one word to put odd word correct location */
2661 rtx rtx_val = GEN_INT (4);
2663 rtx_tmp = gen_reg_rtx (V4SFmode);
2664 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2665 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2666 rtx_tmp, rtx_tmp, rtx_val));
2671 ;; Generate uns_floato
2672 ;; convert long long unsigned to float
2673 ;; (Only odd words are valid, BE numbering)
2674 (define_expand "unsfloatov2di"
2675 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2676 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2677 "VECTOR_UNIT_VSX_P (V4SFmode)"
2679 if (VECTOR_ELT_ORDER_BIG)
2680 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2683 /* Shift left one word to put odd word correct location */
2685 rtx rtx_val = GEN_INT (4);
2687 rtx_tmp = gen_reg_rtx (V4SFmode);
2688 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2689 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2690 rtx_tmp, rtx_tmp, rtx_val));
2695 ;; Generate vsigned2
2696 ;; convert two double float vectors to a vector of single precision ints
2697 (define_expand "vsigned2_v2df"
2698 [(match_operand:V4SI 0 "register_operand" "=wa")
2699 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2700 (match_operand:V2DF 2 "register_operand" "wa")]
2701 UNSPEC_VSX_VSIGNED2)]
2704 rtx rtx_src1, rtx_src2, rtx_dst;
2705 bool signed_convert=true;
2707 rtx_dst = operands[0];
2708 rtx_src1 = operands[1];
2709 rtx_src2 = operands[2];
2711 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2715 ;; Generate vsignedo_v2df
2716 ;; signed double float to int convert odd word
2717 (define_expand "vsignedo_v2df"
2718 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2719 (match_operand:V2DF 1 "register_operand" "wa"))]
2722 if (VECTOR_ELT_ORDER_BIG)
2725 rtx rtx_val = GEN_INT (12);
2726 rtx_tmp = gen_reg_rtx (V4SImode);
2728 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2730 /* Big endian word numbering for words in operand is 0 1 2 3.
2731 take (operand[1] operand[1]) and shift left one word
2732 0 1 2 3 0 1 2 3 => 1 2 3 0
2733 Words 1 and 3 are now are now where they need to be for result. */
2735 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2739 /* Little endian word numbering for operand is 3 2 1 0.
2740 Result words 3 and 1 are where they need to be. */
2741 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2745 [(set_attr "type" "veccomplex")])
2747 ;; Generate vsignede_v2df
2748 ;; signed double float to int even word
2749 (define_expand "vsignede_v2df"
2750 [(set (match_operand:V4SI 0 "register_operand" "=v")
2751 (match_operand:V2DF 1 "register_operand" "v"))]
2754 if (VECTOR_ELT_ORDER_BIG)
2755 /* Big endian word numbering for words in operand is 0 1
2756 Result words 0 is where they need to be. */
2757 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2762 rtx rtx_val = GEN_INT (12);
2763 rtx_tmp = gen_reg_rtx (V4SImode);
2765 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2767 /* Little endian word numbering for operand is 3 2 1 0.
2768 take (operand[1] operand[1]) and shift left three words
2769 0 1 2 3 0 1 2 3 => 3 0 1 2
2770 Words 0 and 2 are now where they need to be for the result. */
2771 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2776 [(set_attr "type" "veccomplex")])
2778 ;; Generate unsigned2
2779 ;; convert two double float vectors to a vector of single precision
2781 (define_expand "vunsigned2_v2df"
2782 [(match_operand:V4SI 0 "register_operand" "=v")
2783 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2784 (match_operand:V2DF 2 "register_operand" "v")]
2785 UNSPEC_VSX_VSIGNED2)]
2788 rtx rtx_src1, rtx_src2, rtx_dst;
2789 bool signed_convert=false;
2791 rtx_dst = operands[0];
2792 rtx_src1 = operands[1];
2793 rtx_src2 = operands[2];
2795 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2799 ;; Generate vunsignedo_v2df
2800 ;; unsigned double float to int convert odd word
2801 (define_expand "vunsignedo_v2df"
2802 [(set (match_operand:V4SI 0 "register_operand" "=v")
2803 (match_operand:V2DF 1 "register_operand" "v"))]
2806 if (VECTOR_ELT_ORDER_BIG)
2809 rtx rtx_val = GEN_INT (12);
2810 rtx_tmp = gen_reg_rtx (V4SImode);
2812 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2814 /* Big endian word numbering for words in operand is 0 1 2 3.
2815 take (operand[1] operand[1]) and shift left one word
2816 0 1 2 3 0 1 2 3 => 1 2 3 0
2817 Words 1 and 3 are now are now where they need to be for result. */
2819 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2823 /* Little endian word numbering for operand is 3 2 1 0.
2824 Result words 3 and 1 are where they need to be. */
2825 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2829 [(set_attr "type" "veccomplex")])
2831 ;; Generate vunsignede_v2df
2832 ;; unsigned double float to int even word
2833 (define_expand "vunsignede_v2df"
2834 [(set (match_operand:V4SI 0 "register_operand" "=v")
2835 (match_operand:V2DF 1 "register_operand" "v"))]
2838 if (VECTOR_ELT_ORDER_BIG)
2839 /* Big endian word numbering for words in operand is 0 1
2840 Result words 0 is where they need to be. */
2841 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2846 rtx rtx_val = GEN_INT (12);
2847 rtx_tmp = gen_reg_rtx (V4SImode);
2849 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2851 /* Little endian word numbering for operand is 3 2 1 0.
2852 take (operand[1] operand[1]) and shift left three words
2853 0 1 2 3 0 1 2 3 => 3 0 1 2
2854 Words 0 and 2 are now where they need to be for the result. */
2855 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2860 [(set_attr "type" "veccomplex")])
2862 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2863 ;; since the xvrdpiz instruction does not truncate the value if the floating
2864 ;; point value is < LONG_MIN or > LONG_MAX.
2865 (define_insn "*vsx_float_fix_v2df2"
2866 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2869 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2870 "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2871 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2872 && !flag_trapping_math && TARGET_FRIZ"
2874 [(set_attr "type" "vecdouble")
2875 (set_attr "fp_type" "fp_addsub_d")])
2878 ;; Permute operations
2880 ;; Build a V2DF/V2DI vector from two scalars
2881 (define_insn "vsx_concat_<mode>"
2882 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2884 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2885 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2886 "VECTOR_MEM_VSX_P (<MODE>mode)"
2888 if (which_alternative == 0)
2889 return (BYTES_BIG_ENDIAN
2890 ? "xxpermdi %x0,%x1,%x2,0"
2891 : "xxpermdi %x0,%x2,%x1,0");
2893 else if (which_alternative == 1)
2894 return (BYTES_BIG_ENDIAN
2895 ? "mtvsrdd %x0,%1,%2"
2896 : "mtvsrdd %x0,%2,%1");
2901 [(set_attr "type" "vecperm")])
2903 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2904 ;; word element in a vector register.
2905 (define_insn "*vsx_concat_<mode>_1"
2906 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2908 (vec_select:<VS_scalar>
2909 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2912 "VECTOR_MEM_VSX_P (<MODE>mode)"
2914 HOST_WIDE_INT dword = INTVAL (operands[2]);
2915 if (BYTES_BIG_ENDIAN)
2917 operands[4] = GEN_INT (2*dword);
2918 return "xxpermdi %x0,%x1,%x3,%4";
2922 operands[4] = GEN_INT (!dword);
2923 return "xxpermdi %x0,%x3,%x1,%4";
2926 [(set_attr "type" "vecperm")])
2928 (define_insn "*vsx_concat_<mode>_2"
2929 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2931 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2932 (vec_select:<VS_scalar>
2933 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2934 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2935 "VECTOR_MEM_VSX_P (<MODE>mode)"
2937 HOST_WIDE_INT dword = INTVAL (operands[3]);
2938 if (BYTES_BIG_ENDIAN)
2940 operands[4] = GEN_INT (dword);
2941 return "xxpermdi %x0,%x1,%x2,%4";
2945 operands[4] = GEN_INT (2 * !dword);
2946 return "xxpermdi %x0,%x2,%x1,%4";
2949 [(set_attr "type" "vecperm")])
2951 (define_insn "*vsx_concat_<mode>_3"
2952 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2954 (vec_select:<VS_scalar>
2955 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2956 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2957 (vec_select:<VS_scalar>
2958 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2959 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2960 "VECTOR_MEM_VSX_P (<MODE>mode)"
2962 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2963 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2964 if (BYTES_BIG_ENDIAN)
2966 operands[5] = GEN_INT ((2 * dword1) + dword2);
2967 return "xxpermdi %x0,%x1,%x3,%5";
2971 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2972 return "xxpermdi %x0,%x3,%x1,%5";
2975 [(set_attr "type" "vecperm")])
2977 ;; Special purpose concat using xxpermdi to glue two single precision values
2978 ;; together, relying on the fact that internally scalar floats are represented
2979 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2980 (define_insn "vsx_concat_v2sf"
2981 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2983 [(match_operand:SF 1 "vsx_register_operand" "ww")
2984 (match_operand:SF 2 "vsx_register_operand" "ww")]
2985 UNSPEC_VSX_CONCAT))]
2986 "VECTOR_MEM_VSX_P (V2DFmode)"
2988 if (BYTES_BIG_ENDIAN)
2989 return "xxpermdi %x0,%x1,%x2,0";
2991 return "xxpermdi %x0,%x2,%x1,0";
2993 [(set_attr "type" "vecperm")])
2995 ;; V4SImode initialization splitter
2996 (define_insn_and_split "vsx_init_v4si"
2997 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2999 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
3000 (match_operand:SI 2 "reg_or_cint_operand" "rn")
3001 (match_operand:SI 3 "reg_or_cint_operand" "rn")
3002 (match_operand:SI 4 "reg_or_cint_operand" "rn")]
3003 UNSPEC_VSX_VEC_INIT))
3004 (clobber (match_scratch:DI 5 "=&r"))
3005 (clobber (match_scratch:DI 6 "=&r"))]
3006 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3008 "&& reload_completed"
3011 rs6000_split_v4si_init (operands);
3015 ;; xxpermdi for little endian loads and stores. We need several of
3016 ;; these since the form of the PARALLEL differs by mode.
3017 (define_insn "*vsx_xxpermdi2_le_<mode>"
3018 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3020 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3021 (parallel [(const_int 1) (const_int 0)])))]
3022 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3023 "xxpermdi %x0,%x1,%x1,2"
3024 [(set_attr "type" "vecperm")])
3026 (define_insn "*vsx_xxpermdi4_le_<mode>"
3027 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3029 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3030 (parallel [(const_int 2) (const_int 3)
3031 (const_int 0) (const_int 1)])))]
3032 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3033 "xxpermdi %x0,%x1,%x1,2"
3034 [(set_attr "type" "vecperm")])
3036 (define_insn "*vsx_xxpermdi8_le_V8HI"
3037 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3039 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3040 (parallel [(const_int 4) (const_int 5)
3041 (const_int 6) (const_int 7)
3042 (const_int 0) (const_int 1)
3043 (const_int 2) (const_int 3)])))]
3044 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3045 "xxpermdi %x0,%x1,%x1,2"
3046 [(set_attr "type" "vecperm")])
3048 (define_insn "*vsx_xxpermdi16_le_V16QI"
3049 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3051 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3052 (parallel [(const_int 8) (const_int 9)
3053 (const_int 10) (const_int 11)
3054 (const_int 12) (const_int 13)
3055 (const_int 14) (const_int 15)
3056 (const_int 0) (const_int 1)
3057 (const_int 2) (const_int 3)
3058 (const_int 4) (const_int 5)
3059 (const_int 6) (const_int 7)])))]
3060 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3061 "xxpermdi %x0,%x1,%x1,2"
3062 [(set_attr "type" "vecperm")])
3064 ;; lxvd2x for little endian loads. We need several of
3065 ;; these since the form of the PARALLEL differs by mode.
3066 (define_insn "*vsx_lxvd2x2_le_<mode>"
3067 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3069 (match_operand:VSX_D 1 "memory_operand" "Z")
3070 (parallel [(const_int 1) (const_int 0)])))]
3071 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3073 [(set_attr "type" "vecload")])
3075 (define_insn "*vsx_lxvd2x4_le_<mode>"
3076 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3078 (match_operand:VSX_W 1 "memory_operand" "Z")
3079 (parallel [(const_int 2) (const_int 3)
3080 (const_int 0) (const_int 1)])))]
3081 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3083 [(set_attr "type" "vecload")])
3085 (define_insn "*vsx_lxvd2x8_le_V8HI"
3086 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3088 (match_operand:V8HI 1 "memory_operand" "Z")
3089 (parallel [(const_int 4) (const_int 5)
3090 (const_int 6) (const_int 7)
3091 (const_int 0) (const_int 1)
3092 (const_int 2) (const_int 3)])))]
3093 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3095 [(set_attr "type" "vecload")])
3097 (define_insn "*vsx_lxvd2x16_le_V16QI"
3098 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3100 (match_operand:V16QI 1 "memory_operand" "Z")
3101 (parallel [(const_int 8) (const_int 9)
3102 (const_int 10) (const_int 11)
3103 (const_int 12) (const_int 13)
3104 (const_int 14) (const_int 15)
3105 (const_int 0) (const_int 1)
3106 (const_int 2) (const_int 3)
3107 (const_int 4) (const_int 5)
3108 (const_int 6) (const_int 7)])))]
3109 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3111 [(set_attr "type" "vecload")])
3113 ;; stxvd2x for little endian stores. We need several of
3114 ;; these since the form of the PARALLEL differs by mode.
3115 (define_insn "*vsx_stxvd2x2_le_<mode>"
3116 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3118 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3119 (parallel [(const_int 1) (const_int 0)])))]
3120 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3122 [(set_attr "type" "vecstore")])
3124 (define_insn "*vsx_stxvd2x4_le_<mode>"
3125 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3127 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3128 (parallel [(const_int 2) (const_int 3)
3129 (const_int 0) (const_int 1)])))]
3130 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3132 [(set_attr "type" "vecstore")])
3134 (define_insn "*vsx_stxvd2x8_le_V8HI"
3135 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3137 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3138 (parallel [(const_int 4) (const_int 5)
3139 (const_int 6) (const_int 7)
3140 (const_int 0) (const_int 1)
3141 (const_int 2) (const_int 3)])))]
3142 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3144 [(set_attr "type" "vecstore")])
3146 (define_insn "*vsx_stxvd2x16_le_V16QI"
3147 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3149 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3150 (parallel [(const_int 8) (const_int 9)
3151 (const_int 10) (const_int 11)
3152 (const_int 12) (const_int 13)
3153 (const_int 14) (const_int 15)
3154 (const_int 0) (const_int 1)
3155 (const_int 2) (const_int 3)
3156 (const_int 4) (const_int 5)
3157 (const_int 6) (const_int 7)])))]
3158 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3160 [(set_attr "type" "vecstore")])
3162 ;; Convert a TImode value into V1TImode
3163 (define_expand "vsx_set_v1ti"
3164 [(match_operand:V1TI 0 "nonimmediate_operand" "")
3165 (match_operand:V1TI 1 "nonimmediate_operand" "")
3166 (match_operand:TI 2 "input_operand" "")
3167 (match_operand:QI 3 "u5bit_cint_operand" "")]
3168 "VECTOR_MEM_VSX_P (V1TImode)"
3170 if (operands[3] != const0_rtx)
3173 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3177 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3178 (define_expand "vsx_set_<mode>"
3179 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3180 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3181 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3182 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3183 "VECTOR_MEM_VSX_P (<MODE>mode)"
3185 rtx dest = operands[0];
3186 rtx vec_reg = operands[1];
3187 rtx value = operands[2];
3188 rtx ele = operands[3];
3189 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3191 if (ele == const0_rtx)
3193 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3194 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3197 else if (ele == const1_rtx)
3199 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3200 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3207 ;; Extract a DF/DI element from V2DF/V2DI
3208 ;; Optimize cases were we can do a simple or direct move.
3209 ;; Or see if we can avoid doing the move at all
3211 ;; There are some unresolved problems with reload that show up if an Altivec
3212 ;; register was picked. Limit the scalar value to FPRs for now.
3214 (define_insn "vsx_extract_<mode>"
3215 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3217 (vec_select:<VS_scalar>
3218 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
3221 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3222 "VECTOR_MEM_VSX_P (<MODE>mode)"
3224 int element = INTVAL (operands[2]);
3225 int op0_regno = REGNO (operands[0]);
3226 int op1_regno = REGNO (operands[1]);
3229 gcc_assert (IN_RANGE (element, 0, 1));
3230 gcc_assert (VSX_REGNO_P (op1_regno));
3232 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3234 if (op0_regno == op1_regno)
3235 return ASM_COMMENT_START " vec_extract to same register";
3237 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3238 && TARGET_POWERPC64)
3239 return "mfvsrd %0,%x1";
3241 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3244 else if (VSX_REGNO_P (op0_regno))
3245 return "xxlor %x0,%x1,%x1";
3251 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3252 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3253 return "mfvsrld %0,%x1";
3255 else if (VSX_REGNO_P (op0_regno))
3257 fldDM = element << 1;
3258 if (!BYTES_BIG_ENDIAN)
3260 operands[3] = GEN_INT (fldDM);
3261 return "xxpermdi %x0,%x1,%x1,%3";
3267 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3269 ;; Optimize extracting a single scalar element from memory.
3270 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3271 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3272 (vec_select:<VSX_D:VS_scalar>
3273 (match_operand:VSX_D 1 "memory_operand" "m,m")
3274 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3275 (clobber (match_scratch:P 3 "=&b,&b"))]
3276 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3278 "&& reload_completed"
3279 [(set (match_dup 0) (match_dup 4))]
3281 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3282 operands[3], <VSX_D:VS_scalar>mode);
3284 [(set_attr "type" "fpload,load")
3285 (set_attr "length" "8")])
3287 ;; Optimize storing a single scalar element that is the right location to
3289 (define_insn "*vsx_extract_<mode>_store"
3290 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3291 (vec_select:<VS_scalar>
3292 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3293 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3294 "VECTOR_MEM_VSX_P (<MODE>mode)"
3299 [(set_attr "type" "fpstore")
3300 (set_attr "length" "4")])
3302 ;; Variable V2DI/V2DF extract shift
3303 (define_insn "vsx_vslo_<mode>"
3304 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3305 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3306 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3308 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3310 [(set_attr "type" "vecperm")])
3312 ;; Variable V2DI/V2DF extract
3313 (define_insn_and_split "vsx_extract_<mode>_var"
3314 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3315 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3316 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3317 UNSPEC_VSX_EXTRACT))
3318 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3319 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3320 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3322 "&& reload_completed"
3325 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3326 operands[3], operands[4]);
3330 ;; Extract a SF element from V4SF
3331 (define_insn_and_split "vsx_extract_v4sf"
3332 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3334 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3335 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3336 (clobber (match_scratch:V4SF 3 "=0"))]
3337 "VECTOR_UNIT_VSX_P (V4SFmode)"
3342 rtx op0 = operands[0];
3343 rtx op1 = operands[1];
3344 rtx op2 = operands[2];
3345 rtx op3 = operands[3];
3347 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3353 if (GET_CODE (op3) == SCRATCH)
3354 op3 = gen_reg_rtx (V4SFmode);
3355 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3358 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3361 [(set_attr "length" "8")
3362 (set_attr "type" "fp")])
3364 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3365 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3367 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3368 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3369 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3370 "VECTOR_MEM_VSX_P (V4SFmode)"
3372 "&& reload_completed"
3373 [(set (match_dup 0) (match_dup 4))]
3375 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376 operands[3], SFmode);
3378 [(set_attr "type" "fpload,fpload,fpload,load")
3379 (set_attr "length" "8")])
3381 ;; Variable V4SF extract
3382 (define_insn_and_split "vsx_extract_v4sf_var"
3383 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3384 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3385 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3386 UNSPEC_VSX_EXTRACT))
3387 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3388 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3389 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3391 "&& reload_completed"
3394 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3395 operands[3], operands[4]);
3399 ;; Expand the builtin form of xxpermdi to canonical rtl.
3400 (define_expand "vsx_xxpermdi_<mode>"
3401 [(match_operand:VSX_L 0 "vsx_register_operand")
3402 (match_operand:VSX_L 1 "vsx_register_operand")
3403 (match_operand:VSX_L 2 "vsx_register_operand")
3404 (match_operand:QI 3 "u5bit_cint_operand")]
3405 "VECTOR_MEM_VSX_P (<MODE>mode)"
3407 rtx target = operands[0];
3408 rtx op0 = operands[1];
3409 rtx op1 = operands[2];
3410 int mask = INTVAL (operands[3]);
3411 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3412 rtx perm1 = GEN_INT ((mask & 1) + 2);
3413 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3415 if (<MODE>mode == V2DFmode)
3416 gen = gen_vsx_xxpermdi2_v2df_1;
3419 gen = gen_vsx_xxpermdi2_v2di_1;
3420 if (<MODE>mode != V2DImode)
3422 target = gen_lowpart (V2DImode, target);
3423 op0 = gen_lowpart (V2DImode, op0);
3424 op1 = gen_lowpart (V2DImode, op1);
3427 emit_insn (gen (target, op0, op1, perm0, perm1));
3431 ;; Special version of xxpermdi that retains big-endian semantics.
3432 (define_expand "vsx_xxpermdi_<mode>_be"
3433 [(match_operand:VSX_L 0 "vsx_register_operand")
3434 (match_operand:VSX_L 1 "vsx_register_operand")
3435 (match_operand:VSX_L 2 "vsx_register_operand")
3436 (match_operand:QI 3 "u5bit_cint_operand")]
3437 "VECTOR_MEM_VSX_P (<MODE>mode)"
3439 rtx target = operands[0];
3440 rtx op0 = operands[1];
3441 rtx op1 = operands[2];
3442 int mask = INTVAL (operands[3]);
3443 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3444 rtx perm1 = GEN_INT ((mask & 1) + 2);
3445 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3447 if (<MODE>mode == V2DFmode)
3448 gen = gen_vsx_xxpermdi2_v2df_1;
3451 gen = gen_vsx_xxpermdi2_v2di_1;
3452 if (<MODE>mode != V2DImode)
3454 target = gen_lowpart (V2DImode, target);
3455 op0 = gen_lowpart (V2DImode, op0);
3456 op1 = gen_lowpart (V2DImode, op1);
3459 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3460 transformation we don't want; it is necessary for
3461 rs6000_expand_vec_perm_const_1 but not for this use. So we
3462 prepare for that by reversing the transformation here. */
3463 if (BYTES_BIG_ENDIAN)
3464 emit_insn (gen (target, op0, op1, perm0, perm1));
3467 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3468 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3469 emit_insn (gen (target, op1, op0, p0, p1));
3474 (define_insn "vsx_xxpermdi2_<mode>_1"
3475 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3477 (vec_concat:<VS_double>
3478 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3479 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3480 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3481 (match_operand 4 "const_2_to_3_operand" "")])))]
3482 "VECTOR_MEM_VSX_P (<MODE>mode)"
3486 /* For little endian, swap operands and invert/swap selectors
3487 to get the correct xxpermdi. The operand swap sets up the
3488 inputs as a little endian array. The selectors are swapped
3489 because they are defined to use big endian ordering. The
3490 selectors are inverted to get the correct doublewords for
3491 little endian ordering. */
3492 if (BYTES_BIG_ENDIAN)
3494 op3 = INTVAL (operands[3]);
3495 op4 = INTVAL (operands[4]);
3499 op3 = 3 - INTVAL (operands[4]);
3500 op4 = 3 - INTVAL (operands[3]);
3503 mask = (op3 << 1) | (op4 - 2);
3504 operands[3] = GEN_INT (mask);
3506 if (BYTES_BIG_ENDIAN)
3507 return "xxpermdi %x0,%x1,%x2,%3";
3509 return "xxpermdi %x0,%x2,%x1,%3";
3511 [(set_attr "type" "vecperm")])
3513 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3514 ;; none of the small types were allowed in a vector register, so we had to
3515 ;; extract to a DImode and either do a direct move or store.
3516 (define_expand "vsx_extract_<mode>"
3517 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3518 (vec_select:<VS_scalar>
3519 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3520 (parallel [(match_operand:QI 2 "const_int_operand")])))
3521 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3522 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3524 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3525 if (TARGET_P9_VECTOR)
3527 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3533 (define_insn "vsx_extract_<mode>_p9"
3534 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3535 (vec_select:<VS_scalar>
3536 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3537 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3538 (clobber (match_scratch:SI 3 "=r,X"))]
3539 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3541 if (which_alternative == 0)
3546 HOST_WIDE_INT elt = INTVAL (operands[2]);
3547 HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3548 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3551 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3552 HOST_WIDE_INT offset = unit_size * elt_adj;
3554 operands[2] = GEN_INT (offset);
3556 return "xxextractuw %x0,%x1,%2";
3558 return "vextractu<wd> %0,%1,%2";
3561 [(set_attr "type" "vecsimple")])
3564 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3565 (vec_select:<VS_scalar>
3566 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3567 (parallel [(match_operand:QI 2 "const_int_operand")])))
3568 (clobber (match_operand:SI 3 "int_reg_operand"))]
3569 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3572 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3573 rtx op1 = operands[1];
3574 rtx op2 = operands[2];
3575 rtx op3 = operands[3];
3576 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3578 emit_move_insn (op3, GEN_INT (offset));
3579 if (VECTOR_ELT_ORDER_BIG)
3580 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3582 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3586 ;; Optimize zero extracts to eliminate the AND after the extract.
3587 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3588 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3590 (vec_select:<VS_scalar>
3591 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3592 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3593 (clobber (match_scratch:SI 3 "=r,X"))]
3594 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3596 "&& reload_completed"
3597 [(parallel [(set (match_dup 4)
3598 (vec_select:<VS_scalar>
3600 (parallel [(match_dup 2)])))
3601 (clobber (match_dup 3))])]
3603 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3606 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3607 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3608 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3609 (vec_select:<VS_scalar>
3610 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3611 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3612 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3613 (clobber (match_scratch:SI 4 "=X,&r"))]
3614 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3616 "&& reload_completed"
3617 [(parallel [(set (match_dup 3)
3618 (vec_select:<VS_scalar>
3620 (parallel [(match_dup 2)])))
3621 (clobber (match_dup 4))])
3625 (define_insn_and_split "*vsx_extract_si"
3626 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3628 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3629 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3630 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3631 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3633 "&& reload_completed"
3636 rtx dest = operands[0];
3637 rtx src = operands[1];
3638 rtx element = operands[2];
3639 rtx vec_tmp = operands[3];
3642 if (!VECTOR_ELT_ORDER_BIG)
3643 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3645 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3647 value = INTVAL (element);
3649 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3653 if (MEM_P (operands[0]))
3655 if (can_create_pseudo_p ())
3656 dest = rs6000_address_for_fpconvert (dest);
3658 if (TARGET_P8_VECTOR)
3659 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3661 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3664 else if (TARGET_P8_VECTOR)
3665 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3667 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3668 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3672 [(set_attr "type" "mftgpr,vecperm,fpstore")
3673 (set_attr "length" "8")])
3675 (define_insn_and_split "*vsx_extract_<mode>_p8"
3676 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3677 (vec_select:<VS_scalar>
3678 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3679 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3680 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3681 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3682 && !TARGET_P9_VECTOR"
3684 "&& reload_completed"
3687 rtx dest = operands[0];
3688 rtx src = operands[1];
3689 rtx element = operands[2];
3690 rtx vec_tmp = operands[3];
3693 if (!VECTOR_ELT_ORDER_BIG)
3694 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3696 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3698 value = INTVAL (element);
3699 if (<MODE>mode == V16QImode)
3702 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3706 else if (<MODE>mode == V8HImode)
3709 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3716 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3717 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3720 [(set_attr "type" "mftgpr")])
3722 ;; Optimize extracting a single scalar element from memory.
3723 (define_insn_and_split "*vsx_extract_<mode>_load"
3724 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3725 (vec_select:<VS_scalar>
3726 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3727 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3728 (clobber (match_scratch:DI 3 "=&b"))]
3729 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3731 "&& reload_completed"
3732 [(set (match_dup 0) (match_dup 4))]
3734 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3735 operands[3], <VS_scalar>mode);
3737 [(set_attr "type" "load")
3738 (set_attr "length" "8")])
3740 ;; Variable V16QI/V8HI/V4SI extract
3741 (define_insn_and_split "vsx_extract_<mode>_var"
3742 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3744 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3745 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3746 UNSPEC_VSX_EXTRACT))
3747 (clobber (match_scratch:DI 3 "=r,r,&b"))
3748 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3749 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3751 "&& reload_completed"
3754 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3755 operands[3], operands[4]);
3759 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3760 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3762 (unspec:<VSX_EXTRACT_I:VS_scalar>
3763 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3764 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3765 UNSPEC_VSX_EXTRACT)))
3766 (clobber (match_scratch:DI 3 "=r,r,&b"))
3767 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3768 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3770 "&& reload_completed"
3773 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3774 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3775 operands[1], operands[2],
3776 operands[3], operands[4]);
3780 ;; VSX_EXTRACT optimizations
3781 ;; Optimize double d = (double) vec_extract (vi, <n>)
3782 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3783 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3784 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3787 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3788 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3789 (clobber (match_scratch:V4SI 3 "=v"))]
3790 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3795 rtx dest = operands[0];
3796 rtx src = operands[1];
3797 rtx element = operands[2];
3798 rtx v4si_tmp = operands[3];
3801 if (!VECTOR_ELT_ORDER_BIG)
3802 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3804 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3806 value = INTVAL (element);
3809 if (GET_CODE (v4si_tmp) == SCRATCH)
3810 v4si_tmp = gen_reg_rtx (V4SImode);
3811 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3816 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3820 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3821 ;; where <type> is a floating point type that supported by the hardware that is
3822 ;; not double. First convert the value to double, and then to the desired
3824 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3825 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3826 (any_float:VSX_EXTRACT_FL
3828 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3829 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3830 (clobber (match_scratch:V4SI 3 "=v"))
3831 (clobber (match_scratch:DF 4 "=ws"))]
3832 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3837 rtx dest = operands[0];
3838 rtx src = operands[1];
3839 rtx element = operands[2];
3840 rtx v4si_tmp = operands[3];
3841 rtx df_tmp = operands[4];
3844 if (!VECTOR_ELT_ORDER_BIG)
3845 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3847 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3849 value = INTVAL (element);
3852 if (GET_CODE (v4si_tmp) == SCRATCH)
3853 v4si_tmp = gen_reg_rtx (V4SImode);
3854 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3859 if (GET_CODE (df_tmp) == SCRATCH)
3860 df_tmp = gen_reg_rtx (DFmode);
3862 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3864 if (<MODE>mode == SFmode)
3865 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3866 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3867 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3868 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3869 && TARGET_FLOAT128_HW)
3870 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3871 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3872 emit_insn (gen_extenddfif2 (dest, df_tmp));
3873 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3874 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3881 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3882 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3883 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3884 ;; vector short or vector unsigned short.
3885 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3886 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3888 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3889 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3890 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3891 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3892 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3893 && TARGET_P9_VECTOR"
3895 "&& reload_completed"
3896 [(parallel [(set (match_dup 3)
3897 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3899 (parallel [(match_dup 2)])))
3900 (clobber (scratch:SI))])
3902 (sign_extend:DI (match_dup 3)))
3904 (float:<FL_CONV:MODE> (match_dup 4)))]
3906 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3909 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3910 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3911 (unsigned_float:FL_CONV
3912 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3913 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3914 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3915 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3916 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3917 && TARGET_P9_VECTOR"
3919 "&& reload_completed"
3920 [(parallel [(set (match_dup 3)
3921 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3923 (parallel [(match_dup 2)])))
3924 (clobber (scratch:SI))])
3926 (float:<FL_CONV:MODE> (match_dup 4)))]
3928 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3931 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3932 (define_insn "vsx_set_<mode>_p9"
3933 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3934 (unspec:VSX_EXTRACT_I
3935 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3936 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3937 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3939 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3941 int ele = INTVAL (operands[3]);
3942 int nunits = GET_MODE_NUNITS (<MODE>mode);
3944 if (!VECTOR_ELT_ORDER_BIG)
3945 ele = nunits - 1 - ele;
3947 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3948 if (<MODE>mode == V4SImode)
3949 return "xxinsertw %x0,%x2,%3";
3951 return "vinsert<wd> %0,%2,%3";
3953 [(set_attr "type" "vecperm")])
3955 (define_insn_and_split "vsx_set_v4sf_p9"
3956 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3958 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3959 (match_operand:SF 2 "gpc_reg_operand" "ww")
3960 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3962 (clobber (match_scratch:SI 4 "=&wJwK"))]
3963 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3965 "&& reload_completed"
3967 (unspec:V4SF [(match_dup 2)]
3968 UNSPEC_VSX_CVDPSPN))
3969 (parallel [(set (match_dup 4)
3970 (vec_select:SI (match_dup 6)
3971 (parallel [(match_dup 7)])))
3972 (clobber (scratch:SI))])
3974 (unspec:V4SI [(match_dup 8)
3979 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3981 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3982 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3983 operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3984 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3986 [(set_attr "type" "vecperm")
3987 (set_attr "length" "12")])
3989 ;; Special case setting 0.0f to a V4SF element
3990 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3991 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3993 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3994 (match_operand:SF 2 "zero_fp_constant" "j")
3995 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3997 (clobber (match_scratch:SI 4 "=&wJwK"))]
3998 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4000 "&& reload_completed"
4004 (unspec:V4SI [(match_dup 5)
4009 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4011 [(set_attr "type" "vecperm")
4012 (set_attr "length" "8")])
4014 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4015 ;; that is in the default scalar position (1 for big endian, 2 for little
4016 ;; endian). We just need to do an xxinsertw since the element is in the
4017 ;; correct location.
4019 (define_insn "*vsx_insert_extract_v4sf_p9"
4020 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4022 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4023 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4025 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4026 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4028 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4029 && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4031 int ele = INTVAL (operands[4]);
4033 if (!VECTOR_ELT_ORDER_BIG)
4034 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4036 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4037 return "xxinsertw %x0,%x2,%4";
4039 [(set_attr "type" "vecperm")])
4041 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4042 ;; that is in the default scalar position (1 for big endian, 2 for little
4043 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4045 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4046 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4048 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4051 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4054 (clobber (match_scratch:SI 5 "=&wJwK"))]
4055 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4056 && TARGET_P9_VECTOR && TARGET_POWERPC64
4057 && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4060 [(parallel [(set (match_dup 5)
4061 (vec_select:SI (match_dup 6)
4062 (parallel [(match_dup 3)])))
4063 (clobber (scratch:SI))])
4065 (unspec:V4SI [(match_dup 8)
4070 if (GET_CODE (operands[5]) == SCRATCH)
4071 operands[5] = gen_reg_rtx (SImode);
4073 operands[6] = gen_lowpart (V4SImode, operands[2]);
4074 operands[7] = gen_lowpart (V4SImode, operands[0]);
4075 operands[8] = gen_lowpart (V4SImode, operands[1]);
4077 [(set_attr "type" "vecperm")])
4079 ;; Expanders for builtins
4080 (define_expand "vsx_mergel_<mode>"
4081 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
4082 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
4083 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
4084 "VECTOR_MEM_VSX_P (<MODE>mode)"
4089 /* Special handling for LE with -maltivec=be. */
4090 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4092 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4093 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4097 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4098 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4101 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4102 emit_insn (gen_rtx_SET (operands[0], x));
4106 (define_expand "vsx_mergeh_<mode>"
4107 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
4108 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
4109 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
4110 "VECTOR_MEM_VSX_P (<MODE>mode)"
4115 /* Special handling for LE with -maltivec=be. */
4116 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4118 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4119 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4123 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4124 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4127 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4128 emit_insn (gen_rtx_SET (operands[0], x));
4133 ;; We separate the register splat insn from the memory splat insn to force the
4134 ;; register allocator to generate the indexed form of the SPLAT when it is
4135 ;; given an offsettable memory reference. Otherwise, if the register and
4136 ;; memory insns were combined into a single insn, the register allocator will
4137 ;; load the value into a register, and then do a double word permute.
4138 (define_expand "vsx_splat_<mode>"
4139 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4140 (vec_duplicate:VSX_D
4141 (match_operand:<VS_scalar> 1 "input_operand")))]
4142 "VECTOR_MEM_VSX_P (<MODE>mode)"
4144 rtx op1 = operands[1];
4146 operands[1] = rs6000_address_for_fpconvert (op1);
4147 else if (!REG_P (op1))
4148 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4151 (define_insn "vsx_splat_<mode>_reg"
4152 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4153 (vec_duplicate:VSX_D
4154 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4155 "VECTOR_MEM_VSX_P (<MODE>mode)"
4157 xxpermdi %x0,%x1,%x1,0
4159 [(set_attr "type" "vecperm")])
4161 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4162 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4163 (vec_duplicate:VSX_D
4164 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4165 "VECTOR_MEM_VSX_P (<MODE>mode)"
4167 [(set_attr "type" "vecload")])
4169 ;; V4SI splat support
4170 (define_insn "vsx_splat_v4si"
4171 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4173 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4178 [(set_attr "type" "vecperm,vecload")])
4180 ;; SImode is not currently allowed in vector registers. This pattern
4181 ;; allows us to use direct move to get the value in a vector register
4182 ;; so that we can use XXSPLTW
4183 (define_insn "vsx_splat_v4si_di"
4184 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4187 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4188 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4192 [(set_attr "type" "vecperm")])
4194 ;; V4SF splat (ISA 3.0)
4195 (define_insn_and_split "vsx_splat_v4sf"
4196 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4198 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4204 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4206 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4208 (unspec:V4SF [(match_dup 0)
4209 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4211 [(set_attr "type" "vecload,vecperm,mftgpr")
4212 (set_attr "length" "4,8,4")])
4214 ;; V4SF/V4SI splat from a vector element
4215 (define_insn "vsx_xxspltw_<mode>"
4216 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4217 (vec_duplicate:VSX_W
4218 (vec_select:<VS_scalar>
4219 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4221 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4222 "VECTOR_MEM_VSX_P (<MODE>mode)"
4224 if (!BYTES_BIG_ENDIAN)
4225 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4227 return "xxspltw %x0,%x1,%2";
4229 [(set_attr "type" "vecperm")])
4231 (define_insn "vsx_xxspltw_<mode>_direct"
4232 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4233 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4234 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4235 UNSPEC_VSX_XXSPLTW))]
4236 "VECTOR_MEM_VSX_P (<MODE>mode)"
4237 "xxspltw %x0,%x1,%2"
4238 [(set_attr "type" "vecperm")])
4240 ;; V16QI/V8HI splat support on ISA 2.07
4241 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4242 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4243 (vec_duplicate:VSX_SPLAT_I
4244 (truncate:<VS_scalar>
4245 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4246 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4247 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4248 [(set_attr "type" "vecperm")])
4250 ;; V2DF/V2DI splat for use by vec_splat builtin
4251 (define_insn "vsx_xxspltd_<mode>"
4252 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4253 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4254 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4255 UNSPEC_VSX_XXSPLTD))]
4256 "VECTOR_MEM_VSX_P (<MODE>mode)"
4258 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4259 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4260 return "xxpermdi %x0,%x1,%x1,0";
4262 return "xxpermdi %x0,%x1,%x1,3";
4264 [(set_attr "type" "vecperm")])
4266 ;; V4SF/V4SI interleave
4267 (define_insn "vsx_xxmrghw_<mode>"
4268 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4270 (vec_concat:<VS_double>
4271 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4272 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4273 (parallel [(const_int 0) (const_int 4)
4274 (const_int 1) (const_int 5)])))]
4275 "VECTOR_MEM_VSX_P (<MODE>mode)"
4277 if (BYTES_BIG_ENDIAN)
4278 return "xxmrghw %x0,%x1,%x2";
4280 return "xxmrglw %x0,%x2,%x1";
4282 [(set_attr "type" "vecperm")])
4284 (define_insn "vsx_xxmrglw_<mode>"
4285 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4287 (vec_concat:<VS_double>
4288 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4289 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4290 (parallel [(const_int 2) (const_int 6)
4291 (const_int 3) (const_int 7)])))]
4292 "VECTOR_MEM_VSX_P (<MODE>mode)"
4294 if (BYTES_BIG_ENDIAN)
4295 return "xxmrglw %x0,%x1,%x2";
4297 return "xxmrghw %x0,%x2,%x1";
4299 [(set_attr "type" "vecperm")])
4301 ;; Shift left double by word immediate
4302 (define_insn "vsx_xxsldwi_<mode>"
4303 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4304 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4305 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4306 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4308 "VECTOR_MEM_VSX_P (<MODE>mode)"
4309 "xxsldwi %x0,%x1,%x2,%3"
4310 [(set_attr "type" "vecperm")])
4313 ;; Vector reduction insns and splitters
4315 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4316 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4320 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4321 (parallel [(const_int 1)]))
4324 (parallel [(const_int 0)])))
4326 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4327 "VECTOR_UNIT_VSX_P (V2DFmode)"
4333 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4334 ? gen_reg_rtx (V2DFmode)
4336 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4337 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4340 [(set_attr "length" "8")
4341 (set_attr "type" "veccomplex")])
4343 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4344 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4346 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4347 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4348 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4349 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4350 "VECTOR_UNIT_VSX_P (V4SFmode)"
4356 rtx op0 = operands[0];
4357 rtx op1 = operands[1];
4358 rtx tmp2, tmp3, tmp4;
4360 if (can_create_pseudo_p ())
4362 tmp2 = gen_reg_rtx (V4SFmode);
4363 tmp3 = gen_reg_rtx (V4SFmode);
4364 tmp4 = gen_reg_rtx (V4SFmode);
4373 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4374 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4375 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4376 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4379 [(set_attr "length" "16")
4380 (set_attr "type" "veccomplex")])
4382 ;; Combiner patterns with the vector reduction patterns that knows we can get
4383 ;; to the top element of the V2DF array without doing an extract.
4385 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4386 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4391 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4392 (parallel [(const_int 1)]))
4395 (parallel [(const_int 0)])))
4397 (parallel [(const_int 1)])))
4398 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4399 "VECTOR_UNIT_VSX_P (V2DFmode)"
4405 rtx hi = gen_highpart (DFmode, operands[1]);
4406 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4407 ? gen_reg_rtx (DFmode)
4410 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4411 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4414 [(set_attr "length" "8")
4415 (set_attr "type" "veccomplex")])
4417 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4418 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4421 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4422 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4423 (parallel [(const_int 3)])))
4424 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4425 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4426 (clobber (match_scratch:V4SF 4 "=0,0"))]
4427 "VECTOR_UNIT_VSX_P (V4SFmode)"
4433 rtx op0 = operands[0];
4434 rtx op1 = operands[1];
4435 rtx tmp2, tmp3, tmp4, tmp5;
4437 if (can_create_pseudo_p ())
4439 tmp2 = gen_reg_rtx (V4SFmode);
4440 tmp3 = gen_reg_rtx (V4SFmode);
4441 tmp4 = gen_reg_rtx (V4SFmode);
4442 tmp5 = gen_reg_rtx (V4SFmode);
4452 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4453 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4454 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4455 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4456 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4459 [(set_attr "length" "20")
4460 (set_attr "type" "veccomplex")])
4463 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4465 [(set (match_operand:P 0 "base_reg_operand" "")
4466 (match_operand:P 1 "short_cint_operand" ""))
4467 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4468 (mem:VSX_M (plus:P (match_dup 0)
4469 (match_operand:P 3 "int_reg_operand" ""))))]
4470 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4471 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4472 [(set_attr "length" "8")
4473 (set_attr "type" "vecload")])
4476 [(set (match_operand:P 0 "base_reg_operand" "")
4477 (match_operand:P 1 "short_cint_operand" ""))
4478 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
4479 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
4481 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4482 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4483 [(set_attr "length" "8")
4484 (set_attr "type" "vecload")])
4487 ;; ISA 3.0 vector extend sign support
4489 (define_insn "vsx_sign_extend_qi_<mode>"
4490 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4492 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4493 UNSPEC_VSX_SIGN_EXTEND))]
4496 [(set_attr "type" "vecexts")])
4498 (define_insn "vsx_sign_extend_hi_<mode>"
4499 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4501 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4502 UNSPEC_VSX_SIGN_EXTEND))]
4505 [(set_attr "type" "vecexts")])
4507 (define_insn "*vsx_sign_extend_si_v2di"
4508 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4509 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4510 UNSPEC_VSX_SIGN_EXTEND))]
4513 [(set_attr "type" "vecexts")])
4516 ;; ISA 3.0 Binary Floating-Point Support
4518 ;; VSX Scalar Extract Exponent Quad-Precision
4519 (define_insn "xsxexpqp_<mode>"
4520 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4521 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4522 UNSPEC_VSX_SXEXPDP))]
4525 [(set_attr "type" "vecmove")])
4527 ;; VSX Scalar Extract Exponent Double-Precision
4528 (define_insn "xsxexpdp"
4529 [(set (match_operand:DI 0 "register_operand" "=r")
4530 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4531 UNSPEC_VSX_SXEXPDP))]
4532 "TARGET_P9_VECTOR && TARGET_64BIT"
4534 [(set_attr "type" "integer")])
4536 ;; VSX Scalar Extract Significand Quad-Precision
4537 (define_insn "xsxsigqp_<mode>"
4538 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4539 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4543 [(set_attr "type" "vecmove")])
4545 ;; VSX Scalar Extract Significand Double-Precision
4546 (define_insn "xsxsigdp"
4547 [(set (match_operand:DI 0 "register_operand" "=r")
4548 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4550 "TARGET_P9_VECTOR && TARGET_64BIT"
4552 [(set_attr "type" "integer")])
4554 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4555 (define_insn "xsiexpqpf_<mode>"
4556 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4558 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4559 (match_operand:DI 2 "altivec_register_operand" "v")]
4560 UNSPEC_VSX_SIEXPQP))]
4563 [(set_attr "type" "vecmove")])
4565 ;; VSX Scalar Insert Exponent Quad-Precision
4566 (define_insn "xsiexpqp_<mode>"
4567 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4568 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4569 (match_operand:DI 2 "altivec_register_operand" "v")]
4570 UNSPEC_VSX_SIEXPQP))]
4573 [(set_attr "type" "vecmove")])
4575 ;; VSX Scalar Insert Exponent Double-Precision
4576 (define_insn "xsiexpdp"
4577 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4578 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4579 (match_operand:DI 2 "register_operand" "r")]
4580 UNSPEC_VSX_SIEXPDP))]
4581 "TARGET_P9_VECTOR && TARGET_64BIT"
4582 "xsiexpdp %x0,%1,%2"
4583 [(set_attr "type" "fpsimple")])
4585 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4586 (define_insn "xsiexpdpf"
4587 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4588 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4589 (match_operand:DI 2 "register_operand" "r")]
4590 UNSPEC_VSX_SIEXPDP))]
4591 "TARGET_P9_VECTOR && TARGET_64BIT"
4592 "xsiexpdp %x0,%1,%2"
4593 [(set_attr "type" "fpsimple")])
4595 ;; VSX Scalar Compare Exponents Double-Precision
4596 (define_expand "xscmpexpdp_<code>"
4600 [(match_operand:DF 1 "vsx_register_operand" "wa")
4601 (match_operand:DF 2 "vsx_register_operand" "wa")]
4602 UNSPEC_VSX_SCMPEXPDP)
4604 (set (match_operand:SI 0 "register_operand" "=r")
4605 (CMP_TEST:SI (match_dup 3)
4609 operands[3] = gen_reg_rtx (CCFPmode);
4612 (define_insn "*xscmpexpdp"
4613 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4615 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4616 (match_operand:DF 2 "vsx_register_operand" "wa")]
4617 UNSPEC_VSX_SCMPEXPDP)
4618 (match_operand:SI 3 "zero_constant" "j")))]
4620 "xscmpexpdp %0,%x1,%x2"
4621 [(set_attr "type" "fpcompare")])
4623 ;; VSX Scalar Test Data Class Quad-Precision
4624 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4625 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4626 ;; setting the eq bit if any of the conditions tested by operand 2
4627 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4628 (define_expand "xststdcqp_<mode>"
4632 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4633 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4636 (set (match_operand:SI 0 "register_operand" "=r")
4637 (eq:SI (match_dup 3)
4641 operands[3] = gen_reg_rtx (CCFPmode);
4644 ;; VSX Scalar Test Data Class Double- and Single-Precision
4645 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4646 ;; if any of the conditions tested by operand 2 are satisfied.
4647 ;; The gt and unordered bits are cleared to zero.)
4648 (define_expand "xststdc<Fvsx>"
4652 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4653 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4656 (set (match_operand:SI 0 "register_operand" "=r")
4657 (eq:SI (match_dup 3)
4661 operands[3] = gen_reg_rtx (CCFPmode);
4662 operands[4] = CONST0_RTX (SImode);
4665 ;; The VSX Scalar Test Negative Quad-Precision
4666 (define_expand "xststdcnegqp_<mode>"
4670 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4674 (set (match_operand:SI 0 "register_operand" "=r")
4675 (lt:SI (match_dup 2)
4679 operands[2] = gen_reg_rtx (CCFPmode);
4682 ;; The VSX Scalar Test Negative Double- and Single-Precision
4683 (define_expand "xststdcneg<Fvsx>"
4687 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4691 (set (match_operand:SI 0 "register_operand" "=r")
4692 (lt:SI (match_dup 2)
4696 operands[2] = gen_reg_rtx (CCFPmode);
4697 operands[3] = CONST0_RTX (SImode);
4700 (define_insn "*xststdcqp_<mode>"
4701 [(set (match_operand:CCFP 0 "" "=y")
4704 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4705 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4709 "xststdcqp %0,%1,%2"
4710 [(set_attr "type" "fpcompare")])
4712 (define_insn "*xststdc<Fvsx>"
4713 [(set (match_operand:CCFP 0 "" "=y")
4715 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4716 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4718 (match_operand:SI 3 "zero_constant" "j")))]
4720 "xststdc<Fvsx> %0,%x1,%2"
4721 [(set_attr "type" "fpcompare")])
4723 ;; VSX Vector Extract Exponent Double and Single Precision
4724 (define_insn "xvxexp<VSs>"
4725 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4727 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4730 "xvxexp<VSs> %x0,%x1"
4731 [(set_attr "type" "vecsimple")])
4733 ;; VSX Vector Extract Significand Double and Single Precision
4734 (define_insn "xvxsig<VSs>"
4735 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4737 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4740 "xvxsig<VSs> %x0,%x1"
4741 [(set_attr "type" "vecsimple")])
4743 ;; VSX Vector Insert Exponent Double and Single Precision
4744 (define_insn "xviexp<VSs>"
4745 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4747 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4748 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4751 "xviexp<VSs> %x0,%x1,%x2"
4752 [(set_attr "type" "vecsimple")])
4754 ;; VSX Vector Test Data Class Double and Single Precision
4755 ;; The corresponding elements of the result vector are all ones
4756 ;; if any of the conditions tested by operand 3 are satisfied.
4757 (define_insn "xvtstdc<VSs>"
4758 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4760 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4761 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4762 UNSPEC_VSX_VTSTDC))]
4764 "xvtstdc<VSs> %x0,%x1,%2"
4765 [(set_attr "type" "vecsimple")])
4767 ;; ISA 3.0 String Operations Support
4769 ;; Compare vectors producing a vector result and a predicate, setting CR6
4770 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4771 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4772 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4773 ;; to use Power8 instructions.
4774 (define_insn "*vsx_ne_<mode>_p"
4775 [(set (reg:CC CR6_REGNO)
4777 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4778 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4780 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4781 (ne:VSX_EXTRACT_I (match_dup 1)
4784 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4785 [(set_attr "type" "vecsimple")])
4787 (define_insn "*vector_nez_<mode>_p"
4788 [(set (reg:CC CR6_REGNO)
4789 (unspec:CC [(unspec:VI
4790 [(match_operand:VI 1 "gpc_reg_operand" "v")
4791 (match_operand:VI 2 "gpc_reg_operand" "v")]
4794 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4795 (unspec:VI [(match_dup 1)
4799 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4800 [(set_attr "type" "vecsimple")])
4802 ;; Return first position of match between vectors
4803 (define_expand "first_match_index_<mode>"
4804 [(match_operand:SI 0 "register_operand")
4805 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4806 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4807 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4812 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4813 rtx not_result = gen_reg_rtx (<MODE>mode);
4815 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4817 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4819 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4821 if (<MODE>mode == V16QImode)
4822 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4825 rtx tmp = gen_reg_rtx (SImode);
4826 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4827 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4832 ;; Return first position of match between vectors or end of string (EOS)
4833 (define_expand "first_match_or_eos_index_<mode>"
4834 [(match_operand:SI 0 "register_operand")
4835 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4836 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4837 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4841 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4842 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4843 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4844 rtx and_result = gen_reg_rtx (<MODE>mode);
4845 rtx result = gen_reg_rtx (<MODE>mode);
4846 rtx vzero = gen_reg_rtx (<MODE>mode);
4848 /* Vector with zeros in elements that correspond to zeros in operands. */
4849 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4850 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4851 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4852 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4854 /* Vector with ones in elments that do not match. */
4855 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4858 /* Create vector with ones in elements where there was a zero in one of
4859 the source elements or the elements that match. */
4860 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4861 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4863 if (<MODE>mode == V16QImode)
4864 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4867 rtx tmp = gen_reg_rtx (SImode);
4868 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4869 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4874 ;; Return first position of mismatch between vectors
4875 (define_expand "first_mismatch_index_<mode>"
4876 [(match_operand:SI 0 "register_operand")
4877 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4878 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4879 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4883 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4885 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4887 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4889 if (<MODE>mode == V16QImode)
4890 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4893 rtx tmp = gen_reg_rtx (SImode);
4894 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4895 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4900 ;; Return first position of mismatch between vectors or end of string (EOS)
4901 (define_expand "first_mismatch_or_eos_index_<mode>"
4902 [(match_operand:SI 0 "register_operand")
4903 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4904 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4905 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4909 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4910 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4911 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4912 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4913 rtx and_result = gen_reg_rtx (<MODE>mode);
4914 rtx result = gen_reg_rtx (<MODE>mode);
4915 rtx vzero = gen_reg_rtx (<MODE>mode);
4917 /* Vector with zeros in elements that correspond to zeros in operands. */
4918 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4920 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4921 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4922 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4924 /* Vector with ones in elments that match. */
4925 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4927 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4929 /* Create vector with ones in elements where there was a zero in one of
4930 the source elements or the elements did not match. */
4931 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4932 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4934 if (<MODE>mode == V16QImode)
4935 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4938 rtx tmp = gen_reg_rtx (SImode);
4939 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4940 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4945 ;; Load VSX Vector with Length
4946 (define_expand "lxvl"
4948 (ashift:DI (match_operand:DI 2 "register_operand")
4950 (set (match_operand:V16QI 0 "vsx_register_operand")
4952 [(match_operand:DI 1 "gpc_reg_operand")
4953 (mem:V16QI (match_dup 1))
4956 "TARGET_P9_VECTOR && TARGET_64BIT"
4958 operands[3] = gen_reg_rtx (DImode);
4961 (define_insn "*lxvl"
4962 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4964 [(match_operand:DI 1 "gpc_reg_operand" "b")
4965 (mem:V16QI (match_dup 1))
4966 (match_operand:DI 2 "register_operand" "r")]
4968 "TARGET_P9_VECTOR && TARGET_64BIT"
4970 [(set_attr "type" "vecload")])
4972 (define_insn "lxvll"
4973 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4974 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4975 (mem:V16QI (match_dup 1))
4976 (match_operand:DI 2 "register_operand" "r")]
4980 [(set_attr "type" "vecload")])
4982 ;; Expand for builtin xl_len_r
4983 (define_expand "xl_len_r"
4984 [(match_operand:V16QI 0 "vsx_register_operand")
4985 (match_operand:DI 1 "register_operand")
4986 (match_operand:DI 2 "register_operand")]
4989 rtx shift_mask = gen_reg_rtx (V16QImode);
4990 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4991 rtx tmp = gen_reg_rtx (DImode);
4993 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4994 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4995 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4996 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5001 (define_insn "stxvll"
5002 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5003 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5004 (mem:V16QI (match_dup 1))
5005 (match_operand:DI 2 "register_operand" "r")]
5009 [(set_attr "type" "vecstore")])
5011 ;; Store VSX Vector with Length
5012 (define_expand "stxvl"
5014 (ashift:DI (match_operand:DI 2 "register_operand")
5016 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5018 [(match_operand:V16QI 0 "vsx_register_operand")
5019 (mem:V16QI (match_dup 1))
5022 "TARGET_P9_VECTOR && TARGET_64BIT"
5024 operands[3] = gen_reg_rtx (DImode);
5027 (define_insn "*stxvl"
5028 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5030 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5031 (mem:V16QI (match_dup 1))
5032 (match_operand:DI 2 "register_operand" "r")]
5034 "TARGET_P9_VECTOR && TARGET_64BIT"
5036 [(set_attr "type" "vecstore")])
5038 ;; Expand for builtin xst_len_r
5039 (define_expand "xst_len_r"
5040 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5041 (match_operand:DI 1 "register_operand" "b")
5042 (match_operand:DI 2 "register_operand" "r")]
5045 rtx shift_mask = gen_reg_rtx (V16QImode);
5046 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5047 rtx tmp = gen_reg_rtx (DImode);
5049 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5050 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5052 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5053 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5057 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5058 (define_insn "vcmpneb"
5059 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5061 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5062 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5065 [(set_attr "type" "vecsimple")])
5067 ;; Vector Compare Not Equal or Zero Byte
5068 (define_insn "vcmpnezb"
5069 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5071 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5072 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5076 [(set_attr "type" "vecsimple")])
5078 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5079 (define_insn "vcmpneh"
5080 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5082 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5083 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5086 [(set_attr "type" "vecsimple")])
5088 ;; Vector Compare Not Equal or Zero Half Word
5089 (define_insn "vcmpnezh"
5090 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5091 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5092 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5096 [(set_attr "type" "vecsimple")])
5098 ;; Vector Compare Not Equal Word (specified/not+eq:)
5099 (define_insn "vcmpnew"
5100 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5102 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5103 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5106 [(set_attr "type" "vecsimple")])
5108 ;; Vector Compare Not Equal or Zero Word
5109 (define_insn "vcmpnezw"
5110 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5111 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5112 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5116 [(set_attr "type" "vecsimple")])
5118 ;; Vector Count Leading Zero Least-Significant Bits Byte
5119 (define_insn "vclzlsbb"
5120 [(set (match_operand:SI 0 "register_operand" "=r")
5122 [(match_operand:V16QI 1 "altivec_register_operand" "v")]
5126 [(set_attr "type" "vecsimple")])
5128 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5129 (define_insn "vctzlsbb_<mode>"
5130 [(set (match_operand:SI 0 "register_operand" "=r")
5132 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5136 [(set_attr "type" "vecsimple")])
5138 ;; Vector Extract Unsigned Byte Left-Indexed
5139 (define_insn "vextublx"
5140 [(set (match_operand:SI 0 "register_operand" "=r")
5142 [(match_operand:SI 1 "register_operand" "r")
5143 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5147 [(set_attr "type" "vecsimple")])
5149 ;; Vector Extract Unsigned Byte Right-Indexed
5150 (define_insn "vextubrx"
5151 [(set (match_operand:SI 0 "register_operand" "=r")
5153 [(match_operand:SI 1 "register_operand" "r")
5154 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5158 [(set_attr "type" "vecsimple")])
5160 ;; Vector Extract Unsigned Half Word Left-Indexed
5161 (define_insn "vextuhlx"
5162 [(set (match_operand:SI 0 "register_operand" "=r")
5164 [(match_operand:SI 1 "register_operand" "r")
5165 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5169 [(set_attr "type" "vecsimple")])
5171 ;; Vector Extract Unsigned Half Word Right-Indexed
5172 (define_insn "vextuhrx"
5173 [(set (match_operand:SI 0 "register_operand" "=r")
5175 [(match_operand:SI 1 "register_operand" "r")
5176 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5180 [(set_attr "type" "vecsimple")])
5182 ;; Vector Extract Unsigned Word Left-Indexed
5183 (define_insn "vextuwlx"
5184 [(set (match_operand:SI 0 "register_operand" "=r")
5186 [(match_operand:SI 1 "register_operand" "r")
5187 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5191 [(set_attr "type" "vecsimple")])
5193 ;; Vector Extract Unsigned Word Right-Indexed
5194 (define_insn "vextuwrx"
5195 [(set (match_operand:SI 0 "register_operand" "=r")
5197 [(match_operand:SI 1 "register_operand" "r")
5198 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5202 [(set_attr "type" "vecsimple")])
5204 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5205 ;; endian version needs to adjust the byte number, and the V4SI element in
5207 (define_expand "vextract4b"
5208 [(set (match_operand:DI 0 "gpc_reg_operand")
5209 (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
5210 (match_operand:QI 2 "const_0_to_12_operand")]
5211 UNSPEC_XXEXTRACTUW))]
5214 if (!VECTOR_ELT_ORDER_BIG)
5215 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5218 (define_insn_and_split "*vextract4b_internal"
5219 [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r")
5220 (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v")
5221 (match_operand:QI 2 "const_0_to_12_operand" "n,n")]
5222 UNSPEC_XXEXTRACTUW))]
5225 xxextractuw %x0,%x1,%2
5227 "&& reload_completed && int_reg_operand (operands[0], DImode)"
5230 rtx op0 = operands[0];
5231 rtx op1 = operands[1];
5232 rtx op2 = operands[2];
5233 rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
5234 rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1));
5236 emit_move_insn (op0, op2);
5237 if (VECTOR_ELT_ORDER_BIG)
5238 emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si));
5240 emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si));
5243 [(set_attr "type" "vecperm")])
5245 (define_expand "vinsert4b"
5246 [(set (match_operand:V16QI 0 "vsx_register_operand")
5247 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5248 (match_operand:V16QI 2 "vsx_register_operand")
5249 (match_operand:QI 3 "const_0_to_12_operand")]
5253 if (!VECTOR_ELT_ORDER_BIG)
5255 rtx op1 = operands[1];
5256 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5257 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5258 operands[1] = v4si_tmp;
5259 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5263 (define_insn "*vinsert4b_internal"
5264 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5265 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5266 (match_operand:V16QI 2 "vsx_register_operand" "0")
5267 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5270 "xxinsertw %x0,%x1,%3"
5271 [(set_attr "type" "vecperm")])
5273 (define_expand "vinsert4b_di"
5274 [(set (match_operand:V16QI 0 "vsx_register_operand")
5275 (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand")
5276 (match_operand:V16QI 2 "vsx_register_operand")
5277 (match_operand:QI 3 "const_0_to_12_operand")]
5281 if (!VECTOR_ELT_ORDER_BIG)
5282 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5285 (define_insn "*vinsert4b_di_internal"
5286 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5287 (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj")
5288 (match_operand:V16QI 2 "vsx_register_operand" "0")
5289 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5292 "xxinsertw %x0,%x1,%3"
5293 [(set_attr "type" "vecperm")])
5295 ;; Generate vector extract four float 32 values from left four elements
5296 ;; of eight element vector of float 16 values.
5297 (define_expand "vextract_fp_from_shorth"
5298 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5299 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5300 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5303 int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5307 rtx mask = gen_reg_rtx (V16QImode);
5308 rtx tmp = gen_reg_rtx (V16QImode);
5311 for (i = 0; i < 16; i++)
5312 rvals[i] = GEN_INT (vals[i]);
5314 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5315 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5316 src half words 0,1,2,3 for the conversion instruction. */
5317 v = gen_rtvec_v (16, rvals);
5318 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5319 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5320 operands[1], mask));
5321 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5325 ;; Generate vector extract four float 32 values from right four elements
5326 ;; of eight element vector of float 16 values.
5327 (define_expand "vextract_fp_from_shortl"
5328 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5329 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5330 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5333 int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5336 rtx mask = gen_reg_rtx (V16QImode);
5337 rtx tmp = gen_reg_rtx (V16QImode);
5340 for (i = 0; i < 16; i++)
5341 rvals[i] = GEN_INT (vals[i]);
5343 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5344 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5345 src half words 4,5,6,7 for the conversion instruction. */
5346 v = gen_rtvec_v (16, rvals);
5347 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5348 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5349 operands[1], mask));
5350 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5354 ;; Support for ISA 3.0 vector byte reverse
5356 ;; Swap all bytes with in a vector
5357 (define_insn "p9_xxbrq_v1ti"
5358 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5359 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5362 [(set_attr "type" "vecperm")])
5364 (define_expand "p9_xxbrq_v16qi"
5365 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5366 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5369 rtx op0 = gen_reg_rtx (V1TImode);
5370 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5371 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5372 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5376 ;; Swap all bytes in each 64-bit element
5377 (define_insn "p9_xxbrd_v2di"
5378 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5379 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5382 [(set_attr "type" "vecperm")])
5384 (define_expand "p9_xxbrd_v2df"
5385 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5386 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5389 rtx op0 = gen_reg_rtx (V2DImode);
5390 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5391 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5392 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5396 ;; Swap all bytes in each 32-bit element
5397 (define_insn "p9_xxbrw_v4si"
5398 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5399 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5402 [(set_attr "type" "vecperm")])
5404 (define_expand "p9_xxbrw_v4sf"
5405 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5406 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5409 rtx op0 = gen_reg_rtx (V4SImode);
5410 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5411 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5412 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5416 ;; Swap all bytes in each element of vector
5417 (define_expand "revb_<mode>"
5418 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5419 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5422 if (TARGET_P9_VECTOR)
5423 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5426 /* Want to have the elements in reverse order relative
5427 to the endian mode in use, i.e. in LE mode, put elements
5429 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5430 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5437 ;; Reversing bytes in vector char is just a NOP.
5438 (define_expand "revb_v16qi"
5439 [(set (match_operand:V16QI 0 "vsx_register_operand")
5440 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5443 emit_move_insn (operands[0], operands[1]);
5447 ;; Swap all bytes in each 16-bit element
5448 (define_insn "p9_xxbrh_v8hi"
5449 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5450 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5453 [(set_attr "type" "vecperm")])
5456 ;; Operand numbers for the following peephole2
5458 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5459 (SFBOOL_TMP_VSX 1) ;; vector temporary
5460 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5461 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5462 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5463 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5464 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5465 (SFBOOL_SHL_D 7) ;; shift left dest
5466 (SFBOOL_SHL_A 8) ;; shift left arg
5467 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5468 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5469 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5470 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5471 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5473 ;; Attempt to optimize some common GLIBC operations using logical operations to
5474 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5475 ;; after macro expansion that looks like:
5480 ;; } ieee_float_shape_type;
5486 ;; ieee_float_shape_type gf_u;
5487 ;; gf_u.value = (t1);
5488 ;; (is) = gf_u.word;
5492 ;; ieee_float_shape_type sf_u;
5493 ;; sf_u.word = (is & 0xfffff000);
5494 ;; (t1) = sf_u.value;
5498 ;; This would result in two direct move operations (convert to memory format,
5499 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5500 ;; scalar format). With this peephole, we eliminate the direct move to the
5501 ;; GPR, and instead move the integer mask value to the vector register after a
5502 ;; shift and do the VSX logical operation.
5504 ;; The insns for dealing with SFmode in GPR registers looks like:
5505 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5507 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5509 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5511 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5513 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5515 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5518 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5519 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5521 ;; MFVSRWZ (aka zero_extend)
5522 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5524 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5526 ;; AND/IOR/XOR operation on int
5527 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5528 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5529 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5532 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5533 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5537 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5538 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5540 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5541 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5542 to compare registers, when the mode is different. */
5543 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5544 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5545 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5546 && (REG_P (operands[SFBOOL_BOOL_A2])
5547 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5548 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5549 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5550 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5551 || (REG_P (operands[SFBOOL_BOOL_A2])
5552 && REGNO (operands[SFBOOL_MFVSR_D])
5553 == REGNO (operands[SFBOOL_BOOL_A2])))
5554 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5555 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5556 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5557 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5558 [(set (match_dup SFBOOL_TMP_GPR)
5559 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5562 (set (match_dup SFBOOL_TMP_VSX_DI)
5563 (match_dup SFBOOL_TMP_GPR))
5565 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5566 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5567 (match_dup SFBOOL_TMP_VSX)))]
5569 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5570 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5571 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5572 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5573 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5574 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5576 if (CONST_INT_P (bool_a2))
5578 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5579 emit_move_insn (tmp_gpr, bool_a2);
5580 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5584 int regno_bool_a1 = REGNO (bool_a1);
5585 int regno_bool_a2 = REGNO (bool_a2);
5586 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5587 ? regno_bool_a2 : regno_bool_a1);
5588 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5591 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5592 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5593 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);