2 ;; Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map the register class used
97 (define_mode_attr VSr [(V16QI "v")
111 ;; What value we need in the "isa" field, to make the IEEE QP float work.
112 (define_mode_attr VSisa [(V16QI "*")
126 ;; A mode attribute to disparage use of GPR registers, except for scalar
128 (define_mode_attr ??r [(V16QI "??r")
139 ;; A mode attribute used for 128-bit constant values.
140 (define_mode_attr nW [(V16QI "W")
151 ;; Same size integer type for floating point data
152 (define_mode_attr VSi [(V4SF "v4si")
156 (define_mode_attr VSI [(V4SF "V4SI")
160 ;; Word size for same size conversion
161 (define_mode_attr VSc [(V4SF "w")
165 ;; Map into either s or v, depending on whether this is a scalar or vector
167 (define_mode_attr VSv [(V16QI "v")
177 ;; Appropriate type for add ops (and other simple FP ops)
178 (define_mode_attr VStype_simple [(V2DF "vecdouble")
182 ;; Appropriate type for multiply ops
183 (define_mode_attr VStype_mul [(V2DF "vecdouble")
187 ;; Appropriate type for divide ops.
188 (define_mode_attr VStype_div [(V2DF "vecdiv")
192 ;; Map the scalar mode for a vector type
193 (define_mode_attr VS_scalar [(V1TI "TI")
201 ;; Map to a double-sized vector mode
202 (define_mode_attr VS_double [(V4SI "V8SI")
208 ;; Iterators for loading constants with xxspltib
209 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
210 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
212 ;; Vector reverse byte modes
213 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
215 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217 ;; done on ISA 2.07 and not just ISA 3.0.
218 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
219 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
220 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
222 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
226 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
227 ;; insert to validate the operand number.
228 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
229 (V8HI "const_0_to_7_operand")
230 (V4SI "const_0_to_3_operand")])
232 ;; Mode attribute to give the constraint for vector extract and insert
234 (define_mode_attr VSX_EX [(V16QI "v")
238 ;; Mode iterator for binary floating types other than double to
239 ;; optimize convert to that floating point type from an extract
240 ;; of an integer type
241 (define_mode_iterator VSX_EXTRACT_FL [SF
242 (IF "FLOAT128_2REG_P (IFmode)")
243 (KF "TARGET_FLOAT128_HW")
244 (TF "FLOAT128_2REG_P (TFmode)
245 || (FLOAT128_IEEE_P (TFmode)
246 && TARGET_FLOAT128_HW)")])
248 ;; Mode iterator for binary floating types that have a direct conversion
249 ;; from 64-bit integer to floating point
250 (define_mode_iterator FL_CONV [SF
252 (KF "TARGET_FLOAT128_HW")
253 (TF "TARGET_FLOAT128_HW
254 && FLOAT128_IEEE_P (TFmode)")])
256 ;; Iterator for the 2 short vector types to do a splat from an integer
257 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
259 ;; Mode attribute to give the count for the splat instruction to splat
260 ;; the value in the 64-bit integer slot
261 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
263 ;; Mode attribute to give the suffix for the splat instruction
264 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
266 ;; Iterator for the move to mask instructions
267 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
268 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
270 ;; Longer vec int modes for rotate/mask ops
271 ;; and Vector Integer Multiply/Divide/Modulo Instructions
272 (define_mode_iterator VIlong [V2DI V4SI])
274 ;; Constants for creating unspecs
275 (define_c_enum "unspec"
288 UNSPEC_VSX_UNS_FLOAT2
290 UNSPEC_VSX_UNS_FLOATE
292 UNSPEC_VSX_UNS_FLOATO
306 UNSPEC_VSX_SIGN_EXTEND
307 UNSPEC_VSX_XVCVBF16SPN
308 UNSPEC_VSX_XVCVSPBF16
309 UNSPEC_VSX_XVCVSPSXDS
320 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
321 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
353 UNSPEC_VSX_FIRST_MATCH_INDEX
354 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
355 UNSPEC_VSX_FIRST_MISMATCH_INDEX
356 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
360 UNSPEC_MTVSRD_DITI_W1
374 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
375 UNSPEC_VSX_XVCVBF16SPN])
377 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
378 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
380 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
381 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
383 ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
384 (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
385 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
386 (V2DI "d") (V2DF "d")])
387 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
388 (V2DI "3") (V2DF "3")])
389 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
390 (V2DI "8") (V2DF "8")])
394 ;; The patterns for LE permuted loads and stores come before the general
395 ;; VSX moves so they match first.
396 (define_insn_and_split "*vsx_le_perm_load_<mode>"
397 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
398 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
399 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
405 (parallel [(const_int 1) (const_int 0)])))
409 (parallel [(const_int 1) (const_int 0)])))]
411 rtx mem = operands[1];
413 /* Don't apply the swap optimization if we've already performed register
414 allocation and the hard register destination is not in the altivec
416 if ((MEM_ALIGN (mem) >= 128)
417 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
418 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
420 rtx mem_address = XEXP (mem, 0);
421 enum machine_mode mode = GET_MODE (mem);
423 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
425 /* Replace the source memory address with masked address. */
426 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
427 emit_insn (lvx_set_expr);
430 else if (rs6000_quadword_masked_address_p (mem_address))
432 /* This rtl is already in the form that matches lvx
433 instruction, so leave it alone. */
436 /* Otherwise, fall through to transform into a swapping load. */
438 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
441 [(set_attr "type" "vecload")
442 (set_attr "length" "8")])
444 (define_insn_and_split "*vsx_le_perm_load_<mode>"
445 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
446 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
447 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
453 (parallel [(const_int 2) (const_int 3)
454 (const_int 0) (const_int 1)])))
458 (parallel [(const_int 2) (const_int 3)
459 (const_int 0) (const_int 1)])))]
461 rtx mem = operands[1];
463 /* Don't apply the swap optimization if we've already performed register
464 allocation and the hard register destination is not in the altivec
466 if ((MEM_ALIGN (mem) >= 128)
467 && (!HARD_REGISTER_P (operands[0])
468 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
470 rtx mem_address = XEXP (mem, 0);
471 enum machine_mode mode = GET_MODE (mem);
473 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
475 /* Replace the source memory address with masked address. */
476 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
477 emit_insn (lvx_set_expr);
480 else if (rs6000_quadword_masked_address_p (mem_address))
482 /* This rtl is already in the form that matches lvx
483 instruction, so leave it alone. */
486 /* Otherwise, fall through to transform into a swapping load. */
488 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
491 [(set_attr "type" "vecload")
492 (set_attr "length" "8")])
494 (define_insn_and_split "*vsx_le_perm_load_v8hi"
495 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
496 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
497 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
503 (parallel [(const_int 4) (const_int 5)
504 (const_int 6) (const_int 7)
505 (const_int 0) (const_int 1)
506 (const_int 2) (const_int 3)])))
510 (parallel [(const_int 4) (const_int 5)
511 (const_int 6) (const_int 7)
512 (const_int 0) (const_int 1)
513 (const_int 2) (const_int 3)])))]
515 rtx mem = operands[1];
517 /* Don't apply the swap optimization if we've already performed register
518 allocation and the hard register destination is not in the altivec
520 if ((MEM_ALIGN (mem) >= 128)
521 && (!HARD_REGISTER_P (operands[0])
522 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
524 rtx mem_address = XEXP (mem, 0);
525 enum machine_mode mode = GET_MODE (mem);
527 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
529 /* Replace the source memory address with masked address. */
530 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
531 emit_insn (lvx_set_expr);
534 else if (rs6000_quadword_masked_address_p (mem_address))
536 /* This rtl is already in the form that matches lvx
537 instruction, so leave it alone. */
540 /* Otherwise, fall through to transform into a swapping load. */
542 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
545 [(set_attr "type" "vecload")
546 (set_attr "length" "8")])
548 (define_insn_and_split "*vsx_le_perm_load_v16qi"
549 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
550 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
551 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
557 (parallel [(const_int 8) (const_int 9)
558 (const_int 10) (const_int 11)
559 (const_int 12) (const_int 13)
560 (const_int 14) (const_int 15)
561 (const_int 0) (const_int 1)
562 (const_int 2) (const_int 3)
563 (const_int 4) (const_int 5)
564 (const_int 6) (const_int 7)])))
568 (parallel [(const_int 8) (const_int 9)
569 (const_int 10) (const_int 11)
570 (const_int 12) (const_int 13)
571 (const_int 14) (const_int 15)
572 (const_int 0) (const_int 1)
573 (const_int 2) (const_int 3)
574 (const_int 4) (const_int 5)
575 (const_int 6) (const_int 7)])))]
577 rtx mem = operands[1];
579 /* Don't apply the swap optimization if we've already performed register
580 allocation and the hard register destination is not in the altivec
582 if ((MEM_ALIGN (mem) >= 128)
583 && (!HARD_REGISTER_P (operands[0])
584 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
586 rtx mem_address = XEXP (mem, 0);
587 enum machine_mode mode = GET_MODE (mem);
589 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
591 /* Replace the source memory address with masked address. */
592 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
593 emit_insn (lvx_set_expr);
596 else if (rs6000_quadword_masked_address_p (mem_address))
598 /* This rtl is already in the form that matches lvx
599 instruction, so leave it alone. */
602 /* Otherwise, fall through to transform into a swapping load. */
604 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
607 [(set_attr "type" "vecload")
608 (set_attr "length" "8")])
610 (define_insn "*vsx_le_perm_store_<mode>"
611 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
612 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
613 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
615 [(set_attr "type" "vecstore")
616 (set_attr "length" "12")])
619 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
620 (match_operand:VSX_D 1 "vsx_register_operand"))]
621 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
625 (parallel [(const_int 1) (const_int 0)])))
629 (parallel [(const_int 1) (const_int 0)])))]
631 rtx mem = operands[0];
633 /* Don't apply the swap optimization if we've already performed register
634 allocation and the hard register source is not in the altivec range. */
635 if ((MEM_ALIGN (mem) >= 128)
636 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
637 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
639 rtx mem_address = XEXP (mem, 0);
640 enum machine_mode mode = GET_MODE (mem);
641 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
643 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
644 emit_insn (stvx_set_expr);
647 else if (rs6000_quadword_masked_address_p (mem_address))
649 /* This rtl is already in the form that matches stvx instruction,
650 so leave it alone. */
653 /* Otherwise, fall through to transform into a swapping store. */
656 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
660 ;; The post-reload split requires that we re-permute the source
661 ;; register in case it is still live.
663 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
664 (match_operand:VSX_D 1 "vsx_register_operand"))]
665 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
669 (parallel [(const_int 1) (const_int 0)])))
673 (parallel [(const_int 1) (const_int 0)])))
677 (parallel [(const_int 1) (const_int 0)])))]
680 (define_insn "*vsx_le_perm_store_<mode>"
681 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
682 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
683 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
685 [(set_attr "type" "vecstore")
686 (set_attr "length" "12")])
689 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
690 (match_operand:VSX_W 1 "vsx_register_operand"))]
691 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
695 (parallel [(const_int 2) (const_int 3)
696 (const_int 0) (const_int 1)])))
700 (parallel [(const_int 2) (const_int 3)
701 (const_int 0) (const_int 1)])))]
703 rtx mem = operands[0];
705 /* Don't apply the swap optimization if we've already performed register
706 allocation and the hard register source is not in the altivec range. */
707 if ((MEM_ALIGN (mem) >= 128)
708 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
709 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
711 rtx mem_address = XEXP (mem, 0);
712 enum machine_mode mode = GET_MODE (mem);
713 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
715 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
716 emit_insn (stvx_set_expr);
719 else if (rs6000_quadword_masked_address_p (mem_address))
721 /* This rtl is already in the form that matches stvx instruction,
722 so leave it alone. */
725 /* Otherwise, fall through to transform into a swapping store. */
728 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
732 ;; The post-reload split requires that we re-permute the source
733 ;; register in case it is still live.
735 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
736 (match_operand:VSX_W 1 "vsx_register_operand"))]
737 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
741 (parallel [(const_int 2) (const_int 3)
742 (const_int 0) (const_int 1)])))
746 (parallel [(const_int 2) (const_int 3)
747 (const_int 0) (const_int 1)])))
751 (parallel [(const_int 2) (const_int 3)
752 (const_int 0) (const_int 1)])))]
755 (define_insn "*vsx_le_perm_store_v8hi"
756 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
757 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
758 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
760 [(set_attr "type" "vecstore")
761 (set_attr "length" "12")])
764 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
765 (match_operand:V8HI 1 "vsx_register_operand"))]
766 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
770 (parallel [(const_int 4) (const_int 5)
771 (const_int 6) (const_int 7)
772 (const_int 0) (const_int 1)
773 (const_int 2) (const_int 3)])))
777 (parallel [(const_int 4) (const_int 5)
778 (const_int 6) (const_int 7)
779 (const_int 0) (const_int 1)
780 (const_int 2) (const_int 3)])))]
782 rtx mem = operands[0];
784 /* Don't apply the swap optimization if we've already performed register
785 allocation and the hard register source is not in the altivec range. */
786 if ((MEM_ALIGN (mem) >= 128)
787 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
788 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
790 rtx mem_address = XEXP (mem, 0);
791 enum machine_mode mode = GET_MODE (mem);
792 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
794 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
795 emit_insn (stvx_set_expr);
798 else if (rs6000_quadword_masked_address_p (mem_address))
800 /* This rtl is already in the form that matches stvx instruction,
801 so leave it alone. */
804 /* Otherwise, fall through to transform into a swapping store. */
807 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
811 ;; The post-reload split requires that we re-permute the source
812 ;; register in case it is still live.
814 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
815 (match_operand:V8HI 1 "vsx_register_operand"))]
816 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
820 (parallel [(const_int 4) (const_int 5)
821 (const_int 6) (const_int 7)
822 (const_int 0) (const_int 1)
823 (const_int 2) (const_int 3)])))
827 (parallel [(const_int 4) (const_int 5)
828 (const_int 6) (const_int 7)
829 (const_int 0) (const_int 1)
830 (const_int 2) (const_int 3)])))
834 (parallel [(const_int 4) (const_int 5)
835 (const_int 6) (const_int 7)
836 (const_int 0) (const_int 1)
837 (const_int 2) (const_int 3)])))]
840 (define_insn "*vsx_le_perm_store_v16qi"
841 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
842 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
843 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
845 [(set_attr "type" "vecstore")
846 (set_attr "length" "12")])
849 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
850 (match_operand:V16QI 1 "vsx_register_operand"))]
851 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
855 (parallel [(const_int 8) (const_int 9)
856 (const_int 10) (const_int 11)
857 (const_int 12) (const_int 13)
858 (const_int 14) (const_int 15)
859 (const_int 0) (const_int 1)
860 (const_int 2) (const_int 3)
861 (const_int 4) (const_int 5)
862 (const_int 6) (const_int 7)])))
866 (parallel [(const_int 8) (const_int 9)
867 (const_int 10) (const_int 11)
868 (const_int 12) (const_int 13)
869 (const_int 14) (const_int 15)
870 (const_int 0) (const_int 1)
871 (const_int 2) (const_int 3)
872 (const_int 4) (const_int 5)
873 (const_int 6) (const_int 7)])))]
875 rtx mem = operands[0];
877 /* Don't apply the swap optimization if we've already performed register
878 allocation and the hard register source is not in the altivec range. */
879 if ((MEM_ALIGN (mem) >= 128)
880 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
881 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
883 rtx mem_address = XEXP (mem, 0);
884 enum machine_mode mode = GET_MODE (mem);
885 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
887 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
888 emit_insn (stvx_set_expr);
891 else if (rs6000_quadword_masked_address_p (mem_address))
893 /* This rtl is already in the form that matches stvx instruction,
894 so leave it alone. */
897 /* Otherwise, fall through to transform into a swapping store. */
900 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
904 ;; The post-reload split requires that we re-permute the source
905 ;; register in case it is still live.
907 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
908 (match_operand:V16QI 1 "vsx_register_operand"))]
909 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
913 (parallel [(const_int 8) (const_int 9)
914 (const_int 10) (const_int 11)
915 (const_int 12) (const_int 13)
916 (const_int 14) (const_int 15)
917 (const_int 0) (const_int 1)
918 (const_int 2) (const_int 3)
919 (const_int 4) (const_int 5)
920 (const_int 6) (const_int 7)])))
924 (parallel [(const_int 8) (const_int 9)
925 (const_int 10) (const_int 11)
926 (const_int 12) (const_int 13)
927 (const_int 14) (const_int 15)
928 (const_int 0) (const_int 1)
929 (const_int 2) (const_int 3)
930 (const_int 4) (const_int 5)
931 (const_int 6) (const_int 7)])))
935 (parallel [(const_int 8) (const_int 9)
936 (const_int 10) (const_int 11)
937 (const_int 12) (const_int 13)
938 (const_int 14) (const_int 15)
939 (const_int 0) (const_int 1)
940 (const_int 2) (const_int 3)
941 (const_int 4) (const_int 5)
942 (const_int 6) (const_int 7)])))]
945 ;; Little endian word swapping for 128-bit types that are either scalars or the
946 ;; special V1TI container class, which it is not appropriate to use vec_select
948 (define_insn "*vsx_le_permute_<mode>"
949 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
951 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
953 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
955 xxpermdi %x0,%x1,%x1,2
959 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
960 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
961 [(set_attr "length" "*,*,*,8,8,8")
962 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
964 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
965 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
968 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
971 "!BYTES_BIG_ENDIAN && TARGET_VSX"
976 [(set (match_dup 0) (match_dup 1))]
978 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
980 emit_note (NOTE_INSN_DELETED);
984 [(set_attr "length" "0,4")
985 (set_attr "type" "veclogical")])
987 (define_insn_and_split "*vsx_le_perm_load_<mode>"
988 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
989 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
990 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
991 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
995 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
996 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
999 rtx tmp = (can_create_pseudo_p ()
1000 ? gen_reg_rtx_and_attrs (operands[0])
1002 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1003 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1006 [(set_attr "type" "vecload,load")
1007 (set_attr "length" "8,8")
1008 (set_attr "isa" "<VSisa>,*")])
1010 (define_insn "*vsx_le_perm_store_<mode>"
1011 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1012 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1013 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1014 & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1018 [(set_attr "type" "vecstore,store")
1019 (set_attr "length" "12,8")
1020 (set_attr "isa" "<VSisa>,*")])
1023 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1024 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1025 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
1026 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1029 rtx tmp = (can_create_pseudo_p ()
1030 ? gen_reg_rtx_and_attrs (operands[0])
1032 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1033 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1037 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1038 ;; GPR registers on a little endian system.
1040 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1041 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1043 (set (match_operand:VSX_TI 2 "int_reg_operand")
1044 (rotate:VSX_TI (match_dup 0)
1046 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1047 && (rtx_equal_p (operands[0], operands[2])
1048 || peep2_reg_dead_p (2, operands[0]))"
1049 [(set (match_dup 2) (match_dup 1))])
1052 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1053 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1055 (set (match_operand:VSX_TI 2 "memory_operand")
1056 (rotate:VSX_TI (match_dup 0)
1058 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1059 && peep2_reg_dead_p (2, operands[0])"
1060 [(set (match_dup 2) (match_dup 1))])
1062 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1063 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1064 ;; floating point are handled by the more generic swap elimination pass.
1066 [(set (match_operand:TI 0 "vsx_register_operand")
1067 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1069 (set (match_operand:TI 2 "vsx_register_operand")
1070 (rotate:TI (match_dup 0)
1072 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1073 && (rtx_equal_p (operands[0], operands[2])
1074 || peep2_reg_dead_p (2, operands[0]))"
1075 [(set (match_dup 2) (match_dup 1))])
1077 ;; The post-reload split requires that we re-permute the source
1078 ;; register in case it is still live.
1080 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1081 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1082 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1083 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1086 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1087 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1088 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1092 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1093 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1094 (define_insn "xxspltib_v16qi"
1095 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1096 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1099 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1100 return "xxspltib %x0,%2";
1102 [(set_attr "type" "vecperm")])
1104 (define_insn "xxspltib_<mode>_nosplit"
1105 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1106 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1109 rtx op1 = operands[1];
1113 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1117 operands[2] = GEN_INT (value & 0xff);
1118 return "xxspltib %x0,%2";
1120 [(set_attr "type" "vecperm")])
1122 (define_insn_and_split "*xxspltib_<mode>_split"
1123 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1124 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1132 rtx op0 = operands[0];
1133 rtx op1 = operands[1];
1134 rtx tmp = ((can_create_pseudo_p ())
1135 ? gen_reg_rtx (V16QImode)
1136 : gen_lowpart (V16QImode, op0));
1138 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1142 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1144 if (<MODE>mode == V2DImode)
1145 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1147 else if (<MODE>mode == V4SImode)
1148 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1150 else if (<MODE>mode == V8HImode)
1151 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1158 [(set_attr "type" "vecperm")
1159 (set_attr "length" "8")])
1162 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1163 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1164 ;; all 1's, since the machine does not have to wait for the previous
1165 ;; instruction using the register being set (such as a store waiting on a slow
1166 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1168 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1169 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1170 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1171 (define_insn "vsx_mov<mode>_64bit"
1172 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1173 "=ZwO, wa, wa, r, we, ?wQ,
1174 ?&r, ??r, ??Y, <??r>, wa, v,
1175 ?wa, v, <??r>, wZ, v")
1177 (match_operand:VSX_M 1 "input_operand"
1178 "wa, ZwO, wa, we, r, r,
1179 wQ, Y, r, r, wE, jwM,
1180 ?jwM, W, <nW>, v, wZ"))]
1182 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1183 && (register_operand (operands[0], <MODE>mode)
1184 || register_operand (operands[1], <MODE>mode))"
1186 return rs6000_output_move_128bit (operands);
1189 "vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
1190 store, load, store, *, vecsimple, vecsimple,
1191 vecsimple, *, *, vecstore, vecload")
1192 (set_attr "num_insns"
1196 (set_attr "max_prefixed_insns"
1205 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1207 <VSisa>, *, *, *, *")])
1209 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1210 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1211 ;; LVX (VMX) STVX (VMX)
1212 (define_insn "*vsx_mov<mode>_32bit"
1213 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1214 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1215 wa, v, ?wa, v, <??r>,
1218 (match_operand:VSX_M 1 "input_operand"
1219 "wa, ZwO, wa, Y, r, r,
1220 wE, jwM, ?jwM, W, <nW>,
1223 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1224 && (register_operand (operands[0], <MODE>mode)
1225 || register_operand (operands[1], <MODE>mode))"
1227 return rs6000_output_move_128bit (operands);
1230 "vecstore, vecload, vecsimple, load, store, *,
1231 vecsimple, vecsimple, vecsimple, *, *,
1234 "*, *, *, 16, 16, 16,
1238 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1239 p9v, *, <VSisa>, *, *,
1242 ;; Explicit load/store expanders for the builtin functions
1243 (define_expand "vsx_load_<mode>"
1244 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1245 (match_operand:VSX_M 1 "memory_operand"))]
1246 "VECTOR_MEM_VSX_P (<MODE>mode)"
1248 /* Expand to swaps if needed, prior to swap optimization. */
1249 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1250 && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1252 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1257 (define_expand "vsx_store_<mode>"
1258 [(set (match_operand:VSX_M 0 "memory_operand")
1259 (match_operand:VSX_M 1 "vsx_register_operand"))]
1260 "VECTOR_MEM_VSX_P (<MODE>mode)"
1262 /* Expand to swaps if needed, prior to swap optimization. */
1263 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1264 && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1266 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1271 ;; Load rightmost element from load_data
1272 ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
1273 (define_insn "vsx_lxvr<wd>x"
1274 [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
1275 (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
1278 [(set_attr "type" "vecload")])
1280 ;; Store rightmost element into store_data
1281 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
1282 (define_insn "vsx_stxvr<wd>x"
1283 [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
1284 (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
1286 "stxvr<wd>x %x1,%y0"
1287 [(set_attr "type" "vecstore")])
1289 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1290 ;; when you really want their element-reversing behavior.
1291 (define_insn "vsx_ld_elemrev_v2di"
1292 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1294 (match_operand:V2DI 1 "memory_operand" "Z")
1295 (parallel [(const_int 1) (const_int 0)])))]
1296 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1298 [(set_attr "type" "vecload")])
1300 (define_insn "vsx_ld_elemrev_v1ti"
1301 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1303 (match_operand:V1TI 1 "memory_operand" "Z")
1304 (parallel [(const_int 0)])))]
1305 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1307 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1309 [(set_attr "type" "vecload")])
1311 (define_insn "vsx_ld_elemrev_v2df"
1312 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1314 (match_operand:V2DF 1 "memory_operand" "Z")
1315 (parallel [(const_int 1) (const_int 0)])))]
1316 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1318 [(set_attr "type" "vecload")])
1320 (define_insn "vsx_ld_elemrev_v4si"
1321 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1323 (match_operand:V4SI 1 "memory_operand" "Z")
1324 (parallel [(const_int 3) (const_int 2)
1325 (const_int 1) (const_int 0)])))]
1326 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1328 [(set_attr "type" "vecload")])
1330 (define_insn "vsx_ld_elemrev_v4sf"
1331 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1333 (match_operand:V4SF 1 "memory_operand" "Z")
1334 (parallel [(const_int 3) (const_int 2)
1335 (const_int 1) (const_int 0)])))]
1336 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1338 [(set_attr "type" "vecload")])
1340 (define_expand "vsx_ld_elemrev_v8hi"
1341 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1343 (match_operand:V8HI 1 "memory_operand" "Z")
1344 (parallel [(const_int 7) (const_int 6)
1345 (const_int 5) (const_int 4)
1346 (const_int 3) (const_int 2)
1347 (const_int 1) (const_int 0)])))]
1348 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1350 if (!TARGET_P9_VECTOR)
1352 rtx tmp = gen_reg_rtx (V4SImode);
1353 rtx subreg, subreg2, perm[16], pcv;
1354 /* 2 is leftmost element in register */
1355 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1358 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1359 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1360 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1362 for (i = 0; i < 16; ++i)
1363 perm[i] = GEN_INT (reorder[i]);
1365 pcv = force_reg (V16QImode,
1366 gen_rtx_CONST_VECTOR (V16QImode,
1367 gen_rtvec_v (16, perm)));
1368 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1374 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1375 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1377 (match_operand:V8HI 1 "memory_operand" "Z")
1378 (parallel [(const_int 7) (const_int 6)
1379 (const_int 5) (const_int 4)
1380 (const_int 3) (const_int 2)
1381 (const_int 1) (const_int 0)])))]
1382 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1384 [(set_attr "type" "vecload")])
1386 (define_expand "vsx_ld_elemrev_v16qi"
1387 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1389 (match_operand:V16QI 1 "memory_operand" "Z")
1390 (parallel [(const_int 15) (const_int 14)
1391 (const_int 13) (const_int 12)
1392 (const_int 11) (const_int 10)
1393 (const_int 9) (const_int 8)
1394 (const_int 7) (const_int 6)
1395 (const_int 5) (const_int 4)
1396 (const_int 3) (const_int 2)
1397 (const_int 1) (const_int 0)])))]
1398 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1400 if (!TARGET_P9_VECTOR)
1402 rtx tmp = gen_reg_rtx (V4SImode);
1403 rtx subreg, subreg2, perm[16], pcv;
1404 /* 3 is leftmost element in register */
1405 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1408 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1409 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1410 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1412 for (i = 0; i < 16; ++i)
1413 perm[i] = GEN_INT (reorder[i]);
1415 pcv = force_reg (V16QImode,
1416 gen_rtx_CONST_VECTOR (V16QImode,
1417 gen_rtvec_v (16, perm)));
1418 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1424 (define_insn "vsx_ld_elemrev_v16qi_internal"
1425 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1427 (match_operand:V16QI 1 "memory_operand" "Z")
1428 (parallel [(const_int 15) (const_int 14)
1429 (const_int 13) (const_int 12)
1430 (const_int 11) (const_int 10)
1431 (const_int 9) (const_int 8)
1432 (const_int 7) (const_int 6)
1433 (const_int 5) (const_int 4)
1434 (const_int 3) (const_int 2)
1435 (const_int 1) (const_int 0)])))]
1436 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1438 [(set_attr "type" "vecload")])
1440 (define_insn "vsx_st_elemrev_v1ti"
1441 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1443 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1444 (parallel [(const_int 0)])))
1445 (clobber (match_dup 1))]
1446 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1448 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1450 [(set_attr "type" "vecstore")])
1452 (define_insn "vsx_st_elemrev_v2df"
1453 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1455 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1456 (parallel [(const_int 1) (const_int 0)])))]
1457 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1459 [(set_attr "type" "vecstore")])
1461 (define_insn "vsx_st_elemrev_v2di"
1462 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1464 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1465 (parallel [(const_int 1) (const_int 0)])))]
1466 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1468 [(set_attr "type" "vecstore")])
1470 (define_insn "vsx_st_elemrev_v4sf"
1471 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1473 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1474 (parallel [(const_int 3) (const_int 2)
1475 (const_int 1) (const_int 0)])))]
1476 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1478 [(set_attr "type" "vecstore")])
1480 (define_insn "vsx_st_elemrev_v4si"
1481 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1483 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1484 (parallel [(const_int 3) (const_int 2)
1485 (const_int 1) (const_int 0)])))]
1486 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1488 [(set_attr "type" "vecstore")])
1490 (define_expand "vsx_st_elemrev_v8hi"
1491 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1493 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1494 (parallel [(const_int 7) (const_int 6)
1495 (const_int 5) (const_int 4)
1496 (const_int 3) (const_int 2)
1497 (const_int 1) (const_int 0)])))]
1498 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1500 if (!TARGET_P9_VECTOR)
1502 rtx mem_subreg, subreg, perm[16], pcv;
1503 rtx tmp = gen_reg_rtx (V8HImode);
1504 /* 2 is leftmost element in register */
1505 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1508 for (i = 0; i < 16; ++i)
1509 perm[i] = GEN_INT (reorder[i]);
1511 pcv = force_reg (V16QImode,
1512 gen_rtx_CONST_VECTOR (V16QImode,
1513 gen_rtvec_v (16, perm)));
1514 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1516 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1517 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1518 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1523 (define_insn "*vsx_st_elemrev_v2di_internal"
1524 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1526 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1527 (parallel [(const_int 1) (const_int 0)])))]
1528 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1530 [(set_attr "type" "vecstore")])
1532 (define_insn "*vsx_st_elemrev_v8hi_internal"
1533 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1535 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1536 (parallel [(const_int 7) (const_int 6)
1537 (const_int 5) (const_int 4)
1538 (const_int 3) (const_int 2)
1539 (const_int 1) (const_int 0)])))]
1540 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1542 [(set_attr "type" "vecstore")])
1544 (define_expand "vsx_st_elemrev_v16qi"
1545 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1547 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1548 (parallel [(const_int 15) (const_int 14)
1549 (const_int 13) (const_int 12)
1550 (const_int 11) (const_int 10)
1551 (const_int 9) (const_int 8)
1552 (const_int 7) (const_int 6)
1553 (const_int 5) (const_int 4)
1554 (const_int 3) (const_int 2)
1555 (const_int 1) (const_int 0)])))]
1556 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1558 if (!TARGET_P9_VECTOR)
1560 rtx mem_subreg, subreg, perm[16], pcv;
1561 rtx tmp = gen_reg_rtx (V16QImode);
1562 /* 3 is leftmost element in register */
1563 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1566 for (i = 0; i < 16; ++i)
1567 perm[i] = GEN_INT (reorder[i]);
1569 pcv = force_reg (V16QImode,
1570 gen_rtx_CONST_VECTOR (V16QImode,
1571 gen_rtvec_v (16, perm)));
1572 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1574 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1575 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1576 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1581 (define_insn "*vsx_st_elemrev_v16qi_internal"
1582 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1584 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1585 (parallel [(const_int 15) (const_int 14)
1586 (const_int 13) (const_int 12)
1587 (const_int 11) (const_int 10)
1588 (const_int 9) (const_int 8)
1589 (const_int 7) (const_int 6)
1590 (const_int 5) (const_int 4)
1591 (const_int 3) (const_int 2)
1592 (const_int 1) (const_int 0)])))]
1593 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1595 [(set_attr "type" "vecstore")])
1598 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1599 ;; instructions are now combined with the insn for the traditional floating
1601 (define_insn "*vsx_add<mode>3"
1602 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1603 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1604 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1605 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1606 "xvadd<sd>p %x0,%x1,%x2"
1607 [(set_attr "type" "<VStype_simple>")])
1609 (define_insn "*vsx_sub<mode>3"
1610 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1611 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1612 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1613 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614 "xvsub<sd>p %x0,%x1,%x2"
1615 [(set_attr "type" "<VStype_simple>")])
1617 (define_insn "*vsx_mul<mode>3"
1618 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1619 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1620 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1621 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1622 "xvmul<sd>p %x0,%x1,%x2"
1623 [(set_attr "type" "<VStype_simple>")])
1625 ; Emulate vector with scalar for vec_mul in V2DImode
1626 (define_insn_and_split "vsx_mul_v2di"
1627 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1628 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1629 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1631 "VECTOR_MEM_VSX_P (V2DImode)"
1633 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1636 rtx op0 = operands[0];
1637 rtx op1 = operands[1];
1638 rtx op2 = operands[2];
1641 emit_insn (gen_mulv2di3 (op0, op1, op2) );
1645 rtx op3 = gen_reg_rtx (DImode);
1646 rtx op4 = gen_reg_rtx (DImode);
1647 rtx op5 = gen_reg_rtx (DImode);
1648 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1649 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1650 if (TARGET_POWERPC64)
1651 emit_insn (gen_muldi3 (op5, op3, op4));
1654 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1655 emit_move_insn (op5, ret);
1657 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1658 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1659 if (TARGET_POWERPC64)
1660 emit_insn (gen_muldi3 (op3, op3, op4));
1663 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1664 emit_move_insn (op3, ret);
1666 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1670 [(set_attr "type" "mul")])
1672 (define_insn "*vsx_div<mode>3"
1673 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1674 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1675 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1676 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1677 "xvdiv<sd>p %x0,%x1,%x2"
1678 [(set_attr "type" "<VStype_div>")])
1680 ; Emulate vector with scalar for vec_div in V2DImode
1681 (define_insn_and_split "vsx_div_v2di"
1682 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1683 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1684 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1686 "VECTOR_MEM_VSX_P (V2DImode)"
1688 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1691 rtx op0 = operands[0];
1692 rtx op1 = operands[1];
1693 rtx op2 = operands[2];
1694 rtx op3 = gen_reg_rtx (DImode);
1695 rtx op4 = gen_reg_rtx (DImode);
1696 rtx op5 = gen_reg_rtx (DImode);
1697 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1698 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1699 if (TARGET_POWERPC64)
1700 emit_insn (gen_divdi3 (op5, op3, op4));
1703 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1704 rtx target = emit_library_call_value (libfunc,
1705 op5, LCT_NORMAL, DImode,
1708 emit_move_insn (op5, target);
1710 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1711 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1712 if (TARGET_POWERPC64)
1713 emit_insn (gen_divdi3 (op3, op3, op4));
1716 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1717 rtx target = emit_library_call_value (libfunc,
1718 op3, LCT_NORMAL, DImode,
1721 emit_move_insn (op3, target);
1723 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1726 [(set_attr "type" "div")])
1728 (define_insn_and_split "vsx_udiv_v2di"
1729 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1730 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1731 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1733 "VECTOR_MEM_VSX_P (V2DImode)"
1735 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1738 rtx op0 = operands[0];
1739 rtx op1 = operands[1];
1740 rtx op2 = operands[2];
1743 emit_insn (gen_udivv2di3 (op0, op1, op2) );
1746 rtx op3 = gen_reg_rtx (DImode);
1747 rtx op4 = gen_reg_rtx (DImode);
1748 rtx op5 = gen_reg_rtx (DImode);
1750 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1751 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1753 if (TARGET_POWERPC64)
1754 emit_insn (gen_udivdi3 (op5, op3, op4));
1757 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1758 rtx target = emit_library_call_value (libfunc,
1759 op5, LCT_NORMAL, DImode,
1762 emit_move_insn (op5, target);
1764 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1765 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1767 if (TARGET_POWERPC64)
1768 emit_insn (gen_udivdi3 (op3, op3, op4));
1771 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1772 rtx target = emit_library_call_value (libfunc,
1773 op3, LCT_NORMAL, DImode,
1776 emit_move_insn (op3, target);
1778 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1782 [(set_attr "type" "div")])
1784 ;; *tdiv* instruction returning the FG flag
1785 (define_expand "vsx_tdiv<mode>3_fg"
1787 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1788 (match_operand:VSX_B 2 "vsx_register_operand")]
1790 (set (match_operand:SI 0 "gpc_reg_operand")
1791 (gt:SI (match_dup 3)
1793 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1795 operands[3] = gen_reg_rtx (CCFPmode);
1798 ;; *tdiv* instruction returning the FE flag
1799 (define_expand "vsx_tdiv<mode>3_fe"
1801 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1802 (match_operand:VSX_B 2 "vsx_register_operand")]
1804 (set (match_operand:SI 0 "gpc_reg_operand")
1805 (eq:SI (match_dup 3)
1807 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1809 operands[3] = gen_reg_rtx (CCFPmode);
1812 (define_insn "*vsx_tdiv<mode>3_internal"
1813 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1814 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1815 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1817 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1818 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1819 [(set_attr "type" "<VStype_simple>")])
1821 (define_insn "vsx_fre<mode>2"
1822 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1823 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1825 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1827 [(set_attr "type" "<VStype_simple>")])
1829 (define_insn "*vsx_neg<mode>2"
1830 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1831 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1832 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1833 "xvneg<sd>p %x0,%x1"
1834 [(set_attr "type" "<VStype_simple>")])
1836 (define_insn "*vsx_abs<mode>2"
1837 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1838 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1839 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1840 "xvabs<sd>p %x0,%x1"
1841 [(set_attr "type" "<VStype_simple>")])
1843 (define_insn "vsx_nabs<mode>2"
1844 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1847 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1848 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1849 "xvnabs<sd>p %x0,%x1"
1850 [(set_attr "type" "<VStype_simple>")])
1852 (define_insn "vsx_smax<mode>3"
1853 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1854 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1855 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1856 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1857 "xvmax<sd>p %x0,%x1,%x2"
1858 [(set_attr "type" "<VStype_simple>")])
1860 (define_insn "*vsx_smin<mode>3"
1861 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1862 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1863 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1864 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1865 "xvmin<sd>p %x0,%x1,%x2"
1866 [(set_attr "type" "<VStype_simple>")])
1868 (define_insn "*vsx_sqrt<mode>2"
1869 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1870 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1871 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1872 "xvsqrt<sd>p %x0,%x1"
1873 [(set_attr "type" "<sd>sqrt")])
1875 (define_insn "*vsx_rsqrte<mode>2"
1876 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1877 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1879 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1880 "xvrsqrte<sd>p %x0,%x1"
1881 [(set_attr "type" "<VStype_simple>")])
1883 ;; *tsqrt* returning the fg flag
1884 (define_expand "vsx_tsqrt<mode>2_fg"
1886 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1888 (set (match_operand:SI 0 "gpc_reg_operand")
1889 (gt:SI (match_dup 2)
1891 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1893 operands[2] = gen_reg_rtx (CCFPmode);
1896 ;; *tsqrt* returning the fe flag
1897 (define_expand "vsx_tsqrt<mode>2_fe"
1899 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1901 (set (match_operand:SI 0 "gpc_reg_operand")
1902 (eq:SI (match_dup 2)
1904 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1906 operands[2] = gen_reg_rtx (CCFPmode);
1909 (define_insn "*vsx_tsqrt<mode>2_internal"
1910 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1911 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1913 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1914 "x<VSv>tsqrt<sd>p %0,%x1"
1915 [(set_attr "type" "<VStype_simple>")])
1917 ;; Fused vector multiply/add instructions. Support the classical Altivec
1918 ;; versions of fma, which allows the target to be a separate register from the
1919 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1922 (define_insn "*vsx_fmav4sf4"
1923 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1925 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1926 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1927 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1928 "VECTOR_UNIT_VSX_P (V4SFmode)"
1930 xvmaddasp %x0,%x1,%x2
1931 xvmaddmsp %x0,%x1,%x3
1932 vmaddfp %0,%1,%2,%3"
1933 [(set_attr "type" "vecfloat")])
1935 (define_insn "*vsx_fmav2df4"
1936 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1938 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1939 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1940 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1941 "VECTOR_UNIT_VSX_P (V2DFmode)"
1943 xvmaddadp %x0,%x1,%x2
1944 xvmaddmdp %x0,%x1,%x3"
1945 [(set_attr "type" "vecdouble")])
1947 (define_insn "*vsx_fms<mode>4"
1948 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1950 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1951 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1953 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1954 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1956 xvmsuba<sd>p %x0,%x1,%x2
1957 xvmsubm<sd>p %x0,%x1,%x3"
1958 [(set_attr "type" "<VStype_mul>")])
1960 (define_insn "*vsx_nfma<mode>4"
1961 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1964 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1965 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1966 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1967 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1969 xvnmadda<sd>p %x0,%x1,%x2
1970 xvnmaddm<sd>p %x0,%x1,%x3"
1971 [(set_attr "type" "<VStype_mul>")])
1973 (define_insn "*vsx_nfmsv4sf4"
1974 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1977 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1978 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1980 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1981 "VECTOR_UNIT_VSX_P (V4SFmode)"
1983 xvnmsubasp %x0,%x1,%x2
1984 xvnmsubmsp %x0,%x1,%x3
1985 vnmsubfp %0,%1,%2,%3"
1986 [(set_attr "type" "vecfloat")])
1988 (define_insn "*vsx_nfmsv2df4"
1989 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1992 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1993 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1995 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1996 "VECTOR_UNIT_VSX_P (V2DFmode)"
1998 xvnmsubadp %x0,%x1,%x2
1999 xvnmsubmdp %x0,%x1,%x3"
2000 [(set_attr "type" "vecdouble")])
2002 ;; Vector conditional expressions (no scalar version for these instructions)
2003 (define_insn "vsx_eq<mode>"
2004 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2005 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2006 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2007 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2008 "xvcmpeq<sd>p %x0,%x1,%x2"
2009 [(set_attr "type" "<VStype_simple>")])
2011 (define_insn "vsx_gt<mode>"
2012 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2013 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2014 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2015 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2016 "xvcmpgt<sd>p %x0,%x1,%x2"
2017 [(set_attr "type" "<VStype_simple>")])
2019 (define_insn "*vsx_ge<mode>"
2020 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2021 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2022 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2023 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2024 "xvcmpge<sd>p %x0,%x1,%x2"
2025 [(set_attr "type" "<VStype_simple>")])
2027 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2028 ;; indicate a combined status
2029 (define_insn "*vsx_eq_<mode>_p"
2030 [(set (reg:CC CR6_REGNO)
2032 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2033 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2035 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2036 (eq:VSX_F (match_dup 1)
2038 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2039 "xvcmpeq<sd>p. %x0,%x1,%x2"
2040 [(set_attr "type" "<VStype_simple>")])
2042 (define_insn "*vsx_gt_<mode>_p"
2043 [(set (reg:CC CR6_REGNO)
2045 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2046 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2048 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2049 (gt:VSX_F (match_dup 1)
2051 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2052 "xvcmpgt<sd>p. %x0,%x1,%x2"
2053 [(set_attr "type" "<VStype_simple>")])
2056 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2057 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2058 (define_insn "*xvtlsbb_internal"
2059 [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2060 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2064 [(set_attr "type" "logical")])
2066 ;; Vector Test Least Significant Bit by Byte
2067 ;; for the implementation of the builtin
2068 ;; __builtin_vec_test_lsbb_all_ones
2069 ;; int vec_test_lsbb_all_ones (vector unsigned char);
2071 ;; __builtin_vec_test_lsbb_all_zeros
2072 ;; int vec_test_lsbb_all_zeros (vector unsigned char);
2073 (define_expand "xvtlsbbo"
2075 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2077 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2078 (lt:SI (match_dup 2) (const_int 0)))]
2081 operands[2] = gen_reg_rtx (CCmode);
2083 (define_expand "xvtlsbbz"
2085 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2087 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2088 (eq:SI (match_dup 2) (const_int 0)))]
2091 operands[2] = gen_reg_rtx (CCmode);
2094 (define_insn "*vsx_ge_<mode>_p"
2095 [(set (reg:CC CR6_REGNO)
2097 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2098 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2100 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2101 (ge:VSX_F (match_dup 1)
2103 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2104 "xvcmpge<sd>p. %x0,%x1,%x2"
2105 [(set_attr "type" "<VStype_simple>")])
2108 (define_insn "*vsx_xxsel<mode>"
2109 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2111 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2112 (match_operand:VSX_L 4 "zero_constant" ""))
2113 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2114 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2115 "VECTOR_MEM_VSX_P (<MODE>mode)"
2116 "xxsel %x0,%x3,%x2,%x1"
2117 [(set_attr "type" "vecmove")
2118 (set_attr "isa" "<VSisa>")])
2120 (define_insn "*vsx_xxsel<mode>_uns"
2121 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2123 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2124 (match_operand:VSX_L 4 "zero_constant" ""))
2125 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2126 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2127 "VECTOR_MEM_VSX_P (<MODE>mode)"
2128 "xxsel %x0,%x3,%x2,%x1"
2129 [(set_attr "type" "vecmove")
2130 (set_attr "isa" "<VSisa>")])
2133 (define_insn "vsx_copysign<mode>3"
2134 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2136 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2137 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2139 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2140 "xvcpsgn<sd>p %x0,%x2,%x1"
2141 [(set_attr "type" "<VStype_simple>")])
2143 ;; For the conversions, limit the register class for the integer value to be
2144 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2145 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2146 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2147 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2148 ;; in allowing virtual registers.
2149 (define_insn "vsx_float<VSi><mode>2"
2150 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2151 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2152 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2153 "xvcvsx<VSc><sd>p %x0,%x1"
2154 [(set_attr "type" "<VStype_simple>")])
2156 (define_insn "vsx_floatuns<VSi><mode>2"
2157 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2158 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2159 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2160 "xvcvux<VSc><sd>p %x0,%x1"
2161 [(set_attr "type" "<VStype_simple>")])
2163 (define_insn "vsx_fix_trunc<mode><VSi>2"
2164 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2165 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2166 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2167 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2168 [(set_attr "type" "<VStype_simple>")])
2170 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2171 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2172 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2173 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2174 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2175 [(set_attr "type" "<VStype_simple>")])
2177 ;; Math rounding functions
2178 (define_insn "vsx_x<VSv>r<sd>pi"
2179 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2180 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2181 UNSPEC_VSX_ROUND_I))]
2182 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2183 "x<VSv>r<sd>pi %x0,%x1"
2184 [(set_attr "type" "<VStype_simple>")])
2186 (define_insn "vsx_x<VSv>r<sd>pic"
2187 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2188 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2189 UNSPEC_VSX_ROUND_IC))]
2190 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2191 "x<VSv>r<sd>pic %x0,%x1"
2192 [(set_attr "type" "<VStype_simple>")])
2194 (define_insn "vsx_btrunc<mode>2"
2195 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2196 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2197 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2198 "xvr<sd>piz %x0,%x1"
2199 [(set_attr "type" "<VStype_simple>")])
2201 (define_insn "*vsx_b2trunc<mode>2"
2202 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2203 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2205 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2206 "x<VSv>r<sd>piz %x0,%x1"
2207 [(set_attr "type" "<VStype_simple>")])
2209 (define_insn "vsx_floor<mode>2"
2210 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2211 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2213 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2214 "xvr<sd>pim %x0,%x1"
2215 [(set_attr "type" "<VStype_simple>")])
2217 (define_insn "vsx_ceil<mode>2"
2218 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2219 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2221 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2222 "xvr<sd>pip %x0,%x1"
2223 [(set_attr "type" "<VStype_simple>")])
2226 ;; VSX convert to/from double vector
2228 ;; Convert between single and double precision
2229 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2230 ;; scalar single precision instructions internally use the double format.
2231 ;; Prefer the altivec registers, since we likely will need to do a vperm
2232 (define_insn "vsx_xscvdpsp"
2233 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2234 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2235 UNSPEC_VSX_CVSPDP))]
2236 "VECTOR_UNIT_VSX_P (DFmode)"
2238 [(set_attr "type" "fp")])
2240 (define_insn "vsx_xvcvspdp_be"
2241 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2243 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2244 (parallel [(const_int 0) (const_int 2)]))))]
2245 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2247 [(set_attr "type" "vecdouble")])
2249 (define_insn "vsx_xvcvspdp_le"
2250 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2252 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2253 (parallel [(const_int 1) (const_int 3)]))))]
2254 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2256 [(set_attr "type" "vecdouble")])
2258 (define_expand "vsx_xvcvspdp"
2259 [(match_operand:V2DF 0 "vsx_register_operand")
2260 (match_operand:V4SF 1 "vsx_register_operand")]
2261 "VECTOR_UNIT_VSX_P (V4SFmode)"
2263 if (BYTES_BIG_ENDIAN)
2264 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2266 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2270 (define_insn "vsx_xvcvdpsp"
2271 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2272 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2273 UNSPEC_VSX_CVSPDP))]
2274 "VECTOR_UNIT_VSX_P (V2DFmode)"
2276 [(set_attr "type" "vecdouble")])
2278 ;; xscvspdp, represent the scalar SF type as V4SF
2279 (define_insn "vsx_xscvspdp"
2280 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2281 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2282 UNSPEC_VSX_CVSPDP))]
2283 "VECTOR_UNIT_VSX_P (V4SFmode)"
2285 [(set_attr "type" "fp")])
2287 ;; Same as vsx_xscvspdp, but use SF as the type
2288 (define_insn "vsx_xscvspdp_scalar2"
2289 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2290 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2291 UNSPEC_VSX_CVSPDP))]
2292 "VECTOR_UNIT_VSX_P (V4SFmode)"
2294 [(set_attr "type" "fp")])
2296 ;; Generate xvcvhpsp instruction
2297 (define_insn "vsx_xvcvhpsp"
2298 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2299 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2300 UNSPEC_VSX_CVHPSP))]
2303 [(set_attr "type" "vecfloat")])
2305 ;; Generate xvcvsphp
2306 (define_insn "vsx_xvcvsphp"
2307 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2308 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2309 UNSPEC_VSX_XVCVSPHP))]
2312 [(set_attr "type" "vecfloat")])
2314 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2315 ;; format of scalars is actually DF.
2316 (define_insn "vsx_xscvdpsp_scalar"
2317 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2318 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2319 UNSPEC_VSX_CVSPDP))]
2320 "VECTOR_UNIT_VSX_P (V4SFmode)"
2322 [(set_attr "type" "fp")])
2324 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2325 (define_insn "vsx_xscvdpspn"
2326 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2327 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2328 UNSPEC_VSX_CVDPSPN))]
2331 [(set_attr "type" "fp")])
2333 (define_insn "vsx_xscvspdpn"
2334 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2335 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2336 UNSPEC_VSX_CVSPDPN))]
2339 [(set_attr "type" "fp")])
2341 (define_insn "vsx_xscvdpspn_scalar"
2342 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2343 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2344 UNSPEC_VSX_CVDPSPN))]
2347 [(set_attr "type" "fp")])
2349 ;; Used by direct move to move a SFmode value from GPR to VSX register
2350 (define_insn "vsx_xscvspdpn_directmove"
2351 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2352 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2353 UNSPEC_VSX_CVSPDPN))]
2356 [(set_attr "type" "fp")])
2358 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2360 (define_insn "vsx_xvcv<su>xwsp"
2361 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2362 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2363 "VECTOR_UNIT_VSX_P (V4SFmode)"
2364 "xvcv<su>xwsp %x0,%x1"
2365 [(set_attr "type" "vecfloat")])
2367 (define_insn "vsx_xvcv<su>xddp"
2368 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2369 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2370 "VECTOR_UNIT_VSX_P (V2DFmode)"
2371 "xvcv<su>xddp %x0,%x1"
2372 [(set_attr "type" "vecdouble")])
2374 (define_insn "vsx_xvcvsp<su>xws"
2375 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2376 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2377 "VECTOR_UNIT_VSX_P (V4SFmode)"
2378 "xvcvsp<su>xws %x0,%x1"
2379 [(set_attr "type" "vecfloat")])
2381 (define_insn "vsx_xvcvdp<su>xds"
2382 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2383 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2384 "VECTOR_UNIT_VSX_P (V2DFmode)"
2385 "xvcvdp<su>xds %x0,%x1"
2386 [(set_attr "type" "vecdouble")])
2388 (define_expand "vsx_xvcvsxddp_scale"
2389 [(match_operand:V2DF 0 "vsx_register_operand")
2390 (match_operand:V2DI 1 "vsx_register_operand")
2391 (match_operand:QI 2 "immediate_operand")]
2392 "VECTOR_UNIT_VSX_P (V2DFmode)"
2394 rtx op0 = operands[0];
2395 rtx op1 = operands[1];
2396 int scale = INTVAL(operands[2]);
2397 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2399 rs6000_scale_v2df (op0, op0, -scale);
2403 (define_expand "vsx_xvcvuxddp_scale"
2404 [(match_operand:V2DF 0 "vsx_register_operand")
2405 (match_operand:V2DI 1 "vsx_register_operand")
2406 (match_operand:QI 2 "immediate_operand")]
2407 "VECTOR_UNIT_VSX_P (V2DFmode)"
2409 rtx op0 = operands[0];
2410 rtx op1 = operands[1];
2411 int scale = INTVAL(operands[2]);
2412 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2414 rs6000_scale_v2df (op0, op0, -scale);
2418 (define_expand "vsx_xvcvdpsxds_scale"
2419 [(match_operand:V2DI 0 "vsx_register_operand")
2420 (match_operand:V2DF 1 "vsx_register_operand")
2421 (match_operand:QI 2 "immediate_operand")]
2422 "VECTOR_UNIT_VSX_P (V2DFmode)"
2424 rtx op0 = operands[0];
2425 rtx op1 = operands[1];
2427 int scale = INTVAL (operands[2]);
2432 tmp = gen_reg_rtx (V2DFmode);
2433 rs6000_scale_v2df (tmp, op1, scale);
2435 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2439 ;; convert vector of 64-bit floating point numbers to vector of
2440 ;; 64-bit unsigned integer
2441 (define_expand "vsx_xvcvdpuxds_scale"
2442 [(match_operand:V2DI 0 "vsx_register_operand")
2443 (match_operand:V2DF 1 "vsx_register_operand")
2444 (match_operand:QI 2 "immediate_operand")]
2445 "VECTOR_UNIT_VSX_P (V2DFmode)"
2447 rtx op0 = operands[0];
2448 rtx op1 = operands[1];
2450 int scale = INTVAL (operands[2]);
2455 tmp = gen_reg_rtx (V2DFmode);
2456 rs6000_scale_v2df (tmp, op1, scale);
2458 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2462 ;; Convert from 64-bit to 32-bit types
2463 ;; Note, favor the Altivec registers since the usual use of these instructions
2464 ;; is in vector converts and we need to use the Altivec vperm instruction.
2466 (define_insn "vsx_xvcvdpsxws"
2467 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2468 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2469 UNSPEC_VSX_CVDPSXWS))]
2470 "VECTOR_UNIT_VSX_P (V2DFmode)"
2471 "xvcvdpsxws %x0,%x1"
2472 [(set_attr "type" "vecdouble")])
2474 (define_insn "vsx_xvcvdpuxws"
2475 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2476 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2477 UNSPEC_VSX_CVDPUXWS))]
2478 "VECTOR_UNIT_VSX_P (V2DFmode)"
2479 "xvcvdpuxws %x0,%x1"
2480 [(set_attr "type" "vecdouble")])
2482 (define_insn "vsx_xvcvsxdsp"
2483 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2484 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2485 UNSPEC_VSX_CVSXDSP))]
2486 "VECTOR_UNIT_VSX_P (V2DFmode)"
2488 [(set_attr "type" "vecfloat")])
2490 (define_insn "vsx_xvcvuxdsp"
2491 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2492 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2493 UNSPEC_VSX_CVUXDSP))]
2494 "VECTOR_UNIT_VSX_P (V2DFmode)"
2496 [(set_attr "type" "vecdouble")])
2498 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2499 ;; 64-bit floating point numbers.
2500 (define_insn "vsx_xvcv<su>xwdp_be"
2501 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2503 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2504 (parallel [(const_int 0) (const_int 2)]))))]
2505 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2506 "xvcv<su>xwdp %x0,%x1"
2507 [(set_attr "type" "vecdouble")])
2509 (define_insn "vsx_xvcv<su>xwdp_le"
2510 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2512 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2513 (parallel [(const_int 1) (const_int 3)]))))]
2514 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2515 "xvcv<su>xwdp %x0,%x1"
2516 [(set_attr "type" "vecdouble")])
2518 (define_expand "vsx_xvcv<su>xwdp"
2519 [(match_operand:V2DF 0 "vsx_register_operand")
2520 (match_operand:V4SI 1 "vsx_register_operand")
2522 "VECTOR_UNIT_VSX_P (V2DFmode)"
2524 if (BYTES_BIG_ENDIAN)
2525 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2527 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2531 (define_insn "vsx_xvcvsxwdp_df"
2532 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2533 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2534 UNSPEC_VSX_CVSXWDP))]
2537 [(set_attr "type" "vecdouble")])
2539 (define_insn "vsx_xvcvuxwdp_df"
2540 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2541 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2542 UNSPEC_VSX_CVUXWDP))]
2545 [(set_attr "type" "vecdouble")])
2547 ;; Convert vector of 32-bit floating point numbers to vector of
2548 ;; 64-bit signed/unsigned integers.
2549 (define_insn "vsx_xvcvsp<su>xds_be"
2550 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2552 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2553 (parallel [(const_int 0) (const_int 2)]))))]
2554 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2555 "xvcvsp<su>xds %x0,%x1"
2556 [(set_attr "type" "vecdouble")])
2558 (define_insn "vsx_xvcvsp<su>xds_le"
2559 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2561 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2562 (parallel [(const_int 1) (const_int 3)]))))]
2563 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2564 "xvcvsp<su>xds %x0,%x1"
2565 [(set_attr "type" "vecdouble")])
2567 (define_expand "vsx_xvcvsp<su>xds"
2568 [(match_operand:V2DI 0 "vsx_register_operand")
2569 (match_operand:V4SF 1 "vsx_register_operand")
2571 "VECTOR_UNIT_VSX_P (V2DFmode)"
2573 if (BYTES_BIG_ENDIAN)
2574 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2576 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2580 ;; Generate float2 double
2581 ;; convert two double to float
2582 (define_expand "float2_v2df"
2583 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2584 (use (match_operand:V2DF 1 "register_operand" "wa"))
2585 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2586 "VECTOR_UNIT_VSX_P (V4SFmode)"
2588 rtx rtx_src1, rtx_src2, rtx_dst;
2590 rtx_dst = operands[0];
2591 rtx_src1 = operands[1];
2592 rtx_src2 = operands[2];
2594 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2599 ;; convert two long long signed ints to float
2600 (define_expand "float2_v2di"
2601 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602 (use (match_operand:V2DI 1 "register_operand" "wa"))
2603 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2604 "VECTOR_UNIT_VSX_P (V4SFmode)"
2606 rtx rtx_src1, rtx_src2, rtx_dst;
2608 rtx_dst = operands[0];
2609 rtx_src1 = operands[1];
2610 rtx_src2 = operands[2];
2612 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2616 ;; Generate uns_float2
2617 ;; convert two long long unsigned ints to float
2618 (define_expand "uns_float2_v2di"
2619 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2620 (use (match_operand:V2DI 1 "register_operand" "wa"))
2621 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2622 "VECTOR_UNIT_VSX_P (V4SFmode)"
2624 rtx rtx_src1, rtx_src2, rtx_dst;
2626 rtx_dst = operands[0];
2627 rtx_src1 = operands[1];
2628 rtx_src2 = operands[2];
2630 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2635 ;; convert double or long long signed to float
2636 ;; (Only even words are valid, BE numbering)
2637 (define_expand "floate<mode>"
2638 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2639 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2640 "VECTOR_UNIT_VSX_P (V4SFmode)"
2642 if (BYTES_BIG_ENDIAN)
2644 /* Shift left one word to put even word correct location */
2646 rtx rtx_val = GEN_INT (4);
2648 rtx_tmp = gen_reg_rtx (V4SFmode);
2649 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2650 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2651 rtx_tmp, rtx_tmp, rtx_val));
2654 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2659 ;; Generate uns_floate
2660 ;; convert long long unsigned to float
2661 ;; (Only even words are valid, BE numbering)
2662 (define_expand "unsfloatev2di"
2663 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2664 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2665 "VECTOR_UNIT_VSX_P (V4SFmode)"
2667 if (BYTES_BIG_ENDIAN)
2669 /* Shift left one word to put even word correct location */
2671 rtx rtx_val = GEN_INT (4);
2673 rtx_tmp = gen_reg_rtx (V4SFmode);
2674 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2675 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2676 rtx_tmp, rtx_tmp, rtx_val));
2679 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2685 ;; convert double or long long signed to float
2686 ;; Only odd words are valid, BE numbering)
2687 (define_expand "floato<mode>"
2688 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2689 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2690 "VECTOR_UNIT_VSX_P (V4SFmode)"
2692 if (BYTES_BIG_ENDIAN)
2693 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2696 /* Shift left one word to put odd word correct location */
2698 rtx rtx_val = GEN_INT (4);
2700 rtx_tmp = gen_reg_rtx (V4SFmode);
2701 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2702 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2703 rtx_tmp, rtx_tmp, rtx_val));
2708 ;; Generate uns_floato
2709 ;; convert long long unsigned to float
2710 ;; (Only odd words are valid, BE numbering)
2711 (define_expand "unsfloatov2di"
2712 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2713 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2714 "VECTOR_UNIT_VSX_P (V4SFmode)"
2716 if (BYTES_BIG_ENDIAN)
2717 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2720 /* Shift left one word to put odd word correct location */
2722 rtx rtx_val = GEN_INT (4);
2724 rtx_tmp = gen_reg_rtx (V4SFmode);
2725 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2726 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2727 rtx_tmp, rtx_tmp, rtx_val));
2732 ;; Generate vsigned2
2733 ;; convert two double float vectors to a vector of single precision ints
2734 (define_expand "vsigned2_v2df"
2735 [(match_operand:V4SI 0 "register_operand" "=wa")
2736 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2737 (match_operand:V2DF 2 "register_operand" "wa")]
2738 UNSPEC_VSX_VSIGNED2)]
2741 rtx rtx_src1, rtx_src2, rtx_dst;
2742 bool signed_convert=true;
2744 rtx_dst = operands[0];
2745 rtx_src1 = operands[1];
2746 rtx_src2 = operands[2];
2748 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2752 ;; Generate vsignedo_v2df
2753 ;; signed double float to int convert odd word
2754 (define_expand "vsignedo_v2df"
2755 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2756 (match_operand:V2DF 1 "register_operand" "wa"))]
2759 if (BYTES_BIG_ENDIAN)
2762 rtx rtx_val = GEN_INT (12);
2763 rtx_tmp = gen_reg_rtx (V4SImode);
2765 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2767 /* Big endian word numbering for words in operand is 0 1 2 3.
2768 take (operand[1] operand[1]) and shift left one word
2769 0 1 2 3 0 1 2 3 => 1 2 3 0
2770 Words 1 and 3 are now are now where they need to be for result. */
2772 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2776 /* Little endian word numbering for operand is 3 2 1 0.
2777 Result words 3 and 1 are where they need to be. */
2778 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2782 [(set_attr "type" "veccomplex")])
2784 ;; Generate vsignede_v2df
2785 ;; signed double float to int even word
2786 (define_expand "vsignede_v2df"
2787 [(set (match_operand:V4SI 0 "register_operand" "=v")
2788 (match_operand:V2DF 1 "register_operand" "v"))]
2791 if (BYTES_BIG_ENDIAN)
2792 /* Big endian word numbering for words in operand is 0 1
2793 Result words 0 is where they need to be. */
2794 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2799 rtx rtx_val = GEN_INT (12);
2800 rtx_tmp = gen_reg_rtx (V4SImode);
2802 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2804 /* Little endian word numbering for operand is 3 2 1 0.
2805 take (operand[1] operand[1]) and shift left three words
2806 0 1 2 3 0 1 2 3 => 3 0 1 2
2807 Words 0 and 2 are now where they need to be for the result. */
2808 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2813 [(set_attr "type" "veccomplex")])
2815 ;; Generate unsigned2
2816 ;; convert two double float vectors to a vector of single precision
2818 (define_expand "vunsigned2_v2df"
2819 [(match_operand:V4SI 0 "register_operand" "=v")
2820 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2821 (match_operand:V2DF 2 "register_operand" "v")]
2822 UNSPEC_VSX_VSIGNED2)]
2825 rtx rtx_src1, rtx_src2, rtx_dst;
2826 bool signed_convert=false;
2828 rtx_dst = operands[0];
2829 rtx_src1 = operands[1];
2830 rtx_src2 = operands[2];
2832 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2836 ;; Generate vunsignedo_v2df
2837 ;; unsigned double float to int convert odd word
2838 (define_expand "vunsignedo_v2df"
2839 [(set (match_operand:V4SI 0 "register_operand" "=v")
2840 (match_operand:V2DF 1 "register_operand" "v"))]
2843 if (BYTES_BIG_ENDIAN)
2846 rtx rtx_val = GEN_INT (12);
2847 rtx_tmp = gen_reg_rtx (V4SImode);
2849 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2851 /* Big endian word numbering for words in operand is 0 1 2 3.
2852 take (operand[1] operand[1]) and shift left one word
2853 0 1 2 3 0 1 2 3 => 1 2 3 0
2854 Words 1 and 3 are now are now where they need to be for result. */
2856 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2860 /* Little endian word numbering for operand is 3 2 1 0.
2861 Result words 3 and 1 are where they need to be. */
2862 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2866 [(set_attr "type" "veccomplex")])
2868 ;; Generate vunsignede_v2df
2869 ;; unsigned double float to int even word
2870 (define_expand "vunsignede_v2df"
2871 [(set (match_operand:V4SI 0 "register_operand" "=v")
2872 (match_operand:V2DF 1 "register_operand" "v"))]
2875 if (BYTES_BIG_ENDIAN)
2876 /* Big endian word numbering for words in operand is 0 1
2877 Result words 0 is where they need to be. */
2878 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2883 rtx rtx_val = GEN_INT (12);
2884 rtx_tmp = gen_reg_rtx (V4SImode);
2886 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2888 /* Little endian word numbering for operand is 3 2 1 0.
2889 take (operand[1] operand[1]) and shift left three words
2890 0 1 2 3 0 1 2 3 => 3 0 1 2
2891 Words 0 and 2 are now where they need to be for the result. */
2892 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2897 [(set_attr "type" "veccomplex")])
2899 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2900 ;; since the xvrdpiz instruction does not truncate the value if the floating
2901 ;; point value is < LONG_MIN or > LONG_MAX.
2902 (define_insn "*vsx_float_fix_v2df2"
2903 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2906 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2908 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2909 && !flag_trapping_math && TARGET_FRIZ"
2911 [(set_attr "type" "vecdouble")])
2914 ;; Permute operations
2916 ;; Build a V2DF/V2DI vector from two scalars
2917 (define_insn "vsx_concat_<mode>"
2918 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2920 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2921 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2922 "VECTOR_MEM_VSX_P (<MODE>mode)"
2924 if (which_alternative == 0)
2925 return (BYTES_BIG_ENDIAN
2926 ? "xxpermdi %x0,%x1,%x2,0"
2927 : "xxpermdi %x0,%x2,%x1,0");
2929 else if (which_alternative == 1)
2930 return (BYTES_BIG_ENDIAN
2931 ? "mtvsrdd %x0,%1,%2"
2932 : "mtvsrdd %x0,%2,%1");
2937 [(set_attr "type" "vecperm,vecmove")])
2939 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2940 ;; word element in a vector register.
2941 (define_insn "*vsx_concat_<mode>_1"
2942 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2944 (vec_select:<VS_scalar>
2945 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2946 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2947 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2948 "VECTOR_MEM_VSX_P (<MODE>mode)"
2950 HOST_WIDE_INT dword = INTVAL (operands[2]);
2951 if (BYTES_BIG_ENDIAN)
2953 operands[4] = GEN_INT (2*dword);
2954 return "xxpermdi %x0,%x1,%x3,%4";
2958 operands[4] = GEN_INT (!dword);
2959 return "xxpermdi %x0,%x3,%x1,%4";
2962 [(set_attr "type" "vecperm")])
2964 (define_insn "*vsx_concat_<mode>_2"
2965 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2967 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2968 (vec_select:<VS_scalar>
2969 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2970 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2971 "VECTOR_MEM_VSX_P (<MODE>mode)"
2973 HOST_WIDE_INT dword = INTVAL (operands[3]);
2974 if (BYTES_BIG_ENDIAN)
2976 operands[4] = GEN_INT (dword);
2977 return "xxpermdi %x0,%x1,%x2,%4";
2981 operands[4] = GEN_INT (2 * !dword);
2982 return "xxpermdi %x0,%x2,%x1,%4";
2985 [(set_attr "type" "vecperm")])
2987 (define_insn "*vsx_concat_<mode>_3"
2988 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2990 (vec_select:<VS_scalar>
2991 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2992 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2993 (vec_select:<VS_scalar>
2994 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2995 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2996 "VECTOR_MEM_VSX_P (<MODE>mode)"
2998 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2999 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
3000 if (BYTES_BIG_ENDIAN)
3002 operands[5] = GEN_INT ((2 * dword1) + dword2);
3003 return "xxpermdi %x0,%x1,%x3,%5";
3007 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
3008 return "xxpermdi %x0,%x3,%x1,%5";
3011 [(set_attr "type" "vecperm")])
3013 ;; Special purpose concat using xxpermdi to glue two single precision values
3014 ;; together, relying on the fact that internally scalar floats are represented
3015 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
3016 (define_insn "vsx_concat_v2sf"
3017 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
3019 [(match_operand:SF 1 "vsx_register_operand" "wa")
3020 (match_operand:SF 2 "vsx_register_operand" "wa")]
3021 UNSPEC_VSX_CONCAT))]
3022 "VECTOR_MEM_VSX_P (V2DFmode)"
3024 if (BYTES_BIG_ENDIAN)
3025 return "xxpermdi %x0,%x1,%x2,0";
3027 return "xxpermdi %x0,%x2,%x1,0";
3029 [(set_attr "type" "vecperm")])
3031 ;; Concatenate 4 SImode elements into a V4SImode reg.
3032 (define_expand "vsx_init_v4si"
3033 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
3034 (use (match_operand:SI 1 "gpc_reg_operand"))
3035 (use (match_operand:SI 2 "gpc_reg_operand"))
3036 (use (match_operand:SI 3 "gpc_reg_operand"))
3037 (use (match_operand:SI 4 "gpc_reg_operand"))]
3038 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3040 rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
3041 rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
3042 rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
3043 rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
3044 if (!BYTES_BIG_ENDIAN)
3050 rtx ab = gen_reg_rtx (DImode);
3051 rtx cd = gen_reg_rtx (DImode);
3052 emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
3053 GEN_INT (0xffffffff)));
3054 emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
3055 GEN_INT (0xffffffff)));
3057 rtx abcd = gen_reg_rtx (V2DImode);
3058 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3059 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3063 ;; xxpermdi for little endian loads and stores. We need several of
3064 ;; these since the form of the PARALLEL differs by mode.
3065 (define_insn "*vsx_xxpermdi2_le_<mode>"
3066 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3068 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3069 (parallel [(const_int 1) (const_int 0)])))]
3070 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3071 "xxpermdi %x0,%x1,%x1,2"
3072 [(set_attr "type" "vecperm")])
3074 (define_insn "xxswapd_v16qi"
3075 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3077 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3078 (parallel [(const_int 8) (const_int 9)
3079 (const_int 10) (const_int 11)
3080 (const_int 12) (const_int 13)
3081 (const_int 14) (const_int 15)
3082 (const_int 0) (const_int 1)
3083 (const_int 2) (const_int 3)
3084 (const_int 4) (const_int 5)
3085 (const_int 6) (const_int 7)])))]
3087 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3088 ;; mnemonic xxpermdi instead.
3089 "xxpermdi %x0,%x1,%x1,2"
3090 [(set_attr "type" "vecperm")])
3092 (define_insn "xxswapd_v8hi"
3093 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3095 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3096 (parallel [(const_int 4) (const_int 5)
3097 (const_int 6) (const_int 7)
3098 (const_int 0) (const_int 1)
3099 (const_int 2) (const_int 3)])))]
3101 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3102 ;; mnemonic xxpermdi instead.
3103 "xxpermdi %x0,%x1,%x1,2"
3104 [(set_attr "type" "vecperm")])
3106 (define_insn "xxswapd_<mode>"
3107 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3109 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3110 (parallel [(const_int 2) (const_int 3)
3111 (const_int 0) (const_int 1)])))]
3113 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3114 ;; mnemonic xxpermdi instead.
3115 "xxpermdi %x0,%x1,%x1,2"
3116 [(set_attr "type" "vecperm")])
3118 (define_insn "xxswapd_<mode>"
3119 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3121 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3122 (parallel [(const_int 1) (const_int 0)])))]
3124 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3125 ;; mnemonic xxpermdi instead.
3126 "xxpermdi %x0,%x1,%x1,2"
3127 [(set_attr "type" "vecperm")])
3129 (define_insn "xxgenpcvm_<mode>_internal"
3130 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3131 (unspec:VSX_EXTRACT_I4
3132 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3133 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3136 "xxgenpcv<wd>m %x0,%1,%2"
3137 [(set_attr "type" "vecsimple")])
3139 (define_expand "xxgenpcvm_<mode>"
3140 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3141 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3142 (use (match_operand:QI 2 "immediate_operand"))]
3145 if (!BYTES_BIG_ENDIAN)
3147 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3148 change swap upper and lower double words. */
3149 rtx tmp = gen_reg_rtx (<MODE>mode);
3151 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3154 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3159 ;; lxvd2x for little endian loads. We need several of
3160 ;; these since the form of the PARALLEL differs by mode.
3161 (define_insn "*vsx_lxvd2x2_le_<mode>"
3162 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3164 (match_operand:VSX_D 1 "memory_operand" "Z")
3165 (parallel [(const_int 1) (const_int 0)])))]
3166 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3168 [(set_attr "type" "vecload")])
3170 (define_insn "*vsx_lxvd2x4_le_<mode>"
3171 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3173 (match_operand:VSX_W 1 "memory_operand" "Z")
3174 (parallel [(const_int 2) (const_int 3)
3175 (const_int 0) (const_int 1)])))]
3176 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3178 [(set_attr "type" "vecload")])
3180 (define_insn "*vsx_lxvd2x8_le_V8HI"
3181 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3183 (match_operand:V8HI 1 "memory_operand" "Z")
3184 (parallel [(const_int 4) (const_int 5)
3185 (const_int 6) (const_int 7)
3186 (const_int 0) (const_int 1)
3187 (const_int 2) (const_int 3)])))]
3188 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3190 [(set_attr "type" "vecload")])
3192 (define_insn "*vsx_lxvd2x16_le_V16QI"
3193 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3195 (match_operand:V16QI 1 "memory_operand" "Z")
3196 (parallel [(const_int 8) (const_int 9)
3197 (const_int 10) (const_int 11)
3198 (const_int 12) (const_int 13)
3199 (const_int 14) (const_int 15)
3200 (const_int 0) (const_int 1)
3201 (const_int 2) (const_int 3)
3202 (const_int 4) (const_int 5)
3203 (const_int 6) (const_int 7)])))]
3204 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3206 [(set_attr "type" "vecload")])
3208 ;; stxvd2x for little endian stores. We need several of
3209 ;; these since the form of the PARALLEL differs by mode.
3210 (define_insn "*vsx_stxvd2x2_le_<mode>"
3211 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3213 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3214 (parallel [(const_int 1) (const_int 0)])))]
3215 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3217 [(set_attr "type" "vecstore")])
3219 (define_insn "*vsx_stxvd2x4_le_<mode>"
3220 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3222 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3223 (parallel [(const_int 2) (const_int 3)
3224 (const_int 0) (const_int 1)])))]
3225 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3227 [(set_attr "type" "vecstore")])
3229 (define_insn "*vsx_stxvd2x8_le_V8HI"
3230 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3232 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3233 (parallel [(const_int 4) (const_int 5)
3234 (const_int 6) (const_int 7)
3235 (const_int 0) (const_int 1)
3236 (const_int 2) (const_int 3)])))]
3237 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3239 [(set_attr "type" "vecstore")])
3241 (define_insn "*vsx_stxvd2x16_le_V16QI"
3242 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3244 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3245 (parallel [(const_int 8) (const_int 9)
3246 (const_int 10) (const_int 11)
3247 (const_int 12) (const_int 13)
3248 (const_int 14) (const_int 15)
3249 (const_int 0) (const_int 1)
3250 (const_int 2) (const_int 3)
3251 (const_int 4) (const_int 5)
3252 (const_int 6) (const_int 7)])))]
3253 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3255 [(set_attr "type" "vecstore")])
3257 ;; Convert a TImode value into V1TImode
3258 (define_expand "vsx_set_v1ti"
3259 [(match_operand:V1TI 0 "nonimmediate_operand")
3260 (match_operand:V1TI 1 "nonimmediate_operand")
3261 (match_operand:TI 2 "input_operand")
3262 (match_operand:QI 3 "u5bit_cint_operand")]
3263 "VECTOR_MEM_VSX_P (V1TImode)"
3265 if (operands[3] != const0_rtx)
3268 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3272 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3273 (define_expand "vsx_set_<mode>"
3274 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3275 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3276 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3277 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3278 "VECTOR_MEM_VSX_P (<MODE>mode)"
3280 rtx dest = operands[0];
3281 rtx vec_reg = operands[1];
3282 rtx value = operands[2];
3283 rtx ele = operands[3];
3284 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3286 if (ele == const0_rtx)
3288 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3289 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3292 else if (ele == const1_rtx)
3294 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3295 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3302 ;; Extract a DF/DI element from V2DF/V2DI
3303 ;; Optimize cases were we can do a simple or direct move.
3304 ;; Or see if we can avoid doing the move at all
3306 ;; There are some unresolved problems with reload that show up if an Altivec
3307 ;; register was picked. Limit the scalar value to FPRs for now.
3309 (define_insn "vsx_extract_<mode>"
3310 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3311 (vec_select:<VS_scalar>
3312 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa")
3314 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3315 "VECTOR_MEM_VSX_P (<MODE>mode)"
3317 int element = INTVAL (operands[2]);
3318 int op0_regno = REGNO (operands[0]);
3319 int op1_regno = REGNO (operands[1]);
3322 gcc_assert (IN_RANGE (element, 0, 1));
3323 gcc_assert (VSX_REGNO_P (op1_regno));
3325 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3327 if (op0_regno == op1_regno)
3328 return ASM_COMMENT_START " vec_extract to same register";
3330 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3331 && TARGET_POWERPC64)
3332 return "mfvsrd %0,%x1";
3334 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3337 else if (VSX_REGNO_P (op0_regno))
3338 return "xxlor %x0,%x1,%x1";
3344 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3345 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3346 return "mfvsrld %0,%x1";
3348 else if (VSX_REGNO_P (op0_regno))
3350 fldDM = element << 1;
3351 if (!BYTES_BIG_ENDIAN)
3353 operands[3] = GEN_INT (fldDM);
3354 return "xxpermdi %x0,%x1,%x1,%3";
3360 [(set_attr "type" "veclogical,mfvsr,mfvsr,vecperm")
3361 (set_attr "isa" "*,*,p8v,p9v")])
3363 ;; Optimize extracting a single scalar element from memory.
3364 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3365 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3366 (vec_select:<VSX_D:VS_scalar>
3367 (match_operand:VSX_D 1 "memory_operand" "m,m")
3368 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3369 (clobber (match_scratch:P 3 "=&b,&b"))]
3370 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3372 "&& reload_completed"
3373 [(set (match_dup 0) (match_dup 4))]
3375 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376 operands[3], <VSX_D:VS_scalar>mode);
3378 [(set_attr "type" "fpload,load")
3379 (set_attr "length" "8")])
3381 ;; Optimize storing a single scalar element that is the right location to
3383 (define_insn "*vsx_extract_<mode>_store"
3384 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3385 (vec_select:<VS_scalar>
3386 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3387 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3388 "VECTOR_MEM_VSX_P (<MODE>mode)"
3393 [(set_attr "type" "fpstore")
3394 (set_attr "isa" "*,p7v,p9v")])
3396 ;; Variable V2DI/V2DF extract shift
3397 (define_insn "vsx_vslo_<mode>"
3398 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3399 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3400 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3402 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3404 [(set_attr "type" "vecperm")])
3406 ;; Variable V2DI/V2DF extract from a register
3407 (define_insn_and_split "vsx_extract_<mode>_var"
3408 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3409 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3410 (match_operand:DI 2 "gpc_reg_operand" "r")]
3411 UNSPEC_VSX_EXTRACT))
3412 (clobber (match_scratch:DI 3 "=r"))
3413 (clobber (match_scratch:V2DI 4 "=&v"))]
3414 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3416 "&& reload_completed"
3419 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3420 operands[3], operands[4]);
3424 ;; Variable V2DI/V2DF extract from memory
3425 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3426 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3427 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3428 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3429 UNSPEC_VSX_EXTRACT))
3430 (clobber (match_scratch:DI 3 "=&b,&b"))]
3431 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3433 "&& reload_completed"
3434 [(set (match_dup 0) (match_dup 4))]
3436 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3437 operands[3], <VS_scalar>mode);
3439 [(set_attr "type" "fpload,load")])
3441 ;; Extract a SF element from V4SF
3442 (define_insn_and_split "vsx_extract_v4sf"
3443 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3445 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3446 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3447 (clobber (match_scratch:V4SF 3 "=0"))]
3448 "VECTOR_UNIT_VSX_P (V4SFmode)"
3453 rtx op0 = operands[0];
3454 rtx op1 = operands[1];
3455 rtx op2 = operands[2];
3456 rtx op3 = operands[3];
3458 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3464 if (GET_CODE (op3) == SCRATCH)
3465 op3 = gen_reg_rtx (V4SFmode);
3466 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3469 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3472 [(set_attr "length" "8")
3473 (set_attr "type" "fp")])
3475 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3476 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3478 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3479 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3480 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3481 "VECTOR_MEM_VSX_P (V4SFmode)"
3483 "&& reload_completed"
3484 [(set (match_dup 0) (match_dup 4))]
3486 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3487 operands[3], SFmode);
3489 [(set_attr "type" "fpload,fpload,fpload,load")
3490 (set_attr "length" "8")
3491 (set_attr "isa" "*,p7v,p9v,*")])
3493 ;; Variable V4SF extract from a register
3494 (define_insn_and_split "vsx_extract_v4sf_var"
3495 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3496 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3497 (match_operand:DI 2 "gpc_reg_operand" "r")]
3498 UNSPEC_VSX_EXTRACT))
3499 (clobber (match_scratch:DI 3 "=r"))
3500 (clobber (match_scratch:V2DI 4 "=&v"))]
3501 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3503 "&& reload_completed"
3506 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3507 operands[3], operands[4]);
3511 ;; Variable V4SF extract from memory
3512 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3513 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3514 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3515 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3516 UNSPEC_VSX_EXTRACT))
3517 (clobber (match_scratch:DI 3 "=&b,&b"))]
3518 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3520 "&& reload_completed"
3521 [(set (match_dup 0) (match_dup 4))]
3523 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3524 operands[3], SFmode);
3526 [(set_attr "type" "fpload,load")])
3528 ;; Expand the builtin form of xxpermdi to canonical rtl.
3529 (define_expand "vsx_xxpermdi_<mode>"
3530 [(match_operand:VSX_L 0 "vsx_register_operand")
3531 (match_operand:VSX_L 1 "vsx_register_operand")
3532 (match_operand:VSX_L 2 "vsx_register_operand")
3533 (match_operand:QI 3 "u5bit_cint_operand")]
3534 "VECTOR_MEM_VSX_P (<MODE>mode)"
3536 rtx target = operands[0];
3537 rtx op0 = operands[1];
3538 rtx op1 = operands[2];
3539 int mask = INTVAL (operands[3]);
3540 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3541 rtx perm1 = GEN_INT ((mask & 1) + 2);
3542 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3544 if (<MODE>mode == V2DFmode)
3545 gen = gen_vsx_xxpermdi2_v2df_1;
3548 gen = gen_vsx_xxpermdi2_v2di_1;
3549 if (<MODE>mode != V2DImode)
3551 target = gen_lowpart (V2DImode, target);
3552 op0 = gen_lowpart (V2DImode, op0);
3553 op1 = gen_lowpart (V2DImode, op1);
3556 emit_insn (gen (target, op0, op1, perm0, perm1));
3560 ;; Special version of xxpermdi that retains big-endian semantics.
3561 (define_expand "vsx_xxpermdi_<mode>_be"
3562 [(match_operand:VSX_L 0 "vsx_register_operand")
3563 (match_operand:VSX_L 1 "vsx_register_operand")
3564 (match_operand:VSX_L 2 "vsx_register_operand")
3565 (match_operand:QI 3 "u5bit_cint_operand")]
3566 "VECTOR_MEM_VSX_P (<MODE>mode)"
3568 rtx target = operands[0];
3569 rtx op0 = operands[1];
3570 rtx op1 = operands[2];
3571 int mask = INTVAL (operands[3]);
3572 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3573 rtx perm1 = GEN_INT ((mask & 1) + 2);
3574 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3576 if (<MODE>mode == V2DFmode)
3577 gen = gen_vsx_xxpermdi2_v2df_1;
3580 gen = gen_vsx_xxpermdi2_v2di_1;
3581 if (<MODE>mode != V2DImode)
3583 target = gen_lowpart (V2DImode, target);
3584 op0 = gen_lowpart (V2DImode, op0);
3585 op1 = gen_lowpart (V2DImode, op1);
3588 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3589 transformation we don't want; it is necessary for
3590 rs6000_expand_vec_perm_const_1 but not for this use. So we
3591 prepare for that by reversing the transformation here. */
3592 if (BYTES_BIG_ENDIAN)
3593 emit_insn (gen (target, op0, op1, perm0, perm1));
3596 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3597 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3598 emit_insn (gen (target, op1, op0, p0, p1));
3603 (define_insn "vsx_xxpermdi2_<mode>_1"
3604 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3606 (vec_concat:<VS_double>
3607 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3608 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3609 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3610 (match_operand 4 "const_2_to_3_operand" "")])))]
3611 "VECTOR_MEM_VSX_P (<MODE>mode)"
3615 /* For little endian, swap operands and invert/swap selectors
3616 to get the correct xxpermdi. The operand swap sets up the
3617 inputs as a little endian array. The selectors are swapped
3618 because they are defined to use big endian ordering. The
3619 selectors are inverted to get the correct doublewords for
3620 little endian ordering. */
3621 if (BYTES_BIG_ENDIAN)
3623 op3 = INTVAL (operands[3]);
3624 op4 = INTVAL (operands[4]);
3628 op3 = 3 - INTVAL (operands[4]);
3629 op4 = 3 - INTVAL (operands[3]);
3632 mask = (op3 << 1) | (op4 - 2);
3633 operands[3] = GEN_INT (mask);
3635 if (BYTES_BIG_ENDIAN)
3636 return "xxpermdi %x0,%x1,%x2,%3";
3638 return "xxpermdi %x0,%x2,%x1,%3";
3640 [(set_attr "type" "vecperm")])
3642 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3643 ;; none of the small types were allowed in a vector register, so we had to
3644 ;; extract to a DImode and either do a direct move or store.
3645 (define_expand "vsx_extract_<mode>"
3646 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3647 (vec_select:<VS_scalar>
3648 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3649 (parallel [(match_operand:QI 2 "const_int_operand")])))
3650 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3651 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3653 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3654 if (TARGET_P9_VECTOR)
3656 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3662 (define_insn "vsx_extract_<mode>_p9"
3663 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3664 (vec_select:<VS_scalar>
3665 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3666 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3667 (clobber (match_scratch:SI 3 "=r,X"))]
3668 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3670 if (which_alternative == 0)
3675 HOST_WIDE_INT elt = INTVAL (operands[2]);
3676 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3677 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3680 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3681 HOST_WIDE_INT offset = unit_size * elt_adj;
3683 operands[2] = GEN_INT (offset);
3685 return "xxextractuw %x0,%x1,%2";
3687 return "vextractu<wd> %0,%1,%2";
3690 [(set_attr "type" "vecsimple")
3691 (set_attr "isa" "p9v,*")])
3694 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3695 (vec_select:<VS_scalar>
3696 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3697 (parallel [(match_operand:QI 2 "const_int_operand")])))
3698 (clobber (match_operand:SI 3 "int_reg_operand"))]
3699 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3702 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3703 rtx op1 = operands[1];
3704 rtx op2 = operands[2];
3705 rtx op3 = operands[3];
3706 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3708 emit_move_insn (op3, GEN_INT (offset));
3709 if (BYTES_BIG_ENDIAN)
3710 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3712 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3716 ;; Optimize zero extracts to eliminate the AND after the extract.
3717 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3718 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3720 (vec_select:<VS_scalar>
3721 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3722 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3723 (clobber (match_scratch:SI 3 "=r,X"))]
3724 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3726 "&& reload_completed"
3727 [(parallel [(set (match_dup 4)
3728 (vec_select:<VS_scalar>
3730 (parallel [(match_dup 2)])))
3731 (clobber (match_dup 3))])]
3733 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3735 [(set_attr "isa" "p9v,*")])
3737 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3738 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3739 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3740 (vec_select:<VS_scalar>
3741 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3742 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3743 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&*r"))
3744 (clobber (match_scratch:SI 4 "=X,&r"))]
3745 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3747 "&& reload_completed"
3748 [(parallel [(set (match_dup 3)
3749 (vec_select:<VS_scalar>
3751 (parallel [(match_dup 2)])))
3752 (clobber (match_dup 4))])
3756 (define_insn_and_split "*vsx_extract_si"
3757 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3759 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3760 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3761 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3762 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3764 "&& reload_completed"
3767 rtx dest = operands[0];
3768 rtx src = operands[1];
3769 rtx element = operands[2];
3770 rtx vec_tmp = operands[3];
3773 if (!BYTES_BIG_ENDIAN)
3774 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3776 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3778 value = INTVAL (element);
3780 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3784 if (MEM_P (operands[0]))
3786 if (can_create_pseudo_p ())
3787 dest = rs6000_force_indexed_or_indirect_mem (dest);
3789 if (TARGET_P8_VECTOR)
3790 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3792 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3795 else if (TARGET_P8_VECTOR)
3796 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3798 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3799 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3803 [(set_attr "type" "mfvsr,vecperm,fpstore")
3804 (set_attr "length" "8")
3805 (set_attr "isa" "*,p8v,*")])
3807 (define_insn_and_split "*vsx_extract_<mode>_p8"
3808 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3809 (vec_select:<VS_scalar>
3810 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3811 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3812 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3813 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3814 && !TARGET_P9_VECTOR"
3816 "&& reload_completed"
3819 rtx dest = operands[0];
3820 rtx src = operands[1];
3821 rtx element = operands[2];
3822 rtx vec_tmp = operands[3];
3825 if (!BYTES_BIG_ENDIAN)
3826 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3828 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3830 value = INTVAL (element);
3831 if (<MODE>mode == V16QImode)
3834 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3838 else if (<MODE>mode == V8HImode)
3841 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3848 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3849 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3852 [(set_attr "type" "mfvsr")])
3854 ;; Optimize extracting a single scalar element from memory.
3855 (define_insn_and_split "*vsx_extract_<mode>_load"
3856 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3857 (vec_select:<VS_scalar>
3858 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3859 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3860 (clobber (match_scratch:DI 3 "=&b"))]
3861 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3863 "&& reload_completed"
3864 [(set (match_dup 0) (match_dup 4))]
3866 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3867 operands[3], <VS_scalar>mode);
3869 [(set_attr "type" "load")
3870 (set_attr "length" "8")])
3872 ;; Variable V16QI/V8HI/V4SI extract from a register
3873 (define_insn_and_split "vsx_extract_<mode>_var"
3874 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3876 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3877 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3878 UNSPEC_VSX_EXTRACT))
3879 (clobber (match_scratch:DI 3 "=r,r"))
3880 (clobber (match_scratch:V2DI 4 "=X,&v"))]
3881 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3883 "&& reload_completed"
3886 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3887 operands[3], operands[4]);
3890 [(set_attr "isa" "p9v,*")])
3892 ;; Variable V16QI/V8HI/V4SI extract from memory
3893 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3894 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3896 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3897 (match_operand:DI 2 "gpc_reg_operand" "r")]
3898 UNSPEC_VSX_EXTRACT))
3899 (clobber (match_scratch:DI 3 "=&b"))]
3900 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3902 "&& reload_completed"
3903 [(set (match_dup 0) (match_dup 4))]
3905 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3906 operands[3], <VS_scalar>mode);
3908 [(set_attr "type" "load")])
3911 (define_expand "vextractl<mode>"
3912 [(set (match_operand:V2DI 0 "altivec_register_operand")
3913 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3914 (match_operand:VI2 2 "altivec_register_operand")
3915 (match_operand:SI 3 "register_operand")]
3919 if (BYTES_BIG_ENDIAN)
3921 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
3922 operands[2], operands[3]));
3923 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3926 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
3927 operands[1], operands[3]));
3931 (define_insn "vextractl<mode>_internal"
3932 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3933 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3934 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3935 (match_operand:SI 3 "register_operand" "r")]
3938 "vext<du_or_d><wd>vlx %0,%1,%2,%3"
3939 [(set_attr "type" "vecsimple")])
3941 (define_expand "vextractr<mode>"
3942 [(set (match_operand:V2DI 0 "altivec_register_operand")
3943 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3944 (match_operand:VI2 2 "altivec_register_operand")
3945 (match_operand:SI 3 "register_operand")]
3949 if (BYTES_BIG_ENDIAN)
3951 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
3952 operands[2], operands[3]));
3953 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
3956 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
3957 operands[1], operands[3]));
3961 (define_insn "vextractr<mode>_internal"
3962 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
3963 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
3964 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3965 (match_operand:SI 3 "register_operand" "r")]
3968 "vext<du_or_d><wd>vrx %0,%1,%2,%3"
3969 [(set_attr "type" "vecsimple")])
3971 (define_expand "vinsertvl_<mode>"
3972 [(set (match_operand:VI2 0 "altivec_register_operand")
3973 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
3974 (match_operand:VI2 2 "altivec_register_operand")
3975 (match_operand:SI 3 "register_operand" "r")]
3979 if (BYTES_BIG_ENDIAN)
3980 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
3981 operands[1], operands[2]));
3983 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
3984 operands[1], operands[2]));
3988 (define_insn "vinsertvl_internal_<mode>"
3989 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
3990 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
3991 (match_operand:VEC_I 2 "altivec_register_operand" "v")
3992 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
3995 "vins<wd>vlx %0,%1,%2"
3996 [(set_attr "type" "vecsimple")])
3998 (define_expand "vinsertvr_<mode>"
3999 [(set (match_operand:VI2 0 "altivec_register_operand")
4000 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4001 (match_operand:VI2 2 "altivec_register_operand")
4002 (match_operand:SI 3 "register_operand" "r")]
4006 if (BYTES_BIG_ENDIAN)
4007 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4008 operands[1], operands[2]));
4010 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4011 operands[1], operands[2]));
4015 (define_insn "vinsertvr_internal_<mode>"
4016 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4017 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4018 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4019 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4022 "vins<wd>vrx %0,%1,%2"
4023 [(set_attr "type" "vecsimple")])
4025 (define_expand "vinsertgl_<mode>"
4026 [(set (match_operand:VI2 0 "altivec_register_operand")
4027 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4028 (match_operand:VI2 2 "altivec_register_operand")
4029 (match_operand:SI 3 "register_operand")]
4033 if (BYTES_BIG_ENDIAN)
4034 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4035 operands[1], operands[2]));
4037 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4038 operands[1], operands[2]));
4042 (define_insn "vinsertgl_internal_<mode>"
4043 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4044 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4045 (match_operand:SI 2 "register_operand" "r")
4046 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4049 "vins<wd>lx %0,%1,%2"
4050 [(set_attr "type" "vecsimple")])
4052 (define_expand "vinsertgr_<mode>"
4053 [(set (match_operand:VI2 0 "altivec_register_operand")
4054 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4055 (match_operand:VI2 2 "altivec_register_operand")
4056 (match_operand:SI 3 "register_operand")]
4060 if (BYTES_BIG_ENDIAN)
4061 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4062 operands[1], operands[2]));
4064 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4065 operands[1], operands[2]));
4069 (define_insn "vinsertgr_internal_<mode>"
4070 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4071 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4072 (match_operand:SI 2 "register_operand" "r")
4073 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4076 "vins<wd>rx %0,%1,%2"
4077 [(set_attr "type" "vecsimple")])
4079 (define_expand "vreplace_elt_<mode>"
4080 [(set (match_operand:REPLACE_ELT 0 "register_operand")
4081 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4082 (match_operand:<VS_scalar> 2 "register_operand")
4083 (match_operand:QI 3 "const_0_to_3_operand")]
4084 UNSPEC_REPLACE_ELT))]
4088 /* Immediate value is the word index, convert to byte index and adjust for
4089 Endianness if needed. */
4090 if (BYTES_BIG_ENDIAN)
4091 index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4094 index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4096 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4101 [(set_attr "type" "vecsimple")])
4103 (define_expand "vreplace_un_<mode>"
4104 [(set (match_operand:REPLACE_ELT 0 "register_operand")
4105 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4106 (match_operand:<VS_scalar> 2 "register_operand")
4107 (match_operand:QI 3 "const_0_to_12_operand")]
4108 UNSPEC_REPLACE_UN))]
4111 /* Immediate value is the byte index Big Endian numbering. */
4112 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4113 operands[2], operands[3]));
4116 [(set_attr "type" "vecsimple")])
4118 (define_insn "vreplace_elt_<mode>_inst"
4119 [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
4120 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4121 (match_operand:<VS_scalar> 2 "register_operand" "r")
4122 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4123 UNSPEC_REPLACE_ELT))]
4125 "vins<REPLACE_ELT_char> %0,%2,%3"
4126 [(set_attr "type" "vecsimple")])
4128 ;; VSX_EXTRACT optimizations
4129 ;; Optimize double d = (double) vec_extract (vi, <n>)
4130 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4131 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4132 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4135 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4136 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4137 (clobber (match_scratch:V4SI 3 "=v"))]
4138 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4143 rtx dest = operands[0];
4144 rtx src = operands[1];
4145 rtx element = operands[2];
4146 rtx v4si_tmp = operands[3];
4149 if (!BYTES_BIG_ENDIAN)
4150 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4152 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4154 value = INTVAL (element);
4157 if (GET_CODE (v4si_tmp) == SCRATCH)
4158 v4si_tmp = gen_reg_rtx (V4SImode);
4159 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4164 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4168 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4169 ;; where <type> is a floating point type that supported by the hardware that is
4170 ;; not double. First convert the value to double, and then to the desired
4172 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4173 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4174 (any_float:VSX_EXTRACT_FL
4176 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4177 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4178 (clobber (match_scratch:V4SI 3 "=v"))
4179 (clobber (match_scratch:DF 4 "=wa"))]
4180 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4185 rtx dest = operands[0];
4186 rtx src = operands[1];
4187 rtx element = operands[2];
4188 rtx v4si_tmp = operands[3];
4189 rtx df_tmp = operands[4];
4192 if (!BYTES_BIG_ENDIAN)
4193 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
4195 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4197 value = INTVAL (element);
4200 if (GET_CODE (v4si_tmp) == SCRATCH)
4201 v4si_tmp = gen_reg_rtx (V4SImode);
4202 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4207 if (GET_CODE (df_tmp) == SCRATCH)
4208 df_tmp = gen_reg_rtx (DFmode);
4210 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4212 if (<MODE>mode == SFmode)
4213 emit_insn (gen_truncdfsf2 (dest, df_tmp));
4214 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4215 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4216 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4217 && TARGET_FLOAT128_HW)
4218 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4219 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4220 emit_insn (gen_extenddfif2 (dest, df_tmp));
4221 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4222 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4229 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4230 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4231 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4232 ;; vector short or vector unsigned short.
4233 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
4234 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4236 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4237 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4238 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4239 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4240 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4241 && TARGET_P9_VECTOR"
4243 "&& reload_completed"
4244 [(parallel [(set (match_dup 3)
4245 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4247 (parallel [(match_dup 2)])))
4248 (clobber (scratch:SI))])
4250 (sign_extend:DI (match_dup 3)))
4252 (float:<FL_CONV:MODE> (match_dup 4)))]
4254 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4256 [(set_attr "isa" "<FL_CONV:VSisa>")])
4258 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
4259 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4260 (unsigned_float:FL_CONV
4261 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4262 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4263 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4264 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
4265 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4266 && TARGET_P9_VECTOR"
4268 "&& reload_completed"
4269 [(parallel [(set (match_dup 3)
4270 (vec_select:<VSX_EXTRACT_I:VS_scalar>
4272 (parallel [(match_dup 2)])))
4273 (clobber (scratch:SI))])
4275 (float:<FL_CONV:MODE> (match_dup 4)))]
4277 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4279 [(set_attr "isa" "<FL_CONV:VSisa>")])
4281 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4282 (define_insn "vsx_set_<mode>_p9"
4283 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4284 (unspec:VSX_EXTRACT_I
4285 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4286 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
4287 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4289 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4291 int ele = INTVAL (operands[3]);
4292 int nunits = GET_MODE_NUNITS (<MODE>mode);
4294 if (!BYTES_BIG_ENDIAN)
4295 ele = nunits - 1 - ele;
4297 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
4298 if (<MODE>mode == V4SImode)
4299 return "xxinsertw %x0,%x2,%3";
4301 return "vinsert<wd> %0,%2,%3";
4303 [(set_attr "type" "vecperm")])
4305 (define_insn_and_split "vsx_set_v4sf_p9"
4306 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4308 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4309 (match_operand:SF 2 "gpc_reg_operand" "wa")
4310 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4312 (clobber (match_scratch:SI 4 "=&wa"))]
4313 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4315 "&& reload_completed"
4317 (unspec:V4SF [(match_dup 2)]
4318 UNSPEC_VSX_CVDPSPN))
4319 (parallel [(set (match_dup 4)
4320 (vec_select:SI (match_dup 6)
4321 (parallel [(match_dup 7)])))
4322 (clobber (scratch:SI))])
4324 (unspec:V4SI [(match_dup 8)
4329 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4331 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4332 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4333 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4334 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4336 [(set_attr "type" "vecperm")
4337 (set_attr "length" "12")
4338 (set_attr "isa" "p9v")])
4340 ;; Special case setting 0.0f to a V4SF element
4341 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4342 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4344 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4345 (match_operand:SF 2 "zero_fp_constant" "j")
4346 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4348 (clobber (match_scratch:SI 4 "=&wa"))]
4349 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4351 "&& reload_completed"
4355 (unspec:V4SI [(match_dup 5)
4360 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4362 [(set_attr "type" "vecperm")
4363 (set_attr "length" "8")
4364 (set_attr "isa" "p9v")])
4366 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4367 ;; that is in the default scalar position (1 for big endian, 2 for little
4368 ;; endian). We just need to do an xxinsertw since the element is in the
4369 ;; correct location.
4371 (define_insn "*vsx_insert_extract_v4sf_p9"
4372 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4374 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4375 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4377 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4378 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4380 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4381 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4383 int ele = INTVAL (operands[4]);
4385 if (!BYTES_BIG_ENDIAN)
4386 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4388 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4389 return "xxinsertw %x0,%x2,%4";
4391 [(set_attr "type" "vecperm")])
4393 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4394 ;; that is in the default scalar position (1 for big endian, 2 for little
4395 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4397 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4398 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4400 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4401 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4403 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4404 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4406 (clobber (match_scratch:SI 5 "=&wa"))]
4407 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4408 && TARGET_P9_VECTOR && TARGET_POWERPC64
4409 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4412 [(parallel [(set (match_dup 5)
4413 (vec_select:SI (match_dup 6)
4414 (parallel [(match_dup 3)])))
4415 (clobber (scratch:SI))])
4417 (unspec:V4SI [(match_dup 8)
4422 if (GET_CODE (operands[5]) == SCRATCH)
4423 operands[5] = gen_reg_rtx (SImode);
4425 operands[6] = gen_lowpart (V4SImode, operands[2]);
4426 operands[7] = gen_lowpart (V4SImode, operands[0]);
4427 operands[8] = gen_lowpart (V4SImode, operands[1]);
4429 [(set_attr "type" "vecperm")
4430 (set_attr "isa" "p9v")])
4432 ;; Expanders for builtins
4433 (define_expand "vsx_mergel_<mode>"
4434 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4435 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4436 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4437 "VECTOR_MEM_VSX_P (<MODE>mode)"
4439 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4440 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4441 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4442 emit_insn (gen_rtx_SET (operands[0], x));
4446 (define_expand "vsx_mergeh_<mode>"
4447 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4448 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4449 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4450 "VECTOR_MEM_VSX_P (<MODE>mode)"
4452 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4453 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4454 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4455 emit_insn (gen_rtx_SET (operands[0], x));
4460 ;; We separate the register splat insn from the memory splat insn to force the
4461 ;; register allocator to generate the indexed form of the SPLAT when it is
4462 ;; given an offsettable memory reference. Otherwise, if the register and
4463 ;; memory insns were combined into a single insn, the register allocator will
4464 ;; load the value into a register, and then do a double word permute.
4465 (define_expand "vsx_splat_<mode>"
4466 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4467 (vec_duplicate:VSX_D
4468 (match_operand:<VS_scalar> 1 "input_operand")))]
4469 "VECTOR_MEM_VSX_P (<MODE>mode)"
4471 rtx op1 = operands[1];
4473 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4474 else if (!REG_P (op1))
4475 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4478 (define_insn "vsx_splat_<mode>_reg"
4479 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4480 (vec_duplicate:VSX_D
4481 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4482 "VECTOR_MEM_VSX_P (<MODE>mode)"
4484 xxpermdi %x0,%x1,%x1,0
4486 [(set_attr "type" "vecperm,vecmove")])
4488 (define_insn "vsx_splat_<mode>_mem"
4489 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4490 (vec_duplicate:VSX_D
4491 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4492 "VECTOR_MEM_VSX_P (<MODE>mode)"
4494 [(set_attr "type" "vecload")])
4496 ;; V4SI splat support
4497 (define_insn "vsx_splat_v4si"
4498 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4500 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4505 [(set_attr "type" "vecperm,vecload")])
4507 ;; SImode is not currently allowed in vector registers. This pattern
4508 ;; allows us to use direct move to get the value in a vector register
4509 ;; so that we can use XXSPLTW
4510 (define_insn "vsx_splat_v4si_di"
4511 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4514 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4515 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4519 [(set_attr "type" "vecperm")
4520 (set_attr "isa" "p8v,*")])
4522 ;; V4SF splat (ISA 3.0)
4523 (define_insn_and_split "vsx_splat_v4sf"
4524 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4526 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4532 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4534 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4536 (unspec:V4SF [(match_dup 0)
4537 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4539 [(set_attr "type" "vecload,vecperm,vecperm")
4540 (set_attr "length" "*,8,*")
4541 (set_attr "isa" "*,p8v,*")])
4543 ;; V4SF/V4SI splat from a vector element
4544 (define_insn "vsx_xxspltw_<mode>"
4545 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4546 (vec_duplicate:VSX_W
4547 (vec_select:<VS_scalar>
4548 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4550 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4551 "VECTOR_MEM_VSX_P (<MODE>mode)"
4553 if (!BYTES_BIG_ENDIAN)
4554 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4556 return "xxspltw %x0,%x1,%2";
4558 [(set_attr "type" "vecperm")])
4560 (define_insn "vsx_xxspltw_<mode>_direct"
4561 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4562 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4563 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4564 UNSPEC_VSX_XXSPLTW))]
4565 "VECTOR_MEM_VSX_P (<MODE>mode)"
4566 "xxspltw %x0,%x1,%2"
4567 [(set_attr "type" "vecperm")])
4569 ;; V16QI/V8HI splat support on ISA 2.07
4570 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4571 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4572 (vec_duplicate:VSX_SPLAT_I
4573 (truncate:<VS_scalar>
4574 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4575 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4576 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4577 [(set_attr "type" "vecperm")])
4579 ;; V2DF/V2DI splat for use by vec_splat builtin
4580 (define_insn "vsx_xxspltd_<mode>"
4581 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4582 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4583 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4584 UNSPEC_VSX_XXSPLTD))]
4585 "VECTOR_MEM_VSX_P (<MODE>mode)"
4587 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4588 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4589 return "xxpermdi %x0,%x1,%x1,0";
4591 return "xxpermdi %x0,%x1,%x1,3";
4593 [(set_attr "type" "vecperm")])
4595 ;; V4SF/V4SI interleave
4596 (define_insn "vsx_xxmrghw_<mode>"
4597 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4599 (vec_concat:<VS_double>
4600 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4601 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4602 (parallel [(const_int 0) (const_int 4)
4603 (const_int 1) (const_int 5)])))]
4604 "VECTOR_MEM_VSX_P (<MODE>mode)"
4606 if (BYTES_BIG_ENDIAN)
4607 return "xxmrghw %x0,%x1,%x2";
4609 return "xxmrglw %x0,%x2,%x1";
4611 [(set_attr "type" "vecperm")])
4613 (define_insn "vsx_xxmrglw_<mode>"
4614 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4616 (vec_concat:<VS_double>
4617 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4618 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4619 (parallel [(const_int 2) (const_int 6)
4620 (const_int 3) (const_int 7)])))]
4621 "VECTOR_MEM_VSX_P (<MODE>mode)"
4623 if (BYTES_BIG_ENDIAN)
4624 return "xxmrglw %x0,%x1,%x2";
4626 return "xxmrghw %x0,%x2,%x1";
4628 [(set_attr "type" "vecperm")])
4630 ;; Shift left double by word immediate
4631 (define_insn "vsx_xxsldwi_<mode>"
4632 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4633 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4634 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4635 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4637 "VECTOR_MEM_VSX_P (<MODE>mode)"
4638 "xxsldwi %x0,%x1,%x2,%3"
4639 [(set_attr "type" "vecperm")
4640 (set_attr "isa" "<VSisa>")])
4643 ;; Vector reduction insns and splitters
4645 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4646 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4650 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4651 (parallel [(const_int 1)]))
4654 (parallel [(const_int 0)])))
4656 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4657 "VECTOR_UNIT_VSX_P (V2DFmode)"
4662 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4663 ? gen_reg_rtx (V2DFmode)
4665 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4666 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4669 [(set_attr "length" "8")
4670 (set_attr "type" "veccomplex")])
4672 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4673 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4675 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4676 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4677 (clobber (match_scratch:V4SF 2 "=&wa"))
4678 (clobber (match_scratch:V4SF 3 "=&wa"))]
4679 "VECTOR_UNIT_VSX_P (V4SFmode)"
4684 rtx op0 = operands[0];
4685 rtx op1 = operands[1];
4686 rtx tmp2, tmp3, tmp4;
4688 if (can_create_pseudo_p ())
4690 tmp2 = gen_reg_rtx (V4SFmode);
4691 tmp3 = gen_reg_rtx (V4SFmode);
4692 tmp4 = gen_reg_rtx (V4SFmode);
4701 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4702 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4703 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4704 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4707 [(set_attr "length" "16")
4708 (set_attr "type" "veccomplex")])
4710 ;; Combiner patterns with the vector reduction patterns that knows we can get
4711 ;; to the top element of the V2DF array without doing an extract.
4713 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4714 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4719 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4720 (parallel [(const_int 1)]))
4723 (parallel [(const_int 0)])))
4725 (parallel [(const_int 1)])))
4726 (clobber (match_scratch:DF 2 "=0,&wa"))]
4727 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4732 rtx hi = gen_highpart (DFmode, operands[1]);
4733 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4734 ? gen_reg_rtx (DFmode)
4737 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4738 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4741 [(set_attr "length" "8")
4742 (set_attr "type" "veccomplex")])
4744 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4745 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4748 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4749 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4750 (parallel [(const_int 3)])))
4751 (clobber (match_scratch:V4SF 2 "=&wa"))
4752 (clobber (match_scratch:V4SF 3 "=&wa"))
4753 (clobber (match_scratch:V4SF 4 "=0"))]
4754 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4759 rtx op0 = operands[0];
4760 rtx op1 = operands[1];
4761 rtx tmp2, tmp3, tmp4, tmp5;
4763 if (can_create_pseudo_p ())
4765 tmp2 = gen_reg_rtx (V4SFmode);
4766 tmp3 = gen_reg_rtx (V4SFmode);
4767 tmp4 = gen_reg_rtx (V4SFmode);
4768 tmp5 = gen_reg_rtx (V4SFmode);
4778 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4779 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4780 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4781 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4782 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4785 [(set_attr "length" "20")
4786 (set_attr "type" "veccomplex")])
4789 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4791 [(set (match_operand:P 0 "base_reg_operand")
4792 (match_operand:P 1 "short_cint_operand"))
4793 (set (match_operand:VSX_M 2 "vsx_register_operand")
4794 (mem:VSX_M (plus:P (match_dup 0)
4795 (match_operand:P 3 "int_reg_operand"))))]
4796 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4797 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4798 [(set_attr "length" "8")
4799 (set_attr "type" "vecload")])
4802 [(set (match_operand:P 0 "base_reg_operand")
4803 (match_operand:P 1 "short_cint_operand"))
4804 (set (match_operand:VSX_M 2 "vsx_register_operand")
4805 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4807 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4808 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4809 [(set_attr "length" "8")
4810 (set_attr "type" "vecload")])
4813 ;; ISA 3.0 vector extend sign support
4815 (define_insn "vsx_sign_extend_qi_<mode>"
4816 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4818 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4819 UNSPEC_VSX_SIGN_EXTEND))]
4822 [(set_attr "type" "vecexts")])
4824 (define_insn "vsx_sign_extend_hi_<mode>"
4825 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4827 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4828 UNSPEC_VSX_SIGN_EXTEND))]
4831 [(set_attr "type" "vecexts")])
4833 (define_insn "*vsx_sign_extend_si_v2di"
4834 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4835 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4836 UNSPEC_VSX_SIGN_EXTEND))]
4839 [(set_attr "type" "vecexts")])
4841 ;; ISA 3.1 vector sign extend
4842 ;; Move DI value from GPR to TI mode in VSX register, word 1.
4843 (define_insn "mtvsrdd_diti_w1"
4844 [(set (match_operand:TI 0 "register_operand" "=wa")
4845 (unspec:TI [(match_operand:DI 1 "register_operand" "r")]
4846 UNSPEC_MTVSRD_DITI_W1))]
4847 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
4849 [(set_attr "type" "vecmove")])
4851 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
4852 (define_insn "extendditi2_vector"
4853 [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
4854 (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
4855 UNSPEC_EXTENDDITI2))]
4858 [(set_attr "type" "vecexts")])
4860 (define_expand "extendditi2"
4861 [(set (match_operand:TI 0 "gpc_reg_operand")
4862 (sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))]
4865 /* Move 64-bit src from GPR to vector reg and sign extend to 128-bits. */
4866 rtx temp = gen_reg_rtx (TImode);
4867 emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1]));
4868 emit_insn (gen_extendditi2_vector (operands[0], temp));
4873 ;; ISA 3.0 Binary Floating-Point Support
4875 ;; VSX Scalar Extract Exponent Quad-Precision
4876 (define_insn "xsxexpqp_<mode>"
4877 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4878 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4879 UNSPEC_VSX_SXEXPDP))]
4882 [(set_attr "type" "vecmove")])
4884 ;; VSX Scalar Extract Exponent Double-Precision
4885 (define_insn "xsxexpdp"
4886 [(set (match_operand:DI 0 "register_operand" "=r")
4887 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4888 UNSPEC_VSX_SXEXPDP))]
4889 "TARGET_P9_VECTOR && TARGET_64BIT"
4891 [(set_attr "type" "integer")])
4893 ;; VSX Scalar Extract Significand Quad-Precision
4894 (define_insn "xsxsigqp_<mode>"
4895 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4896 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4900 [(set_attr "type" "vecmove")])
4902 ;; VSX Scalar Extract Significand Double-Precision
4903 (define_insn "xsxsigdp"
4904 [(set (match_operand:DI 0 "register_operand" "=r")
4905 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4907 "TARGET_P9_VECTOR && TARGET_64BIT"
4909 [(set_attr "type" "integer")])
4911 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4912 (define_insn "xsiexpqpf_<mode>"
4913 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4915 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4916 (match_operand:DI 2 "altivec_register_operand" "v")]
4917 UNSPEC_VSX_SIEXPQP))]
4920 [(set_attr "type" "vecmove")])
4922 ;; VSX Scalar Insert Exponent Quad-Precision
4923 (define_insn "xsiexpqp_<mode>"
4924 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4925 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4926 (match_operand:DI 2 "altivec_register_operand" "v")]
4927 UNSPEC_VSX_SIEXPQP))]
4930 [(set_attr "type" "vecmove")])
4932 ;; VSX Scalar Insert Exponent Double-Precision
4933 (define_insn "xsiexpdp"
4934 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4935 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4936 (match_operand:DI 2 "register_operand" "r")]
4937 UNSPEC_VSX_SIEXPDP))]
4938 "TARGET_P9_VECTOR && TARGET_64BIT"
4939 "xsiexpdp %x0,%1,%2"
4940 [(set_attr "type" "fpsimple")])
4942 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4943 (define_insn "xsiexpdpf"
4944 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4945 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4946 (match_operand:DI 2 "register_operand" "r")]
4947 UNSPEC_VSX_SIEXPDP))]
4948 "TARGET_P9_VECTOR && TARGET_64BIT"
4949 "xsiexpdp %x0,%1,%2"
4950 [(set_attr "type" "fpsimple")])
4952 ;; VSX Scalar Compare Exponents Double-Precision
4953 (define_expand "xscmpexpdp_<code>"
4957 [(match_operand:DF 1 "vsx_register_operand" "wa")
4958 (match_operand:DF 2 "vsx_register_operand" "wa")]
4959 UNSPEC_VSX_SCMPEXPDP)
4961 (set (match_operand:SI 0 "register_operand" "=r")
4962 (CMP_TEST:SI (match_dup 3)
4966 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4968 emit_move_insn (operands[0], const0_rtx);
4972 operands[3] = gen_reg_rtx (CCFPmode);
4975 (define_insn "*xscmpexpdp"
4976 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4978 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4979 (match_operand:DF 2 "vsx_register_operand" "wa")]
4980 UNSPEC_VSX_SCMPEXPDP)
4981 (match_operand:SI 3 "zero_constant" "j")))]
4983 "xscmpexpdp %0,%x1,%x2"
4984 [(set_attr "type" "fpcompare")])
4986 ;; VSX Scalar Compare Exponents Quad-Precision
4987 (define_expand "xscmpexpqp_<code>_<mode>"
4991 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4992 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4993 UNSPEC_VSX_SCMPEXPQP)
4995 (set (match_operand:SI 0 "register_operand" "=r")
4996 (CMP_TEST:SI (match_dup 3)
5000 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
5002 emit_move_insn (operands[0], const0_rtx);
5006 operands[3] = gen_reg_rtx (CCFPmode);
5009 (define_insn "*xscmpexpqp"
5010 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5012 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5013 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
5014 UNSPEC_VSX_SCMPEXPQP)
5015 (match_operand:SI 3 "zero_constant" "j")))]
5017 "xscmpexpqp %0,%1,%2"
5018 [(set_attr "type" "fpcompare")])
5020 ;; VSX Scalar Test Data Class Quad-Precision
5021 ;; (Expansion for scalar_test_data_class (__ieee128, int))
5022 ;; (Has side effect of setting the lt bit if operand 1 is negative,
5023 ;; setting the eq bit if any of the conditions tested by operand 2
5024 ;; are satisfied, and clearing the gt and undordered bits to zero.)
5025 (define_expand "xststdcqp_<mode>"
5029 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5030 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5033 (set (match_operand:SI 0 "register_operand" "=r")
5034 (eq:SI (match_dup 3)
5038 operands[3] = gen_reg_rtx (CCFPmode);
5041 ;; VSX Scalar Test Data Class Double- and Single-Precision
5042 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
5043 ;; if any of the conditions tested by operand 2 are satisfied.
5044 ;; The gt and unordered bits are cleared to zero.)
5045 (define_expand "xststdc<sd>p"
5049 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5050 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5053 (set (match_operand:SI 0 "register_operand" "=r")
5054 (eq:SI (match_dup 3)
5058 operands[3] = gen_reg_rtx (CCFPmode);
5059 operands[4] = CONST0_RTX (SImode);
5062 ;; The VSX Scalar Test Negative Quad-Precision
5063 (define_expand "xststdcnegqp_<mode>"
5067 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5071 (set (match_operand:SI 0 "register_operand" "=r")
5072 (lt:SI (match_dup 2)
5076 operands[2] = gen_reg_rtx (CCFPmode);
5079 ;; The VSX Scalar Test Negative Double- and Single-Precision
5080 (define_expand "xststdcneg<sd>p"
5084 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5088 (set (match_operand:SI 0 "register_operand" "=r")
5089 (lt:SI (match_dup 2)
5093 operands[2] = gen_reg_rtx (CCFPmode);
5094 operands[3] = CONST0_RTX (SImode);
5097 (define_insn "*xststdcqp_<mode>"
5098 [(set (match_operand:CCFP 0 "" "=y")
5101 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5102 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5106 "xststdcqp %0,%1,%2"
5107 [(set_attr "type" "fpcompare")])
5109 (define_insn "*xststdc<sd>p"
5110 [(set (match_operand:CCFP 0 "" "=y")
5112 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5113 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5115 (match_operand:SI 3 "zero_constant" "j")))]
5117 "xststdc<sd>p %0,%x1,%2"
5118 [(set_attr "type" "fpcompare")])
5120 ;; VSX Vector Extract Exponent Double and Single Precision
5121 (define_insn "xvxexp<sd>p"
5122 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5124 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5127 "xvxexp<sd>p %x0,%x1"
5128 [(set_attr "type" "vecsimple")])
5130 ;; VSX Vector Extract Significand Double and Single Precision
5131 (define_insn "xvxsig<sd>p"
5132 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5134 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5137 "xvxsig<sd>p %x0,%x1"
5138 [(set_attr "type" "vecsimple")])
5140 ;; VSX Vector Insert Exponent Double and Single Precision
5141 (define_insn "xviexp<sd>p"
5142 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5144 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5145 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5148 "xviexp<sd>p %x0,%x1,%x2"
5149 [(set_attr "type" "vecsimple")])
5151 ;; VSX Vector Test Data Class Double and Single Precision
5152 ;; The corresponding elements of the result vector are all ones
5153 ;; if any of the conditions tested by operand 3 are satisfied.
5154 (define_insn "xvtstdc<sd>p"
5155 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5157 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5158 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5159 UNSPEC_VSX_VTSTDC))]
5161 "xvtstdc<sd>p %x0,%x1,%2"
5162 [(set_attr "type" "vecsimple")])
5164 ;; ISA 3.0 String Operations Support
5166 ;; Compare vectors producing a vector result and a predicate, setting CR6
5167 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
5168 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
5169 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5170 ;; to use Power8 instructions.
5171 (define_insn "*vsx_ne_<mode>_p"
5172 [(set (reg:CC CR6_REGNO)
5174 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5175 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5177 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5178 (ne:VSX_EXTRACT_I (match_dup 1)
5181 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5182 [(set_attr "type" "vecsimple")])
5184 (define_insn "*vector_nez_<mode>_p"
5185 [(set (reg:CC CR6_REGNO)
5186 (unspec:CC [(unspec:VI
5187 [(match_operand:VI 1 "gpc_reg_operand" "v")
5188 (match_operand:VI 2 "gpc_reg_operand" "v")]
5191 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5192 (unspec:VI [(match_dup 1)
5196 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5197 [(set_attr "type" "vecsimple")])
5199 ;; Return first position of match between vectors using natural order
5200 ;; for both LE and BE execution modes.
5201 (define_expand "first_match_index_<mode>"
5202 [(match_operand:SI 0 "register_operand")
5203 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5204 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5205 UNSPEC_VSX_FIRST_MATCH_INDEX)]
5210 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5211 rtx not_result = gen_reg_rtx (<MODE>mode);
5213 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5215 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5217 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5219 if (<MODE>mode == V16QImode)
5221 if (!BYTES_BIG_ENDIAN)
5222 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5224 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5228 rtx tmp = gen_reg_rtx (SImode);
5229 if (!BYTES_BIG_ENDIAN)
5230 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5232 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5233 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5238 ;; Return first position of match between vectors or end of string (EOS) using
5239 ;; natural element order for both LE and BE execution modes.
5240 (define_expand "first_match_or_eos_index_<mode>"
5241 [(match_operand:SI 0 "register_operand")
5242 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5243 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5244 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5248 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5249 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5250 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5251 rtx and_result = gen_reg_rtx (<MODE>mode);
5252 rtx result = gen_reg_rtx (<MODE>mode);
5253 rtx vzero = gen_reg_rtx (<MODE>mode);
5255 /* Vector with zeros in elements that correspond to zeros in operands. */
5256 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5257 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5258 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5259 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5261 /* Vector with ones in elments that do not match. */
5262 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5265 /* Create vector with ones in elements where there was a zero in one of
5266 the source elements or the elements that match. */
5267 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5268 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5270 if (<MODE>mode == V16QImode)
5272 if (!BYTES_BIG_ENDIAN)
5273 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5275 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5279 rtx tmp = gen_reg_rtx (SImode);
5280 if (!BYTES_BIG_ENDIAN)
5281 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5283 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5284 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5289 ;; Return first position of mismatch between vectors using natural
5290 ;; element order for both LE and BE execution modes.
5291 (define_expand "first_mismatch_index_<mode>"
5292 [(match_operand:SI 0 "register_operand")
5293 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5294 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5295 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5299 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5301 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5303 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5305 if (<MODE>mode == V16QImode)
5307 if (!BYTES_BIG_ENDIAN)
5308 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5310 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5314 rtx tmp = gen_reg_rtx (SImode);
5315 if (!BYTES_BIG_ENDIAN)
5316 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5318 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5319 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5324 ;; Return first position of mismatch between vectors or end of string (EOS)
5325 ;; using natural element order for both LE and BE execution modes.
5326 (define_expand "first_mismatch_or_eos_index_<mode>"
5327 [(match_operand:SI 0 "register_operand")
5328 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5329 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5330 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5334 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5335 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5336 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5337 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5338 rtx and_result = gen_reg_rtx (<MODE>mode);
5339 rtx result = gen_reg_rtx (<MODE>mode);
5340 rtx vzero = gen_reg_rtx (<MODE>mode);
5342 /* Vector with zeros in elements that correspond to zeros in operands. */
5343 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5345 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5346 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5347 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5349 /* Vector with ones in elments that match. */
5350 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5352 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5354 /* Create vector with ones in elements where there was a zero in one of
5355 the source elements or the elements did not match. */
5356 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5357 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5359 if (<MODE>mode == V16QImode)
5361 if (!BYTES_BIG_ENDIAN)
5362 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5364 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5368 rtx tmp = gen_reg_rtx (SImode);
5369 if (!BYTES_BIG_ENDIAN)
5370 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5372 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5373 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5378 ;; Load VSX Vector with Length
5379 (define_expand "lxvl"
5381 (ashift:DI (match_operand:DI 2 "register_operand")
5383 (set (match_operand:V16QI 0 "vsx_register_operand")
5385 [(match_operand:DI 1 "gpc_reg_operand")
5386 (mem:V16QI (match_dup 1))
5389 "TARGET_P9_VECTOR && TARGET_64BIT"
5391 operands[3] = gen_reg_rtx (DImode);
5394 (define_insn "*lxvl"
5395 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5397 [(match_operand:DI 1 "gpc_reg_operand" "b")
5398 (mem:V16QI (match_dup 1))
5399 (match_operand:DI 2 "register_operand" "r")]
5401 "TARGET_P9_VECTOR && TARGET_64BIT"
5403 [(set_attr "type" "vecload")])
5405 (define_insn "lxvll"
5406 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5407 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5408 (mem:V16QI (match_dup 1))
5409 (match_operand:DI 2 "register_operand" "r")]
5413 [(set_attr "type" "vecload")])
5415 ;; Expand for builtin xl_len_r
5416 (define_expand "xl_len_r"
5417 [(match_operand:V16QI 0 "vsx_register_operand")
5418 (match_operand:DI 1 "register_operand")
5419 (match_operand:DI 2 "register_operand")]
5422 rtx shift_mask = gen_reg_rtx (V16QImode);
5423 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5424 rtx tmp = gen_reg_rtx (DImode);
5426 emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2]));
5427 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5428 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5429 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5434 (define_insn "stxvll"
5435 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5436 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5437 (mem:V16QI (match_dup 1))
5438 (match_operand:DI 2 "register_operand" "r")]
5442 [(set_attr "type" "vecstore")])
5444 ;; Store VSX Vector with Length
5445 (define_expand "stxvl"
5447 (ashift:DI (match_operand:DI 2 "register_operand")
5449 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5451 [(match_operand:V16QI 0 "vsx_register_operand")
5452 (mem:V16QI (match_dup 1))
5455 "TARGET_P9_VECTOR && TARGET_64BIT"
5457 operands[3] = gen_reg_rtx (DImode);
5460 ;; Define optab for vector access with length vectorization exploitation.
5461 (define_expand "len_load_v16qi"
5462 [(match_operand:V16QI 0 "vlogical_operand")
5463 (match_operand:V16QI 1 "memory_operand")
5464 (match_operand:QI 2 "gpc_reg_operand")]
5465 "TARGET_P9_VECTOR && TARGET_64BIT"
5467 rtx mem = XEXP (operands[1], 0);
5468 mem = force_reg (DImode, mem);
5469 rtx len = gen_lowpart (DImode, operands[2]);
5470 emit_insn (gen_lxvl (operands[0], mem, len));
5474 (define_expand "len_store_v16qi"
5475 [(match_operand:V16QI 0 "memory_operand")
5476 (match_operand:V16QI 1 "vlogical_operand")
5477 (match_operand:QI 2 "gpc_reg_operand")
5479 "TARGET_P9_VECTOR && TARGET_64BIT"
5481 rtx mem = XEXP (operands[0], 0);
5482 mem = force_reg (DImode, mem);
5483 rtx len = gen_lowpart (DImode, operands[2]);
5484 emit_insn (gen_stxvl (operands[1], mem, len));
5488 (define_insn "*stxvl"
5489 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5491 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5492 (mem:V16QI (match_dup 1))
5493 (match_operand:DI 2 "register_operand" "r")]
5495 "TARGET_P9_VECTOR && TARGET_64BIT"
5497 [(set_attr "type" "vecstore")])
5499 ;; Expand for builtin xst_len_r
5500 (define_expand "xst_len_r"
5501 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5502 (match_operand:DI 1 "register_operand" "b")
5503 (match_operand:DI 2 "register_operand" "r")]
5506 rtx shift_mask = gen_reg_rtx (V16QImode);
5507 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5508 rtx tmp = gen_reg_rtx (DImode);
5510 emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2]));
5511 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5513 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5514 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5518 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5519 (define_insn "vcmpneb"
5520 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5522 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5523 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5526 [(set_attr "type" "vecsimple")])
5528 ;; Vector Compare Not Equal or Zero Byte
5529 (define_insn "vcmpnezb"
5530 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5532 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5533 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5537 [(set_attr "type" "vecsimple")])
5539 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5540 (define_insn "vcmpnezb_p"
5541 [(set (reg:CC CR6_REGNO)
5543 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5544 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5546 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5552 "vcmpnezb. %0,%1,%2"
5553 [(set_attr "type" "vecsimple")])
5555 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5556 (define_insn "vcmpneh"
5557 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5559 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5560 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5563 [(set_attr "type" "vecsimple")])
5565 ;; Vector Compare Not Equal or Zero Half Word
5566 (define_insn "vcmpnezh"
5567 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5568 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5569 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5573 [(set_attr "type" "vecsimple")])
5575 ;; Vector Compare Not Equal Word (specified/not+eq:)
5576 (define_insn "vcmpnew"
5577 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5579 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5580 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5583 [(set_attr "type" "vecsimple")])
5585 ;; Vector Compare Not Equal or Zero Word
5586 (define_insn "vcmpnezw"
5587 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5588 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5589 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5593 [(set_attr "type" "vecsimple")])
5595 ;; Vector Count Leading Zero Least-Significant Bits Byte
5596 (define_insn "vclzlsbb_<mode>"
5597 [(set (match_operand:SI 0 "register_operand" "=r")
5599 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5603 [(set_attr "type" "vecsimple")])
5605 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5606 (define_insn "vctzlsbb_<mode>"
5607 [(set (match_operand:SI 0 "register_operand" "=r")
5609 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5613 [(set_attr "type" "vecsimple")])
5615 ;; Vector Extract Unsigned Byte Left-Indexed
5616 (define_insn "vextublx"
5617 [(set (match_operand:SI 0 "register_operand" "=r")
5619 [(match_operand:SI 1 "register_operand" "r")
5620 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5624 [(set_attr "type" "vecsimple")])
5626 ;; Vector Extract Unsigned Byte Right-Indexed
5627 (define_insn "vextubrx"
5628 [(set (match_operand:SI 0 "register_operand" "=r")
5630 [(match_operand:SI 1 "register_operand" "r")
5631 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5635 [(set_attr "type" "vecsimple")])
5637 ;; Vector Extract Unsigned Half Word Left-Indexed
5638 (define_insn "vextuhlx"
5639 [(set (match_operand:SI 0 "register_operand" "=r")
5641 [(match_operand:SI 1 "register_operand" "r")
5642 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5646 [(set_attr "type" "vecsimple")])
5648 ;; Vector Extract Unsigned Half Word Right-Indexed
5649 (define_insn "vextuhrx"
5650 [(set (match_operand:SI 0 "register_operand" "=r")
5652 [(match_operand:SI 1 "register_operand" "r")
5653 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5657 [(set_attr "type" "vecsimple")])
5659 ;; Vector Extract Unsigned Word Left-Indexed
5660 (define_insn "vextuwlx"
5661 [(set (match_operand:SI 0 "register_operand" "=r")
5663 [(match_operand:SI 1 "register_operand" "r")
5664 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5668 [(set_attr "type" "vecsimple")])
5670 ;; Vector Extract Unsigned Word Right-Indexed
5671 (define_insn "vextuwrx"
5672 [(set (match_operand:SI 0 "register_operand" "=r")
5674 [(match_operand:SI 1 "register_operand" "r")
5675 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5679 [(set_attr "type" "vecsimple")])
5681 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5682 ;; endian version needs to adjust the byte number, and the V4SI element in
5684 (define_insn "extract4b"
5685 [(set (match_operand:V2DI 0 "vsx_register_operand")
5686 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5687 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5688 UNSPEC_XXEXTRACTUW))]
5691 if (!BYTES_BIG_ENDIAN)
5692 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5694 return "xxextractuw %x0,%x1,%2";
5697 (define_expand "insert4b"
5698 [(set (match_operand:V16QI 0 "vsx_register_operand")
5699 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5700 (match_operand:V16QI 2 "vsx_register_operand")
5701 (match_operand:QI 3 "const_0_to_12_operand")]
5705 if (!BYTES_BIG_ENDIAN)
5707 rtx op1 = operands[1];
5708 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5709 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5710 operands[1] = v4si_tmp;
5711 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5715 (define_insn "*insert4b_internal"
5716 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5717 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5718 (match_operand:V16QI 2 "vsx_register_operand" "0")
5719 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5722 "xxinsertw %x0,%x1,%3"
5723 [(set_attr "type" "vecperm")])
5726 ;; Generate vector extract four float 32 values from left four elements
5727 ;; of eight element vector of float 16 values.
5728 (define_expand "vextract_fp_from_shorth"
5729 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5730 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5731 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5735 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5736 int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
5739 rtx mask = gen_reg_rtx (V16QImode);
5740 rtx tmp = gen_reg_rtx (V16QImode);
5743 for (i = 0; i < 16; i++)
5744 if (!BYTES_BIG_ENDIAN)
5745 rvals[i] = GEN_INT (vals_le[i]);
5747 rvals[i] = GEN_INT (vals_be[i]);
5749 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5750 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5751 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5752 conversion instruction. */
5753 v = gen_rtvec_v (16, rvals);
5754 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5755 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5756 operands[1], mask));
5757 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5761 ;; Generate vector extract four float 32 values from right four elements
5762 ;; of eight element vector of float 16 values.
5763 (define_expand "vextract_fp_from_shortl"
5764 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5765 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5766 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5769 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5770 int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
5774 rtx mask = gen_reg_rtx (V16QImode);
5775 rtx tmp = gen_reg_rtx (V16QImode);
5778 for (i = 0; i < 16; i++)
5779 if (!BYTES_BIG_ENDIAN)
5780 rvals[i] = GEN_INT (vals_le[i]);
5782 rvals[i] = GEN_INT (vals_be[i]);
5784 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5785 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5786 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5787 conversion instruction. */
5788 v = gen_rtvec_v (16, rvals);
5789 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5790 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5791 operands[1], mask));
5792 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5796 ;; Support for ISA 3.0 vector byte reverse
5798 ;; Swap all bytes with in a vector
5799 (define_insn "p9_xxbrq_v1ti"
5800 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5801 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5804 [(set_attr "type" "vecperm")])
5806 (define_expand "p9_xxbrq_v16qi"
5807 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5808 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5811 rtx op0 = gen_reg_rtx (V1TImode);
5812 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5813 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5814 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5818 ;; Swap all bytes in each 64-bit element
5819 (define_insn "p9_xxbrd_v2di"
5820 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5821 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5824 [(set_attr "type" "vecperm")])
5826 (define_expand "p9_xxbrd_v2df"
5827 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5828 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5831 rtx op0 = gen_reg_rtx (V2DImode);
5832 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5833 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5834 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5838 ;; Swap all bytes in each 32-bit element
5839 (define_insn "p9_xxbrw_v4si"
5840 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5841 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5844 [(set_attr "type" "vecperm")])
5846 (define_expand "p9_xxbrw_v4sf"
5847 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5848 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5851 rtx op0 = gen_reg_rtx (V4SImode);
5852 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5853 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5854 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5858 ;; Swap all bytes in each element of vector
5859 (define_expand "revb_<mode>"
5860 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5861 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5864 if (TARGET_P9_VECTOR)
5865 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5868 /* Want to have the elements in reverse order relative
5869 to the endian mode in use, i.e. in LE mode, put elements
5871 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5872 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5879 ;; Reversing bytes in vector char is just a NOP.
5880 (define_expand "revb_v16qi"
5881 [(set (match_operand:V16QI 0 "vsx_register_operand")
5882 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5885 emit_move_insn (operands[0], operands[1]);
5889 ;; Swap all bytes in each 16-bit element
5890 (define_insn "p9_xxbrh_v8hi"
5891 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5892 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5895 [(set_attr "type" "vecperm")])
5898 ;; Operand numbers for the following peephole2
5900 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5901 (SFBOOL_TMP_VSX 1) ;; vector temporary
5902 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5903 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5904 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5905 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5906 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5907 (SFBOOL_SHL_D 7) ;; shift left dest
5908 (SFBOOL_SHL_A 8) ;; shift left arg
5909 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5910 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5911 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5912 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5913 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5915 ;; Attempt to optimize some common GLIBC operations using logical operations to
5916 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5917 ;; after macro expansion that looks like:
5922 ;; } ieee_float_shape_type;
5928 ;; ieee_float_shape_type gf_u;
5929 ;; gf_u.value = (t1);
5930 ;; (is) = gf_u.word;
5934 ;; ieee_float_shape_type sf_u;
5935 ;; sf_u.word = (is & 0xfffff000);
5936 ;; (t1) = sf_u.value;
5940 ;; This would result in two direct move operations (convert to memory format,
5941 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5942 ;; scalar format). With this peephole, we eliminate the direct move to the
5943 ;; GPR, and instead move the integer mask value to the vector register after a
5944 ;; shift and do the VSX logical operation.
5946 ;; The insns for dealing with SFmode in GPR registers looks like:
5947 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5949 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5951 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5953 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5955 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5957 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5960 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5961 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5963 ;; MFVSRWZ (aka zero_extend)
5964 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5966 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5968 ;; AND/IOR/XOR operation on int
5969 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5970 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5971 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5974 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5975 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5979 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5980 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5982 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5983 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5984 to compare registers, when the mode is different. */
5985 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5986 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5987 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5988 && (REG_P (operands[SFBOOL_BOOL_A2])
5989 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5990 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5991 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5992 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5993 || (REG_P (operands[SFBOOL_BOOL_A2])
5994 && REGNO (operands[SFBOOL_MFVSR_D])
5995 == REGNO (operands[SFBOOL_BOOL_A2])))
5996 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5997 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5998 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5999 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
6000 [(set (match_dup SFBOOL_TMP_GPR)
6001 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
6004 (set (match_dup SFBOOL_TMP_VSX_DI)
6005 (match_dup SFBOOL_TMP_GPR))
6007 (set (match_dup SFBOOL_MTVSR_D_V4SF)
6008 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
6009 (match_dup SFBOOL_TMP_VSX)))]
6011 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
6012 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
6013 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
6014 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
6015 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
6016 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
6018 if (CONST_INT_P (bool_a2))
6020 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
6021 emit_move_insn (tmp_gpr, bool_a2);
6022 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
6026 int regno_bool_a1 = REGNO (bool_a1);
6027 int regno_bool_a2 = REGNO (bool_a2);
6028 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
6029 ? regno_bool_a2 : regno_bool_a1);
6030 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
6033 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
6034 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
6035 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
6038 ;; Support signed/unsigned long long to float conversion vectorization.
6039 ;; Note that any_float (pc) here is just for code attribute <su>.
6040 (define_expand "vec_pack<su>_float_v2di"
6041 [(match_operand:V4SF 0 "vfloat_operand")
6042 (match_operand:V2DI 1 "vint_operand")
6043 (match_operand:V2DI 2 "vint_operand")
6047 rtx r1 = gen_reg_rtx (V4SFmode);
6048 rtx r2 = gen_reg_rtx (V4SFmode);
6049 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
6050 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
6051 rs6000_expand_extract_even (operands[0], r1, r2);
6055 ;; Support float to signed/unsigned long long conversion vectorization.
6056 ;; Note that any_fix (pc) here is just for code attribute <su>.
6057 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
6058 [(match_operand:V2DI 0 "vint_operand")
6059 (match_operand:V4SF 1 "vfloat_operand")
6063 rtx reg = gen_reg_rtx (V4SFmode);
6064 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
6065 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6069 ;; Note that any_fix (pc) here is just for code attribute <su>.
6070 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
6071 [(match_operand:V2DI 0 "vint_operand")
6072 (match_operand:V4SF 1 "vfloat_operand")
6076 rtx reg = gen_reg_rtx (V4SFmode);
6077 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6078 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6082 (define_insn "vsx_<xvcvbf16>"
6083 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6084 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6087 "<xvcvbf16> %x0,%x1"
6088 [(set_attr "type" "vecfloat")])
6090 (define_insn "vec_mtvsrbmi"
6091 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6092 (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6098 (define_insn "vec_mtvsr_<mode>"
6099 [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6100 (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6104 [(set_attr "type" "vecsimple")])
6106 (define_insn "vec_cntmb_<mode>"
6107 [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6108 (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6109 (match_operand:QI 2 "const_0_to_1_operand" "n")]
6112 "vcntmb<wd> %0,%1,%2"
6113 [(set_attr "type" "vecsimple")])
6115 (define_insn "vec_extract_<mode>"
6116 [(set (match_operand:SI 0 "register_operand" "=r")
6117 (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6120 "vextract<wd>m %0,%1"
6121 [(set_attr "type" "vecsimple")])
6123 (define_insn "vec_expand_<mode>"
6124 [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6125 (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6128 "vexpand<wd>m %0,%1"
6129 [(set_attr "type" "vecsimple")])
6131 (define_insn "dives_<mode>"
6132 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6133 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6134 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6137 "vdives<wd> %0,%1,%2"
6138 [(set_attr "type" "vecdiv")
6139 (set_attr "size" "<bits>")])
6141 (define_insn "diveu_<mode>"
6142 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6143 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6144 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6147 "vdiveu<wd> %0,%1,%2"
6148 [(set_attr "type" "vecdiv")
6149 (set_attr "size" "<bits>")])
6151 (define_insn "div<mode>3"
6152 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6153 (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6154 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6156 "vdivs<wd> %0,%1,%2"
6157 [(set_attr "type" "vecdiv")
6158 (set_attr "size" "<bits>")])
6160 (define_insn "udiv<mode>3"
6161 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6162 (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6163 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6165 "vdivu<wd> %0,%1,%2"
6166 [(set_attr "type" "vecdiv")
6167 (set_attr "size" "<bits>")])
6169 (define_insn "mods_<mode>"
6170 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6171 (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6172 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6174 "vmods<wd> %0,%1,%2"
6175 [(set_attr "type" "vecdiv")
6176 (set_attr "size" "<bits>")])
6178 (define_insn "modu_<mode>"
6179 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6180 (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6181 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6183 "vmodu<wd> %0,%1,%2"
6184 [(set_attr "type" "vecdiv")
6185 (set_attr "size" "<bits>")])
6187 (define_insn "mulhs_<mode>"
6188 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6189 (mult:VIlong (ashiftrt
6190 (match_operand:VIlong 1 "vsx_register_operand" "v")
6193 (match_operand:VIlong 2 "vsx_register_operand" "v")
6196 "vmulhs<wd> %0,%1,%2"
6197 [(set_attr "type" "veccomplex")])
6199 (define_insn "mulhu_<mode>"
6200 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6201 (us_mult:VIlong (ashiftrt
6202 (match_operand:VIlong 1 "vsx_register_operand" "v")
6205 (match_operand:VIlong 2 "vsx_register_operand" "v")
6208 "vmulhu<wd> %0,%1,%2"
6209 [(set_attr "type" "veccomplex")])
6211 ;; Vector multiply low double word
6212 (define_insn "mulv2di3"
6213 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
6214 (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v")
6215 (match_operand:V2DI 2 "vsx_register_operand" "v")))]
6218 [(set_attr "type" "veccomplex")])