2 ;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for the 2 32-bit vector types
41 (define_mode_iterator VSX_W [V4SF V4SI])
43 ;; Iterator for the DF types
44 (define_mode_iterator VSX_DF [V2DF DF])
46 ;; Iterator for vector floating point types supported by VSX
47 (define_mode_iterator VSX_F [V4SF V2DF])
49 ;; Iterator for logical types supported by VSX
50 (define_mode_iterator VSX_L [V16QI
58 (KF "FLOAT128_VECTOR_P (KFmode)")
59 (TF "FLOAT128_VECTOR_P (TFmode)")])
61 ;; Iterator for memory moves.
62 (define_mode_iterator VSX_M [V16QI
69 (KF "FLOAT128_VECTOR_P (KFmode)")
70 (TF "FLOAT128_VECTOR_P (TFmode)")
73 (define_mode_attr VSX_XXBR [(V8HI "h")
80 ;; Map into the appropriate load/store name based on the type
81 (define_mode_attr VSm [(V16QI "vw4")
93 ;; Map the register class used
94 (define_mode_attr VSr [(V16QI "v")
108 ;; What value we need in the "isa" field, to make the IEEE QP float work.
109 (define_mode_attr VSisa [(V16QI "*")
123 ;; A mode attribute to disparage use of GPR registers, except for scalar
125 (define_mode_attr ??r [(V16QI "??r")
136 ;; A mode attribute used for 128-bit constant values.
137 (define_mode_attr nW [(V16QI "W")
148 ;; Same size integer type for floating point data
149 (define_mode_attr VSi [(V4SF "v4si")
153 (define_mode_attr VSI [(V4SF "V4SI")
157 ;; Word size for same size conversion
158 (define_mode_attr VSc [(V4SF "w")
162 ;; Map into either s or v, depending on whether this is a scalar or vector
164 (define_mode_attr VSv [(V16QI "v")
174 ;; Appropriate type for add ops (and other simple FP ops)
175 (define_mode_attr VStype_simple [(V2DF "vecdouble")
179 ;; Appropriate type for multiply ops
180 (define_mode_attr VStype_mul [(V2DF "vecdouble")
184 ;; Appropriate type for divide ops.
185 (define_mode_attr VStype_div [(V2DF "vecdiv")
189 ;; Map to a double-sized vector mode
190 (define_mode_attr VS_double [(V4SI "V8SI")
196 ;; Iterators for loading constants with xxspltib
197 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
198 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
200 ;; Vector reverse byte modes
201 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
203 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
204 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
205 ;; done on ISA 2.07 and not just ISA 3.0.
206 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
207 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
208 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
210 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
214 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
215 ;; insert to validate the operand number.
216 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
217 (V8HI "const_0_to_7_operand")
218 (V4SI "const_0_to_3_operand")])
220 ;; Mode attribute to give the constraint for vector extract and insert
222 (define_mode_attr VSX_EX [(V16QI "v")
226 ;; Mode iterator for binary floating types other than double to
227 ;; optimize convert to that floating point type from an extract
228 ;; of an integer type
229 (define_mode_iterator VSX_EXTRACT_FL [SF
230 (IF "FLOAT128_2REG_P (IFmode)")
231 (KF "TARGET_FLOAT128_HW")
232 (TF "FLOAT128_2REG_P (TFmode)
233 || (FLOAT128_IEEE_P (TFmode)
234 && TARGET_FLOAT128_HW)")])
236 ;; Mode iterator for binary floating types that have a direct conversion
237 ;; from 64-bit integer to floating point
238 (define_mode_iterator FL_CONV [SF
240 (KF "TARGET_FLOAT128_HW")
241 (TF "TARGET_FLOAT128_HW
242 && FLOAT128_IEEE_P (TFmode)")])
244 ;; Iterator for the 2 short vector types to do a splat from an integer
245 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
247 ;; Mode attribute to give the count for the splat instruction to splat
248 ;; the value in the 64-bit integer slot
249 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
251 ;; Mode attribute to give the suffix for the splat instruction
252 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
254 ;; Iterator for the move to mask instructions
255 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
256 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
258 ;; Longer vec int modes for rotate/mask ops
259 ;; and Vector Integer Multiply/Divide/Modulo Instructions
260 (define_mode_iterator VIlong [V2DI V4SI])
262 ;; Constants for creating unspecs
263 (define_c_enum "unspec"
276 UNSPEC_VSX_UNS_FLOAT2
278 UNSPEC_VSX_UNS_FLOATE
280 UNSPEC_VSX_UNS_FLOATO
300 UNSPEC_VSX_SIGN_EXTEND
301 UNSPEC_VSX_XVCVBF16SPN
302 UNSPEC_VSX_XVCVSPBF16
303 UNSPEC_VSX_XVCVSPSXDS
314 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
315 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
347 UNSPEC_VSX_FIRST_MATCH_INDEX
348 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
349 UNSPEC_VSX_FIRST_MISMATCH_INDEX
350 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
374 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
375 UNSPEC_VSX_XVCVBF16SPN])
377 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
378 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
380 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
381 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
383 ;; Aligned and unaligned vector replace iterator/attr for 32-bit and
385 (define_mode_iterator REPLACE_ELT_V [V4SI V4SF V2DI V2DF])
386 (define_mode_iterator REPLACE_ELT [SI SF DI DF])
387 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
388 (V2DI "d") (V2DF "d")
391 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
392 (V2DI "3") (V2DF "3")])
393 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
394 (V2DI "8") (V2DF "8")])
396 ;; Like VM2 in altivec.md, just do char, short, int, long, float and double
397 (define_mode_iterator VM3 [V4SI
403 (define_mode_iterator V2DI_DI [V2DI DI])
404 (define_mode_attr DI_to_TI [(V2DI "V1TI")
407 (define_mode_attr VM3_char [(V2DI "d")
414 ;; Iterator and attribute for vector count leading/trailing
415 ;; zero least-significant bits byte
416 (define_int_iterator VCZLSBB [UNSPEC_VCLZLSBB
418 (define_int_attr vczlsbb_char [(UNSPEC_VCLZLSBB "l")
419 (UNSPEC_VCTZLSBB "t")])
423 ;; TImode memory to memory move optimization on LE with p8vector
424 (define_insn_and_split "*vsx_le_mem_to_mem_mov_ti"
425 [(set (match_operand:TI 0 "indexed_or_indirect_operand" "=Z")
426 (match_operand:TI 1 "indexed_or_indirect_operand" "Z"))]
430 && can_create_pseudo_p ()"
435 rtx tmp = gen_reg_rtx (V2DImode);
436 rtx src = adjust_address (operands[1], V2DImode, 0);
437 emit_insn (gen_vsx_ld_elemrev_v2di (tmp, src));
438 rtx dest = adjust_address (operands[0], V2DImode, 0);
439 emit_insn (gen_vsx_st_elemrev_v2di (dest, tmp));
442 [(set_attr "length" "16")])
444 ;; The patterns for LE permuted loads and stores come before the general
445 ;; VSX moves so they match first.
446 (define_insn_and_split "*vsx_le_perm_load_<mode>"
447 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
448 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
449 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
455 (parallel [(const_int 1) (const_int 0)])))
459 (parallel [(const_int 1) (const_int 0)])))]
461 rtx mem = operands[1];
463 /* Don't apply the swap optimization if we've already performed register
464 allocation and the hard register destination is not in the altivec
466 if ((MEM_ALIGN (mem) >= 128)
467 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
468 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
470 rtx mem_address = XEXP (mem, 0);
471 enum machine_mode mode = GET_MODE (mem);
473 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
475 /* Replace the source memory address with masked address. */
476 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
477 emit_insn (lvx_set_expr);
480 else if (rs6000_quadword_masked_address_p (mem_address))
482 /* This rtl is already in the form that matches lvx
483 instruction, so leave it alone. */
486 /* Otherwise, fall through to transform into a swapping load. */
488 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
491 [(set_attr "type" "vecload")
492 (set_attr "length" "8")])
494 (define_insn_and_split "*vsx_le_perm_load_<mode>"
495 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
496 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
497 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
503 (parallel [(const_int 2) (const_int 3)
504 (const_int 0) (const_int 1)])))
508 (parallel [(const_int 2) (const_int 3)
509 (const_int 0) (const_int 1)])))]
511 rtx mem = operands[1];
513 /* Don't apply the swap optimization if we've already performed register
514 allocation and the hard register destination is not in the altivec
516 if ((MEM_ALIGN (mem) >= 128)
517 && (!HARD_REGISTER_P (operands[0])
518 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
520 rtx mem_address = XEXP (mem, 0);
521 enum machine_mode mode = GET_MODE (mem);
523 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
525 /* Replace the source memory address with masked address. */
526 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
527 emit_insn (lvx_set_expr);
530 else if (rs6000_quadword_masked_address_p (mem_address))
532 /* This rtl is already in the form that matches lvx
533 instruction, so leave it alone. */
536 /* Otherwise, fall through to transform into a swapping load. */
538 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
541 [(set_attr "type" "vecload")
542 (set_attr "length" "8")])
544 (define_insn_and_split "*vsx_le_perm_load_v8hi"
545 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
546 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
547 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
553 (parallel [(const_int 4) (const_int 5)
554 (const_int 6) (const_int 7)
555 (const_int 0) (const_int 1)
556 (const_int 2) (const_int 3)])))
560 (parallel [(const_int 4) (const_int 5)
561 (const_int 6) (const_int 7)
562 (const_int 0) (const_int 1)
563 (const_int 2) (const_int 3)])))]
565 rtx mem = operands[1];
567 /* Don't apply the swap optimization if we've already performed register
568 allocation and the hard register destination is not in the altivec
570 if ((MEM_ALIGN (mem) >= 128)
571 && (!HARD_REGISTER_P (operands[0])
572 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
574 rtx mem_address = XEXP (mem, 0);
575 enum machine_mode mode = GET_MODE (mem);
577 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
579 /* Replace the source memory address with masked address. */
580 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
581 emit_insn (lvx_set_expr);
584 else if (rs6000_quadword_masked_address_p (mem_address))
586 /* This rtl is already in the form that matches lvx
587 instruction, so leave it alone. */
590 /* Otherwise, fall through to transform into a swapping load. */
592 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
595 [(set_attr "type" "vecload")
596 (set_attr "length" "8")])
598 (define_insn_and_split "*vsx_le_perm_load_v16qi"
599 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
600 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
601 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
607 (parallel [(const_int 8) (const_int 9)
608 (const_int 10) (const_int 11)
609 (const_int 12) (const_int 13)
610 (const_int 14) (const_int 15)
611 (const_int 0) (const_int 1)
612 (const_int 2) (const_int 3)
613 (const_int 4) (const_int 5)
614 (const_int 6) (const_int 7)])))
618 (parallel [(const_int 8) (const_int 9)
619 (const_int 10) (const_int 11)
620 (const_int 12) (const_int 13)
621 (const_int 14) (const_int 15)
622 (const_int 0) (const_int 1)
623 (const_int 2) (const_int 3)
624 (const_int 4) (const_int 5)
625 (const_int 6) (const_int 7)])))]
627 rtx mem = operands[1];
629 /* Don't apply the swap optimization if we've already performed register
630 allocation and the hard register destination is not in the altivec
632 if ((MEM_ALIGN (mem) >= 128)
633 && (!HARD_REGISTER_P (operands[0])
634 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
636 rtx mem_address = XEXP (mem, 0);
637 enum machine_mode mode = GET_MODE (mem);
639 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
641 /* Replace the source memory address with masked address. */
642 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
643 emit_insn (lvx_set_expr);
646 else if (rs6000_quadword_masked_address_p (mem_address))
648 /* This rtl is already in the form that matches lvx
649 instruction, so leave it alone. */
652 /* Otherwise, fall through to transform into a swapping load. */
654 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
657 [(set_attr "type" "vecload")
658 (set_attr "length" "8")])
660 (define_insn "*vsx_le_perm_store_<mode>"
661 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
662 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
663 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
665 [(set_attr "type" "vecstore")
666 (set_attr "length" "12")])
669 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
670 (match_operand:VSX_D 1 "vsx_register_operand"))]
671 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
675 (parallel [(const_int 1) (const_int 0)])))
679 (parallel [(const_int 1) (const_int 0)])))]
681 rtx mem = operands[0];
683 /* Don't apply the swap optimization if we've already performed register
684 allocation and the hard register source is not in the altivec range. */
685 if ((MEM_ALIGN (mem) >= 128)
686 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
687 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
689 rtx mem_address = XEXP (mem, 0);
690 enum machine_mode mode = GET_MODE (mem);
691 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
693 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
694 emit_insn (stvx_set_expr);
697 else if (rs6000_quadword_masked_address_p (mem_address))
699 /* This rtl is already in the form that matches stvx instruction,
700 so leave it alone. */
703 /* Otherwise, fall through to transform into a swapping store. */
706 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
710 ;; The post-reload split requires that we re-permute the source
711 ;; register in case it is still live.
713 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
714 (match_operand:VSX_D 1 "vsx_register_operand"))]
715 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
719 (parallel [(const_int 1) (const_int 0)])))
723 (parallel [(const_int 1) (const_int 0)])))
727 (parallel [(const_int 1) (const_int 0)])))]
730 (define_insn "*vsx_le_perm_store_<mode>"
731 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
732 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
733 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
735 [(set_attr "type" "vecstore")
736 (set_attr "length" "12")])
739 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
740 (match_operand:VSX_W 1 "vsx_register_operand"))]
741 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
745 (parallel [(const_int 2) (const_int 3)
746 (const_int 0) (const_int 1)])))
750 (parallel [(const_int 2) (const_int 3)
751 (const_int 0) (const_int 1)])))]
753 rtx mem = operands[0];
755 /* Don't apply the swap optimization if we've already performed register
756 allocation and the hard register source is not in the altivec range. */
757 if ((MEM_ALIGN (mem) >= 128)
758 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
759 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
761 rtx mem_address = XEXP (mem, 0);
762 enum machine_mode mode = GET_MODE (mem);
763 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
765 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
766 emit_insn (stvx_set_expr);
769 else if (rs6000_quadword_masked_address_p (mem_address))
771 /* This rtl is already in the form that matches stvx instruction,
772 so leave it alone. */
775 /* Otherwise, fall through to transform into a swapping store. */
778 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
782 ;; The post-reload split requires that we re-permute the source
783 ;; register in case it is still live.
785 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
786 (match_operand:VSX_W 1 "vsx_register_operand"))]
787 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
791 (parallel [(const_int 2) (const_int 3)
792 (const_int 0) (const_int 1)])))
796 (parallel [(const_int 2) (const_int 3)
797 (const_int 0) (const_int 1)])))
801 (parallel [(const_int 2) (const_int 3)
802 (const_int 0) (const_int 1)])))]
805 (define_insn "*vsx_le_perm_store_v8hi"
806 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
807 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
808 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
810 [(set_attr "type" "vecstore")
811 (set_attr "length" "12")])
814 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
815 (match_operand:V8HI 1 "vsx_register_operand"))]
816 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
820 (parallel [(const_int 4) (const_int 5)
821 (const_int 6) (const_int 7)
822 (const_int 0) (const_int 1)
823 (const_int 2) (const_int 3)])))
827 (parallel [(const_int 4) (const_int 5)
828 (const_int 6) (const_int 7)
829 (const_int 0) (const_int 1)
830 (const_int 2) (const_int 3)])))]
832 rtx mem = operands[0];
834 /* Don't apply the swap optimization if we've already performed register
835 allocation and the hard register source is not in the altivec range. */
836 if ((MEM_ALIGN (mem) >= 128)
837 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
838 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
840 rtx mem_address = XEXP (mem, 0);
841 enum machine_mode mode = GET_MODE (mem);
842 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
844 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
845 emit_insn (stvx_set_expr);
848 else if (rs6000_quadword_masked_address_p (mem_address))
850 /* This rtl is already in the form that matches stvx instruction,
851 so leave it alone. */
854 /* Otherwise, fall through to transform into a swapping store. */
857 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
861 ;; The post-reload split requires that we re-permute the source
862 ;; register in case it is still live.
864 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
865 (match_operand:V8HI 1 "vsx_register_operand"))]
866 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
870 (parallel [(const_int 4) (const_int 5)
871 (const_int 6) (const_int 7)
872 (const_int 0) (const_int 1)
873 (const_int 2) (const_int 3)])))
877 (parallel [(const_int 4) (const_int 5)
878 (const_int 6) (const_int 7)
879 (const_int 0) (const_int 1)
880 (const_int 2) (const_int 3)])))
884 (parallel [(const_int 4) (const_int 5)
885 (const_int 6) (const_int 7)
886 (const_int 0) (const_int 1)
887 (const_int 2) (const_int 3)])))]
890 (define_insn "*vsx_le_perm_store_v16qi"
891 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
892 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
893 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
895 [(set_attr "type" "vecstore")
896 (set_attr "length" "12")])
899 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
900 (match_operand:V16QI 1 "vsx_register_operand"))]
901 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
905 (parallel [(const_int 8) (const_int 9)
906 (const_int 10) (const_int 11)
907 (const_int 12) (const_int 13)
908 (const_int 14) (const_int 15)
909 (const_int 0) (const_int 1)
910 (const_int 2) (const_int 3)
911 (const_int 4) (const_int 5)
912 (const_int 6) (const_int 7)])))
916 (parallel [(const_int 8) (const_int 9)
917 (const_int 10) (const_int 11)
918 (const_int 12) (const_int 13)
919 (const_int 14) (const_int 15)
920 (const_int 0) (const_int 1)
921 (const_int 2) (const_int 3)
922 (const_int 4) (const_int 5)
923 (const_int 6) (const_int 7)])))]
925 rtx mem = operands[0];
927 /* Don't apply the swap optimization if we've already performed register
928 allocation and the hard register source is not in the altivec range. */
929 if ((MEM_ALIGN (mem) >= 128)
930 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
931 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
933 rtx mem_address = XEXP (mem, 0);
934 enum machine_mode mode = GET_MODE (mem);
935 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
937 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
938 emit_insn (stvx_set_expr);
941 else if (rs6000_quadword_masked_address_p (mem_address))
943 /* This rtl is already in the form that matches stvx instruction,
944 so leave it alone. */
947 /* Otherwise, fall through to transform into a swapping store. */
950 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
954 ;; The post-reload split requires that we re-permute the source
955 ;; register in case it is still live.
957 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
958 (match_operand:V16QI 1 "vsx_register_operand"))]
959 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
963 (parallel [(const_int 8) (const_int 9)
964 (const_int 10) (const_int 11)
965 (const_int 12) (const_int 13)
966 (const_int 14) (const_int 15)
967 (const_int 0) (const_int 1)
968 (const_int 2) (const_int 3)
969 (const_int 4) (const_int 5)
970 (const_int 6) (const_int 7)])))
974 (parallel [(const_int 8) (const_int 9)
975 (const_int 10) (const_int 11)
976 (const_int 12) (const_int 13)
977 (const_int 14) (const_int 15)
978 (const_int 0) (const_int 1)
979 (const_int 2) (const_int 3)
980 (const_int 4) (const_int 5)
981 (const_int 6) (const_int 7)])))
985 (parallel [(const_int 8) (const_int 9)
986 (const_int 10) (const_int 11)
987 (const_int 12) (const_int 13)
988 (const_int 14) (const_int 15)
989 (const_int 0) (const_int 1)
990 (const_int 2) (const_int 3)
991 (const_int 4) (const_int 5)
992 (const_int 6) (const_int 7)])))]
995 ;; Little endian word swapping for 128-bit types that are either scalars or the
996 ;; special V1TI container class, which it is not appropriate to use vec_select
998 (define_insn "*vsx_le_permute_<mode>"
999 [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
1001 (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
1003 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1005 xxpermdi %x0,%x1,%x1,2
1008 mr %0,%L1\;mr %L0,%1
1009 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1010 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1011 [(set_attr "length" "*,*,*,8,8,8")
1012 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1014 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
1015 [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa")
1018 (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa")
1021 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1026 [(set (match_dup 0) (match_dup 1))]
1028 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1030 emit_note (NOTE_INSN_DELETED);
1034 [(set_attr "length" "0,4")
1035 (set_attr "type" "veclogical")])
1037 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1038 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
1039 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1040 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1041 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1045 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1046 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1049 rtx tmp = (can_create_pseudo_p ()
1050 ? gen_reg_rtx_and_attrs (operands[0])
1052 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1053 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1056 [(set_attr "type" "vecload,load")
1057 (set_attr "length" "8,8")
1058 (set_attr "isa" "<VSisa>,*")])
1060 (define_insn "*vsx_le_perm_store_<mode>"
1061 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1062 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1063 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1064 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1068 [(set_attr "type" "vecstore,store")
1069 (set_attr "length" "12,8")
1070 (set_attr "isa" "<VSisa>,*")])
1073 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1074 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1075 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
1076 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1079 rtx tmp = (can_create_pseudo_p ()
1080 ? gen_reg_rtx_and_attrs (operands[0])
1082 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1083 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1087 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1088 ;; GPR registers on a little endian system.
1090 [(set (match_operand:VEC_TI 0 "int_reg_operand")
1091 (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand")
1093 (set (match_operand:VEC_TI 2 "int_reg_operand")
1094 (rotate:VEC_TI (match_dup 0)
1096 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1097 && (rtx_equal_p (operands[0], operands[2])
1098 || peep2_reg_dead_p (2, operands[0]))"
1099 [(set (match_dup 2) (match_dup 1))])
1102 [(set (match_operand:VEC_TI 0 "int_reg_operand")
1103 (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand")
1105 (set (match_operand:VEC_TI 2 "memory_operand")
1106 (rotate:VEC_TI (match_dup 0)
1108 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1109 && peep2_reg_dead_p (2, operands[0])"
1110 [(set (match_dup 2) (match_dup 1))])
1112 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1113 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1114 ;; floating point are handled by the more generic swap elimination pass.
1116 [(set (match_operand:TI 0 "vsx_register_operand")
1117 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1119 (set (match_operand:TI 2 "vsx_register_operand")
1120 (rotate:TI (match_dup 0)
1122 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1123 && (rtx_equal_p (operands[0], operands[2])
1124 || peep2_reg_dead_p (2, operands[0]))"
1125 [(set (match_dup 2) (match_dup 1))])
1127 ;; The post-reload split requires that we re-permute the source
1128 ;; register in case it is still live.
1130 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1131 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1132 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1133 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1136 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1137 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1138 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1142 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1143 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1144 (define_insn "xxspltib_v16qi"
1145 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1146 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1149 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1150 return "xxspltib %x0,%2";
1152 [(set_attr "type" "vecperm")])
1154 (define_insn "xxspltib_<mode>_nosplit"
1155 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1156 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1159 rtx op1 = operands[1];
1163 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1167 operands[2] = GEN_INT (value & 0xff);
1168 return "xxspltib %x0,%2";
1170 [(set_attr "type" "vecperm")])
1172 (define_insn_and_split "*xxspltib_<mode>_split"
1173 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1174 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1182 rtx op0 = operands[0];
1183 rtx op1 = operands[1];
1184 rtx tmp = ((can_create_pseudo_p ())
1185 ? gen_reg_rtx (V16QImode)
1186 : gen_lowpart (V16QImode, op0));
1188 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1192 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1194 if (<MODE>mode == V2DImode)
1195 emit_insn (gen_vsx_sign_extend_v16qi_v2di (op0, tmp));
1197 else if (<MODE>mode == V4SImode)
1198 emit_insn (gen_vsx_sign_extend_v16qi_v4si (op0, tmp));
1200 else if (<MODE>mode == V8HImode)
1201 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1208 [(set_attr "type" "vecperm")
1209 (set_attr "length" "8")])
1211 (define_insn_and_split "*vspltisw_v2di_split"
1212 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
1213 (match_operand:V2DI 1 "vspltisw_vupkhsw_constant_split" "W"))]
1214 "TARGET_P8_VECTOR && vspltisw_vupkhsw_constant_split (operands[1], V2DImode)"
1219 rtx op0 = operands[0];
1220 rtx op1 = operands[1];
1221 rtx tmp = can_create_pseudo_p ()
1222 ? gen_reg_rtx (V4SImode)
1223 : gen_lowpart (V4SImode, op0);
1226 vspltisw_vupkhsw_constant_p (op1, V2DImode, &value);
1227 emit_insn (gen_altivec_vspltisw (tmp, GEN_INT (value)));
1228 emit_insn (gen_altivec_vupkhsw_direct (op0, tmp));
1232 [(set_attr "type" "vecperm")
1233 (set_attr "length" "8")])
1236 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1237 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1238 ;; all 1's, since the machine does not have to wait for the previous
1239 ;; instruction using the register being set (such as a store waiting on a slow
1240 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1242 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1243 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1245 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1246 (define_insn "vsx_mov<mode>_64bit"
1247 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1248 "=ZwO, wa, wa, r, we, ?wQ,
1249 ?&r, ??r, ??Y, <??r>, wa, v,
1251 ?wa, v, <??r>, wZ, v")
1253 (match_operand:VSX_M 1 "input_operand"
1254 "wa, ZwO, wa, we, r, r,
1255 wQ, Y, r, r, wE, jwM,
1257 ?jwM, W, <nW>, v, wZ"))]
1259 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1260 && (register_operand (operands[0], <MODE>mode)
1261 || register_operand (operands[1], <MODE>mode))"
1263 return rs6000_output_move_128bit (operands);
1266 "vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
1267 store, load, store, *, vecsimple, vecsimple,
1269 vecsimple, *, *, vecstore, vecload")
1270 (set_attr "num_insns"
1275 (set_attr "max_prefixed_insns"
1286 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1289 <VSisa>, *, *, *, *")
1290 (set_attr "prefixed"
1296 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1298 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1299 ;; LVX (VMX) STVX (VMX)
1300 (define_insn "*vsx_mov<mode>_32bit"
1301 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1302 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1304 wa, v, ?wa, v, <??r>,
1307 (match_operand:VSX_M 1 "input_operand"
1308 "wa, ZwO, wa, Y, r, r,
1310 wE, jwM, ?jwM, W, <nW>,
1313 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1314 && (register_operand (operands[0], <MODE>mode)
1315 || register_operand (operands[1], <MODE>mode))"
1317 return rs6000_output_move_128bit (operands);
1320 "vecstore, vecload, vecsimple, load, store, *,
1322 vecsimple, vecsimple, vecsimple, *, *,
1325 "*, *, *, 16, 16, 16,
1330 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1332 p9v, *, <VSisa>, *, *,
1334 (set_attr "prefixed"
1340 ;; Explicit load/store expanders for the builtin functions
1341 (define_expand "vsx_load_<mode>"
1342 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1343 (match_operand:VSX_M 1 "memory_operand"))]
1344 "VECTOR_MEM_VSX_P (<MODE>mode)"
1346 /* Expand to swaps if needed, prior to swap optimization. */
1347 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1348 && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1350 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1355 (define_expand "vsx_store_<mode>"
1356 [(set (match_operand:VSX_M 0 "memory_operand")
1357 (match_operand:VSX_M 1 "vsx_register_operand"))]
1358 "VECTOR_MEM_VSX_P (<MODE>mode)"
1360 /* Expand to swaps if needed, prior to swap optimization. */
1361 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1362 && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1364 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1369 ;; Load rightmost element from load_data
1370 ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
1371 (define_insn "vsx_lxvr<wd>x"
1372 [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
1373 (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
1376 [(set_attr "type" "vecload")])
1378 ;; Store rightmost element into store_data
1379 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
1380 (define_insn "vsx_stxvr<wd>x"
1381 [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
1382 (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
1384 "stxvr<wd>x %x1,%y0"
1385 [(set_attr "type" "vecstore")])
1387 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1388 ;; when you really want their element-reversing behavior.
1389 (define_insn "vsx_ld_elemrev_v2di"
1390 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1392 (match_operand:V2DI 1 "memory_operand" "Z")
1393 (parallel [(const_int 1) (const_int 0)])))]
1394 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1396 [(set_attr "type" "vecload")])
1398 (define_insn "vsx_ld_elemrev_v1ti"
1399 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1401 (match_operand:V1TI 1 "memory_operand" "Z")
1402 (parallel [(const_int 0)])))]
1403 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1405 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1407 [(set_attr "type" "vecload")])
1409 (define_insn "vsx_ld_elemrev_v2df"
1410 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1412 (match_operand:V2DF 1 "memory_operand" "Z")
1413 (parallel [(const_int 1) (const_int 0)])))]
1414 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1416 [(set_attr "type" "vecload")])
1418 (define_insn "vsx_ld_elemrev_v4si"
1419 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1421 (match_operand:V4SI 1 "memory_operand" "Z")
1422 (parallel [(const_int 3) (const_int 2)
1423 (const_int 1) (const_int 0)])))]
1424 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1426 [(set_attr "type" "vecload")])
1428 (define_insn "vsx_ld_elemrev_v4sf"
1429 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1431 (match_operand:V4SF 1 "memory_operand" "Z")
1432 (parallel [(const_int 3) (const_int 2)
1433 (const_int 1) (const_int 0)])))]
1434 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1436 [(set_attr "type" "vecload")])
1438 (define_expand "vsx_ld_elemrev_v8hi"
1439 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1441 (match_operand:V8HI 1 "memory_operand" "Z")
1442 (parallel [(const_int 7) (const_int 6)
1443 (const_int 5) (const_int 4)
1444 (const_int 3) (const_int 2)
1445 (const_int 1) (const_int 0)])))]
1446 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1448 if (!TARGET_P9_VECTOR)
1450 rtx tmp = gen_reg_rtx (V4SImode);
1451 rtx subreg, subreg2, perm[16], pcv;
1452 /* 2 is leftmost element in register */
1453 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1456 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1457 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1458 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1460 for (i = 0; i < 16; ++i)
1461 perm[i] = GEN_INT (reorder[i]);
1463 pcv = force_reg (V16QImode,
1464 gen_rtx_CONST_VECTOR (V16QImode,
1465 gen_rtvec_v (16, perm)));
1466 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1472 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1473 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1475 (match_operand:V8HI 1 "memory_operand" "Z")
1476 (parallel [(const_int 7) (const_int 6)
1477 (const_int 5) (const_int 4)
1478 (const_int 3) (const_int 2)
1479 (const_int 1) (const_int 0)])))]
1480 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1482 [(set_attr "type" "vecload")])
1484 (define_expand "vsx_ld_elemrev_v16qi"
1485 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1487 (match_operand:V16QI 1 "memory_operand" "Z")
1488 (parallel [(const_int 15) (const_int 14)
1489 (const_int 13) (const_int 12)
1490 (const_int 11) (const_int 10)
1491 (const_int 9) (const_int 8)
1492 (const_int 7) (const_int 6)
1493 (const_int 5) (const_int 4)
1494 (const_int 3) (const_int 2)
1495 (const_int 1) (const_int 0)])))]
1496 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1498 if (!TARGET_P9_VECTOR)
1500 rtx tmp = gen_reg_rtx (V4SImode);
1501 rtx subreg, subreg2, perm[16], pcv;
1502 /* 3 is leftmost element in register */
1503 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1506 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1507 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1508 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1510 for (i = 0; i < 16; ++i)
1511 perm[i] = GEN_INT (reorder[i]);
1513 pcv = force_reg (V16QImode,
1514 gen_rtx_CONST_VECTOR (V16QImode,
1515 gen_rtvec_v (16, perm)));
1516 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1522 (define_insn "vsx_ld_elemrev_v16qi_internal"
1523 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1525 (match_operand:V16QI 1 "memory_operand" "Z")
1526 (parallel [(const_int 15) (const_int 14)
1527 (const_int 13) (const_int 12)
1528 (const_int 11) (const_int 10)
1529 (const_int 9) (const_int 8)
1530 (const_int 7) (const_int 6)
1531 (const_int 5) (const_int 4)
1532 (const_int 3) (const_int 2)
1533 (const_int 1) (const_int 0)])))]
1534 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1536 [(set_attr "type" "vecload")])
1538 (define_insn "vsx_st_elemrev_v1ti"
1539 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1541 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1542 (parallel [(const_int 0)])))
1543 (clobber (match_dup 1))]
1544 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1546 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1548 [(set_attr "type" "vecstore")])
1550 (define_insn "vsx_st_elemrev_v2df"
1551 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1553 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1554 (parallel [(const_int 1) (const_int 0)])))]
1555 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1557 [(set_attr "type" "vecstore")])
1559 (define_insn "vsx_st_elemrev_v2di"
1560 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1562 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1563 (parallel [(const_int 1) (const_int 0)])))]
1564 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1566 [(set_attr "type" "vecstore")])
1568 (define_insn "vsx_st_elemrev_v4sf"
1569 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1571 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1572 (parallel [(const_int 3) (const_int 2)
1573 (const_int 1) (const_int 0)])))]
1574 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1576 [(set_attr "type" "vecstore")])
1578 (define_insn "vsx_st_elemrev_v4si"
1579 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1581 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1582 (parallel [(const_int 3) (const_int 2)
1583 (const_int 1) (const_int 0)])))]
1584 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1586 [(set_attr "type" "vecstore")])
1588 (define_expand "vsx_st_elemrev_v8hi"
1589 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1591 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1592 (parallel [(const_int 7) (const_int 6)
1593 (const_int 5) (const_int 4)
1594 (const_int 3) (const_int 2)
1595 (const_int 1) (const_int 0)])))]
1596 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1598 if (!TARGET_P9_VECTOR)
1600 rtx mem_subreg, subreg, perm[16], pcv;
1601 rtx tmp = gen_reg_rtx (V8HImode);
1602 /* 2 is leftmost element in register */
1603 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1606 for (i = 0; i < 16; ++i)
1607 perm[i] = GEN_INT (reorder[i]);
1609 pcv = force_reg (V16QImode,
1610 gen_rtx_CONST_VECTOR (V16QImode,
1611 gen_rtvec_v (16, perm)));
1612 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1614 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1615 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1616 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1621 (define_insn "*vsx_st_elemrev_v2di_internal"
1622 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1624 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1625 (parallel [(const_int 1) (const_int 0)])))]
1626 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1628 [(set_attr "type" "vecstore")])
1630 (define_insn "*vsx_st_elemrev_v8hi_internal"
1631 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1633 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1634 (parallel [(const_int 7) (const_int 6)
1635 (const_int 5) (const_int 4)
1636 (const_int 3) (const_int 2)
1637 (const_int 1) (const_int 0)])))]
1638 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1640 [(set_attr "type" "vecstore")])
1642 (define_expand "vsx_st_elemrev_v16qi"
1643 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1645 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1646 (parallel [(const_int 15) (const_int 14)
1647 (const_int 13) (const_int 12)
1648 (const_int 11) (const_int 10)
1649 (const_int 9) (const_int 8)
1650 (const_int 7) (const_int 6)
1651 (const_int 5) (const_int 4)
1652 (const_int 3) (const_int 2)
1653 (const_int 1) (const_int 0)])))]
1654 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1656 if (!TARGET_P9_VECTOR)
1658 rtx mem_subreg, subreg, perm[16], pcv;
1659 rtx tmp = gen_reg_rtx (V16QImode);
1660 /* 3 is leftmost element in register */
1661 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1664 for (i = 0; i < 16; ++i)
1665 perm[i] = GEN_INT (reorder[i]);
1667 pcv = force_reg (V16QImode,
1668 gen_rtx_CONST_VECTOR (V16QImode,
1669 gen_rtvec_v (16, perm)));
1670 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1672 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1673 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1674 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1679 (define_insn "*vsx_st_elemrev_v16qi_internal"
1680 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1682 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1683 (parallel [(const_int 15) (const_int 14)
1684 (const_int 13) (const_int 12)
1685 (const_int 11) (const_int 10)
1686 (const_int 9) (const_int 8)
1687 (const_int 7) (const_int 6)
1688 (const_int 5) (const_int 4)
1689 (const_int 3) (const_int 2)
1690 (const_int 1) (const_int 0)])))]
1691 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1693 [(set_attr "type" "vecstore")])
1696 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1697 ;; instructions are now combined with the insn for the traditional floating
1699 (define_insn "*vsx_add<mode>3"
1700 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1701 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1702 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1703 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1704 "xvadd<sd>p %x0,%x1,%x2"
1705 [(set_attr "type" "<VStype_simple>")])
1707 (define_insn "*vsx_sub<mode>3"
1708 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1709 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1710 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1711 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1712 "xvsub<sd>p %x0,%x1,%x2"
1713 [(set_attr "type" "<VStype_simple>")])
1715 (define_insn "*vsx_mul<mode>3"
1716 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1717 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1718 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1719 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1720 "xvmul<sd>p %x0,%x1,%x2"
1721 [(set_attr "type" "<VStype_simple>")])
1723 ; Emulate vector with scalar for vec_mul in V2DImode
1724 (define_insn_and_split "vsx_mul_v2di"
1725 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1726 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1727 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1729 "VECTOR_MEM_VSX_P (V2DImode)"
1731 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1734 rtx op0 = operands[0];
1735 rtx op1 = operands[1];
1736 rtx op2 = operands[2];
1739 emit_insn (gen_mulv2di3 (op0, op1, op2) );
1743 rtx op3 = gen_reg_rtx (DImode);
1744 rtx op4 = gen_reg_rtx (DImode);
1745 rtx op5 = gen_reg_rtx (DImode);
1746 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1747 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1748 if (TARGET_POWERPC64)
1749 emit_insn (gen_muldi3 (op5, op3, op4));
1752 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1753 emit_move_insn (op5, ret);
1755 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1756 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1757 if (TARGET_POWERPC64)
1758 emit_insn (gen_muldi3 (op3, op3, op4));
1761 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1762 emit_move_insn (op3, ret);
1764 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1768 [(set_attr "type" "mul")])
1770 (define_insn "*vsx_div<mode>3"
1771 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1772 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1773 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1774 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1775 "xvdiv<sd>p %x0,%x1,%x2"
1776 [(set_attr "type" "<VStype_div>")])
1778 ; Emulate vector with scalar for vec_div in V2DImode
1779 (define_insn_and_split "vsx_div_v2di"
1780 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1781 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1782 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1784 "VECTOR_MEM_VSX_P (V2DImode)"
1786 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1789 rtx op0 = operands[0];
1790 rtx op1 = operands[1];
1791 rtx op2 = operands[2];
1792 rtx op3 = gen_reg_rtx (DImode);
1793 rtx op4 = gen_reg_rtx (DImode);
1794 rtx op5 = gen_reg_rtx (DImode);
1795 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1796 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1797 if (TARGET_POWERPC64)
1798 emit_insn (gen_divdi3 (op5, op3, op4));
1801 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1802 rtx target = emit_library_call_value (libfunc,
1803 op5, LCT_NORMAL, DImode,
1806 emit_move_insn (op5, target);
1808 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1809 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1810 if (TARGET_POWERPC64)
1811 emit_insn (gen_divdi3 (op3, op3, op4));
1814 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1815 rtx target = emit_library_call_value (libfunc,
1816 op3, LCT_NORMAL, DImode,
1819 emit_move_insn (op3, target);
1821 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1824 [(set_attr "type" "div")])
1826 (define_insn_and_split "vsx_udiv_v2di"
1827 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1828 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1829 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1831 "VECTOR_MEM_VSX_P (V2DImode)"
1833 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1836 rtx op0 = operands[0];
1837 rtx op1 = operands[1];
1838 rtx op2 = operands[2];
1841 emit_insn (gen_udivv2di3 (op0, op1, op2) );
1844 rtx op3 = gen_reg_rtx (DImode);
1845 rtx op4 = gen_reg_rtx (DImode);
1846 rtx op5 = gen_reg_rtx (DImode);
1848 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1849 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1851 if (TARGET_POWERPC64)
1852 emit_insn (gen_udivdi3 (op5, op3, op4));
1855 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1856 rtx target = emit_library_call_value (libfunc,
1857 op5, LCT_NORMAL, DImode,
1860 emit_move_insn (op5, target);
1862 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1863 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1865 if (TARGET_POWERPC64)
1866 emit_insn (gen_udivdi3 (op3, op3, op4));
1869 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1870 rtx target = emit_library_call_value (libfunc,
1871 op3, LCT_NORMAL, DImode,
1874 emit_move_insn (op3, target);
1876 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1880 [(set_attr "type" "div")])
1882 ;; Vector integer signed/unsigned divide
1883 (define_insn "vsx_div_v1ti"
1884 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1885 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1886 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1890 [(set_attr "type" "div")])
1892 (define_insn "vsx_udiv_v1ti"
1893 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1894 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1895 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1899 [(set_attr "type" "div")])
1901 (define_insn "vsx_dives_v1ti"
1902 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1903 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1904 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1905 UNSPEC_VSX_DIVESQ))]
1908 [(set_attr "type" "div")])
1910 (define_insn "vsx_diveu_v1ti"
1911 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1912 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1913 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1914 UNSPEC_VSX_DIVEUQ))]
1917 [(set_attr "type" "div")])
1919 (define_insn "vsx_mods_v1ti"
1920 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1921 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1922 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1926 [(set_attr "type" "div")])
1928 (define_insn "vsx_modu_v1ti"
1929 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1930 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1931 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1935 [(set_attr "type" "div")])
1937 ;; *tdiv* instruction returning the FG flag
1938 (define_expand "vsx_tdiv<mode>3_fg"
1940 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1941 (match_operand:VSX_B 2 "vsx_register_operand")]
1943 (set (match_operand:SI 0 "gpc_reg_operand")
1944 (gt:SI (match_dup 3)
1946 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1948 operands[3] = gen_reg_rtx (CCFPmode);
1951 ;; *tdiv* instruction returning the FE flag
1952 (define_expand "vsx_tdiv<mode>3_fe"
1954 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1955 (match_operand:VSX_B 2 "vsx_register_operand")]
1957 (set (match_operand:SI 0 "gpc_reg_operand")
1958 (eq:SI (match_dup 3)
1960 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1962 operands[3] = gen_reg_rtx (CCFPmode);
1965 (define_insn "*vsx_tdiv<mode>3_internal"
1966 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1967 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1968 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1970 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1971 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1972 [(set_attr "type" "<VStype_simple>")])
1974 (define_insn "vsx_fre<mode>2"
1975 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1976 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1978 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1980 [(set_attr "type" "<VStype_simple>")])
1982 (define_insn "*vsx_neg<mode>2"
1983 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1984 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1985 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1986 "xvneg<sd>p %x0,%x1"
1987 [(set_attr "type" "<VStype_simple>")])
1989 (define_insn "*vsx_abs<mode>2"
1990 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1991 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1992 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1993 "xvabs<sd>p %x0,%x1"
1994 [(set_attr "type" "<VStype_simple>")])
1996 (define_insn "vsx_nabs<mode>2"
1997 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2000 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
2001 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2002 "xvnabs<sd>p %x0,%x1"
2003 [(set_attr "type" "<VStype_simple>")])
2005 (define_insn "vsx_smax<mode>3"
2006 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2007 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2008 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2009 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2010 "xvmax<sd>p %x0,%x1,%x2"
2011 [(set_attr "type" "<VStype_simple>")])
2013 (define_insn "*vsx_smin<mode>3"
2014 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2015 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2016 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2017 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2018 "xvmin<sd>p %x0,%x1,%x2"
2019 [(set_attr "type" "<VStype_simple>")])
2021 (define_insn "*vsx_sqrt<mode>2"
2022 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2023 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2024 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2025 "xvsqrt<sd>p %x0,%x1"
2026 [(set_attr "type" "<sd>sqrt")])
2028 (define_insn "*vsx_rsqrte<mode>2"
2029 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2030 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2032 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2033 "xvrsqrte<sd>p %x0,%x1"
2034 [(set_attr "type" "<VStype_simple>")])
2036 ;; *tsqrt* returning the fg flag
2037 (define_expand "vsx_tsqrt<mode>2_fg"
2039 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
2041 (set (match_operand:SI 0 "gpc_reg_operand")
2042 (gt:SI (match_dup 2)
2044 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2046 operands[2] = gen_reg_rtx (CCFPmode);
2049 ;; *tsqrt* returning the fe flag
2050 (define_expand "vsx_tsqrt<mode>2_fe"
2052 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
2054 (set (match_operand:SI 0 "gpc_reg_operand")
2055 (eq:SI (match_dup 2)
2057 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2059 operands[2] = gen_reg_rtx (CCFPmode);
2062 (define_insn "*vsx_tsqrt<mode>2_internal"
2063 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
2064 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2066 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2067 "x<VSv>tsqrt<sd>p %0,%x1"
2068 [(set_attr "type" "<VStype_simple>")])
2070 ;; Fused vector multiply/add instructions. Do not generate the Altivec versions
2071 ;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a
2072 ;; separate register from the 3 inputs, which can possibly save an extra move
2073 ;; being generated (assuming all registers are AltiVec registers). However,
2074 ;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
2075 ;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
2077 (define_insn "*vsx_fmav4sf4"
2078 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2080 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2081 (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2082 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
2083 "VECTOR_UNIT_VSX_P (V4SFmode)"
2085 xvmaddasp %x0,%x1,%x2
2086 xvmaddmsp %x0,%x1,%x3"
2087 [(set_attr "type" "vecfloat")])
2089 (define_insn "*vsx_fmav2df4"
2090 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2092 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2093 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2094 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
2095 "VECTOR_UNIT_VSX_P (V2DFmode)"
2097 xvmaddadp %x0,%x1,%x2
2098 xvmaddmdp %x0,%x1,%x3"
2099 [(set_attr "type" "vecdouble")])
2101 (define_insn "*vsx_fms<mode>4"
2102 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2104 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
2105 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2107 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2108 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2110 xvmsuba<sd>p %x0,%x1,%x2
2111 xvmsubm<sd>p %x0,%x1,%x3"
2112 [(set_attr "type" "<VStype_mul>")])
2114 (define_insn "*vsx_nfma<mode>4"
2115 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2118 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
2119 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2120 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2121 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2123 xvnmadda<sd>p %x0,%x1,%x2
2124 xvnmaddm<sd>p %x0,%x1,%x3"
2125 [(set_attr "type" "<VStype_mul>")])
2127 (define_insn "*vsx_nfmsv4sf4"
2128 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2131 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2132 (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2134 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
2135 "VECTOR_UNIT_VSX_P (V4SFmode)"
2137 xvnmsubasp %x0,%x1,%x2
2138 xvnmsubmsp %x0,%x1,%x3"
2139 [(set_attr "type" "vecfloat")])
2141 (define_insn "*vsx_nfmsv2df4"
2142 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2145 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2146 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2148 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
2149 "VECTOR_UNIT_VSX_P (V2DFmode)"
2151 xvnmsubadp %x0,%x1,%x2
2152 xvnmsubmdp %x0,%x1,%x3"
2153 [(set_attr "type" "vecdouble")])
2155 ;; Vector conditional expressions (no scalar version for these instructions)
2156 (define_insn "vsx_eq<mode>"
2157 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2158 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2159 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2160 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2161 "xvcmpeq<sd>p %x0,%x1,%x2"
2162 [(set_attr "type" "<VStype_simple>")])
2164 (define_insn "vsx_gt<mode>"
2165 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2166 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2167 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169 "xvcmpgt<sd>p %x0,%x1,%x2"
2170 [(set_attr "type" "<VStype_simple>")])
2172 (define_insn "*vsx_ge<mode>"
2173 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2174 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2175 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2176 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2177 "xvcmpge<sd>p %x0,%x1,%x2"
2178 [(set_attr "type" "<VStype_simple>")])
2180 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2181 ;; indicate a combined status
2182 (define_insn "*vsx_eq_<mode>_p"
2183 [(set (reg:CC CR6_REGNO)
2185 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2186 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2188 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2189 (eq:VSX_F (match_dup 1)
2191 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2192 "xvcmpeq<sd>p. %x0,%x1,%x2"
2193 [(set_attr "type" "<VStype_simple>")])
2195 (define_insn "*vsx_gt_<mode>_p"
2196 [(set (reg:CC CR6_REGNO)
2198 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2199 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2201 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2202 (gt:VSX_F (match_dup 1)
2204 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2205 "xvcmpgt<sd>p. %x0,%x1,%x2"
2206 [(set_attr "type" "<VStype_simple>")])
2209 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2210 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2211 (define_insn "*xvtlsbb_internal"
2212 [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2213 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2217 [(set_attr "type" "logical")])
2219 ;; Vector Test Least Significant Bit by Byte
2220 ;; for the implementation of the builtin
2221 ;; __builtin_vec_test_lsbb_all_ones
2222 ;; int vec_test_lsbb_all_ones (vector unsigned char);
2224 ;; __builtin_vec_test_lsbb_all_zeros
2225 ;; int vec_test_lsbb_all_zeros (vector unsigned char);
2226 (define_expand "xvtlsbbo"
2228 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2230 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2231 (lt:SI (match_dup 2) (const_int 0)))]
2234 operands[2] = gen_reg_rtx (CCmode);
2236 (define_expand "xvtlsbbz"
2238 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2240 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2241 (eq:SI (match_dup 2) (const_int 0)))]
2244 operands[2] = gen_reg_rtx (CCmode);
2247 (define_insn "*vsx_ge_<mode>_p"
2248 [(set (reg:CC CR6_REGNO)
2250 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2251 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2253 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2254 (ge:VSX_F (match_dup 1)
2256 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2257 "xvcmpge<sd>p. %x0,%x1,%x2"
2258 [(set_attr "type" "<VStype_simple>")])
2261 (define_insn "vsx_copysign<mode>3"
2262 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2264 (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2265 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2266 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2267 "xvcpsgn<sd>p %x0,%x2,%x1"
2268 [(set_attr "type" "<VStype_simple>")])
2270 ;; For the conversions, limit the register class for the integer value to be
2271 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2272 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2273 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2274 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2275 ;; in allowing virtual registers.
2276 (define_insn "vsx_float<VSi><mode>2"
2277 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2278 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2279 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2280 "xvcvsx<VSc><sd>p %x0,%x1"
2281 [(set_attr "type" "<VStype_simple>")])
2283 (define_insn "vsx_floatuns<VSi><mode>2"
2284 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2285 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2286 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2287 "xvcvux<VSc><sd>p %x0,%x1"
2288 [(set_attr "type" "<VStype_simple>")])
2290 (define_insn "vsx_fix_trunc<mode><VSi>2"
2291 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2292 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2293 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2294 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2295 [(set_attr "type" "<VStype_simple>")])
2297 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2298 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2299 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2300 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2301 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2302 [(set_attr "type" "<VStype_simple>")])
2304 ;; Math rounding functions
2305 (define_insn "vsx_x<VSv>r<sd>pi"
2306 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2307 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2308 UNSPEC_VSX_ROUND_I))]
2309 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2310 "x<VSv>r<sd>pi %x0,%x1"
2311 [(set_attr "type" "<VStype_simple>")])
2313 (define_insn "vsx_x<VSv>r<sd>pic"
2314 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2315 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2316 UNSPEC_VSX_ROUND_IC))]
2317 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2318 "x<VSv>r<sd>pic %x0,%x1"
2319 [(set_attr "type" "<VStype_simple>")])
2321 (define_insn "vsx_btrunc<mode>2"
2322 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2323 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2324 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2325 "xvr<sd>piz %x0,%x1"
2326 [(set_attr "type" "<VStype_simple>")])
2328 (define_insn "*vsx_b2trunc<mode>2"
2329 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2330 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2332 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2333 "x<VSv>r<sd>piz %x0,%x1"
2334 [(set_attr "type" "<VStype_simple>")])
2336 (define_insn "vsx_floor<mode>2"
2337 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2338 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2340 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2341 "xvr<sd>pim %x0,%x1"
2342 [(set_attr "type" "<VStype_simple>")])
2344 (define_insn "vsx_ceil<mode>2"
2345 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2346 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2348 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2349 "xvr<sd>pip %x0,%x1"
2350 [(set_attr "type" "<VStype_simple>")])
2353 ;; VSX convert to/from double vector
2355 ;; Convert between single and double precision
2356 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2357 ;; scalar single precision instructions internally use the double format.
2358 ;; Prefer the altivec registers, since we likely will need to do a vperm
2359 (define_insn "vsx_xscvdpsp"
2360 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2361 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2362 UNSPEC_VSX_CVSPDP))]
2363 "VECTOR_UNIT_VSX_P (DFmode)"
2365 [(set_attr "type" "fp")])
2367 (define_insn "vsx_xvcvspdp_be"
2368 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2370 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2371 (parallel [(const_int 0) (const_int 2)]))))]
2372 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2374 [(set_attr "type" "vecdouble")])
2376 (define_insn "vsx_xvcvspdp_le"
2377 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2379 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2380 (parallel [(const_int 1) (const_int 3)]))))]
2381 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2383 [(set_attr "type" "vecdouble")])
2385 (define_expand "vsx_xvcvspdp"
2386 [(match_operand:V2DF 0 "vsx_register_operand")
2387 (match_operand:V4SF 1 "vsx_register_operand")]
2388 "VECTOR_UNIT_VSX_P (V4SFmode)"
2390 if (BYTES_BIG_ENDIAN)
2391 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2393 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2397 (define_insn "vsx_xvcvdpsp"
2398 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2399 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2400 UNSPEC_VSX_CVSPDP))]
2401 "VECTOR_UNIT_VSX_P (V2DFmode)"
2403 [(set_attr "type" "vecdouble")])
2405 ;; xscvspdp, represent the scalar SF type as V4SF
2406 (define_insn "vsx_xscvspdp"
2407 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2408 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2409 UNSPEC_VSX_CVSPDP))]
2410 "VECTOR_UNIT_VSX_P (V4SFmode)"
2412 [(set_attr "type" "fp")])
2414 ;; Same as vsx_xscvspdp, but use SF as the type
2415 (define_insn "vsx_xscvspdp_scalar2"
2416 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2417 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2418 UNSPEC_VSX_CVSPDP))]
2419 "VECTOR_UNIT_VSX_P (V4SFmode)"
2421 [(set_attr "type" "fp")])
2423 ;; Generate xvcvhpsp instruction
2424 (define_insn "vsx_xvcvhpsp"
2425 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2426 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2427 UNSPEC_VSX_CVHPSP))]
2430 [(set_attr "type" "vecfloat")])
2432 ;; Generate xvcvsphp
2433 (define_insn "vsx_xvcvsphp"
2434 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2435 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2436 UNSPEC_VSX_XVCVSPHP))]
2439 [(set_attr "type" "vecfloat")])
2441 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2442 ;; format of scalars is actually DF.
2443 (define_insn "vsx_xscvdpsp_scalar"
2444 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2445 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2446 UNSPEC_VSX_CVSPDP))]
2447 "VECTOR_UNIT_VSX_P (V4SFmode)"
2449 [(set_attr "type" "fp")])
2451 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2452 (define_insn "vsx_xscvdpspn"
2453 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2454 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2455 UNSPEC_VSX_CVDPSPN))]
2458 [(set_attr "type" "fp")])
2460 (define_insn "vsx_xscvspdpn"
2461 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2462 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2463 UNSPEC_VSX_CVSPDPN))]
2466 [(set_attr "type" "fp")])
2468 (define_insn "vsx_xscvdpspn_scalar"
2469 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2470 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2471 UNSPEC_VSX_CVDPSPN))]
2474 [(set_attr "type" "fp")])
2476 ;; Used by direct move to move a SFmode value from GPR to VSX register
2477 (define_insn "vsx_xscvspdpn_directmove"
2478 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2479 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2480 UNSPEC_VSX_CVSPDPN))]
2483 [(set_attr "type" "fp")])
2485 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2487 (define_insn "vsx_xvcv<su>xwsp"
2488 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2489 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2490 "VECTOR_UNIT_VSX_P (V4SFmode)"
2491 "xvcv<su>xwsp %x0,%x1"
2492 [(set_attr "type" "vecfloat")])
2494 (define_insn "vsx_xvcv<su>xddp"
2495 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2496 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2497 "VECTOR_UNIT_VSX_P (V2DFmode)"
2498 "xvcv<su>xddp %x0,%x1"
2499 [(set_attr "type" "vecdouble")])
2501 (define_insn "vsx_xvcvsp<su>xws"
2502 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2503 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2504 "VECTOR_UNIT_VSX_P (V4SFmode)"
2505 "xvcvsp<su>xws %x0,%x1"
2506 [(set_attr "type" "vecfloat")])
2508 (define_insn "vsx_xvcvdp<su>xds"
2509 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2510 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2511 "VECTOR_UNIT_VSX_P (V2DFmode)"
2512 "xvcvdp<su>xds %x0,%x1"
2513 [(set_attr "type" "vecdouble")])
2515 (define_expand "vsx_xvcvsxddp_scale"
2516 [(match_operand:V2DF 0 "vsx_register_operand")
2517 (match_operand:V2DI 1 "vsx_register_operand")
2518 (match_operand:QI 2 "immediate_operand")]
2519 "VECTOR_UNIT_VSX_P (V2DFmode)"
2521 rtx op0 = operands[0];
2522 rtx op1 = operands[1];
2523 int scale = INTVAL(operands[2]);
2524 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2526 rs6000_scale_v2df (op0, op0, -scale);
2530 (define_expand "vsx_xvcvuxddp_scale"
2531 [(match_operand:V2DF 0 "vsx_register_operand")
2532 (match_operand:V2DI 1 "vsx_register_operand")
2533 (match_operand:QI 2 "immediate_operand")]
2534 "VECTOR_UNIT_VSX_P (V2DFmode)"
2536 rtx op0 = operands[0];
2537 rtx op1 = operands[1];
2538 int scale = INTVAL(operands[2]);
2539 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2541 rs6000_scale_v2df (op0, op0, -scale);
2545 (define_expand "vsx_xvcvdpsxds_scale"
2546 [(match_operand:V2DI 0 "vsx_register_operand")
2547 (match_operand:V2DF 1 "vsx_register_operand")
2548 (match_operand:QI 2 "immediate_operand")]
2549 "VECTOR_UNIT_VSX_P (V2DFmode)"
2551 rtx op0 = operands[0];
2552 rtx op1 = operands[1];
2554 int scale = INTVAL (operands[2]);
2559 tmp = gen_reg_rtx (V2DFmode);
2560 rs6000_scale_v2df (tmp, op1, scale);
2562 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2566 ;; convert vector of 64-bit floating point numbers to vector of
2567 ;; 64-bit unsigned integer
2568 (define_expand "vsx_xvcvdpuxds_scale"
2569 [(match_operand:V2DI 0 "vsx_register_operand")
2570 (match_operand:V2DF 1 "vsx_register_operand")
2571 (match_operand:QI 2 "immediate_operand")]
2572 "VECTOR_UNIT_VSX_P (V2DFmode)"
2574 rtx op0 = operands[0];
2575 rtx op1 = operands[1];
2577 int scale = INTVAL (operands[2]);
2582 tmp = gen_reg_rtx (V2DFmode);
2583 rs6000_scale_v2df (tmp, op1, scale);
2585 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2589 ;; Convert from 64-bit to 32-bit types
2590 ;; Note, favor the Altivec registers since the usual use of these instructions
2591 ;; is in vector converts and we need to use the Altivec vperm instruction.
2593 (define_insn "vsx_xvcvdpsxws"
2594 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2595 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2596 UNSPEC_VSX_CVDPSXWS))]
2597 "VECTOR_UNIT_VSX_P (V2DFmode)"
2598 "xvcvdpsxws %x0,%x1"
2599 [(set_attr "type" "vecdouble")])
2601 (define_insn "vsx_xvcvdpuxws"
2602 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2603 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2604 UNSPEC_VSX_CVDPUXWS))]
2605 "VECTOR_UNIT_VSX_P (V2DFmode)"
2606 "xvcvdpuxws %x0,%x1"
2607 [(set_attr "type" "vecdouble")])
2609 (define_insn "vsx_xvcvsxdsp"
2610 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2611 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2612 UNSPEC_VSX_CVSXDSP))]
2613 "VECTOR_UNIT_VSX_P (V2DFmode)"
2615 [(set_attr "type" "vecfloat")])
2617 (define_insn "vsx_xvcvuxdsp"
2618 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2619 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2620 UNSPEC_VSX_CVUXDSP))]
2621 "VECTOR_UNIT_VSX_P (V2DFmode)"
2623 [(set_attr "type" "vecdouble")])
2625 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2626 ;; 64-bit floating point numbers.
2627 (define_insn "vsx_xvcv<su>xwdp_be"
2628 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2630 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2631 (parallel [(const_int 0) (const_int 2)]))))]
2632 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2633 "xvcv<su>xwdp %x0,%x1"
2634 [(set_attr "type" "vecdouble")])
2636 (define_insn "vsx_xvcv<su>xwdp_le"
2637 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2639 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2640 (parallel [(const_int 1) (const_int 3)]))))]
2641 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2642 "xvcv<su>xwdp %x0,%x1"
2643 [(set_attr "type" "vecdouble")])
2645 (define_expand "vsx_xvcv<su>xwdp"
2646 [(match_operand:V2DF 0 "vsx_register_operand")
2647 (match_operand:V4SI 1 "vsx_register_operand")
2649 "VECTOR_UNIT_VSX_P (V2DFmode)"
2651 if (BYTES_BIG_ENDIAN)
2652 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2654 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2658 (define_insn "vsx_xvcvsxwdp_df"
2659 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2660 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2661 UNSPEC_VSX_CVSXWDP))]
2664 [(set_attr "type" "vecdouble")])
2666 (define_insn "vsx_xvcvuxwdp_df"
2667 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2668 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2669 UNSPEC_VSX_CVUXWDP))]
2672 [(set_attr "type" "vecdouble")])
2674 ;; Convert vector of 32-bit floating point numbers to vector of
2675 ;; 64-bit signed/unsigned integers.
2676 (define_insn "vsx_xvcvsp<su>xds_be"
2677 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2679 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2680 (parallel [(const_int 0) (const_int 2)]))))]
2681 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2682 "xvcvsp<su>xds %x0,%x1"
2683 [(set_attr "type" "vecdouble")])
2685 (define_insn "vsx_xvcvsp<su>xds_le"
2686 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2688 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2689 (parallel [(const_int 1) (const_int 3)]))))]
2690 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2691 "xvcvsp<su>xds %x0,%x1"
2692 [(set_attr "type" "vecdouble")])
2694 (define_expand "vsx_xvcvsp<su>xds"
2695 [(match_operand:V2DI 0 "vsx_register_operand")
2696 (match_operand:V4SF 1 "vsx_register_operand")
2698 "VECTOR_UNIT_VSX_P (V2DFmode)"
2700 if (BYTES_BIG_ENDIAN)
2701 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2703 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2707 ;; Generate float2 double
2708 ;; convert two double to float
2709 (define_expand "float2_v2df"
2710 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2711 (use (match_operand:V2DF 1 "register_operand" "wa"))
2712 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2713 "VECTOR_UNIT_VSX_P (V4SFmode)"
2715 rtx rtx_src1, rtx_src2, rtx_dst;
2717 rtx_dst = operands[0];
2718 rtx_src1 = operands[1];
2719 rtx_src2 = operands[2];
2721 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2726 ;; convert two long long signed ints to float
2727 (define_expand "float2_v2di"
2728 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2729 (use (match_operand:V2DI 1 "register_operand" "wa"))
2730 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2731 "VECTOR_UNIT_VSX_P (V4SFmode)"
2733 rtx rtx_src1, rtx_src2, rtx_dst;
2735 rtx_dst = operands[0];
2736 rtx_src1 = operands[1];
2737 rtx_src2 = operands[2];
2739 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2743 ;; Generate uns_float2
2744 ;; convert two long long unsigned ints to float
2745 (define_expand "uns_float2_v2di"
2746 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2747 (use (match_operand:V2DI 1 "register_operand" "wa"))
2748 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2749 "VECTOR_UNIT_VSX_P (V4SFmode)"
2751 rtx rtx_src1, rtx_src2, rtx_dst;
2753 rtx_dst = operands[0];
2754 rtx_src1 = operands[1];
2755 rtx_src2 = operands[2];
2757 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2762 ;; convert double or long long signed to float
2763 ;; (Only even words are valid, BE numbering)
2764 (define_expand "floate<mode>"
2765 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2766 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2767 "VECTOR_UNIT_VSX_P (V4SFmode)"
2769 if (BYTES_BIG_ENDIAN)
2771 /* Shift left one word to put even word correct location */
2773 rtx rtx_val = GEN_INT (4);
2775 rtx_tmp = gen_reg_rtx (V4SFmode);
2776 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2777 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2778 rtx_tmp, rtx_tmp, rtx_val));
2781 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2786 ;; Generate uns_floate
2787 ;; convert long long unsigned to float
2788 ;; (Only even words are valid, BE numbering)
2789 (define_expand "unsfloatev2di"
2790 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2791 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2792 "VECTOR_UNIT_VSX_P (V4SFmode)"
2794 if (BYTES_BIG_ENDIAN)
2796 /* Shift left one word to put even word correct location */
2798 rtx rtx_val = GEN_INT (4);
2800 rtx_tmp = gen_reg_rtx (V4SFmode);
2801 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2802 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2803 rtx_tmp, rtx_tmp, rtx_val));
2806 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2812 ;; convert double or long long signed to float
2813 ;; Only odd words are valid, BE numbering)
2814 (define_expand "floato<mode>"
2815 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2816 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2817 "VECTOR_UNIT_VSX_P (V4SFmode)"
2819 if (BYTES_BIG_ENDIAN)
2820 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2823 /* Shift left one word to put odd word correct location */
2825 rtx rtx_val = GEN_INT (4);
2827 rtx_tmp = gen_reg_rtx (V4SFmode);
2828 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2829 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2830 rtx_tmp, rtx_tmp, rtx_val));
2835 ;; Generate uns_floato
2836 ;; convert long long unsigned to float
2837 ;; (Only odd words are valid, BE numbering)
2838 (define_expand "unsfloatov2di"
2839 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2840 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2841 "VECTOR_UNIT_VSX_P (V4SFmode)"
2843 if (BYTES_BIG_ENDIAN)
2844 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2847 /* Shift left one word to put odd word correct location */
2849 rtx rtx_val = GEN_INT (4);
2851 rtx_tmp = gen_reg_rtx (V4SFmode);
2852 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2853 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2854 rtx_tmp, rtx_tmp, rtx_val));
2859 ;; Generate vsigned2
2860 ;; convert two double float vectors to a vector of single precision ints
2861 (define_expand "vsigned2_v2df"
2862 [(match_operand:V4SI 0 "register_operand" "=wa")
2863 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2864 (match_operand:V2DF 2 "register_operand" "wa")]
2865 UNSPEC_VSX_VSIGNED2)]
2868 rtx rtx_src1, rtx_src2, rtx_dst;
2869 bool signed_convert=true;
2871 rtx_dst = operands[0];
2872 rtx_src1 = operands[1];
2873 rtx_src2 = operands[2];
2875 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2879 ;; Generate vsignedo_v2df
2880 ;; signed double float to int convert odd word
2881 (define_expand "vsignedo_v2df"
2882 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2883 (match_operand:V2DF 1 "register_operand" "wa"))]
2886 if (BYTES_BIG_ENDIAN)
2889 rtx rtx_val = GEN_INT (12);
2890 rtx_tmp = gen_reg_rtx (V4SImode);
2892 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2894 /* Big endian word numbering for words in operand is 0 1 2 3.
2895 take (operand[1] operand[1]) and shift left one word
2896 0 1 2 3 0 1 2 3 => 1 2 3 0
2897 Words 1 and 3 are now are now where they need to be for result. */
2899 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2903 /* Little endian word numbering for operand is 3 2 1 0.
2904 Result words 3 and 1 are where they need to be. */
2905 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2909 [(set_attr "type" "veccomplex")])
2911 ;; Generate vsignede_v2df
2912 ;; signed double float to int even word
2913 (define_expand "vsignede_v2df"
2914 [(set (match_operand:V4SI 0 "register_operand" "=v")
2915 (match_operand:V2DF 1 "register_operand" "v"))]
2918 if (BYTES_BIG_ENDIAN)
2919 /* Big endian word numbering for words in operand is 0 1
2920 Result words 0 is where they need to be. */
2921 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2926 rtx rtx_val = GEN_INT (12);
2927 rtx_tmp = gen_reg_rtx (V4SImode);
2929 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2931 /* Little endian word numbering for operand is 3 2 1 0.
2932 take (operand[1] operand[1]) and shift left three words
2933 0 1 2 3 0 1 2 3 => 3 0 1 2
2934 Words 0 and 2 are now where they need to be for the result. */
2935 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2940 [(set_attr "type" "veccomplex")])
2942 ;; Generate unsigned2
2943 ;; convert two double float vectors to a vector of single precision
2945 (define_expand "vunsigned2_v2df"
2946 [(match_operand:V4SI 0 "register_operand" "=v")
2947 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2948 (match_operand:V2DF 2 "register_operand" "v")]
2949 UNSPEC_VSX_VSIGNED2)]
2952 rtx rtx_src1, rtx_src2, rtx_dst;
2953 bool signed_convert=false;
2955 rtx_dst = operands[0];
2956 rtx_src1 = operands[1];
2957 rtx_src2 = operands[2];
2959 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2963 ;; Generate vunsignedo_v2df
2964 ;; unsigned double float to int convert odd word
2965 (define_expand "vunsignedo_v2df"
2966 [(set (match_operand:V4SI 0 "register_operand" "=v")
2967 (match_operand:V2DF 1 "register_operand" "v"))]
2970 if (BYTES_BIG_ENDIAN)
2973 rtx rtx_val = GEN_INT (12);
2974 rtx_tmp = gen_reg_rtx (V4SImode);
2976 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2978 /* Big endian word numbering for words in operand is 0 1 2 3.
2979 take (operand[1] operand[1]) and shift left one word
2980 0 1 2 3 0 1 2 3 => 1 2 3 0
2981 Words 1 and 3 are now are now where they need to be for result. */
2983 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2987 /* Little endian word numbering for operand is 3 2 1 0.
2988 Result words 3 and 1 are where they need to be. */
2989 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2993 [(set_attr "type" "veccomplex")])
2995 ;; Generate vunsignede_v2df
2996 ;; unsigned double float to int even word
2997 (define_expand "vunsignede_v2df"
2998 [(set (match_operand:V4SI 0 "register_operand" "=v")
2999 (match_operand:V2DF 1 "register_operand" "v"))]
3002 if (BYTES_BIG_ENDIAN)
3003 /* Big endian word numbering for words in operand is 0 1
3004 Result words 0 is where they need to be. */
3005 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
3010 rtx rtx_val = GEN_INT (12);
3011 rtx_tmp = gen_reg_rtx (V4SImode);
3013 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
3015 /* Little endian word numbering for operand is 3 2 1 0.
3016 take (operand[1] operand[1]) and shift left three words
3017 0 1 2 3 0 1 2 3 => 3 0 1 2
3018 Words 0 and 2 are now where they need to be for the result. */
3019 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
3024 [(set_attr "type" "veccomplex")])
3026 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
3027 ;; since the xvrdpiz instruction does not truncate the value if the floating
3028 ;; point value is < LONG_MIN or > LONG_MAX.
3029 (define_insn "*vsx_float_fix_v2df2"
3030 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
3033 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
3035 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
3036 && !flag_trapping_math && TARGET_FRIZ"
3038 [(set_attr "type" "vecdouble")])
3041 ;; Permute operations
3043 ;; Build a V2DF/V2DI vector from two scalars
3044 (define_insn "vsx_concat_<mode>"
3045 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
3047 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")
3048 (match_operand:<VEC_base> 2 "gpc_reg_operand" "wa,b")))]
3049 "VECTOR_MEM_VSX_P (<MODE>mode)"
3051 if (which_alternative == 0)
3052 return (BYTES_BIG_ENDIAN
3053 ? "xxpermdi %x0,%x1,%x2,0"
3054 : "xxpermdi %x0,%x2,%x1,0");
3056 else if (which_alternative == 1)
3057 return (BYTES_BIG_ENDIAN
3058 ? "mtvsrdd %x0,%1,%2"
3059 : "mtvsrdd %x0,%2,%1");
3064 [(set_attr "type" "vecperm,vecmove")])
3066 ;; Combiner patterns to allow creating XXPERMDI's to access either double
3067 ;; word element in a vector register.
3068 (define_insn "*vsx_concat_<mode>_1"
3069 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3071 (vec_select:<VEC_base>
3072 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3073 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3074 (match_operand:<VEC_base> 3 "gpc_reg_operand" "wa")))]
3075 "VECTOR_MEM_VSX_P (<MODE>mode)"
3077 HOST_WIDE_INT dword = INTVAL (operands[2]);
3078 if (BYTES_BIG_ENDIAN)
3080 operands[4] = GEN_INT (2*dword);
3081 return "xxpermdi %x0,%x1,%x3,%4";
3085 operands[4] = GEN_INT (!dword);
3086 return "xxpermdi %x0,%x3,%x1,%4";
3089 [(set_attr "type" "vecperm")])
3091 (define_insn "*vsx_concat_<mode>_2"
3092 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3094 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa")
3095 (vec_select:<VEC_base>
3096 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
3097 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
3098 "VECTOR_MEM_VSX_P (<MODE>mode)"
3100 HOST_WIDE_INT dword = INTVAL (operands[3]);
3101 if (BYTES_BIG_ENDIAN)
3103 operands[4] = GEN_INT (dword);
3104 return "xxpermdi %x0,%x1,%x2,%4";
3108 operands[4] = GEN_INT (2 * !dword);
3109 return "xxpermdi %x0,%x2,%x1,%4";
3112 [(set_attr "type" "vecperm")])
3114 (define_insn "*vsx_concat_<mode>_3"
3115 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3117 (vec_select:<VEC_base>
3118 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3119 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3120 (vec_select:<VEC_base>
3121 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
3122 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
3123 "VECTOR_MEM_VSX_P (<MODE>mode)"
3125 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
3126 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
3127 if (BYTES_BIG_ENDIAN)
3129 operands[5] = GEN_INT ((2 * dword1) + dword2);
3130 return "xxpermdi %x0,%x1,%x3,%5";
3134 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
3135 return "xxpermdi %x0,%x3,%x1,%5";
3138 [(set_attr "type" "vecperm")])
3140 ;; Special purpose concat using xxpermdi to glue two single precision values
3141 ;; together, relying on the fact that internally scalar floats are represented
3142 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
3143 (define_insn "vsx_concat_v2sf"
3144 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
3146 [(match_operand:SF 1 "vsx_register_operand" "wa")
3147 (match_operand:SF 2 "vsx_register_operand" "wa")]
3148 UNSPEC_VSX_CONCAT))]
3149 "VECTOR_MEM_VSX_P (V2DFmode)"
3151 if (BYTES_BIG_ENDIAN)
3152 return "xxpermdi %x0,%x1,%x2,0";
3154 return "xxpermdi %x0,%x2,%x1,0";
3156 [(set_attr "type" "vecperm")])
3158 ;; Concatenate 4 SImode elements into a V4SImode reg.
3159 (define_expand "vsx_init_v4si"
3160 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
3161 (use (match_operand:SI 1 "gpc_reg_operand"))
3162 (use (match_operand:SI 2 "gpc_reg_operand"))
3163 (use (match_operand:SI 3 "gpc_reg_operand"))
3164 (use (match_operand:SI 4 "gpc_reg_operand"))]
3165 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3167 rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
3168 rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
3169 rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
3170 rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
3171 if (!BYTES_BIG_ENDIAN)
3177 rtx ab = gen_reg_rtx (DImode);
3178 rtx cd = gen_reg_rtx (DImode);
3179 emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
3180 GEN_INT (0xffffffff)));
3181 emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
3182 GEN_INT (0xffffffff)));
3184 rtx abcd = gen_reg_rtx (V2DImode);
3185 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3186 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3190 ;; xxpermdi for little endian loads and stores. We need several of
3191 ;; these since the form of the PARALLEL differs by mode.
3192 (define_insn "*vsx_xxpermdi2_le_<mode>"
3193 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3195 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3196 (parallel [(const_int 1) (const_int 0)])))]
3197 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3198 "xxpermdi %x0,%x1,%x1,2"
3199 [(set_attr "type" "vecperm")])
3201 (define_insn "xxswapd_v16qi"
3202 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3204 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3205 (parallel [(const_int 8) (const_int 9)
3206 (const_int 10) (const_int 11)
3207 (const_int 12) (const_int 13)
3208 (const_int 14) (const_int 15)
3209 (const_int 0) (const_int 1)
3210 (const_int 2) (const_int 3)
3211 (const_int 4) (const_int 5)
3212 (const_int 6) (const_int 7)])))]
3214 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3215 ;; mnemonic xxpermdi instead.
3216 "xxpermdi %x0,%x1,%x1,2"
3217 [(set_attr "type" "vecperm")])
3219 (define_insn "xxswapd_v8hi"
3220 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3222 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3223 (parallel [(const_int 4) (const_int 5)
3224 (const_int 6) (const_int 7)
3225 (const_int 0) (const_int 1)
3226 (const_int 2) (const_int 3)])))]
3228 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3229 ;; mnemonic xxpermdi instead.
3230 "xxpermdi %x0,%x1,%x1,2"
3231 [(set_attr "type" "vecperm")])
3233 (define_insn "xxswapd_<mode>"
3234 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3236 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3237 (parallel [(const_int 2) (const_int 3)
3238 (const_int 0) (const_int 1)])))]
3240 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3241 ;; mnemonic xxpermdi instead.
3242 "xxpermdi %x0,%x1,%x1,2"
3243 [(set_attr "type" "vecperm")])
3245 (define_insn "xxswapd_<mode>"
3246 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3248 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3249 (parallel [(const_int 1) (const_int 0)])))]
3251 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3252 ;; mnemonic xxpermdi instead.
3253 "xxpermdi %x0,%x1,%x1,2"
3254 [(set_attr "type" "vecperm")])
3256 ;; Swap upper/lower 64-bit values in a 128-bit vector
3257 (define_insn "xxswapd_v1ti"
3258 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
3262 (match_operand:V1TI 1 "vsx_register_operand" "v") 0 )
3263 (parallel [(const_int 1)(const_int 0)]))
3266 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3267 ;; mnemonic xxpermdi instead.
3268 "xxpermdi %x0,%x1,%x1,2"
3269 [(set_attr "type" "vecperm")])
3271 (define_insn "xxgenpcvm_<mode>_internal"
3272 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3273 (unspec:VSX_EXTRACT_I4
3274 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3275 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3278 "xxgenpcv<wd>m %x0,%1,%2"
3279 [(set_attr "type" "vecsimple")])
3281 (define_expand "xxgenpcvm_<mode>"
3282 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3283 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3284 (use (match_operand:QI 2 "immediate_operand"))]
3287 if (!BYTES_BIG_ENDIAN)
3289 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3290 change swap upper and lower double words. */
3291 rtx tmp = gen_reg_rtx (<MODE>mode);
3293 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3296 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3301 ;; lxvd2x for little endian loads. We need several of
3302 ;; these since the form of the PARALLEL differs by mode.
3303 (define_insn "*vsx_lxvd2x2_le_<mode>"
3304 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3306 (match_operand:VSX_D 1 "memory_operand" "Z")
3307 (parallel [(const_int 1) (const_int 0)])))]
3308 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3310 [(set_attr "type" "vecload")])
3312 (define_insn "*vsx_lxvd2x4_le_<mode>"
3313 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3315 (match_operand:VSX_W 1 "memory_operand" "Z")
3316 (parallel [(const_int 2) (const_int 3)
3317 (const_int 0) (const_int 1)])))]
3318 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3320 [(set_attr "type" "vecload")])
3322 (define_insn "*vsx_lxvd2x8_le_V8HI"
3323 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3325 (match_operand:V8HI 1 "memory_operand" "Z")
3326 (parallel [(const_int 4) (const_int 5)
3327 (const_int 6) (const_int 7)
3328 (const_int 0) (const_int 1)
3329 (const_int 2) (const_int 3)])))]
3330 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3332 [(set_attr "type" "vecload")])
3334 (define_insn "*vsx_lxvd2x16_le_V16QI"
3335 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3337 (match_operand:V16QI 1 "memory_operand" "Z")
3338 (parallel [(const_int 8) (const_int 9)
3339 (const_int 10) (const_int 11)
3340 (const_int 12) (const_int 13)
3341 (const_int 14) (const_int 15)
3342 (const_int 0) (const_int 1)
3343 (const_int 2) (const_int 3)
3344 (const_int 4) (const_int 5)
3345 (const_int 6) (const_int 7)])))]
3346 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3348 [(set_attr "type" "vecload")])
3350 ;; stxvd2x for little endian stores. We need several of
3351 ;; these since the form of the PARALLEL differs by mode.
3352 (define_insn "*vsx_stxvd2x2_le_<mode>"
3353 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3355 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3356 (parallel [(const_int 1) (const_int 0)])))]
3357 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3359 [(set_attr "type" "vecstore")])
3361 (define_insn "*vsx_stxvd2x4_le_<mode>"
3362 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3364 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3365 (parallel [(const_int 2) (const_int 3)
3366 (const_int 0) (const_int 1)])))]
3367 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3369 [(set_attr "type" "vecstore")])
3371 (define_insn_and_split "vsx_stxvd2x4_le_const_<mode>"
3372 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3373 (match_operand:VSX_W 1 "immediate_operand" "W"))]
3375 && VECTOR_MEM_VSX_P (<MODE>mode)
3376 && !TARGET_P9_VECTOR
3377 && const_vec_duplicate_p (operands[1])
3378 && can_create_pseudo_p ()"
3386 (parallel [(const_int 2) (const_int 3)
3387 (const_int 0) (const_int 1)])))]
3389 /* Here all the constants must be loaded without memory. */
3390 gcc_assert (easy_altivec_constant (operands[1], <MODE>mode));
3391 operands[2] = gen_reg_rtx (<MODE>mode);
3393 [(set_attr "type" "vecstore")
3394 (set_attr "length" "8")])
3396 (define_insn "*vsx_stxvd2x8_le_V8HI"
3397 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3399 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3400 (parallel [(const_int 4) (const_int 5)
3401 (const_int 6) (const_int 7)
3402 (const_int 0) (const_int 1)
3403 (const_int 2) (const_int 3)])))]
3404 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3406 [(set_attr "type" "vecstore")])
3408 (define_insn "*vsx_stxvd2x16_le_V16QI"
3409 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3411 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3412 (parallel [(const_int 8) (const_int 9)
3413 (const_int 10) (const_int 11)
3414 (const_int 12) (const_int 13)
3415 (const_int 14) (const_int 15)
3416 (const_int 0) (const_int 1)
3417 (const_int 2) (const_int 3)
3418 (const_int 4) (const_int 5)
3419 (const_int 6) (const_int 7)])))]
3420 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3422 [(set_attr "type" "vecstore")])
3424 ;; Convert a TImode value into V1TImode
3425 (define_expand "vsx_set_v1ti"
3426 [(match_operand:V1TI 0 "nonimmediate_operand")
3427 (match_operand:V1TI 1 "nonimmediate_operand")
3428 (match_operand:TI 2 "input_operand")
3429 (match_operand:QI 3 "u5bit_cint_operand")]
3430 "VECTOR_MEM_VSX_P (V1TImode)"
3432 if (operands[3] != const0_rtx)
3435 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3439 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3440 (define_expand "vsx_set_<mode>"
3441 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3442 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3443 (use (match_operand:<VEC_base> 2 "gpc_reg_operand"))
3444 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3445 "VECTOR_MEM_VSX_P (<MODE>mode)"
3447 rtx dest = operands[0];
3448 rtx vec_reg = operands[1];
3449 rtx value = operands[2];
3450 rtx ele = operands[3];
3451 rtx tmp = gen_reg_rtx (<VEC_base>mode);
3453 if (ele == const0_rtx)
3455 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3456 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3459 else if (ele == const1_rtx)
3461 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3462 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3469 ;; Extract a DF/DI element from V2DF/V2DI
3470 ;; Optimize cases were we can do a simple or direct move.
3471 ;; Or see if we can avoid doing the move at all
3473 (define_expand "vsx_extract_<mode>"
3474 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3475 (vec_select:<VEC_base>
3476 (match_operand:VSX_D 1 "gpc_reg_operand")
3478 [(match_operand:QI 2 "const_0_to_1_operand")])))]
3479 "VECTOR_MEM_VSX_P (<MODE>mode)"
3482 (define_insn "*vsx_extract_<mode>_0"
3483 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wa,wr")
3484 (vec_select:<VEC_base>
3485 (match_operand:VSX_D 1 "gpc_reg_operand" "0,wa,wa")
3487 [(match_operand:QI 2 "const_0_to_1_operand" "n,n,n")])))]
3488 "VECTOR_MEM_VSX_P (<MODE>mode)
3489 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3491 if (which_alternative == 0)
3492 return ASM_COMMENT_START " vec_extract to same register";
3494 if (which_alternative == 2)
3495 return "mfvsrd %0,%x1";
3497 return "xxlor %x0,%x1,%x1";
3499 [(set_attr "type" "*,veclogical,mfvsr")
3500 (set_attr "isa" "*,*,p8v")
3501 (set_attr "length" "0,*,*")])
3503 (define_insn "*vsx_extract_<mode>_1"
3504 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wr")
3505 (vec_select:<VEC_base>
3506 (match_operand:VSX_D 1 "gpc_reg_operand" "wa,wa")
3508 [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))]
3509 "VECTOR_MEM_VSX_P (<MODE>mode)
3510 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 0)"
3512 if (which_alternative == 1)
3513 return "mfvsrld %0,%x1";
3515 operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 2 : 3);
3516 return "xxpermdi %x0,%x1,%x1,%3";
3518 [(set_attr "type" "mfvsr,vecperm")
3519 (set_attr "isa" "*,p9v")])
3521 ;; Optimize extracting a single scalar element from memory.
3522 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3523 [(set (match_operand:<VEC_base> 0 "register_operand" "=wa,wr")
3524 (vec_select:<VSX_D:VEC_base>
3525 (match_operand:VSX_D 1 "memory_operand" "m,m")
3526 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3527 (clobber (match_scratch:P 3 "=&b,&b"))]
3528 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3530 "&& reload_completed"
3531 [(set (match_dup 0) (match_dup 4))]
3533 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3534 operands[3], <VSX_D:VEC_base>mode);
3536 [(set_attr "type" "fpload,load")
3537 (set_attr "length" "8")])
3539 ;; Optimize storing a single scalar element that is the right location to
3541 (define_insn "*vsx_extract_<mode>_store"
3542 [(set (match_operand:<VEC_base> 0 "memory_operand" "=m,Z,wY")
3543 (vec_select:<VEC_base>
3544 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3545 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "n,n,n")])))]
3546 "VECTOR_MEM_VSX_P (<MODE>mode)
3547 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3552 [(set_attr "type" "fpstore")
3553 (set_attr "isa" "*,p7v,p9v")])
3555 ;; Variable V2DI/V2DF extract shift
3556 (define_insn "vsx_vslo_<mode>"
3557 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3558 (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3559 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3561 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3563 [(set_attr "type" "vecperm")])
3565 ;; Variable V2DI/V2DF extract from a register
3566 (define_insn_and_split "vsx_extract_<mode>_var"
3567 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3568 (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3569 (match_operand:DI 2 "gpc_reg_operand" "r")]
3570 UNSPEC_VSX_EXTRACT))
3571 (clobber (match_scratch:DI 3 "=r"))
3572 (clobber (match_scratch:V2DI 4 "=&v"))]
3573 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3575 "&& reload_completed"
3578 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3579 operands[3], operands[4]);
3583 ;; Variable V2DI/V2DF extract from memory
3584 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3585 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,r")
3586 (unspec:<VEC_base> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3587 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3588 UNSPEC_VSX_EXTRACT))
3589 (clobber (match_scratch:DI 3 "=&b,&b"))]
3590 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3592 "&& reload_completed"
3593 [(set (match_dup 0) (match_dup 4))]
3595 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3596 operands[3], <VEC_base>mode);
3598 [(set_attr "type" "fpload,load")])
3600 ;; Extract a SF element from V4SF
3601 (define_insn_and_split "vsx_extract_v4sf"
3602 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3604 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3605 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3606 (clobber (match_scratch:V4SF 3 "=0"))]
3607 "VECTOR_UNIT_VSX_P (V4SFmode)"
3612 rtx op0 = operands[0];
3613 rtx op1 = operands[1];
3614 rtx op2 = operands[2];
3615 rtx op3 = operands[3];
3617 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3623 if (GET_CODE (op3) == SCRATCH)
3624 op3 = gen_reg_rtx (V4SFmode);
3625 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3628 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3631 [(set_attr "length" "8")
3632 (set_attr "type" "fp")])
3634 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3635 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3637 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3638 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3639 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3640 "VECTOR_MEM_VSX_P (V4SFmode)"
3642 "&& reload_completed"
3643 [(set (match_dup 0) (match_dup 4))]
3645 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3646 operands[3], SFmode);
3648 [(set_attr "type" "fpload,fpload,fpload,load")
3649 (set_attr "length" "8")
3650 (set_attr "isa" "*,p7v,p9v,*")])
3652 ;; Variable V4SF extract from a register
3653 (define_insn_and_split "vsx_extract_v4sf_var"
3654 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3655 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3656 (match_operand:DI 2 "gpc_reg_operand" "r")]
3657 UNSPEC_VSX_EXTRACT))
3658 (clobber (match_scratch:DI 3 "=r"))
3659 (clobber (match_scratch:V2DI 4 "=&v"))]
3660 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3662 "&& reload_completed"
3665 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3666 operands[3], operands[4]);
3670 ;; Variable V4SF extract from memory
3671 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3672 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3673 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3674 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3675 UNSPEC_VSX_EXTRACT))
3676 (clobber (match_scratch:DI 3 "=&b,&b"))]
3677 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3679 "&& reload_completed"
3680 [(set (match_dup 0) (match_dup 4))]
3682 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3683 operands[3], SFmode);
3685 [(set_attr "type" "fpload,load")])
3687 ;; Expand the builtin form of xxpermdi to canonical rtl.
3688 (define_expand "vsx_xxpermdi_<mode>"
3689 [(match_operand:VSX_L 0 "vsx_register_operand")
3690 (match_operand:VSX_L 1 "vsx_register_operand")
3691 (match_operand:VSX_L 2 "vsx_register_operand")
3692 (match_operand:QI 3 "u5bit_cint_operand")]
3693 "VECTOR_MEM_VSX_P (<MODE>mode)"
3695 rtx target = operands[0];
3696 rtx op0 = operands[1];
3697 rtx op1 = operands[2];
3698 int mask = INTVAL (operands[3]);
3699 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3700 rtx perm1 = GEN_INT ((mask & 1) + 2);
3701 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3703 if (<MODE>mode == V2DFmode)
3704 gen = gen_vsx_xxpermdi2_v2df_1;
3707 gen = gen_vsx_xxpermdi2_v2di_1;
3708 if (<MODE>mode != V2DImode)
3710 target = gen_lowpart (V2DImode, target);
3711 op0 = gen_lowpart (V2DImode, op0);
3712 op1 = gen_lowpart (V2DImode, op1);
3715 emit_insn (gen (target, op0, op1, perm0, perm1));
3719 ;; Special version of xxpermdi that retains big-endian semantics.
3720 (define_expand "vsx_xxpermdi_<mode>_be"
3721 [(match_operand:VSX_L 0 "vsx_register_operand")
3722 (match_operand:VSX_L 1 "vsx_register_operand")
3723 (match_operand:VSX_L 2 "vsx_register_operand")
3724 (match_operand:QI 3 "u5bit_cint_operand")]
3725 "VECTOR_MEM_VSX_P (<MODE>mode)"
3727 rtx target = operands[0];
3728 rtx op0 = operands[1];
3729 rtx op1 = operands[2];
3730 int mask = INTVAL (operands[3]);
3731 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3732 rtx perm1 = GEN_INT ((mask & 1) + 2);
3733 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3735 if (<MODE>mode == V2DFmode)
3736 gen = gen_vsx_xxpermdi2_v2df_1;
3739 gen = gen_vsx_xxpermdi2_v2di_1;
3740 if (<MODE>mode != V2DImode)
3742 target = gen_lowpart (V2DImode, target);
3743 op0 = gen_lowpart (V2DImode, op0);
3744 op1 = gen_lowpart (V2DImode, op1);
3747 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3748 transformation we don't want; it is necessary for
3749 rs6000_expand_vec_perm_const_1 but not for this use. So we
3750 prepare for that by reversing the transformation here. */
3751 if (BYTES_BIG_ENDIAN)
3752 emit_insn (gen (target, op0, op1, perm0, perm1));
3755 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3756 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3757 emit_insn (gen (target, op1, op0, p0, p1));
3762 (define_insn "vsx_xxpermdi2_<mode>_1"
3763 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3765 (vec_concat:<VS_double>
3766 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3767 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3768 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3769 (match_operand 4 "const_2_to_3_operand" "")])))]
3770 "VECTOR_MEM_VSX_P (<MODE>mode)"
3774 /* For little endian, swap operands and invert/swap selectors
3775 to get the correct xxpermdi. The operand swap sets up the
3776 inputs as a little endian array. The selectors are swapped
3777 because they are defined to use big endian ordering. The
3778 selectors are inverted to get the correct doublewords for
3779 little endian ordering. */
3780 if (BYTES_BIG_ENDIAN)
3782 op3 = INTVAL (operands[3]);
3783 op4 = INTVAL (operands[4]);
3787 op3 = 3 - INTVAL (operands[4]);
3788 op4 = 3 - INTVAL (operands[3]);
3791 mask = (op3 << 1) | (op4 - 2);
3792 operands[3] = GEN_INT (mask);
3794 if (BYTES_BIG_ENDIAN)
3795 return "xxpermdi %x0,%x1,%x2,%3";
3797 return "xxpermdi %x0,%x2,%x1,%3";
3799 [(set_attr "type" "vecperm")])
3801 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3802 ;; none of the small types were allowed in a vector register, so we had to
3803 ;; extract to a DImode and either do a direct move or store.
3804 (define_expand "vsx_extract_<mode>"
3805 [(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3806 (vec_select:<VEC_base>
3807 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
3808 (parallel [(match_operand:QI 2 "const_int_operand")])))
3809 (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
3810 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3812 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3813 if (TARGET_P9_VECTOR)
3815 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3821 (define_expand "vsx_extract_v4si"
3822 [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
3824 (match_operand:V4SI 1 "gpc_reg_operand")
3825 (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
3826 (clobber (match_scratch:V4SI 3))])]
3827 "TARGET_DIRECT_MOVE_64BIT"
3829 /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx. So just
3830 fall through to vsx_extract_v4si_w1. */
3831 if (TARGET_P9_VECTOR
3832 && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
3834 emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
3840 ;; Extract from word 1 (BE order).
3841 (define_insn "vsx_extract_v4si_w1"
3842 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
3844 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
3845 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3846 (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
3847 "TARGET_DIRECT_MOVE_64BIT
3848 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
3850 if (which_alternative == 0)
3851 return "mfvsrwz %0,%x1";
3853 if (which_alternative == 1)
3854 return "xxlor %x0,%x1,%x1";
3856 if (which_alternative == 2)
3857 return "stxsiwx %x1,%y0";
3859 return ASM_COMMENT_START " vec_extract to same register";
3861 [(set_attr "type" "mfvsr,veclogical,fpstore,*")
3862 (set_attr "length" "4,4,4,0")
3863 (set_attr "isa" "p8v,*,p8v,*")])
3865 (define_insn "*mfvsrwz"
3866 [(set (match_operand:DI 0 "register_operand" "=r")
3869 (match_operand:V4SI 1 "vsx_register_operand" "wa")
3870 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3871 (clobber (match_scratch:V4SI 3 "=v"))]
3872 "TARGET_DIRECT_MOVE_64BIT
3873 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
3875 [(set_attr "type" "mfvsr")
3876 (set_attr "isa" "p8v")])
3878 (define_insn "vsx_extract_<mode>_p9"
3879 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3880 (vec_select:<VEC_base>
3881 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3882 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3883 (clobber (match_scratch:SI 3 "=r,X"))]
3884 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3886 if (which_alternative == 0)
3891 HOST_WIDE_INT elt = INTVAL (operands[2]);
3892 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3893 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3896 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3897 HOST_WIDE_INT offset = unit_size * elt_adj;
3899 operands[2] = GEN_INT (offset);
3901 return "xxextractuw %x0,%x1,%2";
3903 return "vextractu<wd> %0,%1,%2";
3906 [(set_attr "type" "vecsimple")
3907 (set_attr "isa" "p9v,*")])
3910 [(set (match_operand:<VEC_base> 0 "int_reg_operand")
3911 (vec_select:<VEC_base>
3912 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3913 (parallel [(match_operand:QI 2 "const_int_operand")])))
3914 (clobber (match_operand:SI 3 "int_reg_operand"))]
3915 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3918 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3919 rtx op1 = operands[1];
3920 rtx op2 = operands[2];
3921 rtx op3 = operands[3];
3922 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3924 emit_move_insn (op3, GEN_INT (offset));
3925 if (BYTES_BIG_ENDIAN)
3926 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3928 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3932 ;; Optimize zero extracts to eliminate the AND after the extract.
3933 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3934 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3936 (vec_select:<VEC_base>
3937 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3938 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3939 (clobber (match_scratch:SI 3 "=r,X"))]
3940 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3942 "&& reload_completed"
3943 [(parallel [(set (match_dup 4)
3944 (vec_select:<VEC_base>
3946 (parallel [(match_dup 2)])))
3947 (clobber (match_dup 3))])]
3949 gcc_assert (<MODE>mode != V4SImode
3950 || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
3952 operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
3954 [(set_attr "isa" "p9v,*")])
3956 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3957 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3958 [(set (match_operand:<VEC_base> 0 "memory_operand" "=Z,m")
3959 (vec_select:<VEC_base>
3960 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3961 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3962 (clobber (match_scratch:<VEC_base> 3 "=<VSX_EX>,&*r"))
3963 (clobber (match_scratch:SI 4 "=X,&r"))]
3964 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3966 "&& reload_completed"
3967 [(parallel [(set (match_dup 3)
3968 (vec_select:<VEC_base>
3970 (parallel [(match_dup 2)])))
3971 (clobber (match_dup 4))])
3975 if (which_alternative == 0
3976 && ((<MODE>mode == V16QImode
3977 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
3978 || (<MODE>mode == V8HImode
3979 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4))))
3981 enum machine_mode dest_mode = GET_MODE (operands[0]);
3982 emit_move_insn (operands[0],
3983 gen_rtx_REG (dest_mode, REGNO (operands[3])));
3989 ;; Extract from word 0, 2, 3 (BE order).
3990 (define_insn_and_split "*vsx_extract_v4si_w023"
3991 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3993 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3994 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3995 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3996 "TARGET_DIRECT_MOVE_64BIT"
3998 "&& INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2)"
4001 gcc_assert (!TARGET_P9_VECTOR);
4003 rtx dest = operands[0];
4004 rtx src = operands[1];
4005 rtx element = operands[2];
4008 if (GET_CODE (operands[3]) == SCRATCH)
4009 vec_tmp = gen_reg_rtx (V4SImode);
4011 vec_tmp = operands[3];
4013 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4014 GET_MODE_NUNITS (V4SImode) - 1. */
4015 if (!BYTES_BIG_ENDIAN)
4016 element = GEN_INT (3 - INTVAL (element));
4018 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
4020 int value = BYTES_BIG_ENDIAN ? 1 : 2;
4021 emit_insn (gen_vsx_extract_v4si_w1 (dest, vec_tmp, GEN_INT (value)));
4026 (define_insn_and_split "*vsx_extract_<mode>_p8"
4027 [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
4028 (vec_select:<VEC_base>
4029 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
4030 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
4031 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
4032 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4033 && !TARGET_P9_VECTOR"
4035 "&& reload_completed"
4038 rtx dest = operands[0];
4039 rtx src = operands[1];
4040 rtx element = operands[2];
4041 rtx vec_tmp = operands[3];
4044 if (!BYTES_BIG_ENDIAN)
4045 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
4047 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4049 value = INTVAL (element);
4050 if (<MODE>mode == V16QImode)
4053 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
4057 else if (<MODE>mode == V8HImode)
4060 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
4067 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
4068 gen_rtx_REG (DImode, REGNO (vec_tmp)));
4071 [(set_attr "type" "mfvsr")])
4073 ;; Optimize extracting a single scalar element from memory.
4074 (define_insn_and_split "*vsx_extract_<mode>_load"
4075 [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
4076 (vec_select:<VEC_base>
4077 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
4078 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
4079 (clobber (match_scratch:DI 3 "=&b"))]
4080 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4082 "&& reload_completed"
4083 [(set (match_dup 0) (match_dup 4))]
4085 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
4086 operands[3], <VEC_base>mode);
4088 [(set_attr "type" "load")
4089 (set_attr "length" "8")])
4091 ;; Variable V16QI/V8HI/V4SI extract from a register
4092 (define_insn_and_split "vsx_extract_<mode>_var"
4093 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,r")
4095 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
4096 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
4097 UNSPEC_VSX_EXTRACT))
4098 (clobber (match_scratch:DI 3 "=r,r"))
4099 (clobber (match_scratch:V2DI 4 "=X,&v"))]
4100 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4102 "&& reload_completed"
4105 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
4106 operands[3], operands[4]);
4109 [(set_attr "isa" "p9v,*")])
4111 ;; Variable V16QI/V8HI/V4SI extract from memory
4112 (define_insn_and_split "*vsx_extract_<mode>_var_load"
4113 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r")
4115 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
4116 (match_operand:DI 2 "gpc_reg_operand" "r")]
4117 UNSPEC_VSX_EXTRACT))
4118 (clobber (match_scratch:DI 3 "=&b"))]
4119 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4121 "&& reload_completed"
4122 [(set (match_dup 0) (match_dup 4))]
4124 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
4125 operands[3], <VEC_base>mode);
4127 [(set_attr "type" "load")])
4130 (define_expand "vextractl<mode>"
4131 [(set (match_operand:V2DI 0 "altivec_register_operand")
4132 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
4133 (match_operand:VI2 2 "altivec_register_operand")
4134 (match_operand:SI 3 "register_operand")]
4138 if (BYTES_BIG_ENDIAN)
4140 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
4141 operands[2], operands[3]));
4142 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4145 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
4146 operands[1], operands[3]));
4150 (define_insn "vextractl<mode>_internal"
4151 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4152 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4153 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4154 (match_operand:SI 3 "register_operand" "r")]
4157 "vext<du_or_d><wd>vlx %0,%1,%2,%3"
4158 [(set_attr "type" "vecsimple")])
4160 (define_expand "vextractr<mode>"
4161 [(set (match_operand:V2DI 0 "altivec_register_operand")
4162 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
4163 (match_operand:VI2 2 "altivec_register_operand")
4164 (match_operand:SI 3 "register_operand")]
4168 if (BYTES_BIG_ENDIAN)
4170 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
4171 operands[2], operands[3]));
4172 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4175 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
4176 operands[1], operands[3]));
4180 (define_insn "vextractr<mode>_internal"
4181 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4182 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4183 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4184 (match_operand:SI 3 "register_operand" "r")]
4187 "vext<du_or_d><wd>vrx %0,%1,%2,%3"
4188 [(set_attr "type" "vecsimple")])
4190 (define_expand "vinsertvl_<mode>"
4191 [(set (match_operand:VI2 0 "altivec_register_operand")
4192 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4193 (match_operand:VI2 2 "altivec_register_operand")
4194 (match_operand:SI 3 "register_operand" "r")]
4198 if (BYTES_BIG_ENDIAN)
4199 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4200 operands[1], operands[2]));
4202 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4203 operands[1], operands[2]));
4207 (define_insn "vinsertvl_internal_<mode>"
4208 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4209 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4210 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4211 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4214 "vins<wd>vlx %0,%1,%2"
4215 [(set_attr "type" "vecsimple")])
4217 (define_expand "vinsertvr_<mode>"
4218 [(set (match_operand:VI2 0 "altivec_register_operand")
4219 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4220 (match_operand:VI2 2 "altivec_register_operand")
4221 (match_operand:SI 3 "register_operand" "r")]
4225 if (BYTES_BIG_ENDIAN)
4226 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4227 operands[1], operands[2]));
4229 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4230 operands[1], operands[2]));
4234 (define_insn "vinsertvr_internal_<mode>"
4235 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4236 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4237 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4238 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4241 "vins<wd>vrx %0,%1,%2"
4242 [(set_attr "type" "vecsimple")])
4244 (define_expand "vinsertgl_<mode>"
4245 [(set (match_operand:VI2 0 "altivec_register_operand")
4246 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4247 (match_operand:VI2 2 "altivec_register_operand")
4248 (match_operand:SI 3 "register_operand")]
4252 if (BYTES_BIG_ENDIAN)
4253 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4254 operands[1], operands[2]));
4256 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4257 operands[1], operands[2]));
4261 (define_insn "vinsertgl_internal_<mode>"
4262 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4263 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4264 (match_operand:SI 2 "register_operand" "r")
4265 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4268 "vins<wd>lx %0,%1,%2"
4269 [(set_attr "type" "vecsimple")])
4271 (define_expand "vinsertgr_<mode>"
4272 [(set (match_operand:VI2 0 "altivec_register_operand")
4273 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4274 (match_operand:VI2 2 "altivec_register_operand")
4275 (match_operand:SI 3 "register_operand")]
4279 if (BYTES_BIG_ENDIAN)
4280 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4281 operands[1], operands[2]));
4283 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4284 operands[1], operands[2]));
4288 (define_insn "vinsertgr_internal_<mode>"
4289 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4290 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4291 (match_operand:SI 2 "register_operand" "r")
4292 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4295 "vins<wd>rx %0,%1,%2"
4296 [(set_attr "type" "vecsimple")])
4298 (define_expand "vreplace_elt_<mode>"
4299 [(set (match_operand:REPLACE_ELT_V 0 "register_operand")
4300 (unspec:REPLACE_ELT_V [(match_operand:REPLACE_ELT_V 1 "register_operand")
4301 (match_operand:<VEC_base> 2 "register_operand")
4302 (match_operand:QI 3 "const_0_to_3_operand")]
4303 UNSPEC_REPLACE_ELT))]
4307 /* Immediate value is the word index, convert to byte index and adjust for
4308 Endianness if needed. */
4309 if (BYTES_BIG_ENDIAN)
4310 index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4313 index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4315 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4320 [(set_attr "type" "vecsimple")])
4322 (define_insn "vreplace_elt_<mode>_inst"
4323 [(set (match_operand:REPLACE_ELT_V 0 "register_operand" "=v")
4324 (unspec:REPLACE_ELT_V [(match_operand:REPLACE_ELT_V 1 "register_operand" "0")
4325 (match_operand:<VEC_base> 2 "register_operand" "r")
4326 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4327 UNSPEC_REPLACE_ELT))]
4329 "vins<REPLACE_ELT_char> %0,%2,%3"
4330 [(set_attr "type" "vecsimple")])
4332 (define_insn "vreplace_un_<mode>"
4333 [(set (match_operand:V16QI 0 "register_operand" "=v")
4334 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4335 (match_operand:REPLACE_ELT 2 "register_operand" "r")
4336 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4337 UNSPEC_REPLACE_UN))]
4339 "vins<REPLACE_ELT_char> %0,%2,%3"
4340 [(set_attr "type" "vecsimple")])
4342 ;; VSX_EXTRACT optimizations
4343 ;; Optimize double d = (double) vec_extract (vi, <n>)
4344 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4345 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4346 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4349 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4350 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4351 (clobber (match_scratch:V4SI 3 "=v"))]
4352 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4357 rtx dest = operands[0];
4358 rtx src = operands[1];
4359 rtx element = operands[2];
4360 rtx v4si_tmp = operands[3];
4363 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4364 GET_MODE_NUNITS (V4SImode) - 1. */
4365 if (!BYTES_BIG_ENDIAN)
4366 element = GEN_INT (3 - INTVAL (element));
4368 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4370 value = INTVAL (element);
4373 if (GET_CODE (v4si_tmp) == SCRATCH)
4374 v4si_tmp = gen_reg_rtx (V4SImode);
4375 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4380 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4384 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4385 ;; where <type> is a floating point type that supported by the hardware that is
4386 ;; not double. First convert the value to double, and then to the desired
4388 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4389 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4390 (any_float:VSX_EXTRACT_FL
4392 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4393 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4394 (clobber (match_scratch:V4SI 3 "=v"))
4395 (clobber (match_scratch:DF 4 "=wa"))]
4396 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4401 rtx dest = operands[0];
4402 rtx src = operands[1];
4403 rtx element = operands[2];
4404 rtx v4si_tmp = operands[3];
4405 rtx df_tmp = operands[4];
4408 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4409 GET_MODE_NUNITS (V4SImode) - 1. */
4410 if (!BYTES_BIG_ENDIAN)
4411 element = GEN_INT (3 - INTVAL (element));
4413 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4415 value = INTVAL (element);
4418 if (GET_CODE (v4si_tmp) == SCRATCH)
4419 v4si_tmp = gen_reg_rtx (V4SImode);
4420 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4425 if (GET_CODE (df_tmp) == SCRATCH)
4426 df_tmp = gen_reg_rtx (DFmode);
4428 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4430 if (<MODE>mode == SFmode)
4431 emit_insn (gen_truncdfsf2 (dest, df_tmp));
4432 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4433 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4434 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4435 && TARGET_FLOAT128_HW)
4436 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4437 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4438 emit_insn (gen_extenddfif2 (dest, df_tmp));
4439 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4440 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4447 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4448 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4449 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4450 ;; vector short or vector unsigned short.
4451 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_fl_<FL_CONV:mode>"
4452 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4454 (vec_select:<VSX_EXTRACT_I:VEC_base>
4455 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4456 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4457 (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4458 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4459 && TARGET_P9_VECTOR"
4461 "&& reload_completed"
4462 [(parallel [(set (match_dup 3)
4463 (vec_select:<VSX_EXTRACT_I:VEC_base>
4465 (parallel [(match_dup 2)])))
4466 (clobber (scratch:SI))])
4468 (sign_extend:DI (match_dup 3)))
4470 (float:<FL_CONV:MODE> (match_dup 4)))]
4472 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4474 [(set_attr "isa" "<FL_CONV:VSisa>")])
4476 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_ufl_<FL_CONV:mode>"
4477 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4478 (unsigned_float:FL_CONV
4479 (vec_select:<VSX_EXTRACT_I:VEC_base>
4480 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4481 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4482 (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4483 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4484 && TARGET_P9_VECTOR"
4486 "&& reload_completed"
4487 [(parallel [(set (match_dup 3)
4488 (vec_select:<VSX_EXTRACT_I:VEC_base>
4490 (parallel [(match_dup 2)])))
4491 (clobber (scratch:SI))])
4493 (float:<FL_CONV:MODE> (match_dup 4)))]
4495 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4497 [(set_attr "isa" "<FL_CONV:VSisa>")])
4499 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4500 (define_insn "vsx_set_<mode>_p9"
4501 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4502 (unspec:VSX_EXTRACT_I
4503 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4504 (match_operand:<VEC_base> 2 "gpc_reg_operand" "<VSX_EX>")
4505 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4507 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4509 int ele = INTVAL (operands[3]);
4510 int nunits = GET_MODE_NUNITS (<MODE>mode);
4512 if (!BYTES_BIG_ENDIAN)
4513 ele = nunits - 1 - ele;
4515 operands[3] = GEN_INT (GET_MODE_SIZE (<VEC_base>mode) * ele);
4516 if (<MODE>mode == V4SImode)
4517 return "xxinsertw %x0,%x2,%3";
4519 return "vinsert<wd> %0,%2,%3";
4521 [(set_attr "type" "vecperm")])
4523 (define_insn_and_split "vsx_set_v4sf_p9"
4524 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4526 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4527 (match_operand:SF 2 "gpc_reg_operand" "wa")
4528 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4530 (clobber (match_scratch:SI 4 "=&wa"))]
4531 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4533 "&& reload_completed"
4535 (unspec:V4SF [(match_dup 2)]
4536 UNSPEC_VSX_CVDPSPN))
4537 (parallel [(set (match_dup 4)
4538 (vec_select:SI (match_dup 6)
4539 (parallel [(match_dup 7)])))
4540 (clobber (scratch:SI))])
4542 (unspec:V4SI [(match_dup 8)
4547 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4549 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4550 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4551 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4552 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4554 [(set_attr "type" "vecperm")
4555 (set_attr "length" "12")
4556 (set_attr "isa" "p9v")])
4558 ;; Special case setting 0.0f to a V4SF element
4559 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4560 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4562 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4563 (match_operand:SF 2 "zero_fp_constant" "j")
4564 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4566 (clobber (match_scratch:SI 4 "=&wa"))]
4567 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4569 "&& reload_completed"
4573 (unspec:V4SI [(match_dup 5)
4578 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4580 [(set_attr "type" "vecperm")
4581 (set_attr "length" "8")
4582 (set_attr "isa" "p9v")])
4584 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4585 ;; that is in the default scalar position (1 for big endian, 2 for little
4586 ;; endian). We just need to do an xxinsertw since the element is in the
4587 ;; correct location.
4589 (define_insn "*vsx_insert_extract_v4sf_p9"
4590 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4592 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4593 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4595 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4596 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4598 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4599 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4601 int ele = INTVAL (operands[4]);
4603 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4604 GET_MODE_NUNITS (V4SFmode) - 1. */
4605 if (!BYTES_BIG_ENDIAN)
4608 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4609 return "xxinsertw %x0,%x2,%4";
4611 [(set_attr "type" "vecperm")])
4613 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4614 ;; that is in the default scalar position (1 for big endian, 2 for little
4615 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4617 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4618 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4620 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4621 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4623 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4624 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4626 (clobber (match_scratch:SI 5 "=&wa"))]
4627 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4628 && TARGET_P9_VECTOR && TARGET_POWERPC64
4629 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4632 [(parallel [(set (match_dup 5)
4633 (vec_select:SI (match_dup 6)
4634 (parallel [(match_dup 3)])))
4635 (clobber (scratch:SI))])
4637 (unspec:V4SI [(match_dup 8)
4642 if (GET_CODE (operands[5]) == SCRATCH)
4643 operands[5] = gen_reg_rtx (SImode);
4645 operands[6] = gen_lowpart (V4SImode, operands[2]);
4646 operands[7] = gen_lowpart (V4SImode, operands[0]);
4647 operands[8] = gen_lowpart (V4SImode, operands[1]);
4649 [(set_attr "type" "vecperm")
4650 (set_attr "isa" "p9v")])
4652 ;; Expanders for builtins
4653 (define_expand "vsx_mergel_<mode>"
4654 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4655 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4656 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4657 "VECTOR_MEM_VSX_P (<MODE>mode)"
4659 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4660 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4661 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4662 emit_insn (gen_rtx_SET (operands[0], x));
4666 (define_expand "vsx_mergeh_<mode>"
4667 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4668 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4669 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4670 "VECTOR_MEM_VSX_P (<MODE>mode)"
4672 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4673 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4674 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4675 emit_insn (gen_rtx_SET (operands[0], x));
4680 ;; We separate the register splat insn from the memory splat insn to force the
4681 ;; register allocator to generate the indexed form of the SPLAT when it is
4682 ;; given an offsettable memory reference. Otherwise, if the register and
4683 ;; memory insns were combined into a single insn, the register allocator will
4684 ;; load the value into a register, and then do a double word permute.
4685 (define_expand "vsx_splat_<mode>"
4686 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4687 (vec_duplicate:VSX_D
4688 (match_operand:<VEC_base> 1 "input_operand")))]
4689 "VECTOR_MEM_VSX_P (<MODE>mode)"
4691 rtx op1 = operands[1];
4693 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4695 operands[1] = force_reg (<VSX_D:VEC_base>mode, op1);
4698 (define_insn "vsx_splat_<mode>_reg"
4699 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4700 (vec_duplicate:VSX_D
4701 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")))]
4702 "VECTOR_MEM_VSX_P (<MODE>mode)"
4704 xxpermdi %x0,%x1,%x1,0
4706 [(set_attr "type" "vecperm,vecmove")])
4708 (define_insn "vsx_splat_<mode>_mem"
4709 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4710 (vec_duplicate:VSX_D
4711 (match_operand:<VSX_D:VEC_base> 1 "memory_operand" "Z")))]
4712 "VECTOR_MEM_VSX_P (<MODE>mode)"
4714 [(set_attr "type" "vecload")])
4716 ;; V4SI splat support
4717 (define_insn "vsx_splat_v4si"
4718 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
4720 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4725 [(set_attr "type" "vecperm,vecload")])
4727 ;; SImode is not currently allowed in vector registers. This pattern
4728 ;; allows us to use direct move to get the value in a vector register
4729 ;; so that we can use XXSPLTW
4730 (define_insn "vsx_splat_v4si_di"
4731 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4734 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4735 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4739 [(set_attr "type" "vecperm")
4740 (set_attr "isa" "p8v,*")])
4742 ;; V4SF splat (ISA 3.0)
4743 (define_insn_and_split "vsx_splat_v4sf"
4744 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4746 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4752 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4754 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4756 (unspec:V4SF [(match_dup 0)
4757 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4759 [(set_attr "type" "vecload,vecperm,vecperm")
4760 (set_attr "length" "*,8,*")
4761 (set_attr "isa" "*,p8v,*")])
4763 ;; V4SF/V4SI splat from a vector element
4764 (define_insn "vsx_xxspltw_<mode>"
4765 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4766 (vec_duplicate:VSX_W
4767 (vec_select:<VEC_base>
4768 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4770 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4771 "VECTOR_MEM_VSX_P (<MODE>mode)"
4773 if (!BYTES_BIG_ENDIAN)
4774 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4776 return "xxspltw %x0,%x1,%2";
4778 [(set_attr "type" "vecperm")])
4780 (define_insn "vsx_xxspltw_<mode>_direct"
4781 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4782 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4783 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4784 UNSPEC_VSX_XXSPLTW))]
4785 "VECTOR_MEM_VSX_P (<MODE>mode)"
4786 "xxspltw %x0,%x1,%2"
4787 [(set_attr "type" "vecperm")])
4789 ;; V16QI/V8HI splat support on ISA 2.07
4790 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4791 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4792 (vec_duplicate:VSX_SPLAT_I
4793 (truncate:<VEC_base>
4794 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4795 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4796 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4797 [(set_attr "type" "vecperm")])
4799 ;; V2DF/V2DI splat for use by vec_splat builtin
4800 (define_insn "vsx_xxspltd_<mode>"
4801 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4802 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4803 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4804 UNSPEC_VSX_XXSPLTD))]
4805 "VECTOR_MEM_VSX_P (<MODE>mode)"
4807 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4808 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4809 return "xxpermdi %x0,%x1,%x1,0";
4811 return "xxpermdi %x0,%x1,%x1,3";
4813 [(set_attr "type" "vecperm")])
4815 ;; V4SF/V4SI interleave
4816 (define_expand "vsx_xxmrghw_<mode>"
4817 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4819 (vec_concat:<VS_double>
4820 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4821 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4822 (parallel [(const_int 0) (const_int 4)
4823 (const_int 1) (const_int 5)])))]
4824 "VECTOR_MEM_VSX_P (<MODE>mode)"
4826 if (BYTES_BIG_ENDIAN)
4827 emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0],
4831 emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0],
4836 [(set_attr "type" "vecperm")])
4838 (define_expand "vsx_xxmrglw_<mode>"
4839 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4841 (vec_concat:<VS_double>
4842 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4843 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4844 (parallel [(const_int 2) (const_int 6)
4845 (const_int 3) (const_int 7)])))]
4846 "VECTOR_MEM_VSX_P (<MODE>mode)"
4848 if (BYTES_BIG_ENDIAN)
4849 emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0],
4853 emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0],
4858 [(set_attr "type" "vecperm")])
4860 ;; Shift left double by word immediate
4861 (define_insn "vsx_xxsldwi_<mode>"
4862 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4863 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4864 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4865 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4867 "VECTOR_MEM_VSX_P (<MODE>mode)"
4868 "xxsldwi %x0,%x1,%x2,%3"
4869 [(set_attr "type" "vecperm")
4870 (set_attr "isa" "<VSisa>")])
4873 ;; Vector reduction insns and splitters
4875 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4876 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4880 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4881 (parallel [(const_int 1)]))
4884 (parallel [(const_int 0)])))
4886 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4887 "VECTOR_UNIT_VSX_P (V2DFmode)"
4892 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4893 ? gen_reg_rtx (V2DFmode)
4895 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4896 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4899 [(set_attr "length" "8")
4900 (set_attr "type" "veccomplex")])
4902 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4903 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4905 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4906 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4907 (clobber (match_scratch:V4SF 2 "=&wa"))
4908 (clobber (match_scratch:V4SF 3 "=&wa"))]
4909 "VECTOR_UNIT_VSX_P (V4SFmode)"
4914 rtx op0 = operands[0];
4915 rtx op1 = operands[1];
4916 rtx tmp2, tmp3, tmp4;
4918 if (can_create_pseudo_p ())
4920 tmp2 = gen_reg_rtx (V4SFmode);
4921 tmp3 = gen_reg_rtx (V4SFmode);
4922 tmp4 = gen_reg_rtx (V4SFmode);
4931 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4932 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4933 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4934 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4937 [(set_attr "length" "16")
4938 (set_attr "type" "veccomplex")])
4940 ;; Combiner patterns with the vector reduction patterns that knows we can get
4941 ;; to the top element of the V2DF array without doing an extract.
4943 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4944 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4949 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4950 (parallel [(const_int 1)]))
4953 (parallel [(const_int 0)])))
4955 (parallel [(const_int 1)])))
4956 (clobber (match_scratch:DF 2 "=0,&wa"))]
4957 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4962 rtx hi = gen_highpart (DFmode, operands[1]);
4963 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4964 ? gen_reg_rtx (DFmode)
4967 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4968 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4971 [(set_attr "length" "8")
4972 (set_attr "type" "veccomplex")])
4974 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4975 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4978 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4979 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4980 (parallel [(const_int 3)])))
4981 (clobber (match_scratch:V4SF 2 "=&wa"))
4982 (clobber (match_scratch:V4SF 3 "=&wa"))
4983 (clobber (match_scratch:V4SF 4 "=0"))]
4984 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4989 rtx op0 = operands[0];
4990 rtx op1 = operands[1];
4991 rtx tmp2, tmp3, tmp4, tmp5;
4993 if (can_create_pseudo_p ())
4995 tmp2 = gen_reg_rtx (V4SFmode);
4996 tmp3 = gen_reg_rtx (V4SFmode);
4997 tmp4 = gen_reg_rtx (V4SFmode);
4998 tmp5 = gen_reg_rtx (V4SFmode);
5008 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
5009 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
5010 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
5011 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
5012 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
5015 [(set_attr "length" "20")
5016 (set_attr "type" "veccomplex")])
5019 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
5021 [(set (match_operand:P 0 "base_reg_operand")
5022 (match_operand:P 1 "short_cint_operand"))
5023 (set (match_operand:VSX_M 2 "vsx_register_operand")
5024 (mem:VSX_M (plus:P (match_dup 0)
5025 (match_operand:P 3 "int_reg_operand"))))]
5026 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
5027 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
5028 [(set_attr "length" "8")
5029 (set_attr "type" "vecload")])
5032 [(set (match_operand:P 0 "base_reg_operand")
5033 (match_operand:P 1 "short_cint_operand"))
5034 (set (match_operand:VSX_M 2 "vsx_register_operand")
5035 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
5037 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
5038 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
5039 [(set_attr "length" "8")
5040 (set_attr "type" "vecload")])
5043 ;; ISA 3.1 vector extend sign support
5044 (define_insn "vsx_sign_extend_v2di_v1ti"
5045 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
5046 (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")]
5047 UNSPEC_VSX_SIGN_EXTEND))]
5050 [(set_attr "type" "vecexts")])
5052 ;; ISA 3.0 vector extend sign support
5054 (define_insn "vsx_sign_extend_v16qi_<mode>"
5055 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
5057 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
5058 UNSPEC_VSX_SIGN_EXTEND))]
5061 [(set_attr "type" "vecexts")])
5063 (define_insn "vsx_sign_extend_v8hi_<mode>"
5064 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
5066 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
5067 UNSPEC_VSX_SIGN_EXTEND))]
5070 [(set_attr "type" "vecexts")])
5072 (define_insn "vsx_sign_extend_v4si_v2di"
5073 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
5074 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
5075 UNSPEC_VSX_SIGN_EXTEND))]
5078 [(set_attr "type" "vecexts")])
5080 ;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
5081 ;; power10. On earlier systems, the machine independent code will generate a
5082 ;; shift left to sign extend the 64-bit value to 128-bit.
5084 ;; If the register allocator prefers to use GPR registers, we will use a shift
5085 ;; left instruction to sign extend the 64-bit value to 128-bit.
5087 ;; If the register allocator prefers to use Altivec registers on power10,
5088 ;; generate the vextsd2q instruction.
5089 (define_insn_and_split "extendditi2"
5090 [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v")
5091 (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z")))
5092 (clobber (reg:DI CA_REGNO))]
5093 "TARGET_POWERPC64 && TARGET_POWER10"
5095 "&& reload_completed"
5098 rtx dest = operands[0];
5099 rtx src = operands[1];
5100 int dest_regno = reg_or_subregno (dest);
5102 /* Handle conversion to GPR registers. Load up the low part and then do
5103 a sign extension to the upper part. */
5104 if (INT_REGNO_P (dest_regno))
5106 rtx dest_hi = gen_highpart (DImode, dest);
5107 rtx dest_lo = gen_lowpart (DImode, dest);
5109 emit_move_insn (dest_lo, src);
5110 /* In case src is a MEM, we have to use the destination, which is a
5111 register, instead of re-using the source. */
5112 rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
5113 emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
5117 /* For conversion to an Altivec register, generate either a splat operation
5118 or a load rightmost double word instruction. Both instructions gets the
5119 DImode value into the lower 64 bits, and then do the vextsd2q
5122 else if (ALTIVEC_REGNO_P (dest_regno))
5125 emit_insn (gen_vsx_lxvrdx (dest, src));
5128 rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
5129 emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
5132 emit_insn (gen_extendditi2_vector (dest, dest));
5139 [(set_attr "length" "8")
5140 (set_attr "type" "shift,load,vecmove,vecperm,load")])
5142 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
5143 (define_insn "extendditi2_vector"
5144 [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
5145 (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
5146 UNSPEC_EXTENDDITI2))]
5149 [(set_attr "type" "vecexts")])
5152 ;; ISA 3.0 Binary Floating-Point Support
5154 ;; VSX Scalar Extract Exponent Quad-Precision
5155 (define_insn "xsxexpqp_<IEEE128:mode>_<V2DI_DI:mode>"
5156 [(set (match_operand:V2DI_DI 0 "altivec_register_operand" "=v")
5158 [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
5159 UNSPEC_VSX_SXEXPDP))]
5162 [(set_attr "type" "vecmove")])
5164 ;; VSX Scalar Extract Exponent Double-Precision
5165 (define_insn "xsxexpdp_<mode>"
5166 [(set (match_operand:GPR 0 "register_operand" "=r")
5167 (unspec:GPR [(match_operand:DF 1 "vsx_register_operand" "wa")]
5168 UNSPEC_VSX_SXEXPDP))]
5171 [(set_attr "type" "integer")])
5173 ;; VSX Scalar Extract Significand Quad-Precision
5174 (define_insn "xsxsigqp_<IEEE128:mode>_<VEC_TI:mode>"
5175 [(set (match_operand:VEC_TI 0 "altivec_register_operand" "=v")
5176 (unspec:VEC_TI [(match_operand:IEEE128 1
5177 "altivec_register_operand" "v")]
5181 [(set_attr "type" "vecmove")])
5183 ;; VSX Scalar Extract Significand Double-Precision
5184 (define_insn "xsxsigdp"
5185 [(set (match_operand:DI 0 "register_operand" "=r")
5186 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
5188 "TARGET_P9_VECTOR && TARGET_POWERPC64"
5190 [(set_attr "type" "integer")])
5192 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
5193 (define_insn "xsiexpqpf_<mode>"
5194 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5196 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5197 (match_operand:DI 2 "altivec_register_operand" "v")]
5198 UNSPEC_VSX_SIEXPQP))]
5201 [(set_attr "type" "vecmove")])
5203 ;; VSX Scalar Insert Exponent Quad-Precision
5204 (define_insn "xsiexpqp_<IEEE128:mode>_<V2DI_DI:mode>"
5205 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5206 (unspec:IEEE128 [(match_operand:<DI_to_TI> 1
5207 "altivec_register_operand" "v")
5208 (match_operand:V2DI_DI 2
5209 "altivec_register_operand" "v")]
5210 UNSPEC_VSX_SIEXPQP))]
5213 [(set_attr "type" "vecmove")])
5215 ;; VSX Scalar Insert Exponent Double-Precision
5216 (define_insn "xsiexpdp_<mode>"
5217 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5218 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
5219 (match_operand:GPR 2 "register_operand" "r")]
5220 UNSPEC_VSX_SIEXPDP))]
5221 "TARGET_P9_VECTOR && TARGET_POWERPC64"
5222 "xsiexpdp %x0,%1,%2"
5223 [(set_attr "type" "fpsimple")])
5225 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
5226 (define_insn "xsiexpdpf_<mode>"
5227 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5228 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
5229 (match_operand:GPR 2 "register_operand" "r")]
5230 UNSPEC_VSX_SIEXPDP))]
5231 "TARGET_P9_VECTOR && TARGET_POWERPC64"
5232 "xsiexpdp %x0,%1,%2"
5233 [(set_attr "type" "fpsimple")])
5235 ;; VSX Scalar Compare Exponents Double-Precision
5236 (define_expand "xscmpexpdp_<code>"
5240 [(match_operand:DF 1 "vsx_register_operand" "wa")
5241 (match_operand:DF 2 "vsx_register_operand" "wa")]
5242 UNSPEC_VSX_SCMPEXPDP)
5244 (set (match_operand:SI 0 "register_operand" "=r")
5245 (CMP_TEST:SI (match_dup 3)
5249 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
5251 emit_move_insn (operands[0], const0_rtx);
5255 operands[3] = gen_reg_rtx (CCFPmode);
5258 (define_insn "*xscmpexpdp"
5259 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5261 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
5262 (match_operand:DF 2 "vsx_register_operand" "wa")]
5263 UNSPEC_VSX_SCMPEXPDP)
5264 (match_operand:SI 3 "zero_constant" "j")))]
5266 "xscmpexpdp %0,%x1,%x2"
5267 [(set_attr "type" "fpcompare")])
5269 ;; VSX Scalar Compare Exponents Quad-Precision
5270 (define_expand "xscmpexpqp_<code>_<mode>"
5274 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
5275 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
5276 UNSPEC_VSX_SCMPEXPQP)
5278 (set (match_operand:SI 0 "register_operand" "=r")
5279 (CMP_TEST:SI (match_dup 3)
5283 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
5285 emit_move_insn (operands[0], const0_rtx);
5289 operands[3] = gen_reg_rtx (CCFPmode);
5292 (define_insn "*xscmpexpqp"
5293 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5295 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5296 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
5297 UNSPEC_VSX_SCMPEXPQP)
5298 (match_operand:SI 3 "zero_constant" "j")))]
5300 "xscmpexpqp %0,%1,%2"
5301 [(set_attr "type" "fpcompare")])
5303 ;; VSX Scalar Test Data Class Quad-Precision
5304 ;; (Expansion for scalar_test_data_class (__ieee128, int))
5305 ;; (Has side effect of setting the lt bit if operand 1 is negative,
5306 ;; setting the eq bit if any of the conditions tested by operand 2
5307 ;; are satisfied, and clearing the gt and undordered bits to zero.)
5308 (define_expand "xststdcqp_<mode>"
5312 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5313 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5316 (set (match_operand:SI 0 "register_operand" "=r")
5317 (eq:SI (match_dup 3)
5321 operands[3] = gen_reg_rtx (CCFPmode);
5324 ;; VSX Scalar Test Data Class Double- and Single-Precision
5325 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
5326 ;; if any of the conditions tested by operand 2 are satisfied.
5327 ;; The gt and unordered bits are cleared to zero.)
5328 (define_expand "xststdc<sd>p"
5332 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5333 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5336 (set (match_operand:SI 0 "register_operand" "=r")
5337 (eq:SI (match_dup 3)
5341 operands[3] = gen_reg_rtx (CCFPmode);
5342 operands[4] = CONST0_RTX (SImode);
5345 ;; The VSX Scalar Test Negative Quad-Precision
5346 (define_expand "xststdcnegqp_<mode>"
5350 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5354 (set (match_operand:SI 0 "register_operand" "=r")
5355 (lt:SI (match_dup 2)
5359 operands[2] = gen_reg_rtx (CCFPmode);
5362 ;; The VSX Scalar Test Negative Double- and Single-Precision
5363 (define_expand "xststdcneg<sd>p"
5367 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5371 (set (match_operand:SI 0 "register_operand" "=r")
5372 (lt:SI (match_dup 2)
5376 operands[2] = gen_reg_rtx (CCFPmode);
5377 operands[3] = CONST0_RTX (SImode);
5380 (define_insn "*xststdcqp_<mode>"
5381 [(set (match_operand:CCFP 0 "" "=y")
5384 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5385 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5389 "xststdcqp %0,%1,%2"
5390 [(set_attr "type" "fpcompare")])
5392 (define_insn "*xststdc<sd>p"
5393 [(set (match_operand:CCFP 0 "" "=y")
5395 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5396 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5398 (match_operand:SI 3 "zero_constant" "j")))]
5400 "xststdc<sd>p %0,%x1,%2"
5401 [(set_attr "type" "fpcompare")])
5403 ;; VSX Vector Extract Exponent Double and Single Precision
5404 (define_insn "xvxexp<sd>p"
5405 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5407 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5410 "xvxexp<sd>p %x0,%x1"
5411 [(set_attr "type" "vecsimple")])
5413 ;; VSX Vector Extract Significand Double and Single Precision
5414 (define_insn "xvxsig<sd>p"
5415 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5417 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5420 "xvxsig<sd>p %x0,%x1"
5421 [(set_attr "type" "vecsimple")])
5423 ;; VSX Vector Insert Exponent Double and Single Precision
5424 (define_insn "xviexp<sd>p"
5425 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5427 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5428 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5431 "xviexp<sd>p %x0,%x1,%x2"
5432 [(set_attr "type" "vecsimple")])
5434 ;; VSX Vector Test Data Class Double and Single Precision
5435 ;; The corresponding elements of the result vector are all ones
5436 ;; if any of the conditions tested by operand 3 are satisfied.
5437 (define_insn "xvtstdc<sd>p"
5438 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5440 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5441 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5442 UNSPEC_VSX_VTSTDC))]
5444 "xvtstdc<sd>p %x0,%x1,%2"
5445 [(set_attr "type" "vecsimple")])
5447 ;; ISA 3.0 String Operations Support
5449 ;; Compare vectors producing a vector result and a predicate, setting CR6
5450 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
5451 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
5452 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5453 ;; to use Power8 instructions.
5454 (define_insn "*vsx_ne_<mode>_p"
5455 [(set (reg:CC CR6_REGNO)
5457 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5458 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5460 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5461 (ne:VSX_EXTRACT_I (match_dup 1)
5464 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5465 [(set_attr "type" "vecsimple")])
5467 (define_insn "*vector_nez_<mode>_p"
5468 [(set (reg:CC CR6_REGNO)
5469 (unspec:CC [(unspec:VI
5470 [(match_operand:VI 1 "gpc_reg_operand" "v")
5471 (match_operand:VI 2 "gpc_reg_operand" "v")]
5474 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5475 (unspec:VI [(match_dup 1)
5479 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5480 [(set_attr "type" "vecsimple")])
5482 ;; Return first position of match between vectors using natural order
5483 ;; for both LE and BE execution modes.
5484 (define_expand "first_match_index_<mode>"
5485 [(match_operand:SI 0 "register_operand")
5486 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5487 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5488 UNSPEC_VSX_FIRST_MATCH_INDEX)]
5493 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5494 rtx not_result = gen_reg_rtx (<MODE>mode);
5496 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5498 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5500 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5502 if (<MODE>mode == V16QImode)
5504 if (!BYTES_BIG_ENDIAN)
5505 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5507 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5511 rtx tmp = gen_reg_rtx (SImode);
5512 if (!BYTES_BIG_ENDIAN)
5513 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5515 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5516 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5521 ;; Return first position of match between vectors or end of string (EOS) using
5522 ;; natural element order for both LE and BE execution modes.
5523 (define_expand "first_match_or_eos_index_<mode>"
5524 [(match_operand:SI 0 "register_operand")
5525 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5526 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5527 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5531 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5532 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5533 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5534 rtx and_result = gen_reg_rtx (<MODE>mode);
5535 rtx result = gen_reg_rtx (<MODE>mode);
5536 rtx vzero = gen_reg_rtx (<MODE>mode);
5538 /* Vector with zeros in elements that correspond to zeros in operands. */
5539 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5540 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5541 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5542 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5544 /* Vector with ones in elments that do not match. */
5545 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5548 /* Create vector with ones in elements where there was a zero in one of
5549 the source elements or the elements that match. */
5550 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5551 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5553 if (<MODE>mode == V16QImode)
5555 if (!BYTES_BIG_ENDIAN)
5556 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5558 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5562 rtx tmp = gen_reg_rtx (SImode);
5563 if (!BYTES_BIG_ENDIAN)
5564 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5566 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5567 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5572 ;; Return first position of mismatch between vectors using natural
5573 ;; element order for both LE and BE execution modes.
5574 (define_expand "first_mismatch_index_<mode>"
5575 [(match_operand:SI 0 "register_operand")
5576 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5577 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5578 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5582 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5584 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5586 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5588 if (<MODE>mode == V16QImode)
5590 if (!BYTES_BIG_ENDIAN)
5591 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5593 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5597 rtx tmp = gen_reg_rtx (SImode);
5598 if (!BYTES_BIG_ENDIAN)
5599 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5601 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5602 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5607 ;; Return first position of mismatch between vectors or end of string (EOS)
5608 ;; using natural element order for both LE and BE execution modes.
5609 (define_expand "first_mismatch_or_eos_index_<mode>"
5610 [(match_operand:SI 0 "register_operand")
5611 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5612 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5613 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5617 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5618 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5619 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5620 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5621 rtx and_result = gen_reg_rtx (<MODE>mode);
5622 rtx result = gen_reg_rtx (<MODE>mode);
5623 rtx vzero = gen_reg_rtx (<MODE>mode);
5625 /* Vector with zeros in elements that correspond to zeros in operands. */
5626 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5628 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5629 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5630 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5632 /* Vector with ones in elments that match. */
5633 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5635 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5637 /* Create vector with ones in elements where there was a zero in one of
5638 the source elements or the elements did not match. */
5639 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5640 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5642 if (<MODE>mode == V16QImode)
5644 if (!BYTES_BIG_ENDIAN)
5645 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5647 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5651 rtx tmp = gen_reg_rtx (SImode);
5652 if (!BYTES_BIG_ENDIAN)
5653 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5655 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5656 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5661 ;; Load VSX Vector with Length
5662 (define_expand "lxvl"
5664 (ashift:DI (match_operand:DI 2 "register_operand")
5666 (set (match_operand:V16QI 0 "vsx_register_operand")
5668 [(match_operand:DI 1 "gpc_reg_operand")
5669 (mem:V16QI (match_dup 1))
5672 "TARGET_P9_VECTOR && TARGET_64BIT"
5674 operands[3] = gen_reg_rtx (DImode);
5677 (define_insn "*lxvl"
5678 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5680 [(match_operand:DI 1 "gpc_reg_operand" "b")
5681 (mem:V16QI (match_dup 1))
5682 (match_operand:DI 2 "register_operand" "r")]
5684 "TARGET_P9_VECTOR && TARGET_64BIT"
5686 [(set_attr "type" "vecload")])
5688 (define_insn "lxvll"
5689 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5690 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5691 (mem:V16QI (match_dup 1))
5692 (match_operand:DI 2 "register_operand" "r")]
5696 [(set_attr "type" "vecload")])
5698 ;; Expand for builtin xl_len_r
5699 (define_expand "xl_len_r"
5700 [(match_operand:V16QI 0 "vsx_register_operand")
5701 (match_operand:DI 1 "register_operand")
5702 (match_operand:DI 2 "register_operand")]
5705 rtx shift_mask = gen_reg_rtx (V16QImode);
5706 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5707 rtx tmp = gen_reg_rtx (DImode);
5709 emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2]));
5710 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5711 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5712 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5717 (define_insn "stxvll"
5718 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5719 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5720 (mem:V16QI (match_dup 1))
5721 (match_operand:DI 2 "register_operand" "r")]
5725 [(set_attr "type" "vecstore")])
5727 ;; Store VSX Vector with Length
5728 (define_expand "stxvl"
5730 (ashift:DI (match_operand:DI 2 "register_operand")
5732 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5734 [(match_operand:V16QI 0 "vsx_register_operand")
5735 (mem:V16QI (match_dup 1))
5738 "TARGET_P9_VECTOR && TARGET_64BIT"
5740 operands[3] = gen_reg_rtx (DImode);
5743 ;; Define optab for vector access with length vectorization exploitation.
5744 (define_expand "len_load_v16qi"
5745 [(match_operand:V16QI 0 "vlogical_operand")
5746 (match_operand:V16QI 1 "memory_operand")
5747 (match_operand:QI 2 "gpc_reg_operand")
5748 (match_operand:QI 3 "zero_constant")]
5749 "TARGET_P9_VECTOR && TARGET_64BIT"
5751 rtx mem = XEXP (operands[1], 0);
5752 mem = force_reg (DImode, mem);
5753 rtx len = gen_lowpart (DImode, operands[2]);
5754 emit_insn (gen_lxvl (operands[0], mem, len));
5758 (define_expand "len_store_v16qi"
5759 [(match_operand:V16QI 0 "memory_operand")
5760 (match_operand:V16QI 1 "vlogical_operand")
5761 (match_operand:QI 2 "gpc_reg_operand")
5762 (match_operand:QI 3 "zero_constant")
5764 "TARGET_P9_VECTOR && TARGET_64BIT"
5766 rtx mem = XEXP (operands[0], 0);
5767 mem = force_reg (DImode, mem);
5768 rtx len = gen_lowpart (DImode, operands[2]);
5769 emit_insn (gen_stxvl (operands[1], mem, len));
5773 (define_insn "*stxvl"
5774 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5776 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5777 (mem:V16QI (match_dup 1))
5778 (match_operand:DI 2 "register_operand" "r")]
5780 "TARGET_P9_VECTOR && TARGET_64BIT"
5782 [(set_attr "type" "vecstore")])
5784 ;; Expand for builtin xst_len_r
5785 (define_expand "xst_len_r"
5786 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5787 (match_operand:DI 1 "register_operand" "b")
5788 (match_operand:DI 2 "register_operand" "r")]
5791 rtx shift_mask = gen_reg_rtx (V16QImode);
5792 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5793 rtx tmp = gen_reg_rtx (DImode);
5795 emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2]));
5796 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5798 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5799 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5803 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5804 (define_insn "vcmpneb"
5805 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5807 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5808 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5811 [(set_attr "type" "vecsimple")])
5813 ;; Vector Compare Not Equal v1ti (specified/not+eq:)
5814 (define_expand "vcmpnet"
5815 [(set (match_operand:V1TI 0 "altivec_register_operand")
5817 (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand")
5818 (match_operand:V1TI 2 "altivec_register_operand"))))]
5821 emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2]));
5822 emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0]));
5826 ;; Vector Compare Not Equal or Zero Byte
5827 (define_insn "vcmpnezb"
5828 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5830 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5831 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5835 [(set_attr "type" "vecsimple")])
5837 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5838 (define_insn "vcmpnezb_p"
5839 [(set (reg:CC CR6_REGNO)
5841 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5842 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5844 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5850 "vcmpnezb. %0,%1,%2"
5851 [(set_attr "type" "vecsimple")])
5853 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5854 (define_insn "vcmpneh"
5855 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5857 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5858 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5861 [(set_attr "type" "vecsimple")])
5863 ;; Vector Compare Not Equal or Zero Half Word
5864 (define_insn "vcmpnezh"
5865 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5866 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5867 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5871 [(set_attr "type" "vecsimple")])
5873 ;; Vector Compare Not Equal Word (specified/not+eq:)
5874 (define_insn "vcmpnew"
5875 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5877 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5878 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5881 [(set_attr "type" "vecsimple")])
5883 ;; Vector Compare Not Equal or Zero Word
5884 (define_insn "vcmpnezw"
5885 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5886 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5887 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5891 [(set_attr "type" "vecsimple")])
5893 ;; Vector Count Leading/Trailing Zero Least-Significant Bits Byte
5894 (define_insn "*vc<vczlsbb_char>zlsbb_zext_<mode>"
5895 [(set (match_operand:DI 0 "register_operand" "=r")
5898 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5901 "vc<vczlsbb_char>zlsbb %0,%1"
5902 [(set_attr "type" "vecsimple")])
5904 (define_insn "vc<vczlsbb_char>zlsbb_<mode>"
5905 [(set (match_operand:SI 0 "register_operand" "=r")
5907 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5910 "vc<vczlsbb_char>zlsbb %0,%1"
5911 [(set_attr "type" "vecsimple")])
5913 ;; Vector Extract Unsigned Byte Left-Indexed
5914 (define_insn "vextublx"
5915 [(set (match_operand:SI 0 "register_operand" "=r")
5917 [(match_operand:SI 1 "register_operand" "r")
5918 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5922 [(set_attr "type" "vecsimple")])
5924 ;; Vector Extract Unsigned Byte Right-Indexed
5925 (define_insn "vextubrx"
5926 [(set (match_operand:SI 0 "register_operand" "=r")
5928 [(match_operand:SI 1 "register_operand" "r")
5929 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5933 [(set_attr "type" "vecsimple")])
5935 ;; Vector Extract Unsigned Half Word Left-Indexed
5936 (define_insn "vextuhlx"
5937 [(set (match_operand:SI 0 "register_operand" "=r")
5939 [(match_operand:SI 1 "register_operand" "r")
5940 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5944 [(set_attr "type" "vecsimple")])
5946 ;; Vector Extract Unsigned Half Word Right-Indexed
5947 (define_insn "vextuhrx"
5948 [(set (match_operand:SI 0 "register_operand" "=r")
5950 [(match_operand:SI 1 "register_operand" "r")
5951 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5955 [(set_attr "type" "vecsimple")])
5957 ;; Vector Extract Unsigned Word Left-Indexed
5958 (define_insn "vextuwlx"
5959 [(set (match_operand:SI 0 "register_operand" "=r")
5961 [(match_operand:SI 1 "register_operand" "r")
5962 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5966 [(set_attr "type" "vecsimple")])
5968 ;; Vector Extract Unsigned Word Right-Indexed
5969 (define_insn "vextuwrx"
5970 [(set (match_operand:SI 0 "register_operand" "=r")
5972 [(match_operand:SI 1 "register_operand" "r")
5973 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5977 [(set_attr "type" "vecsimple")])
5979 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5980 ;; endian version needs to adjust the byte number, and the V4SI element in
5982 (define_insn "extract4b"
5983 [(set (match_operand:V2DI 0 "vsx_register_operand")
5984 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5985 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5986 UNSPEC_XXEXTRACTUW))]
5989 if (!BYTES_BIG_ENDIAN)
5990 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5992 return "xxextractuw %x0,%x1,%2";
5995 (define_expand "insert4b"
5996 [(set (match_operand:V16QI 0 "vsx_register_operand")
5997 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5998 (match_operand:V16QI 2 "vsx_register_operand")
5999 (match_operand:QI 3 "const_0_to_12_operand")]
6003 if (!BYTES_BIG_ENDIAN)
6005 rtx op1 = operands[1];
6006 rtx v4si_tmp = gen_reg_rtx (V4SImode);
6007 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
6008 operands[1] = v4si_tmp;
6009 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
6013 (define_insn "*insert4b_internal"
6014 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6015 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
6016 (match_operand:V16QI 2 "vsx_register_operand" "0")
6017 (match_operand:QI 3 "const_0_to_12_operand" "n")]
6020 "xxinsertw %x0,%x1,%3"
6021 [(set_attr "type" "vecperm")])
6024 ;; Generate vector extract four float 32 values from left four elements
6025 ;; of eight element vector of float 16 values.
6026 (define_expand "vextract_fp_from_shorth"
6027 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6028 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
6029 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
6033 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
6034 int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
6037 rtx mask = gen_reg_rtx (V16QImode);
6038 rtx tmp = gen_reg_rtx (V16QImode);
6041 for (i = 0; i < 16; i++)
6042 if (!BYTES_BIG_ENDIAN)
6043 rvals[i] = GEN_INT (vals_le[i]);
6045 rvals[i] = GEN_INT (vals_be[i]);
6047 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
6048 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
6049 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
6050 conversion instruction. */
6051 v = gen_rtvec_v (16, rvals);
6052 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
6053 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
6054 operands[1], mask));
6055 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
6059 ;; Generate vector extract four float 32 values from right four elements
6060 ;; of eight element vector of float 16 values.
6061 (define_expand "vextract_fp_from_shortl"
6062 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6063 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
6064 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
6067 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
6068 int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
6072 rtx mask = gen_reg_rtx (V16QImode);
6073 rtx tmp = gen_reg_rtx (V16QImode);
6076 for (i = 0; i < 16; i++)
6077 if (!BYTES_BIG_ENDIAN)
6078 rvals[i] = GEN_INT (vals_le[i]);
6080 rvals[i] = GEN_INT (vals_be[i]);
6082 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
6083 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
6084 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
6085 conversion instruction. */
6086 v = gen_rtvec_v (16, rvals);
6087 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
6088 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
6089 operands[1], mask));
6090 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
6094 ;; Support for ISA 3.0 vector byte reverse
6096 ;; Swap all bytes with in a vector
6097 (define_insn "p9_xxbrq_v1ti"
6098 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
6099 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
6102 [(set_attr "type" "vecperm")])
6104 (define_expand "p9_xxbrq_v16qi"
6105 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
6106 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
6109 rtx op0 = gen_reg_rtx (V1TImode);
6110 rtx op1 = gen_lowpart (V1TImode, operands[1]);
6111 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
6112 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
6116 ;; Swap all bytes in each 64-bit element
6117 (define_insn "p9_xxbrd_v2di"
6118 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
6119 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
6122 [(set_attr "type" "vecperm")])
6124 (define_expand "p9_xxbrd_v2df"
6125 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
6126 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
6129 rtx op0 = gen_reg_rtx (V2DImode);
6130 rtx op1 = gen_lowpart (V2DImode, operands[1]);
6131 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
6132 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
6136 ;; Swap all bytes in each 32-bit element
6137 (define_insn "p9_xxbrw_v4si"
6138 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
6139 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
6142 [(set_attr "type" "vecperm")])
6144 (define_expand "p9_xxbrw_v4sf"
6145 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
6146 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
6149 rtx op0 = gen_reg_rtx (V4SImode);
6150 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6151 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
6152 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
6156 ;; Swap all bytes in each element of vector
6157 (define_expand "revb_<mode>"
6158 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
6159 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
6162 if (TARGET_P9_VECTOR)
6163 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
6166 if (<MODE>mode == V8HImode)
6168 rtx splt = gen_reg_rtx (V8HImode);
6169 emit_insn (gen_altivec_vspltish (splt, GEN_INT (8)));
6170 emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt));
6174 /* Want to have the elements in reverse order relative
6175 to the endian mode in use, i.e. in LE mode, put elements
6177 rtx sel = swap_endian_selector_for_mode (<MODE>mode);
6178 emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], operands[1],
6186 ;; Reversing bytes in vector char is just a NOP.
6187 (define_expand "revb_v16qi"
6188 [(set (match_operand:V16QI 0 "vsx_register_operand")
6189 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
6192 emit_move_insn (operands[0], operands[1]);
6196 ;; Swap all bytes in each 16-bit element
6197 (define_insn "p9_xxbrh_v8hi"
6198 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
6199 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
6202 [(set_attr "type" "vecperm")])
6205 ;; Operand numbers for the following peephole2
6207 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
6208 (SFBOOL_TMP_VSX 1) ;; vector temporary
6209 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
6210 (SFBOOL_MFVSR_A 3) ;; move to gpr src
6211 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
6212 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
6213 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
6214 (SFBOOL_SHL_D 7) ;; shift left dest
6215 (SFBOOL_SHL_A 8) ;; shift left arg
6216 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
6217 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
6218 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
6219 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
6220 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
6222 ;; Attempt to optimize some common GLIBC operations using logical operations to
6223 ;; pick apart SFmode operations. For example, there is code from e_powf.c
6224 ;; after macro expansion that looks like:
6229 ;; } ieee_float_shape_type;
6235 ;; ieee_float_shape_type gf_u;
6236 ;; gf_u.value = (t1);
6237 ;; (is) = gf_u.word;
6241 ;; ieee_float_shape_type sf_u;
6242 ;; sf_u.word = (is & 0xfffff000);
6243 ;; (t1) = sf_u.value;
6247 ;; This would result in two direct move operations (convert to memory format,
6248 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
6249 ;; scalar format). With this peephole, we eliminate the direct move to the
6250 ;; GPR, and instead move the integer mask value to the vector register after a
6251 ;; shift and do the VSX logical operation.
6253 ;; The insns for dealing with SFmode in GPR registers looks like:
6254 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
6256 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
6258 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
6260 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
6262 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
6264 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
6267 [(match_scratch:DI SFBOOL_TMP_GPR "r")
6268 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
6270 ;; MFVSRWZ (aka zero_extend)
6271 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
6273 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
6275 ;; AND/IOR/XOR operation on int
6276 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
6277 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
6278 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
6281 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
6282 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
6286 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
6287 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
6289 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
6290 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
6291 to compare registers, when the mode is different. */
6292 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
6293 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
6294 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
6295 && (REG_P (operands[SFBOOL_BOOL_A2])
6296 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
6297 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
6298 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
6299 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
6300 || (REG_P (operands[SFBOOL_BOOL_A2])
6301 && REGNO (operands[SFBOOL_MFVSR_D])
6302 == REGNO (operands[SFBOOL_BOOL_A2])))
6303 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
6304 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
6305 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
6306 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
6307 [(set (match_dup SFBOOL_TMP_GPR)
6308 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
6311 (set (match_dup SFBOOL_TMP_VSX_DI)
6312 (match_dup SFBOOL_TMP_GPR))
6314 (set (match_dup SFBOOL_MTVSR_D_V4SF)
6315 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
6316 (match_dup SFBOOL_TMP_VSX)))]
6318 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
6319 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
6320 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
6321 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
6322 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
6323 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
6325 if (CONST_INT_P (bool_a2))
6327 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
6328 emit_move_insn (tmp_gpr, bool_a2);
6329 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
6333 int regno_bool_a1 = REGNO (bool_a1);
6334 int regno_bool_a2 = REGNO (bool_a2);
6335 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
6336 ? regno_bool_a2 : regno_bool_a1);
6337 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
6340 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
6341 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
6342 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
6345 ;; Support signed/unsigned long long to float conversion vectorization.
6346 ;; Note that any_float (pc) here is just for code attribute <su>.
6347 (define_expand "vec_pack<su>_float_v2di"
6348 [(match_operand:V4SF 0 "vfloat_operand")
6349 (match_operand:V2DI 1 "vint_operand")
6350 (match_operand:V2DI 2 "vint_operand")
6354 rtx r1 = gen_reg_rtx (V4SFmode);
6355 rtx r2 = gen_reg_rtx (V4SFmode);
6356 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
6357 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
6358 rs6000_expand_extract_even (operands[0], r1, r2);
6362 ;; Support float to signed/unsigned long long conversion vectorization.
6363 ;; Note that any_fix (pc) here is just for code attribute <su>.
6364 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
6365 [(match_operand:V2DI 0 "vint_operand")
6366 (match_operand:V4SF 1 "vfloat_operand")
6370 rtx reg = gen_reg_rtx (V4SFmode);
6371 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
6372 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6376 ;; Note that any_fix (pc) here is just for code attribute <su>.
6377 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
6378 [(match_operand:V2DI 0 "vint_operand")
6379 (match_operand:V4SF 1 "vfloat_operand")
6383 rtx reg = gen_reg_rtx (V4SFmode);
6384 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6385 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6389 (define_insn "vsx_<xvcvbf16>"
6390 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6391 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6394 "<xvcvbf16> %x0,%x1"
6395 [(set_attr "type" "vecfloat")])
6397 (define_insn "vec_mtvsrbmi"
6398 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6399 (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6405 (define_insn "vec_mtvsr_<mode>"
6406 [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6407 (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6411 [(set_attr "type" "vecsimple")])
6413 (define_insn "vec_cntmb_<mode>"
6414 [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6415 (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6416 (match_operand:QI 2 "const_0_to_1_operand" "n")]
6419 "vcntmb<wd> %0,%1,%2"
6420 [(set_attr "type" "vecsimple")])
6422 (define_insn "vec_extract_<mode>"
6423 [(set (match_operand:SI 0 "register_operand" "=r")
6424 (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6427 "vextract<wd>m %0,%1"
6428 [(set_attr "type" "vecsimple")])
6430 (define_insn "vec_expand_<mode>"
6431 [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6432 (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6435 "vexpand<wd>m %0,%1"
6436 [(set_attr "type" "vecsimple")])
6438 (define_insn "dives_<mode>"
6439 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6440 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6441 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6444 "vdives<wd> %0,%1,%2"
6445 [(set_attr "type" "vecdiv")
6446 (set_attr "size" "<bits>")])
6448 (define_insn "diveu_<mode>"
6449 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6450 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6451 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6454 "vdiveu<wd> %0,%1,%2"
6455 [(set_attr "type" "vecdiv")
6456 (set_attr "size" "<bits>")])
6458 (define_insn "div<mode>3"
6459 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6460 (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6461 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6463 "vdivs<wd> %0,%1,%2"
6464 [(set_attr "type" "vecdiv")
6465 (set_attr "size" "<bits>")])
6467 (define_insn "udiv<mode>3"
6468 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6469 (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6470 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6472 "vdivu<wd> %0,%1,%2"
6473 [(set_attr "type" "vecdiv")
6474 (set_attr "size" "<bits>")])
6476 (define_insn "mod<mode>3"
6477 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6478 (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6479 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6481 "vmods<wd> %0,%1,%2"
6482 [(set_attr "type" "vecdiv")
6483 (set_attr "size" "<bits>")])
6485 (define_insn "umod<mode>3"
6486 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6487 (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6488 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6490 "vmodu<wd> %0,%1,%2"
6491 [(set_attr "type" "vecdiv")
6492 (set_attr "size" "<bits>")])
6494 (define_insn "smul<mode>3_highpart"
6495 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6496 (mult:VIlong (ashiftrt
6497 (match_operand:VIlong 1 "vsx_register_operand" "v")
6500 (match_operand:VIlong 2 "vsx_register_operand" "v")
6503 "vmulhs<wd> %0,%1,%2"
6504 [(set_attr "type" "veccomplex")])
6506 (define_insn "umul<mode>3_highpart"
6507 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6508 (us_mult:VIlong (ashiftrt
6509 (match_operand:VIlong 1 "vsx_register_operand" "v")
6512 (match_operand:VIlong 2 "vsx_register_operand" "v")
6515 "vmulhu<wd> %0,%1,%2"
6516 [(set_attr "type" "veccomplex")])
6518 ;; Vector multiply low double word
6519 (define_insn "mulv2di3"
6520 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
6521 (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v")
6522 (match_operand:V2DI 2 "vsx_register_operand" "v")))]
6525 [(set_attr "type" "veccomplex")])
6528 ;; XXSPLTIW built-in function support
6529 (define_insn "xxspltiw_v4si"
6530 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6531 (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
6535 [(set_attr "type" "vecperm")
6536 (set_attr "prefixed" "yes")])
6538 (define_expand "xxspltiw_v4sf"
6539 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6540 (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
6544 long value = rs6000_const_f32_to_i32 (operands[1]);
6545 emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
6549 (define_insn "xxspltiw_v4sf_inst"
6550 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6551 (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6555 [(set_attr "type" "vecperm")
6556 (set_attr "prefixed" "yes")])
6558 ;; XXSPLTIDP built-in function support
6559 (define_expand "xxspltidp_v2df"
6560 [(set (match_operand:V2DF 0 "register_operand" )
6561 (unspec:V2DF [(match_operand:SF 1 "const_double_operand")]
6565 long value = rs6000_const_f32_to_i32 (operands[1]);
6566 rs6000_emit_xxspltidp_v2df (operands[0], value);
6570 (define_insn "xxspltidp_v2df_inst"
6571 [(set (match_operand:V2DF 0 "register_operand" "=wa")
6572 (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6576 [(set_attr "type" "vecperm")
6577 (set_attr "prefixed" "yes")])
6579 ;; XXSPLTI32DX built-in function support
6580 (define_expand "xxsplti32dx_v4si"
6581 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6582 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6583 (match_operand:QI 2 "u1bit_cint_operand" "n")
6584 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6585 UNSPEC_XXSPLTI32DX))]
6588 int index = INTVAL (operands[2]);
6590 if (!BYTES_BIG_ENDIAN)
6593 emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1],
6594 GEN_INT (index), operands[3]));
6597 [(set_attr "type" "vecperm")])
6599 (define_insn "xxsplti32dx_v4si_inst"
6600 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6601 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6602 (match_operand:QI 2 "u1bit_cint_operand" "n")
6603 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6604 UNSPEC_XXSPLTI32DX))]
6606 "xxsplti32dx %x0,%2,%3"
6607 [(set_attr "type" "vecperm")
6608 (set_attr "prefixed" "yes")])
6610 (define_expand "xxsplti32dx_v4sf"
6611 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6612 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6613 (match_operand:QI 2 "u1bit_cint_operand" "n")
6614 (match_operand:SF 3 "const_double_operand" "n")]
6615 UNSPEC_XXSPLTI32DX))]
6618 int index = INTVAL (operands[2]);
6619 long value = rs6000_const_f32_to_i32 (operands[3]);
6620 if (!BYTES_BIG_ENDIAN)
6623 emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1],
6624 GEN_INT (index), GEN_INT (value)));
6628 (define_insn "xxsplti32dx_v4sf_inst"
6629 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6630 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6631 (match_operand:QI 2 "u1bit_cint_operand" "n")
6632 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6633 UNSPEC_XXSPLTI32DX))]
6635 "xxsplti32dx %x0,%2,%3"
6636 [(set_attr "type" "vecperm")
6637 (set_attr "prefixed" "yes")])
6639 ;; XXBLEND built-in function support
6640 (define_insn "xxblend_<mode>"
6641 [(set (match_operand:VM3 0 "register_operand" "=wa")
6642 (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa")
6643 (match_operand:VM3 2 "register_operand" "wa")
6644 (match_operand:VM3 3 "register_operand" "wa")]
6647 "xxblendv<VM3_char> %x0,%x1,%x2,%x3"
6648 [(set_attr "type" "vecperm")
6649 (set_attr "prefixed" "yes")])
6651 ;; XXPERMX built-in function support
6652 (define_expand "xxpermx"
6653 [(set (match_operand:V2DI 0 "register_operand" "+wa")
6654 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6655 (match_operand:V2DI 2 "register_operand" "wa")
6656 (match_operand:V16QI 3 "register_operand" "wa")
6657 (match_operand:QI 4 "u8bit_cint_operand" "n")]
6661 if (BYTES_BIG_ENDIAN)
6662 emit_insn (gen_xxpermx_inst (operands[0], operands[1],
6663 operands[2], operands[3],
6667 /* Reverse value of byte element indexes by XORing with 0xFF.
6668 Reverse the 32-byte section identifier match by subracting bits [0:2]
6669 of elemet from 7. */
6670 int value = INTVAL (operands[4]);
6671 rtx vreg = gen_reg_rtx (V16QImode);
6673 emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1)));
6674 emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg));
6676 emit_insn (gen_xxpermx_inst (operands[0], operands[2],
6677 operands[1], operands[3],
6683 [(set_attr "type" "vecperm")])
6685 (define_insn "xxpermx_inst"
6686 [(set (match_operand:V2DI 0 "register_operand" "+v")
6687 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
6688 (match_operand:V2DI 2 "register_operand" "v")
6689 (match_operand:V16QI 3 "register_operand" "v")
6690 (match_operand:QI 4 "u3bit_cint_operand" "n")]
6693 "xxpermx %x0,%x1,%x2,%x3,%4"
6694 [(set_attr "type" "vecperm")
6695 (set_attr "prefixed" "yes")])
6697 ;; XXEVAL built-in function support
6698 (define_insn "xxeval"
6699 [(set (match_operand:V2DI 0 "register_operand" "=wa")
6700 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6701 (match_operand:V2DI 2 "register_operand" "wa")
6702 (match_operand:V2DI 3 "register_operand" "wa")
6703 (match_operand:QI 4 "u8bit_cint_operand" "n")]
6706 "xxeval %x0,%x1,%x2,%x3,%4"
6707 [(set_attr "type" "vecperm")
6708 (set_attr "prefixed" "yes")])
6710 ;; Construct V1TI by vsx_concat_v2di
6712 [(set (match_operand:V1TI 0 "vsx_register_operand")
6714 (match_operand:TI 1 "int_reg_operand") 0 ))]
6715 "TARGET_P9_VECTOR && !reload_completed"
6718 rtx tmp1 = simplify_gen_subreg (DImode, operands[1], TImode, 0);
6719 rtx tmp2 = simplify_gen_subreg (DImode, operands[1], TImode, 8);
6720 rtx tmp3 = gen_reg_rtx (V2DImode);
6721 emit_insn (gen_vsx_concat_v2di (tmp3, tmp1, tmp2));
6722 rtx tmp4 = simplify_gen_subreg (V1TImode, tmp3, V2DImode, 0);
6723 emit_move_insn (operands[0], tmp4);
6728 (define_insn "vmsumcud"
6729 [(set (match_operand:V1TI 0 "register_operand" "+v")
6730 (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
6731 (match_operand:V2DI 2 "register_operand" "v")
6732 (match_operand:V1TI 3 "register_operand" "v")]
6735 "vmsumcud %0,%1,%2,%3"
6736 [(set_attr "type" "veccomplex")]